Content-Length: 1135647 | pFad | http://github.com/tonybelloni/postgres/commit/5262f7a4fc44f651241d2ff1fa688dd664a34874

D3 Add optimizer and executor support for parallel index scans. · tonybelloni/postgres@5262f7a · GitHub
Skip to content

Commit 5262f7a

Browse files
committed
Add optimizer and executor support for parallel index scans.
In combination with 569174f, which taught the btree AM how to perform parallel index scans, this allows parallel index scan plans on btree indexes. This infrastructure should be general enough to support parallel index scans for other index AMs as well, if someone updates them to support parallel scans. Amit Kapila, reviewed and tested by Anastasia Lubennikova, Tushar Ahuja, and Haribabu Kommi, and me.
1 parent 51ee6f3 commit 5262f7a

File tree

29 files changed

+366
-55
lines changed

29 files changed

+366
-55
lines changed

contrib/bloom/blcost.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
void
2525
blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
2626
Cost *indexStartupCost, Cost *indexTotalCost,
27-
Selectivity *indexSelectivity, double *indexCorrelation)
27+
Selectivity *indexSelectivity, double *indexCorrelation,
28+
double *indexPages)
2829
{
2930
IndexOptInfo *index = path->indexinfo;
3031
List *qinfos;
@@ -45,4 +46,5 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
4546
*indexTotalCost = costs.indexTotalCost;
4647
*indexSelectivity = costs.indexSelectivity;
4748
*indexCorrelation = costs.indexCorrelation;
49+
*indexPages = costs.numIndexPages;
4850
}

contrib/bloom/bloom.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,6 @@ extern bytea *bloptions(Datum reloptions, bool validate);
208208
extern void blcostestimate(PlannerInfo *root, IndexPath *path,
209209
double loop_count, Cost *indexStartupCost,
210210
Cost *indexTotalCost, Selectivity *indexSelectivity,
211-
double *indexCorrelation);
211+
double *indexCorrelation, double *indexPages);
212212

213213
#endif

contrib/bloom/blutils.c

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ blhandler(PG_FUNCTION_ARGS)
119119
amroutine->amstorage = false;
120120
amroutine->amclusterable = false;
121121
amroutine->ampredlocks = false;
122+
amroutine->amcanparallel = false;
122123
amroutine->amkeytype = InvalidOid;
123124

124125
amroutine->ambuild = blbuild;

doc/src/sgml/indexam.sgml

+2
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ typedef struct IndexAmRoutine
110110
bool amclusterable;
111111
/* does AM handle predicate locks? */
112112
bool ampredlocks;
113+
/* does AM support parallel scan? */
114+
bool amcanparallel;
113115
/* type of data stored in index, or InvalidOid if variable */
114116
Oid amkeytype;
115117

src/backend/access/brin/brin.c

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ brinhandler(PG_FUNCTION_ARGS)
9393
amroutine->amstorage = true;
9494
amroutine->amclusterable = false;
9595
amroutine->ampredlocks = false;
96+
amroutine->amcanparallel = false;
9697
amroutine->amkeytype = InvalidOid;
9798

9899
amroutine->ambuild = brinbuild;

src/backend/access/gin/ginutil.c

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ ginhandler(PG_FUNCTION_ARGS)
5050
amroutine->amstorage = true;
5151
amroutine->amclusterable = false;
5252
amroutine->ampredlocks = false;
53+
amroutine->amcanparallel = false;
5354
amroutine->amkeytype = InvalidOid;
5455

5556
amroutine->ambuild = ginbuild;

src/backend/access/gist/gist.c

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ gisthandler(PG_FUNCTION_ARGS)
7171
amroutine->amstorage = true;
7272
amroutine->amclusterable = true;
7373
amroutine->ampredlocks = false;
74+
amroutine->amcanparallel = false;
7475
amroutine->amkeytype = InvalidOid;
7576

7677
amroutine->ambuild = gistbuild;

src/backend/access/hash/hash.c

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ hashhandler(PG_FUNCTION_ARGS)
6767
amroutine->amstorage = false;
6868
amroutine->amclusterable = false;
6969
amroutine->ampredlocks = false;
70+
amroutine->amcanparallel = false;
7071
amroutine->amkeytype = INT4OID;
7172

7273
amroutine->ambuild = hashbuild;

src/backend/access/nbtree/nbtree.c

+1
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ bthandler(PG_FUNCTION_ARGS)
140140
amroutine->amstorage = false;
141141
amroutine->amclusterable = true;
142142
amroutine->ampredlocks = true;
143+
amroutine->amcanparallel = true;
143144
amroutine->amkeytype = InvalidOid;
144145

145146
amroutine->ambuild = btbuild;

src/backend/access/spgist/spgutils.c

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ spghandler(PG_FUNCTION_ARGS)
4949
amroutine->amstorage = false;
5050
amroutine->amclusterable = false;
5151
amroutine->ampredlocks = false;
52+
amroutine->amcanparallel = false;
5253
amroutine->amkeytype = InvalidOid;
5354

5455
amroutine->ambuild = spgbuild;

src/backend/executor/execParallel.c

+12
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "executor/nodeCustom.h"
2929
#include "executor/nodeForeignscan.h"
3030
#include "executor/nodeSeqscan.h"
31+
#include "executor/nodeIndexscan.h"
3132
#include "executor/tqueue.h"
3233
#include "nodes/nodeFuncs.h"
3334
#include "optimizer/planmain.h"
@@ -197,6 +198,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
197198
ExecSeqScanEstimate((SeqScanState *) planstate,
198199
e->pcxt);
199200
break;
201+
case T_IndexScanState:
202+
ExecIndexScanEstimate((IndexScanState *) planstate,
203+
e->pcxt);
204+
break;
200205
case T_ForeignScanState:
201206
ExecForeignScanEstimate((ForeignScanState *) planstate,
202207
e->pcxt);
@@ -249,6 +254,10 @@ ExecParallelInitializeDSM(PlanState *planstate,
249254
ExecSeqScanInitializeDSM((SeqScanState *) planstate,
250255
d->pcxt);
251256
break;
257+
case T_IndexScanState:
258+
ExecIndexScanInitializeDSM((IndexScanState *) planstate,
259+
d->pcxt);
260+
break;
252261
case T_ForeignScanState:
253262
ExecForeignScanInitializeDSM((ForeignScanState *) planstate,
254263
d->pcxt);
@@ -725,6 +734,9 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc)
725734
case T_SeqScanState:
726735
ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
727736
break;
737+
case T_IndexScanState:
738+
ExecIndexScanInitializeWorker((IndexScanState *) planstate, toc);
739+
break;
728740
case T_ForeignScanState:
729741
ExecForeignScanInitializeWorker((ForeignScanState *) planstate,
730742
toc);

src/backend/executor/nodeIndexscan.c

+137-16
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
* ExecEndIndexScan releases all storage.
2323
* ExecIndexMarkPos marks scan position.
2424
* ExecIndexRestrPos restores scan position.
25+
* ExecIndexScanEstimate estimates DSM space needed for parallel index scan
26+
* ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
27+
* ExecIndexScanInitializeWorker attach to DSM info in parallel worker
2528
*/
2629
#include "postgres.h"
2730

@@ -514,6 +517,18 @@ ExecIndexScan(IndexScanState *node)
514517
void
515518
ExecReScanIndexScan(IndexScanState *node)
516519
{
520+
bool reset_parallel_scan = true;
521+
522+
/*
523+
* If we are here to just update the scan keys, then don't reset parallel
524+
* scan. We don't want each of the participating process in the parallel
525+
* scan to update the shared parallel scan state at the start of the scan.
526+
* It is quite possible that one of the participants has already begun
527+
* scanning the index when another has yet to start it.
528+
*/
529+
if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
530+
reset_parallel_scan = false;
531+
517532
/*
518533
* If we are doing runtime key calculations (ie, any of the index key
519534
* values weren't simple Consts), compute the new key values. But first,
@@ -539,10 +554,21 @@ ExecReScanIndexScan(IndexScanState *node)
539554
reorderqueue_pop(node);
540555
}
541556

542-
/* reset index scan */
543-
index_rescan(node->iss_ScanDesc,
544-
node->iss_ScanKeys, node->iss_NumScanKeys,
545-
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
557+
/*
558+
* Reset (parallel) index scan. For parallel-aware nodes, the scan
559+
* descriptor is initialized during actual execution of node and we can
560+
* reach here before that (ex. during execution of nest loop join). So,
561+
* avoid updating the scan descriptor at that time.
562+
*/
563+
if (node->iss_ScanDesc)
564+
{
565+
index_rescan(node->iss_ScanDesc,
566+
node->iss_ScanKeys, node->iss_NumScanKeys,
567+
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
568+
569+
if (reset_parallel_scan && node->iss_ScanDesc->parallel_scan)
570+
index_parallelrescan(node->iss_ScanDesc);
571+
}
546572
node->iss_ReachedEnd = false;
547573

548574
ExecScanReScan(&node->ss);
@@ -1013,22 +1039,29 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
10131039
}
10141040

10151041
/*
1016-
* Initialize scan descriptor.
1042+
* for parallel-aware node, we initialize the scan descriptor after
1043+
* initializing the shared memory for parallel execution.
10171044
*/
1018-
indexstate->iss_ScanDesc = index_beginscan(currentRelation,
1019-
indexstate->iss_RelationDesc,
1020-
estate->es_snapshot,
1021-
indexstate->iss_NumScanKeys,
1045+
if (!node->scan.plan.parallel_aware)
1046+
{
1047+
/*
1048+
* Initialize scan descriptor.
1049+
*/
1050+
indexstate->iss_ScanDesc = index_beginscan(currentRelation,
1051+
indexstate->iss_RelationDesc,
1052+
estate->es_snapshot,
1053+
indexstate->iss_NumScanKeys,
10221054
indexstate->iss_NumOrderByKeys);
10231055

1024-
/*
1025-
* If no run-time keys to calculate, go ahead and pass the scankeys to the
1026-
* index AM.
1027-
*/
1028-
if (indexstate->iss_NumRuntimeKeys == 0)
1029-
index_rescan(indexstate->iss_ScanDesc,
1030-
indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
1056+
/*
1057+
* If no run-time keys to calculate, go ahead and pass the scankeys to
1058+
* the index AM.
1059+
*/
1060+
if (indexstate->iss_NumRuntimeKeys == 0)
1061+
index_rescan(indexstate->iss_ScanDesc,
1062+
indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
10311063
indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
1064+
}
10321065

10331066
/*
10341067
* all done.
@@ -1590,3 +1623,91 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
15901623
else if (n_array_keys != 0)
15911624
elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
15921625
}
1626+
1627+
/* ----------------------------------------------------------------
1628+
* Parallel Scan Support
1629+
* ----------------------------------------------------------------
1630+
*/
1631+
1632+
/* ----------------------------------------------------------------
1633+
* ExecIndexScanEstimate
1634+
*
1635+
* estimates the space required to serialize indexscan node.
1636+
* ----------------------------------------------------------------
1637+
*/
1638+
void
1639+
ExecIndexScanEstimate(IndexScanState *node,
1640+
ParallelContext *pcxt)
1641+
{
1642+
EState *estate = node->ss.ps.state;
1643+
1644+
node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
1645+
estate->es_snapshot);
1646+
shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
1647+
shm_toc_estimate_keys(&pcxt->estimator, 1);
1648+
}
1649+
1650+
/* ----------------------------------------------------------------
1651+
* ExecIndexScanInitializeDSM
1652+
*
1653+
* Set up a parallel index scan descriptor.
1654+
* ----------------------------------------------------------------
1655+
*/
1656+
void
1657+
ExecIndexScanInitializeDSM(IndexScanState *node,
1658+
ParallelContext *pcxt)
1659+
{
1660+
EState *estate = node->ss.ps.state;
1661+
ParallelIndexScanDesc piscan;
1662+
1663+
piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
1664+
index_parallelscan_initialize(node->ss.ss_currentRelation,
1665+
node->iss_RelationDesc,
1666+
estate->es_snapshot,
1667+
piscan);
1668+
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
1669+
node->iss_ScanDesc =
1670+
index_beginscan_parallel(node->ss.ss_currentRelation,
1671+
node->iss_RelationDesc,
1672+
node->iss_NumScanKeys,
1673+
node->iss_NumOrderByKeys,
1674+
piscan);
1675+
1676+
/*
1677+
* If no run-time keys to calculate, go ahead and pass the scankeys to the
1678+
* index AM.
1679+
*/
1680+
if (node->iss_NumRuntimeKeys == 0)
1681+
index_rescan(node->iss_ScanDesc,
1682+
node->iss_ScanKeys, node->iss_NumScanKeys,
1683+
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1684+
}
1685+
1686+
/* ----------------------------------------------------------------
1687+
* ExecIndexScanInitializeWorker
1688+
*
1689+
* Copy relevant information from TOC into planstate.
1690+
* ----------------------------------------------------------------
1691+
*/
1692+
void
1693+
ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
1694+
{
1695+
ParallelIndexScanDesc piscan;
1696+
1697+
piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
1698+
node->iss_ScanDesc =
1699+
index_beginscan_parallel(node->ss.ss_currentRelation,
1700+
node->iss_RelationDesc,
1701+
node->iss_NumScanKeys,
1702+
node->iss_NumOrderByKeys,
1703+
piscan);
1704+
1705+
/*
1706+
* If no run-time keys to calculate, go ahead and pass the scankeys to the
1707+
* index AM.
1708+
*/
1709+
if (node->iss_NumRuntimeKeys == 0)
1710+
index_rescan(node->iss_ScanDesc,
1711+
node->iss_ScanKeys, node->iss_NumScanKeys,
1712+
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1713+
}

src/backend/optimizer/path/allpaths.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,6 @@ static void subquery_push_qual(Query *subquery,
127127
static void recurse_push_qual(Node *setOp, Query *topquery,
128128
RangeTblEntry *rte, Index rti, Node *qual);
129129
static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
130-
static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
131-
BlockNumber index_pages);
132130

133131

134132
/*
@@ -2885,7 +2883,7 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
28852883
* "heap_pages" is the number of pages from the table that we expect to scan.
28862884
* "index_pages" is the number of pages from the index that we expect to scan.
28872885
*/
2888-
static int
2886+
int
28892887
compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
28902888
BlockNumber index_pages)
28912889
{

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/tonybelloni/postgres/commit/5262f7a4fc44f651241d2ff1fa688dd664a34874

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy