| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * nodeSort.c |
| 4 | * Routines to handle sorting of relations. |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/executor/nodeSort.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | |
| 16 | #include "postgres.h" |
| 17 | |
| 18 | #include "access/parallel.h" |
| 19 | #include "executor/execdebug.h" |
| 20 | #include "executor/nodeSort.h" |
| 21 | #include "miscadmin.h" |
| 22 | #include "utils/tuplesort.h" |
| 23 | |
| 24 | |
| 25 | /* ---------------------------------------------------------------- |
| 26 | * ExecSort |
| 27 | * |
| 28 | * Sorts tuples from the outer subtree of the node using tuplesort, |
| 29 | * which saves the results in a temporary file or memory. After the |
| 30 | * initial call, returns a tuple from the file with each call. |
| 31 | * |
| 32 | * Conditions: |
| 33 | * -- none. |
| 34 | * |
| 35 | * Initial States: |
| 36 | * -- the outer child is prepared to return the first tuple. |
| 37 | * ---------------------------------------------------------------- |
| 38 | */ |
| 39 | static TupleTableSlot * |
| 40 | ExecSort(PlanState *pstate) |
| 41 | { |
| 42 | SortState *node = castNode(SortState, pstate); |
| 43 | EState *estate; |
| 44 | ScanDirection dir; |
| 45 | Tuplesortstate *tuplesortstate; |
| 46 | TupleTableSlot *slot; |
| 47 | |
| 48 | CHECK_FOR_INTERRUPTS(); |
| 49 | |
| 50 | /* |
| 51 | * get state info from node |
| 52 | */ |
| 53 | SO1_printf("ExecSort: %s\n" , |
| 54 | "entering routine" ); |
| 55 | |
| 56 | estate = node->ss.ps.state; |
| 57 | dir = estate->es_direction; |
| 58 | tuplesortstate = (Tuplesortstate *) node->tuplesortstate; |
| 59 | |
| 60 | /* |
| 61 | * If first time through, read all tuples from outer plan and pass them to |
| 62 | * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. |
| 63 | */ |
| 64 | |
| 65 | if (!node->sort_Done) |
| 66 | { |
| 67 | Sort *plannode = (Sort *) node->ss.ps.plan; |
| 68 | PlanState *outerNode; |
| 69 | TupleDesc tupDesc; |
| 70 | |
| 71 | SO1_printf("ExecSort: %s\n" , |
| 72 | "sorting subplan" ); |
| 73 | |
| 74 | /* |
| 75 | * Want to scan subplan in the forward direction while creating the |
| 76 | * sorted data. |
| 77 | */ |
| 78 | estate->es_direction = ForwardScanDirection; |
| 79 | |
| 80 | /* |
| 81 | * Initialize tuplesort module. |
| 82 | */ |
| 83 | SO1_printf("ExecSort: %s\n" , |
| 84 | "calling tuplesort_begin" ); |
| 85 | |
| 86 | outerNode = outerPlanState(node); |
| 87 | tupDesc = ExecGetResultType(outerNode); |
| 88 | |
| 89 | tuplesortstate = tuplesort_begin_heap(tupDesc, |
| 90 | plannode->numCols, |
| 91 | plannode->sortColIdx, |
| 92 | plannode->sortOperators, |
| 93 | plannode->collations, |
| 94 | plannode->nullsFirst, |
| 95 | work_mem, |
| 96 | NULL, node->randomAccess); |
| 97 | if (node->bounded) |
| 98 | tuplesort_set_bound(tuplesortstate, node->bound); |
| 99 | node->tuplesortstate = (void *) tuplesortstate; |
| 100 | |
| 101 | /* |
| 102 | * Scan the subplan and feed all the tuples to tuplesort. |
| 103 | */ |
| 104 | |
| 105 | for (;;) |
| 106 | { |
| 107 | slot = ExecProcNode(outerNode); |
| 108 | |
| 109 | if (TupIsNull(slot)) |
| 110 | break; |
| 111 | |
| 112 | tuplesort_puttupleslot(tuplesortstate, slot); |
| 113 | } |
| 114 | |
| 115 | /* |
| 116 | * Complete the sort. |
| 117 | */ |
| 118 | tuplesort_performsort(tuplesortstate); |
| 119 | |
| 120 | /* |
| 121 | * restore to user specified direction |
| 122 | */ |
| 123 | estate->es_direction = dir; |
| 124 | |
| 125 | /* |
| 126 | * finally set the sorted flag to true |
| 127 | */ |
| 128 | node->sort_Done = true; |
| 129 | node->bounded_Done = node->bounded; |
| 130 | node->bound_Done = node->bound; |
| 131 | if (node->shared_info && node->am_worker) |
| 132 | { |
| 133 | TuplesortInstrumentation *si; |
| 134 | |
| 135 | Assert(IsParallelWorker()); |
| 136 | Assert(ParallelWorkerNumber <= node->shared_info->num_workers); |
| 137 | si = &node->shared_info->sinstrument[ParallelWorkerNumber]; |
| 138 | tuplesort_get_stats(tuplesortstate, si); |
| 139 | } |
| 140 | SO1_printf("ExecSort: %s\n" , "sorting done" ); |
| 141 | } |
| 142 | |
| 143 | SO1_printf("ExecSort: %s\n" , |
| 144 | "retrieving tuple from tuplesort" ); |
| 145 | |
| 146 | /* |
| 147 | * Get the first or next tuple from tuplesort. Returns NULL if no more |
| 148 | * tuples. Note that we only rely on slot tuple remaining valid until the |
| 149 | * next fetch from the tuplesort. |
| 150 | */ |
| 151 | slot = node->ss.ps.ps_ResultTupleSlot; |
| 152 | (void) tuplesort_gettupleslot(tuplesortstate, |
| 153 | ScanDirectionIsForward(dir), |
| 154 | false, slot, NULL); |
| 155 | return slot; |
| 156 | } |
| 157 | |
| 158 | /* ---------------------------------------------------------------- |
| 159 | * ExecInitSort |
| 160 | * |
| 161 | * Creates the run-time state information for the sort node |
| 162 | * produced by the planner and initializes its outer subtree. |
| 163 | * ---------------------------------------------------------------- |
| 164 | */ |
| 165 | SortState * |
| 166 | ExecInitSort(Sort *node, EState *estate, int eflags) |
| 167 | { |
| 168 | SortState *sortstate; |
| 169 | |
| 170 | SO1_printf("ExecInitSort: %s\n" , |
| 171 | "initializing sort node" ); |
| 172 | |
| 173 | /* |
| 174 | * create state structure |
| 175 | */ |
| 176 | sortstate = makeNode(SortState); |
| 177 | sortstate->ss.ps.plan = (Plan *) node; |
| 178 | sortstate->ss.ps.state = estate; |
| 179 | sortstate->ss.ps.ExecProcNode = ExecSort; |
| 180 | |
| 181 | /* |
| 182 | * We must have random access to the sort output to do backward scan or |
| 183 | * mark/restore. We also prefer to materialize the sort output if we |
| 184 | * might be called on to rewind and replay it many times. |
| 185 | */ |
| 186 | sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND | |
| 187 | EXEC_FLAG_BACKWARD | |
| 188 | EXEC_FLAG_MARK)) != 0; |
| 189 | |
| 190 | sortstate->bounded = false; |
| 191 | sortstate->sort_Done = false; |
| 192 | sortstate->tuplesortstate = NULL; |
| 193 | |
| 194 | /* |
| 195 | * Miscellaneous initialization |
| 196 | * |
| 197 | * Sort nodes don't initialize their ExprContexts because they never call |
| 198 | * ExecQual or ExecProject. |
| 199 | */ |
| 200 | |
| 201 | /* |
| 202 | * initialize child nodes |
| 203 | * |
| 204 | * We shield the child node from the need to support REWIND, BACKWARD, or |
| 205 | * MARK/RESTORE. |
| 206 | */ |
| 207 | eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); |
| 208 | |
| 209 | outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags); |
| 210 | |
| 211 | /* |
| 212 | * Initialize scan slot and type. |
| 213 | */ |
| 214 | ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual); |
| 215 | |
| 216 | /* |
| 217 | * Initialize return slot and type. No need to initialize projection info |
| 218 | * because this node doesn't do projections. |
| 219 | */ |
| 220 | ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple); |
| 221 | sortstate->ss.ps.ps_ProjInfo = NULL; |
| 222 | |
| 223 | SO1_printf("ExecInitSort: %s\n" , |
| 224 | "sort node initialized" ); |
| 225 | |
| 226 | return sortstate; |
| 227 | } |
| 228 | |
| 229 | /* ---------------------------------------------------------------- |
| 230 | * ExecEndSort(node) |
| 231 | * ---------------------------------------------------------------- |
| 232 | */ |
| 233 | void |
| 234 | ExecEndSort(SortState *node) |
| 235 | { |
| 236 | SO1_printf("ExecEndSort: %s\n" , |
| 237 | "shutting down sort node" ); |
| 238 | |
| 239 | /* |
| 240 | * clean out the tuple table |
| 241 | */ |
| 242 | ExecClearTuple(node->ss.ss_ScanTupleSlot); |
| 243 | /* must drop pointer to sort result tuple */ |
| 244 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
| 245 | |
| 246 | /* |
| 247 | * Release tuplesort resources |
| 248 | */ |
| 249 | if (node->tuplesortstate != NULL) |
| 250 | tuplesort_end((Tuplesortstate *) node->tuplesortstate); |
| 251 | node->tuplesortstate = NULL; |
| 252 | |
| 253 | /* |
| 254 | * shut down the subplan |
| 255 | */ |
| 256 | ExecEndNode(outerPlanState(node)); |
| 257 | |
| 258 | SO1_printf("ExecEndSort: %s\n" , |
| 259 | "sort node shutdown" ); |
| 260 | } |
| 261 | |
| 262 | /* ---------------------------------------------------------------- |
| 263 | * ExecSortMarkPos |
| 264 | * |
| 265 | * Calls tuplesort to save the current position in the sorted file. |
| 266 | * ---------------------------------------------------------------- |
| 267 | */ |
| 268 | void |
| 269 | ExecSortMarkPos(SortState *node) |
| 270 | { |
| 271 | /* |
| 272 | * if we haven't sorted yet, just return |
| 273 | */ |
| 274 | if (!node->sort_Done) |
| 275 | return; |
| 276 | |
| 277 | tuplesort_markpos((Tuplesortstate *) node->tuplesortstate); |
| 278 | } |
| 279 | |
| 280 | /* ---------------------------------------------------------------- |
| 281 | * ExecSortRestrPos |
| 282 | * |
| 283 | * Calls tuplesort to restore the last saved sort file position. |
| 284 | * ---------------------------------------------------------------- |
| 285 | */ |
| 286 | void |
| 287 | ExecSortRestrPos(SortState *node) |
| 288 | { |
| 289 | /* |
| 290 | * if we haven't sorted yet, just return. |
| 291 | */ |
| 292 | if (!node->sort_Done) |
| 293 | return; |
| 294 | |
| 295 | /* |
| 296 | * restore the scan to the previously marked position |
| 297 | */ |
| 298 | tuplesort_restorepos((Tuplesortstate *) node->tuplesortstate); |
| 299 | } |
| 300 | |
| 301 | void |
| 302 | ExecReScanSort(SortState *node) |
| 303 | { |
| 304 | PlanState *outerPlan = outerPlanState(node); |
| 305 | |
| 306 | /* |
| 307 | * If we haven't sorted yet, just return. If outerplan's chgParam is not |
| 308 | * NULL then it will be re-scanned by ExecProcNode, else no reason to |
| 309 | * re-scan it at all. |
| 310 | */ |
| 311 | if (!node->sort_Done) |
| 312 | return; |
| 313 | |
| 314 | /* must drop pointer to sort result tuple */ |
| 315 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
| 316 | |
| 317 | /* |
| 318 | * If subnode is to be rescanned then we forget previous sort results; we |
| 319 | * have to re-read the subplan and re-sort. Also must re-sort if the |
| 320 | * bounded-sort parameters changed or we didn't select randomAccess. |
| 321 | * |
| 322 | * Otherwise we can just rewind and rescan the sorted output. |
| 323 | */ |
| 324 | if (outerPlan->chgParam != NULL || |
| 325 | node->bounded != node->bounded_Done || |
| 326 | node->bound != node->bound_Done || |
| 327 | !node->randomAccess) |
| 328 | { |
| 329 | node->sort_Done = false; |
| 330 | tuplesort_end((Tuplesortstate *) node->tuplesortstate); |
| 331 | node->tuplesortstate = NULL; |
| 332 | |
| 333 | /* |
| 334 | * if chgParam of subnode is not null then plan will be re-scanned by |
| 335 | * first ExecProcNode. |
| 336 | */ |
| 337 | if (outerPlan->chgParam == NULL) |
| 338 | ExecReScan(outerPlan); |
| 339 | } |
| 340 | else |
| 341 | tuplesort_rescan((Tuplesortstate *) node->tuplesortstate); |
| 342 | } |
| 343 | |
| 344 | /* ---------------------------------------------------------------- |
| 345 | * Parallel Query Support |
| 346 | * ---------------------------------------------------------------- |
| 347 | */ |
| 348 | |
| 349 | /* ---------------------------------------------------------------- |
| 350 | * ExecSortEstimate |
| 351 | * |
| 352 | * Estimate space required to propagate sort statistics. |
| 353 | * ---------------------------------------------------------------- |
| 354 | */ |
| 355 | void |
| 356 | ExecSortEstimate(SortState *node, ParallelContext *pcxt) |
| 357 | { |
| 358 | Size size; |
| 359 | |
| 360 | /* don't need this if not instrumenting or no workers */ |
| 361 | if (!node->ss.ps.instrument || pcxt->nworkers == 0) |
| 362 | return; |
| 363 | |
| 364 | size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation)); |
| 365 | size = add_size(size, offsetof(SharedSortInfo, sinstrument)); |
| 366 | shm_toc_estimate_chunk(&pcxt->estimator, size); |
| 367 | shm_toc_estimate_keys(&pcxt->estimator, 1); |
| 368 | } |
| 369 | |
| 370 | /* ---------------------------------------------------------------- |
| 371 | * ExecSortInitializeDSM |
| 372 | * |
| 373 | * Initialize DSM space for sort statistics. |
| 374 | * ---------------------------------------------------------------- |
| 375 | */ |
| 376 | void |
| 377 | ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt) |
| 378 | { |
| 379 | Size size; |
| 380 | |
| 381 | /* don't need this if not instrumenting or no workers */ |
| 382 | if (!node->ss.ps.instrument || pcxt->nworkers == 0) |
| 383 | return; |
| 384 | |
| 385 | size = offsetof(SharedSortInfo, sinstrument) |
| 386 | + pcxt->nworkers * sizeof(TuplesortInstrumentation); |
| 387 | node->shared_info = shm_toc_allocate(pcxt->toc, size); |
| 388 | /* ensure any unfilled slots will contain zeroes */ |
| 389 | memset(node->shared_info, 0, size); |
| 390 | node->shared_info->num_workers = pcxt->nworkers; |
| 391 | shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, |
| 392 | node->shared_info); |
| 393 | } |
| 394 | |
| 395 | /* ---------------------------------------------------------------- |
| 396 | * ExecSortInitializeWorker |
| 397 | * |
| 398 | * Attach worker to DSM space for sort statistics. |
| 399 | * ---------------------------------------------------------------- |
| 400 | */ |
| 401 | void |
| 402 | ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt) |
| 403 | { |
| 404 | node->shared_info = |
| 405 | shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true); |
| 406 | node->am_worker = true; |
| 407 | } |
| 408 | |
| 409 | /* ---------------------------------------------------------------- |
| 410 | * ExecSortRetrieveInstrumentation |
| 411 | * |
| 412 | * Transfer sort statistics from DSM to private memory. |
| 413 | * ---------------------------------------------------------------- |
| 414 | */ |
| 415 | void |
| 416 | ExecSortRetrieveInstrumentation(SortState *node) |
| 417 | { |
| 418 | Size size; |
| 419 | SharedSortInfo *si; |
| 420 | |
| 421 | if (node->shared_info == NULL) |
| 422 | return; |
| 423 | |
| 424 | size = offsetof(SharedSortInfo, sinstrument) |
| 425 | + node->shared_info->num_workers * sizeof(TuplesortInstrumentation); |
| 426 | si = palloc(size); |
| 427 | memcpy(si, node->shared_info, size); |
| 428 | node->shared_info = si; |
| 429 | } |
| 430 | |