1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * nodeSort.c |
4 | * Routines to handle sorting of relations. |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/executor/nodeSort.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | |
16 | #include "postgres.h" |
17 | |
18 | #include "access/parallel.h" |
19 | #include "executor/execdebug.h" |
20 | #include "executor/nodeSort.h" |
21 | #include "miscadmin.h" |
22 | #include "utils/tuplesort.h" |
23 | |
24 | |
25 | /* ---------------------------------------------------------------- |
26 | * ExecSort |
27 | * |
28 | * Sorts tuples from the outer subtree of the node using tuplesort, |
29 | * which saves the results in a temporary file or memory. After the |
30 | * initial call, returns a tuple from the file with each call. |
31 | * |
32 | * Conditions: |
33 | * -- none. |
34 | * |
35 | * Initial States: |
36 | * -- the outer child is prepared to return the first tuple. |
37 | * ---------------------------------------------------------------- |
38 | */ |
39 | static TupleTableSlot * |
40 | ExecSort(PlanState *pstate) |
41 | { |
42 | SortState *node = castNode(SortState, pstate); |
43 | EState *estate; |
44 | ScanDirection dir; |
45 | Tuplesortstate *tuplesortstate; |
46 | TupleTableSlot *slot; |
47 | |
48 | CHECK_FOR_INTERRUPTS(); |
49 | |
50 | /* |
51 | * get state info from node |
52 | */ |
53 | SO1_printf("ExecSort: %s\n" , |
54 | "entering routine" ); |
55 | |
56 | estate = node->ss.ps.state; |
57 | dir = estate->es_direction; |
58 | tuplesortstate = (Tuplesortstate *) node->tuplesortstate; |
59 | |
60 | /* |
61 | * If first time through, read all tuples from outer plan and pass them to |
62 | * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. |
63 | */ |
64 | |
65 | if (!node->sort_Done) |
66 | { |
67 | Sort *plannode = (Sort *) node->ss.ps.plan; |
68 | PlanState *outerNode; |
69 | TupleDesc tupDesc; |
70 | |
71 | SO1_printf("ExecSort: %s\n" , |
72 | "sorting subplan" ); |
73 | |
74 | /* |
75 | * Want to scan subplan in the forward direction while creating the |
76 | * sorted data. |
77 | */ |
78 | estate->es_direction = ForwardScanDirection; |
79 | |
80 | /* |
81 | * Initialize tuplesort module. |
82 | */ |
83 | SO1_printf("ExecSort: %s\n" , |
84 | "calling tuplesort_begin" ); |
85 | |
86 | outerNode = outerPlanState(node); |
87 | tupDesc = ExecGetResultType(outerNode); |
88 | |
89 | tuplesortstate = tuplesort_begin_heap(tupDesc, |
90 | plannode->numCols, |
91 | plannode->sortColIdx, |
92 | plannode->sortOperators, |
93 | plannode->collations, |
94 | plannode->nullsFirst, |
95 | work_mem, |
96 | NULL, node->randomAccess); |
97 | if (node->bounded) |
98 | tuplesort_set_bound(tuplesortstate, node->bound); |
99 | node->tuplesortstate = (void *) tuplesortstate; |
100 | |
101 | /* |
102 | * Scan the subplan and feed all the tuples to tuplesort. |
103 | */ |
104 | |
105 | for (;;) |
106 | { |
107 | slot = ExecProcNode(outerNode); |
108 | |
109 | if (TupIsNull(slot)) |
110 | break; |
111 | |
112 | tuplesort_puttupleslot(tuplesortstate, slot); |
113 | } |
114 | |
115 | /* |
116 | * Complete the sort. |
117 | */ |
118 | tuplesort_performsort(tuplesortstate); |
119 | |
120 | /* |
121 | * restore to user specified direction |
122 | */ |
123 | estate->es_direction = dir; |
124 | |
125 | /* |
126 | * finally set the sorted flag to true |
127 | */ |
128 | node->sort_Done = true; |
129 | node->bounded_Done = node->bounded; |
130 | node->bound_Done = node->bound; |
131 | if (node->shared_info && node->am_worker) |
132 | { |
133 | TuplesortInstrumentation *si; |
134 | |
135 | Assert(IsParallelWorker()); |
136 | Assert(ParallelWorkerNumber <= node->shared_info->num_workers); |
137 | si = &node->shared_info->sinstrument[ParallelWorkerNumber]; |
138 | tuplesort_get_stats(tuplesortstate, si); |
139 | } |
140 | SO1_printf("ExecSort: %s\n" , "sorting done" ); |
141 | } |
142 | |
143 | SO1_printf("ExecSort: %s\n" , |
144 | "retrieving tuple from tuplesort" ); |
145 | |
146 | /* |
147 | * Get the first or next tuple from tuplesort. Returns NULL if no more |
148 | * tuples. Note that we only rely on slot tuple remaining valid until the |
149 | * next fetch from the tuplesort. |
150 | */ |
151 | slot = node->ss.ps.ps_ResultTupleSlot; |
152 | (void) tuplesort_gettupleslot(tuplesortstate, |
153 | ScanDirectionIsForward(dir), |
154 | false, slot, NULL); |
155 | return slot; |
156 | } |
157 | |
158 | /* ---------------------------------------------------------------- |
159 | * ExecInitSort |
160 | * |
161 | * Creates the run-time state information for the sort node |
162 | * produced by the planner and initializes its outer subtree. |
163 | * ---------------------------------------------------------------- |
164 | */ |
165 | SortState * |
166 | ExecInitSort(Sort *node, EState *estate, int eflags) |
167 | { |
168 | SortState *sortstate; |
169 | |
170 | SO1_printf("ExecInitSort: %s\n" , |
171 | "initializing sort node" ); |
172 | |
173 | /* |
174 | * create state structure |
175 | */ |
176 | sortstate = makeNode(SortState); |
177 | sortstate->ss.ps.plan = (Plan *) node; |
178 | sortstate->ss.ps.state = estate; |
179 | sortstate->ss.ps.ExecProcNode = ExecSort; |
180 | |
181 | /* |
182 | * We must have random access to the sort output to do backward scan or |
183 | * mark/restore. We also prefer to materialize the sort output if we |
184 | * might be called on to rewind and replay it many times. |
185 | */ |
186 | sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND | |
187 | EXEC_FLAG_BACKWARD | |
188 | EXEC_FLAG_MARK)) != 0; |
189 | |
190 | sortstate->bounded = false; |
191 | sortstate->sort_Done = false; |
192 | sortstate->tuplesortstate = NULL; |
193 | |
194 | /* |
195 | * Miscellaneous initialization |
196 | * |
197 | * Sort nodes don't initialize their ExprContexts because they never call |
198 | * ExecQual or ExecProject. |
199 | */ |
200 | |
201 | /* |
202 | * initialize child nodes |
203 | * |
204 | * We shield the child node from the need to support REWIND, BACKWARD, or |
205 | * MARK/RESTORE. |
206 | */ |
207 | eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK); |
208 | |
209 | outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags); |
210 | |
211 | /* |
212 | * Initialize scan slot and type. |
213 | */ |
214 | ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual); |
215 | |
216 | /* |
217 | * Initialize return slot and type. No need to initialize projection info |
218 | * because this node doesn't do projections. |
219 | */ |
220 | ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple); |
221 | sortstate->ss.ps.ps_ProjInfo = NULL; |
222 | |
223 | SO1_printf("ExecInitSort: %s\n" , |
224 | "sort node initialized" ); |
225 | |
226 | return sortstate; |
227 | } |
228 | |
229 | /* ---------------------------------------------------------------- |
230 | * ExecEndSort(node) |
231 | * ---------------------------------------------------------------- |
232 | */ |
233 | void |
234 | ExecEndSort(SortState *node) |
235 | { |
236 | SO1_printf("ExecEndSort: %s\n" , |
237 | "shutting down sort node" ); |
238 | |
239 | /* |
240 | * clean out the tuple table |
241 | */ |
242 | ExecClearTuple(node->ss.ss_ScanTupleSlot); |
243 | /* must drop pointer to sort result tuple */ |
244 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
245 | |
246 | /* |
247 | * Release tuplesort resources |
248 | */ |
249 | if (node->tuplesortstate != NULL) |
250 | tuplesort_end((Tuplesortstate *) node->tuplesortstate); |
251 | node->tuplesortstate = NULL; |
252 | |
253 | /* |
254 | * shut down the subplan |
255 | */ |
256 | ExecEndNode(outerPlanState(node)); |
257 | |
258 | SO1_printf("ExecEndSort: %s\n" , |
259 | "sort node shutdown" ); |
260 | } |
261 | |
262 | /* ---------------------------------------------------------------- |
263 | * ExecSortMarkPos |
264 | * |
265 | * Calls tuplesort to save the current position in the sorted file. |
266 | * ---------------------------------------------------------------- |
267 | */ |
268 | void |
269 | ExecSortMarkPos(SortState *node) |
270 | { |
271 | /* |
272 | * if we haven't sorted yet, just return |
273 | */ |
274 | if (!node->sort_Done) |
275 | return; |
276 | |
277 | tuplesort_markpos((Tuplesortstate *) node->tuplesortstate); |
278 | } |
279 | |
280 | /* ---------------------------------------------------------------- |
281 | * ExecSortRestrPos |
282 | * |
283 | * Calls tuplesort to restore the last saved sort file position. |
284 | * ---------------------------------------------------------------- |
285 | */ |
286 | void |
287 | ExecSortRestrPos(SortState *node) |
288 | { |
289 | /* |
290 | * if we haven't sorted yet, just return. |
291 | */ |
292 | if (!node->sort_Done) |
293 | return; |
294 | |
295 | /* |
296 | * restore the scan to the previously marked position |
297 | */ |
298 | tuplesort_restorepos((Tuplesortstate *) node->tuplesortstate); |
299 | } |
300 | |
301 | void |
302 | ExecReScanSort(SortState *node) |
303 | { |
304 | PlanState *outerPlan = outerPlanState(node); |
305 | |
306 | /* |
307 | * If we haven't sorted yet, just return. If outerplan's chgParam is not |
308 | * NULL then it will be re-scanned by ExecProcNode, else no reason to |
309 | * re-scan it at all. |
310 | */ |
311 | if (!node->sort_Done) |
312 | return; |
313 | |
314 | /* must drop pointer to sort result tuple */ |
315 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
316 | |
317 | /* |
318 | * If subnode is to be rescanned then we forget previous sort results; we |
319 | * have to re-read the subplan and re-sort. Also must re-sort if the |
320 | * bounded-sort parameters changed or we didn't select randomAccess. |
321 | * |
322 | * Otherwise we can just rewind and rescan the sorted output. |
323 | */ |
324 | if (outerPlan->chgParam != NULL || |
325 | node->bounded != node->bounded_Done || |
326 | node->bound != node->bound_Done || |
327 | !node->randomAccess) |
328 | { |
329 | node->sort_Done = false; |
330 | tuplesort_end((Tuplesortstate *) node->tuplesortstate); |
331 | node->tuplesortstate = NULL; |
332 | |
333 | /* |
334 | * if chgParam of subnode is not null then plan will be re-scanned by |
335 | * first ExecProcNode. |
336 | */ |
337 | if (outerPlan->chgParam == NULL) |
338 | ExecReScan(outerPlan); |
339 | } |
340 | else |
341 | tuplesort_rescan((Tuplesortstate *) node->tuplesortstate); |
342 | } |
343 | |
344 | /* ---------------------------------------------------------------- |
345 | * Parallel Query Support |
346 | * ---------------------------------------------------------------- |
347 | */ |
348 | |
349 | /* ---------------------------------------------------------------- |
350 | * ExecSortEstimate |
351 | * |
352 | * Estimate space required to propagate sort statistics. |
353 | * ---------------------------------------------------------------- |
354 | */ |
355 | void |
356 | ExecSortEstimate(SortState *node, ParallelContext *pcxt) |
357 | { |
358 | Size size; |
359 | |
360 | /* don't need this if not instrumenting or no workers */ |
361 | if (!node->ss.ps.instrument || pcxt->nworkers == 0) |
362 | return; |
363 | |
364 | size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation)); |
365 | size = add_size(size, offsetof(SharedSortInfo, sinstrument)); |
366 | shm_toc_estimate_chunk(&pcxt->estimator, size); |
367 | shm_toc_estimate_keys(&pcxt->estimator, 1); |
368 | } |
369 | |
370 | /* ---------------------------------------------------------------- |
371 | * ExecSortInitializeDSM |
372 | * |
373 | * Initialize DSM space for sort statistics. |
374 | * ---------------------------------------------------------------- |
375 | */ |
376 | void |
377 | ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt) |
378 | { |
379 | Size size; |
380 | |
381 | /* don't need this if not instrumenting or no workers */ |
382 | if (!node->ss.ps.instrument || pcxt->nworkers == 0) |
383 | return; |
384 | |
385 | size = offsetof(SharedSortInfo, sinstrument) |
386 | + pcxt->nworkers * sizeof(TuplesortInstrumentation); |
387 | node->shared_info = shm_toc_allocate(pcxt->toc, size); |
388 | /* ensure any unfilled slots will contain zeroes */ |
389 | memset(node->shared_info, 0, size); |
390 | node->shared_info->num_workers = pcxt->nworkers; |
391 | shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, |
392 | node->shared_info); |
393 | } |
394 | |
395 | /* ---------------------------------------------------------------- |
396 | * ExecSortInitializeWorker |
397 | * |
398 | * Attach worker to DSM space for sort statistics. |
399 | * ---------------------------------------------------------------- |
400 | */ |
401 | void |
402 | ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt) |
403 | { |
404 | node->shared_info = |
405 | shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true); |
406 | node->am_worker = true; |
407 | } |
408 | |
409 | /* ---------------------------------------------------------------- |
410 | * ExecSortRetrieveInstrumentation |
411 | * |
412 | * Transfer sort statistics from DSM to private memory. |
413 | * ---------------------------------------------------------------- |
414 | */ |
415 | void |
416 | ExecSortRetrieveInstrumentation(SortState *node) |
417 | { |
418 | Size size; |
419 | SharedSortInfo *si; |
420 | |
421 | if (node->shared_info == NULL) |
422 | return; |
423 | |
424 | size = offsetof(SharedSortInfo, sinstrument) |
425 | + node->shared_info->num_workers * sizeof(TuplesortInstrumentation); |
426 | si = palloc(size); |
427 | memcpy(si, node->shared_info, size); |
428 | node->shared_info = si; |
429 | } |
430 | |