1/*-------------------------------------------------------------------------
2 *
3 * nodeSort.c
4 * Routines to handle sorting of relations.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeSort.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "postgres.h"
17
18#include "access/parallel.h"
19#include "executor/execdebug.h"
20#include "executor/nodeSort.h"
21#include "miscadmin.h"
22#include "utils/tuplesort.h"
23
24
25/* ----------------------------------------------------------------
26 * ExecSort
27 *
28 * Sorts tuples from the outer subtree of the node using tuplesort,
29 * which saves the results in a temporary file or memory. After the
30 * initial call, returns a tuple from the file with each call.
31 *
32 * Conditions:
33 * -- none.
34 *
35 * Initial States:
36 * -- the outer child is prepared to return the first tuple.
37 * ----------------------------------------------------------------
38 */
39static TupleTableSlot *
40ExecSort(PlanState *pstate)
41{
42 SortState *node = castNode(SortState, pstate);
43 EState *estate;
44 ScanDirection dir;
45 Tuplesortstate *tuplesortstate;
46 TupleTableSlot *slot;
47
48 CHECK_FOR_INTERRUPTS();
49
50 /*
51 * get state info from node
52 */
53 SO1_printf("ExecSort: %s\n",
54 "entering routine");
55
56 estate = node->ss.ps.state;
57 dir = estate->es_direction;
58 tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
59
60 /*
61 * If first time through, read all tuples from outer plan and pass them to
62 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
63 */
64
65 if (!node->sort_Done)
66 {
67 Sort *plannode = (Sort *) node->ss.ps.plan;
68 PlanState *outerNode;
69 TupleDesc tupDesc;
70
71 SO1_printf("ExecSort: %s\n",
72 "sorting subplan");
73
74 /*
75 * Want to scan subplan in the forward direction while creating the
76 * sorted data.
77 */
78 estate->es_direction = ForwardScanDirection;
79
80 /*
81 * Initialize tuplesort module.
82 */
83 SO1_printf("ExecSort: %s\n",
84 "calling tuplesort_begin");
85
86 outerNode = outerPlanState(node);
87 tupDesc = ExecGetResultType(outerNode);
88
89 tuplesortstate = tuplesort_begin_heap(tupDesc,
90 plannode->numCols,
91 plannode->sortColIdx,
92 plannode->sortOperators,
93 plannode->collations,
94 plannode->nullsFirst,
95 work_mem,
96 NULL, node->randomAccess);
97 if (node->bounded)
98 tuplesort_set_bound(tuplesortstate, node->bound);
99 node->tuplesortstate = (void *) tuplesortstate;
100
101 /*
102 * Scan the subplan and feed all the tuples to tuplesort.
103 */
104
105 for (;;)
106 {
107 slot = ExecProcNode(outerNode);
108
109 if (TupIsNull(slot))
110 break;
111
112 tuplesort_puttupleslot(tuplesortstate, slot);
113 }
114
115 /*
116 * Complete the sort.
117 */
118 tuplesort_performsort(tuplesortstate);
119
120 /*
121 * restore to user specified direction
122 */
123 estate->es_direction = dir;
124
125 /*
126 * finally set the sorted flag to true
127 */
128 node->sort_Done = true;
129 node->bounded_Done = node->bounded;
130 node->bound_Done = node->bound;
131 if (node->shared_info && node->am_worker)
132 {
133 TuplesortInstrumentation *si;
134
135 Assert(IsParallelWorker());
136 Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
137 si = &node->shared_info->sinstrument[ParallelWorkerNumber];
138 tuplesort_get_stats(tuplesortstate, si);
139 }
140 SO1_printf("ExecSort: %s\n", "sorting done");
141 }
142
143 SO1_printf("ExecSort: %s\n",
144 "retrieving tuple from tuplesort");
145
146 /*
147 * Get the first or next tuple from tuplesort. Returns NULL if no more
148 * tuples. Note that we only rely on slot tuple remaining valid until the
149 * next fetch from the tuplesort.
150 */
151 slot = node->ss.ps.ps_ResultTupleSlot;
152 (void) tuplesort_gettupleslot(tuplesortstate,
153 ScanDirectionIsForward(dir),
154 false, slot, NULL);
155 return slot;
156}
157
158/* ----------------------------------------------------------------
159 * ExecInitSort
160 *
161 * Creates the run-time state information for the sort node
162 * produced by the planner and initializes its outer subtree.
163 * ----------------------------------------------------------------
164 */
165SortState *
166ExecInitSort(Sort *node, EState *estate, int eflags)
167{
168 SortState *sortstate;
169
170 SO1_printf("ExecInitSort: %s\n",
171 "initializing sort node");
172
173 /*
174 * create state structure
175 */
176 sortstate = makeNode(SortState);
177 sortstate->ss.ps.plan = (Plan *) node;
178 sortstate->ss.ps.state = estate;
179 sortstate->ss.ps.ExecProcNode = ExecSort;
180
181 /*
182 * We must have random access to the sort output to do backward scan or
183 * mark/restore. We also prefer to materialize the sort output if we
184 * might be called on to rewind and replay it many times.
185 */
186 sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
187 EXEC_FLAG_BACKWARD |
188 EXEC_FLAG_MARK)) != 0;
189
190 sortstate->bounded = false;
191 sortstate->sort_Done = false;
192 sortstate->tuplesortstate = NULL;
193
194 /*
195 * Miscellaneous initialization
196 *
197 * Sort nodes don't initialize their ExprContexts because they never call
198 * ExecQual or ExecProject.
199 */
200
201 /*
202 * initialize child nodes
203 *
204 * We shield the child node from the need to support REWIND, BACKWARD, or
205 * MARK/RESTORE.
206 */
207 eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK);
208
209 outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
210
211 /*
212 * Initialize scan slot and type.
213 */
214 ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
215
216 /*
217 * Initialize return slot and type. No need to initialize projection info
218 * because this node doesn't do projections.
219 */
220 ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
221 sortstate->ss.ps.ps_ProjInfo = NULL;
222
223 SO1_printf("ExecInitSort: %s\n",
224 "sort node initialized");
225
226 return sortstate;
227}
228
229/* ----------------------------------------------------------------
230 * ExecEndSort(node)
231 * ----------------------------------------------------------------
232 */
233void
234ExecEndSort(SortState *node)
235{
236 SO1_printf("ExecEndSort: %s\n",
237 "shutting down sort node");
238
239 /*
240 * clean out the tuple table
241 */
242 ExecClearTuple(node->ss.ss_ScanTupleSlot);
243 /* must drop pointer to sort result tuple */
244 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
245
246 /*
247 * Release tuplesort resources
248 */
249 if (node->tuplesortstate != NULL)
250 tuplesort_end((Tuplesortstate *) node->tuplesortstate);
251 node->tuplesortstate = NULL;
252
253 /*
254 * shut down the subplan
255 */
256 ExecEndNode(outerPlanState(node));
257
258 SO1_printf("ExecEndSort: %s\n",
259 "sort node shutdown");
260}
261
262/* ----------------------------------------------------------------
263 * ExecSortMarkPos
264 *
265 * Calls tuplesort to save the current position in the sorted file.
266 * ----------------------------------------------------------------
267 */
268void
269ExecSortMarkPos(SortState *node)
270{
271 /*
272 * if we haven't sorted yet, just return
273 */
274 if (!node->sort_Done)
275 return;
276
277 tuplesort_markpos((Tuplesortstate *) node->tuplesortstate);
278}
279
280/* ----------------------------------------------------------------
281 * ExecSortRestrPos
282 *
283 * Calls tuplesort to restore the last saved sort file position.
284 * ----------------------------------------------------------------
285 */
286void
287ExecSortRestrPos(SortState *node)
288{
289 /*
290 * if we haven't sorted yet, just return.
291 */
292 if (!node->sort_Done)
293 return;
294
295 /*
296 * restore the scan to the previously marked position
297 */
298 tuplesort_restorepos((Tuplesortstate *) node->tuplesortstate);
299}
300
301void
302ExecReScanSort(SortState *node)
303{
304 PlanState *outerPlan = outerPlanState(node);
305
306 /*
307 * If we haven't sorted yet, just return. If outerplan's chgParam is not
308 * NULL then it will be re-scanned by ExecProcNode, else no reason to
309 * re-scan it at all.
310 */
311 if (!node->sort_Done)
312 return;
313
314 /* must drop pointer to sort result tuple */
315 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
316
317 /*
318 * If subnode is to be rescanned then we forget previous sort results; we
319 * have to re-read the subplan and re-sort. Also must re-sort if the
320 * bounded-sort parameters changed or we didn't select randomAccess.
321 *
322 * Otherwise we can just rewind and rescan the sorted output.
323 */
324 if (outerPlan->chgParam != NULL ||
325 node->bounded != node->bounded_Done ||
326 node->bound != node->bound_Done ||
327 !node->randomAccess)
328 {
329 node->sort_Done = false;
330 tuplesort_end((Tuplesortstate *) node->tuplesortstate);
331 node->tuplesortstate = NULL;
332
333 /*
334 * if chgParam of subnode is not null then plan will be re-scanned by
335 * first ExecProcNode.
336 */
337 if (outerPlan->chgParam == NULL)
338 ExecReScan(outerPlan);
339 }
340 else
341 tuplesort_rescan((Tuplesortstate *) node->tuplesortstate);
342}
343
344/* ----------------------------------------------------------------
345 * Parallel Query Support
346 * ----------------------------------------------------------------
347 */
348
349/* ----------------------------------------------------------------
350 * ExecSortEstimate
351 *
352 * Estimate space required to propagate sort statistics.
353 * ----------------------------------------------------------------
354 */
355void
356ExecSortEstimate(SortState *node, ParallelContext *pcxt)
357{
358 Size size;
359
360 /* don't need this if not instrumenting or no workers */
361 if (!node->ss.ps.instrument || pcxt->nworkers == 0)
362 return;
363
364 size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
365 size = add_size(size, offsetof(SharedSortInfo, sinstrument));
366 shm_toc_estimate_chunk(&pcxt->estimator, size);
367 shm_toc_estimate_keys(&pcxt->estimator, 1);
368}
369
370/* ----------------------------------------------------------------
371 * ExecSortInitializeDSM
372 *
373 * Initialize DSM space for sort statistics.
374 * ----------------------------------------------------------------
375 */
376void
377ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
378{
379 Size size;
380
381 /* don't need this if not instrumenting or no workers */
382 if (!node->ss.ps.instrument || pcxt->nworkers == 0)
383 return;
384
385 size = offsetof(SharedSortInfo, sinstrument)
386 + pcxt->nworkers * sizeof(TuplesortInstrumentation);
387 node->shared_info = shm_toc_allocate(pcxt->toc, size);
388 /* ensure any unfilled slots will contain zeroes */
389 memset(node->shared_info, 0, size);
390 node->shared_info->num_workers = pcxt->nworkers;
391 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
392 node->shared_info);
393}
394
395/* ----------------------------------------------------------------
396 * ExecSortInitializeWorker
397 *
398 * Attach worker to DSM space for sort statistics.
399 * ----------------------------------------------------------------
400 */
401void
402ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
403{
404 node->shared_info =
405 shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
406 node->am_worker = true;
407}
408
409/* ----------------------------------------------------------------
410 * ExecSortRetrieveInstrumentation
411 *
412 * Transfer sort statistics from DSM to private memory.
413 * ----------------------------------------------------------------
414 */
415void
416ExecSortRetrieveInstrumentation(SortState *node)
417{
418 Size size;
419 SharedSortInfo *si;
420
421 if (node->shared_info == NULL)
422 return;
423
424 size = offsetof(SharedSortInfo, sinstrument)
425 + node->shared_info->num_workers * sizeof(TuplesortInstrumentation);
426 si = palloc(size);
427 memcpy(si, node->shared_info, size);
428 node->shared_info = si;
429}
430