nodeGatherMerge.c source code [PostgreSQL/src/backend/executor/nodeGatherMerge.c]

1	/-------------------------------------------------------------------------*
2	*
3	* nodeGatherMerge.c
4	* Scan a plan in multiple workers, and do order-preserving merge.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	* IDENTIFICATION
10	* src/backend/executor/nodeGatherMerge.c
11	*
12	*-------------------------------------------------------------------------
13	*/
14
15	#include "postgres.h"
16
17	#include "access/relscan.h"
18	#include "access/xact.h"
19	#include "executor/execdebug.h"
20	#include "executor/execParallel.h"
21	#include "executor/nodeGatherMerge.h"
22	#include "executor/nodeSubplan.h"
23	#include "executor/tqueue.h"
24	#include "lib/binaryheap.h"
25	#include "miscadmin.h"
26	#include "optimizer/optimizer.h"
27	#include "utils/memutils.h"
28	#include "utils/rel.h"
29
30	/*
31	* When we read tuples from workers, it's a good idea to read several at once
32	* for efficiency when possible: this minimizes context-switching overhead.
33	* But reading too many at a time wastes memory without improving performance.
34	* We'll read up to MAX_TUPLE_STORE tuples (in addition to the first one).
35	*/
36	#define MAX_TUPLE_STORE 10
37
38	/*
39	* Pending-tuple array for each worker. This holds additional tuples that
40	* we were able to fetch from the worker, but can't process yet. In addition,
41	* this struct holds the "done" flag indicating the worker is known to have
42	* no more tuples. (We do not use this struct for the leader; we don't keep
43	* any pending tuples for the leader, and the need_to_scan_locally flag serves
44	* as its "done" indicator.)
45	*/
46	typedef struct GMReaderTupleBuffer
47	{
48	HeapTuple tuple; /* array of length MAX_TUPLE_STORE /
49	int nTuples; / number of tuples currently stored /
50	int readCounter; / index of next tuple to extract /
51	bool done; / true if reader is known exhausted /
52	} GMReaderTupleBuffer;
53
54	static TupleTableSlot ExecGatherMerge(PlanState pstate);
55	static int32 heap_compare_slots(Datum a, Datum b, void *arg);
56	static TupleTableSlot gather_merge_getnext(GatherMergeState gm_state);
57	static HeapTuple gm_readnext_tuple(GatherMergeState gm_state, int* nreader,
58	bool nowait, bool *done);
59	static void ExecShutdownGatherMergeWorkers(GatherMergeState *node);
60	static void gather_merge_setup(GatherMergeState *gm_state);
61	static void gather_merge_init(GatherMergeState *gm_state);
62	static void gather_merge_clear_tuples(GatherMergeState *gm_state);
63	static bool gather_merge_readnext(GatherMergeState gm_state, int* reader,
64	bool nowait);
65	static void load_tuple_array(GatherMergeState gm_state, int* reader);
66
67	/ ----------------------------------------------------------------*
68	* ExecInitGather
69	* ----------------------------------------------------------------
70	*/
71	GatherMergeState *
72	ExecInitGatherMerge(GatherMerge node, EState estate, int eflags)
73	{
74	GatherMergeState *gm_state;
75	Plan *outerNode;
76	TupleDesc tupDesc;
77
78	/ Gather merge node doesn't have innerPlan node. /
79	Assert(innerPlan(node) == NULL);
80
81	/*
82	* create state structure
83	*/
84	gm_state = makeNode(GatherMergeState);
85	gm_state->ps.plan = (Plan *) node;
86	gm_state->ps.state = estate;
87	gm_state->ps.ExecProcNode = ExecGatherMerge;
88
89	gm_state->initialized = false;
90	gm_state->gm_initialized = false;
91	gm_state->tuples_needed = -`1`;
92
93	/*
94	* Miscellaneous initialization
95	*
96	* create expression context for node
97	*/
98	ExecAssignExprContext(estate, &gm_state->ps);
99
100	/*
101	* GatherMerge doesn't support checking a qual (it's always more efficient
102	* to do it in the child node).
103	*/
104	Assert(!node->plan.qual);
105
106	/*
107	* now initialize outer plan
108	*/
109	outerNode = outerPlan(node);
110	outerPlanState(gm_state) = ExecInitNode(outerNode, estate, eflags);
111
112	/*
113	* Leader may access ExecProcNode result directly (if
114	* need_to_scan_locally), or from workers via tuple queue. So we can't
115	* trivially rely on the slot type being fixed for expressions evaluated
116	* within this node.
117	*/
118	gm_state->ps.outeropsset = true;
119	gm_state->ps.outeropsfixed = false;
120
121	/*
122	* Store the tuple descriptor into gather merge state, so we can use it
123	* while initializing the gather merge slots.
124	*/
125	tupDesc = ExecGetResultType(outerPlanState(gm_state));
126	gm_state->tupDesc = tupDesc;
127
128	/*
129	* Initialize result type and projection.
130	*/
131	ExecInitResultTypeTL(&gm_state->ps);
132	ExecConditionalAssignProjectionInfo(&gm_state->ps, tupDesc, OUTER_VAR);
133
134	/*
135	* Without projections result slot type is not trivially known, see
136	* comment above.
137	*/
138	if (gm_state->ps.ps_ProjInfo == NULL)
139	{
140	gm_state->ps.resultopsset = true;
141	gm_state->ps.resultopsfixed = false;
142	}
143
144	/*
145	* initialize sort-key information
146	*/
147	if (node->numCols)
148	{
149	int i;
150
151	gm_state->gm_nkeys = node->numCols;
152	gm_state->gm_sortkeys =
153	palloc0(sizeof(SortSupportData) * node->numCols);
154
155	for (i = `0`; i < node->numCols; i++)
156	{
157	SortSupport sortKey = gm_state->gm_sortkeys + i;
158
159	sortKey->ssup_cxt = CurrentMemoryContext;
160	sortKey->ssup_collation = node->collations[i];
161	sortKey->ssup_nulls_first = node->nullsFirst[i];
162	sortKey->ssup_attno = node->sortColIdx[i];
163
164	/*
165	* We don't perform abbreviated key conversion here, for the same
166	* reasons that it isn't used in MergeAppend
167	*/
168	sortKey->abbreviate = false;
169
170	PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey);
171	}
172	}
173
174	/ Now allocate the workspace for gather merge /
175	gather_merge_setup(gm_state);
176
177	return gm_state;
178	}
179
180	/ ----------------------------------------------------------------*
181	* ExecGatherMerge(node)
182	*
183	* Scans the relation via multiple workers and returns
184	* the next qualifying tuple.
185	* ----------------------------------------------------------------
186	*/
187	static TupleTableSlot *
188	ExecGatherMerge(PlanState *pstate)
189	{
190	GatherMergeState *node = castNode(GatherMergeState, pstate);
191	TupleTableSlot *slot;
192	ExprContext *econtext;
193
194	CHECK_FOR_INTERRUPTS();
195
196	/*
197	* As with Gather, we don't launch workers until this node is actually
198	* executed.
199	*/
200	if (!node->initialized)
201	{
202	EState *estate = node->ps.state;
203	GatherMerge *gm = castNode(GatherMerge, node->ps.plan);
204
205	/*
206	* Sometimes we might have to run without parallelism; but if parallel
207	* mode is active then we can try to fire up some workers.
208	*/
209	if (gm->num_workers > `0` && estate->es_use_parallel_mode)
210	{
211	ParallelContext *pcxt;
212
213	/ Initialize, or re-initialize, shared state needed by workers. /
214	if (!node->pei)
215	node->pei = ExecInitParallelPlan(node->ps.lefttree,
216	estate,
217	gm->initParam,
218	gm->num_workers,
219	node->tuples_needed);
220	else
221	ExecParallelReinitialize(node->ps.lefttree,
222	node->pei,
223	gm->initParam);
224
225	/ Try to launch workers. /
226	pcxt = node->pei->pcxt;
227	LaunchParallelWorkers(pcxt);
228	/ We save # workers launched for the benefit of EXPLAIN /
229	node->nworkers_launched = pcxt->nworkers_launched;
230
231	/ Set up tuple queue readers to read the results. /
232	if (pcxt->nworkers_launched > `0`)
233	{
234	ExecParallelCreateReaders(node->pei);
235	/ Make a working array showing the active readers /
236	node->nreaders = pcxt->nworkers_launched;
237	node->reader = (TupleQueueReader **)
238	palloc(node->nreaders * sizeof(TupleQueueReader *));
239	memcpy(node->reader, node->pei->reader,
240	node->nreaders * sizeof(TupleQueueReader *));
241	}
242	else
243	{
244	/ No workers? Then never mind. /
245	node->nreaders = `0`;
246	node->reader = NULL;
247	}
248	}
249
250	/ allow leader to participate if enabled or no choice /
251	if (parallel_leader_participation \|\| node->nreaders == `0`)
252	node->need_to_scan_locally = true;
253	node->initialized = true;
254	}
255
256	/*
257	* Reset per-tuple memory context to free any expression evaluation
258	* storage allocated in the previous tuple cycle.
259	*/
260	econtext = node->ps.ps_ExprContext;
261	ResetExprContext(econtext);
262
263	/*
264	* Get next tuple, either from one of our workers, or by running the plan
265	* ourselves.
266	*/
267	slot = gather_merge_getnext(node);
268	if (TupIsNull(slot))
269	return NULL;
270
271	/ If no projection is required, we're done. /
272	if (node->ps.ps_ProjInfo == NULL)
273	return slot;
274
275	/*
276	* Form the result tuple using ExecProject(), and return it.
277	*/
278	econtext->ecxt_outertuple = slot;
279	return ExecProject(node->ps.ps_ProjInfo);
280	}
281
282	/ ----------------------------------------------------------------*
283	* ExecEndGatherMerge
284	*
285	* frees any storage allocated through C routines.
286	* ----------------------------------------------------------------
287	*/
288	void
289	ExecEndGatherMerge(GatherMergeState *node)
290	{
291	ExecEndNode(outerPlanState(node)); / let children clean up first /
292	ExecShutdownGatherMerge(node);
293	ExecFreeExprContext(&node->ps);
294	if (node->ps.ps_ResultTupleSlot)
295	ExecClearTuple(node->ps.ps_ResultTupleSlot);
296	}
297
298	/ ----------------------------------------------------------------*
299	* ExecShutdownGatherMerge
300	*
301	* Destroy the setup for parallel workers including parallel context.
302	* ----------------------------------------------------------------
303	*/
304	void
305	ExecShutdownGatherMerge(GatherMergeState *node)
306	{
307	ExecShutdownGatherMergeWorkers(node);
308
309	/ Now destroy the parallel context. /
310	if (node->pei != NULL)
311	{
312	ExecParallelCleanup(node->pei);
313	node->pei = NULL;
314	}
315	}
316
317	/ ----------------------------------------------------------------*
318	* ExecShutdownGatherMergeWorkers
319	*
320	* Stop all the parallel workers.
321	* ----------------------------------------------------------------
322	*/
323	static void
324	ExecShutdownGatherMergeWorkers(GatherMergeState *node)
325	{
326	if (node->pei != NULL)
327	ExecParallelFinish(node->pei);
328
329	/ Flush local copy of reader array /
330	if (node->reader)
331	pfree(node->reader);
332	node->reader = NULL;
333	}
334
335	/ ----------------------------------------------------------------*
336	* ExecReScanGatherMerge
337	*
338	* Prepare to re-scan the result of a GatherMerge.
339	* ----------------------------------------------------------------
340	*/
341	void
342	ExecReScanGatherMerge(GatherMergeState *node)
343	{
344	GatherMerge gm = (GatherMerge ) node->ps.plan;
345	PlanState *outerPlan = outerPlanState(node);
346
347	/ Make sure any existing workers are gracefully shut down /
348	ExecShutdownGatherMergeWorkers(node);
349
350	/ Free any unused tuples, so we don't leak memory across rescans /
351	gather_merge_clear_tuples(node);
352
353	/ Mark node so that shared state will be rebuilt at next call /
354	node->initialized = false;
355	node->gm_initialized = false;
356
357	/*
358	* Set child node's chgParam to tell it that the next scan might deliver a
359	* different set of rows within the leader process. (The overall rowset
360	* shouldn't change, but the leader process's subset might; hence nodes
361	* between here and the parallel table scan node mustn't optimize on the
362	* assumption of an unchanging rowset.)
363	*/
364	if (gm->rescan_param >= `0`)
365	outerPlan->chgParam = bms_add_member(outerPlan->chgParam,
366	gm->rescan_param);
367
368	/*
369	* If chgParam of subnode is not null then plan will be re-scanned by
370	* first ExecProcNode. Note: because this does nothing if we have a
371	* rescan_param, it's currently guaranteed that parallel-aware child nodes
372	* will not see a ReScan call until after they get a ReInitializeDSM call.
373	* That ordering might not be something to rely on, though. A good rule
374	* of thumb is that ReInitializeDSM should reset only shared state, ReScan
375	* should reset only local state, and anything that depends on both of
376	* those steps being finished must wait until the first ExecProcNode call.
377	*/
378	if (outerPlan->chgParam == NULL)
379	ExecReScan(outerPlan);
380	}
381
382	/*
383	* Set up the data structures that we'll need for Gather Merge.
384	*
385	* We allocate these once on the basis of gm->num_workers, which is an
386	* upper bound for the number of workers we'll actually have. During
387	* a rescan, we reset the structures to empty. This approach simplifies
388	* not leaking memory across rescans.
389	*
390	* In the gm_slots[] array, index 0 is for the leader, and indexes 1 to n
391	* are for workers. The values placed into gm_heap correspond to indexes
392	* in gm_slots[]. The gm_tuple_buffers[] array, however, is indexed from
393	* 0 to n-1; it has no entry for the leader.
394	*/
395	static void
396	gather_merge_setup(GatherMergeState *gm_state)
397	{
398	GatherMerge *gm = castNode(GatherMerge, gm_state->ps.plan);
399	int nreaders = gm->num_workers;
400	int i;
401
402	/*
403	* Allocate gm_slots for the number of workers + one more slot for leader.
404	* Slot 0 is always for the leader. Leader always calls ExecProcNode() to
405	* read the tuple, and then stores it directly into its gm_slots entry.
406	* For other slots, code below will call ExecInitExtraTupleSlot() to
407	* create a slot for the worker's results. Note that during any single
408	* scan, we might have fewer than num_workers available workers, in which
409	* case the extra array entries go unused.
410	*/
411	gm_state->gm_slots = (TupleTableSlot **)
412	palloc0((nreaders + `1`) * sizeof(TupleTableSlot *));
413
414	/ Allocate the tuple slot and tuple array for each worker /
415	gm_state->gm_tuple_buffers = (GMReaderTupleBuffer *)
416	palloc0(nreaders * sizeof(GMReaderTupleBuffer));
417
418	for (i = `0`; i < nreaders; i++)
419	{
420	/ Allocate the tuple array with length MAX_TUPLE_STORE /
421	gm_state->gm_tuple_buffers[i].tuple =
422	(HeapTuple ) palloc0(sizeof(HeapTuple) MAX_TUPLE_STORE);
423
424	/ Initialize tuple slot for worker /
425	gm_state->gm_slots[i + `1`] =
426	ExecInitExtraTupleSlot(gm_state->ps.state, gm_state->tupDesc,
427	&TTSOpsHeapTuple);
428	}
429
430	/ Allocate the resources for the merge /
431	gm_state->gm_heap = binaryheap_allocate(nreaders + `1`,
432	heap_compare_slots,
433	gm_state);
434	}
435
436	/*
437	* Initialize the Gather Merge.
438	*
439	* Reset data structures to ensure they're empty. Then pull at least one
440	* tuple from leader + each worker (or set its "done" indicator), and set up
441	* the heap.
442	*/
443	static void
444	gather_merge_init(GatherMergeState *gm_state)
445	{
446	int nreaders = gm_state->nreaders;
447	bool nowait = true;
448	int i;
449
450	/ Assert that gather_merge_setup made enough space /
451	Assert(nreaders <= castNode(GatherMerge, gm_state->ps.plan)->num_workers);
452
453	/ Reset leader's tuple slot to empty /
454	gm_state->gm_slots[`0`] = NULL;
455
456	/ Reset the tuple slot and tuple array for each worker /
457	for (i = `0`; i < nreaders; i++)
458	{
459	/ Reset tuple array to empty /
460	gm_state->gm_tuple_buffers[i].nTuples = `0`;
461	gm_state->gm_tuple_buffers[i].readCounter = `0`;
462	/ Reset done flag to not-done /
463	gm_state->gm_tuple_buffers[i].done = false;
464	/ Ensure output slot is empty /
465	ExecClearTuple(gm_state->gm_slots[i + `1`]);
466	}
467
468	/ Reset binary heap to empty /
469	binaryheap_reset(gm_state->gm_heap);
470
471	/*
472	* First, try to read a tuple from each worker (including leader) in
473	* nowait mode. After this, if not all workers were able to produce a
474	* tuple (or a "done" indication), then re-read from remaining workers,
475	* this time using wait mode. Add all live readers (those producing at
476	* least one tuple) to the heap.
477	*/
478	reread:
479	for (i = `0`; i <= nreaders; i++)
480	{
481	CHECK_FOR_INTERRUPTS();
482
483	/ skip this source if already known done /
484	if ((i == `0`) ? gm_state->need_to_scan_locally :
485	!gm_state->gm_tuple_buffers[i - `1`].done)
486	{
487	if (TupIsNull(gm_state->gm_slots[i]))
488	{
489	/ Don't have a tuple yet, try to get one /
490	if (gather_merge_readnext(gm_state, i, nowait))
491	binaryheap_add_unordered(gm_state->gm_heap,
492	Int32GetDatum(i));
493	}
494	else
495	{
496	/*
497	* We already got at least one tuple from this worker, but
498	* might as well see if it has any more ready by now.
499	*/
500	load_tuple_array(gm_state, i);
501	}
502	}
503	}
504
505	/ need not recheck leader, since nowait doesn't matter for it /
506	for (i = `1`; i <= nreaders; i++)
507	{
508	if (!gm_state->gm_tuple_buffers[i - `1`].done &&
509	TupIsNull(gm_state->gm_slots[i]))
510	{
511	nowait = false;
512	goto reread;
513	}
514	}
515
516	/ Now heapify the heap. /
517	binaryheap_build(gm_state->gm_heap);
518
519	gm_state->gm_initialized = true;
520	}
521
522	/*
523	* Clear out the tuple table slot, and any unused pending tuples,
524	* for each gather merge input.
525	*/
526	static void
527	gather_merge_clear_tuples(GatherMergeState *gm_state)
528	{
529	int i;
530
531	for (i = `0`; i < gm_state->nreaders; i++)
532	{
533	GMReaderTupleBuffer *tuple_buffer = &gm_state->gm_tuple_buffers[i];
534
535	while (tuple_buffer->readCounter < tuple_buffer->nTuples)
536	heap_freetuple(tuple_buffer->tuple[tuple_buffer->readCounter++]);
537
538	ExecClearTuple(gm_state->gm_slots[i + `1`]);
539	}
540	}
541
542	/*
543	* Read the next tuple for gather merge.
544	*
545	* Fetch the sorted tuple out of the heap.
546	*/
547	static TupleTableSlot *
548	gather_merge_getnext(GatherMergeState *gm_state)
549	{
550	int i;
551
552	if (!gm_state->gm_initialized)
553	{
554	/*
555	* First time through: pull the first tuple from each participant, and
556	* set up the heap.
557	*/
558	gather_merge_init(gm_state);
559	}
560	else
561	{
562	/*
563	* Otherwise, pull the next tuple from whichever participant we
564	* returned from last time, and reinsert that participant's index into
565	* the heap, because it might now compare differently against the
566	* other elements of the heap.
567	*/
568	i = DatumGetInt32(binaryheap_first(gm_state->gm_heap));
569
570	if (gather_merge_readnext(gm_state, i, false))
571	binaryheap_replace_first(gm_state->gm_heap, Int32GetDatum(i));
572	else
573	{
574	/ reader exhausted, remove it from heap /
575	(void) binaryheap_remove_first(gm_state->gm_heap);
576	}
577	}
578
579	if (binaryheap_empty(gm_state->gm_heap))
580	{
581	/ All the queues are exhausted, and so is the heap /
582	gather_merge_clear_tuples(gm_state);
583	return NULL;
584	}
585	else
586	{
587	/ Return next tuple from whichever participant has the leading one /
588	i = DatumGetInt32(binaryheap_first(gm_state->gm_heap));
589	return gm_state->gm_slots[i];
590	}
591	}
592
593	/*
594	* Read tuple(s) for given reader in nowait mode, and load into its tuple
595	* array, until we have MAX_TUPLE_STORE of them or would have to block.
596	*/
597	static void
598	load_tuple_array(GatherMergeState gm_state, int* reader)
599	{
600	GMReaderTupleBuffer *tuple_buffer;
601	int i;
602
603	/ Don't do anything if this is the leader. /
604	if (reader == `0`)
605	return;
606
607	tuple_buffer = &gm_state->gm_tuple_buffers[reader - `1`];
608
609	/ If there's nothing in the array, reset the counters to zero. /
610	if (tuple_buffer->nTuples == tuple_buffer->readCounter)
611	tuple_buffer->nTuples = tuple_buffer->readCounter = `0`;
612
613	/ Try to fill additional slots in the array. /
614	for (i = tuple_buffer->nTuples; i < MAX_TUPLE_STORE; i++)
615	{
616	HeapTuple tuple;
617
618	tuple = gm_readnext_tuple(gm_state,
619	reader,
620	true,
621	&tuple_buffer->done);
622	if (!HeapTupleIsValid(tuple))
623	break;
624	tuple_buffer->tuple[i] = tuple;
625	tuple_buffer->nTuples++;
626	}
627	}
628
629	/*
630	* Store the next tuple for a given reader into the appropriate slot.
631	*
632	* Returns true if successful, false if not (either reader is exhausted,
633	* or we didn't want to wait for a tuple). Sets done flag if reader
634	* is found to be exhausted.
635	*/
636	static bool
637	gather_merge_readnext(GatherMergeState gm_state, int* reader, bool nowait)
638	{
639	GMReaderTupleBuffer *tuple_buffer;
640	HeapTuple tup;
641
642	/*
643	* If we're being asked to generate a tuple from the leader, then we just
644	* call ExecProcNode as normal to produce one.
645	*/
646	if (reader == `0`)
647	{
648	if (gm_state->need_to_scan_locally)
649	{
650	PlanState *outerPlan = outerPlanState(gm_state);
651	TupleTableSlot *outerTupleSlot;
652	EState *estate = gm_state->ps.state;
653
654	/ Install our DSA area while executing the plan. /
655	estate->es_query_dsa = gm_state->pei ? gm_state->pei->area : NULL;
656	outerTupleSlot = ExecProcNode(outerPlan);
657	estate->es_query_dsa = NULL;
658
659	if (!TupIsNull(outerTupleSlot))
660	{
661	gm_state->gm_slots[`0`] = outerTupleSlot;
662	return true;
663	}
664	/ need_to_scan_locally serves as "done" flag for leader /
665	gm_state->need_to_scan_locally = false;
666	}
667	return false;
668	}
669
670	/ Otherwise, check the state of the relevant tuple buffer. /
671	tuple_buffer = &gm_state->gm_tuple_buffers[reader - `1`];
672
673	if (tuple_buffer->nTuples > tuple_buffer->readCounter)
674	{
675	/ Return any tuple previously read that is still buffered. /
676	tup = tuple_buffer->tuple[tuple_buffer->readCounter++];
677	}
678	else if (tuple_buffer->done)
679	{
680	/ Reader is known to be exhausted. /
681	return false;
682	}
683	else
684	{
685	/ Read and buffer next tuple. /
686	tup = gm_readnext_tuple(gm_state,
687	reader,
688	nowait,
689	&tuple_buffer->done);
690	if (!HeapTupleIsValid(tup))
691	return false;
692
693	/*
694	* Attempt to read more tuples in nowait mode and store them in the
695	* pending-tuple array for the reader.
696	*/
697	load_tuple_array(gm_state, reader);
698	}
699
700	Assert(HeapTupleIsValid(tup));
701
702	/ Build the TupleTableSlot for the given tuple /
703	ExecStoreHeapTuple(tup, / tuple to store /
704	gm_state->gm_slots[reader], / slot in which to store*
705	* the tuple */
706	true); / pfree tuple when done with it /
707
708	return true;
709	}
710
711	/*
712	* Attempt to read a tuple from given worker.
713	*/
714	static HeapTuple
715	gm_readnext_tuple(GatherMergeState gm_state, int* nreader, bool nowait,
716	bool *done)
717	{
718	TupleQueueReader *reader;
719	HeapTuple tup;
720
721	/ Check for async events, particularly messages from workers. /
722	CHECK_FOR_INTERRUPTS();
723
724	/*
725	* Attempt to read a tuple.
726	*
727	* Note that TupleQueueReaderNext will just return NULL for a worker which
728	* fails to initialize. We'll treat that worker as having produced no
729	* tuples; WaitForParallelWorkersToFinish will error out when we get
730	* there.
731	*/
732	reader = gm_state->reader[nreader - `1`];
733	tup = TupleQueueReaderNext(reader, nowait, done);
734
735	return tup;
736	}
737
738	/*
739	* We have one slot for each item in the heap array. We use SlotNumber
740	* to store slot indexes. This doesn't actually provide any formal
741	* type-safety, but it makes the code more self-documenting.
742	*/
743	typedef int32 SlotNumber;
744
745	/*
746	* Compare the tuples in the two given slots.
747	*/
748	static int32
749	heap_compare_slots(Datum a, Datum b, void *arg)
750	{
751	GatherMergeState node = (GatherMergeState ) arg;
752	SlotNumber slot1 = DatumGetInt32(a);
753	SlotNumber slot2 = DatumGetInt32(b);
754
755	TupleTableSlot *s1 = node->gm_slots[slot1];
756	TupleTableSlot *s2 = node->gm_slots[slot2];
757	int nkey;
758
759	Assert(!TupIsNull(s1));
760	Assert(!TupIsNull(s2));
761
762	for (nkey = `0`; nkey < node->gm_nkeys; nkey++)
763	{
764	SortSupport sortKey = node->gm_sortkeys + nkey;
765	AttrNumber attno = sortKey->ssup_attno;
766	Datum datum1,
767	datum2;
768	bool isNull1,
769	isNull2;
770	int compare;
771
772	datum1 = slot_getattr(s1, attno, &isNull1);
773	datum2 = slot_getattr(s2, attno, &isNull2);
774
775	compare = ApplySortComparator(datum1, isNull1,
776	datum2, isNull2,
777	sortKey);
778	if (compare != `0`)
779	{
780	INVERT_COMPARE_RESULT(compare);
781	return compare;
782	}
783	}
784	return `0`;
785	}
786

Browse the source code of PostgreSQL/src/backend/executor/nodeGatherMerge.c