tuplesort.h source code [PostgreSQL/src/include/utils/tuplesort.h]

1	/-------------------------------------------------------------------------*
2	*
3	* tuplesort.h
4	* Generalized tuple sorting routines.
5	*
6	* This module handles sorting of heap tuples, index tuples, or single
7	* Datums (and could easily support other kinds of sortable objects,
8	* if necessary). It works efficiently for both small and large amounts
9	* of data. Small amounts are sorted in-memory using qsort(). Large
10	* amounts are sorted using temporary files and a standard external sort
11	* algorithm. Parallel sorts use a variant of this external sort
12	* algorithm, and are typically only used for large amounts of data.
13	*
14	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
15	* Portions Copyright (c) 1994, Regents of the University of California
16	*
17	* src/include/utils/tuplesort.h
18	*
19	*-------------------------------------------------------------------------
20	*/
21	#ifndef TUPLESORT_H
22	#define TUPLESORT_H
23
24	#include "access/itup.h"
25	#include "executor/tuptable.h"
26	#include "fmgr.h"
27	#include "storage/dsm.h"
28	#include "utils/relcache.h"
29
30
31	/*
32	* Tuplesortstate and Sharedsort are opaque types whose details are not
33	* known outside tuplesort.c.
34	*/
35	typedef struct Tuplesortstate Tuplesortstate;
36	typedef struct Sharedsort Sharedsort;
37
38	/*
39	* Tuplesort parallel coordination state, allocated by each participant in
40	* local memory. Participant caller initializes everything. See usage notes
41	* below.
42	*/
43	typedef struct SortCoordinateData
44	{
45	/ Worker process? If not, must be leader. /
46	bool isWorker;
47
48	/*
49	* Leader-process-passed number of participants known launched (workers
50	* set this to -1). Includes state within leader needed for it to
51	* participate as a worker, if any.
52	*/
53	int nParticipants;
54
55	/ Private opaque state (points to shared memory) /
56	Sharedsort *sharedsort;
57	} SortCoordinateData;
58
59	typedef struct SortCoordinateData *SortCoordinate;
60
61	/*
62	* Data structures for reporting sort statistics. Note that
63	* TuplesortInstrumentation can't contain any pointers because we
64	* sometimes put it in shared memory.
65	*/
66	typedef enum
67	{
68	SORT_TYPE_STILL_IN_PROGRESS = `0`,
69	SORT_TYPE_TOP_N_HEAPSORT,
70	SORT_TYPE_QUICKSORT,
71	SORT_TYPE_EXTERNAL_SORT,
72	SORT_TYPE_EXTERNAL_MERGE
73	} TuplesortMethod;
74
75	typedef enum
76	{
77	SORT_SPACE_TYPE_DISK,
78	SORT_SPACE_TYPE_MEMORY
79	} TuplesortSpaceType;
80
81	typedef struct TuplesortInstrumentation
82	{
83	TuplesortMethod sortMethod; / sort algorithm used /
84	TuplesortSpaceType spaceType; / type of space spaceUsed represents /
85	long spaceUsed; / space consumption, in kB /
86	} TuplesortInstrumentation;
87
88
89	/*
90	* We provide multiple interfaces to what is essentially the same code,
91	* since different callers have different data to be sorted and want to
92	* specify the sort key information differently. There are two APIs for
93	* sorting HeapTuples and two more for sorting IndexTuples. Yet another
94	* API supports sorting bare Datums.
95	*
96	* Serial sort callers should pass NULL for their coordinate argument.
97	*
98	* The "heap" API actually stores/sorts MinimalTuples, which means it doesn't
99	* preserve the system columns (tuple identity and transaction visibility
100	* info). The sort keys are specified by column numbers within the tuples
101	* and sort operator OIDs. We save some cycles by passing and returning the
102	* tuples in TupleTableSlots, rather than forming actual HeapTuples (which'd
103	* have to be converted to MinimalTuples). This API works well for sorts
104	* executed as parts of plan trees.
105	*
106	* The "cluster" API stores/sorts full HeapTuples including all visibility
107	* info. The sort keys are specified by reference to a btree index that is
108	* defined on the relation to be sorted. Note that putheaptuple/getheaptuple
109	* go with this API, not the "begin_heap" one!
110	*
111	* The "index_btree" API stores/sorts IndexTuples (preserving all their
112	* header fields). The sort keys are specified by a btree index definition.
113	*
114	* The "index_hash" API is similar to index_btree, but the tuples are
115	* actually sorted by their hash codes not the raw data.
116	*
117	* Parallel sort callers are required to coordinate multiple tuplesort states
118	* in a leader process and one or more worker processes. The leader process
119	* must launch workers, and have each perform an independent "partial"
120	* tuplesort, typically fed by the parallel heap interface. The leader later
121	* produces the final output (internally, it merges runs output by workers).
122	*
123	* Callers must do the following to perform a sort in parallel using multiple
124	* worker processes:
125	*
126	* 1. Request tuplesort-private shared memory for n workers. Use
127	* tuplesort_estimate_shared() to get the required size.
128	* 2. Have leader process initialize allocated shared memory using
129	* tuplesort_initialize_shared(). Launch workers.
130	* 3. Initialize a coordinate argument within both the leader process, and
131	* for each worker process. This has a pointer to the shared
132	* tuplesort-private structure, as well as some caller-initialized fields.
133	* Leader's coordinate argument reliably indicates number of workers
134	* launched (this is unused by workers).
135	* 4. Begin a tuplesort using some appropriate tuplesort_begin* routine,
136	* (passing the coordinate argument) within each worker. The workMem
137	* arguments need not be identical. All other arguments should match
138	* exactly, though.
139	* 5. tuplesort_attach_shared() should be called by all workers. Feed tuples
140	* to each worker, and call tuplesort_performsort() within each when input
141	* is exhausted.
142	* 6. Call tuplesort_end() in each worker process. Worker processes can shut
143	* down once tuplesort_end() returns.
144	* 7. Begin a tuplesort in the leader using the same tuplesort_begin*
145	* routine, passing a leader-appropriate coordinate argument (this can
146	* happen as early as during step 3, actually, since we only need to know
147	* the number of workers successfully launched). The leader must now wait
148	* for workers to finish. Caller must use own mechanism for ensuring that
149	* next step isn't reached until all workers have called and returned from
150	* tuplesort_performsort(). (Note that it's okay if workers have already
151	* also called tuplesort_end() by then.)
152	* 8. Call tuplesort_performsort() in leader. Consume output using the
153	* appropriate tuplesort_get* routine. Leader can skip this step if
154	* tuplesort turns out to be unnecessary.
155	* 9. Call tuplesort_end() in leader.
156	*
157	* This division of labor assumes nothing about how input tuples are produced,
158	* but does require that caller combine the state of multiple tuplesorts for
159	* any purpose other than producing the final output. For example, callers
160	* must consider that tuplesort_get_stats() reports on only one worker's role
161	* in a sort (or the leader's role), and not statistics for the sort as a
162	* whole.
163	*
164	* Note that callers may use the leader process to sort runs as if it was an
165	* independent worker process (prior to the process performing a leader sort
166	* to produce the final sorted output). Doing so only requires a second
167	* "partial" tuplesort within the leader process, initialized like that of a
168	* worker process. The steps above don't touch on this directly. The only
169	* difference is that the tuplesort_attach_shared() call is never needed within
170	* leader process, because the backend as a whole holds the shared fileset
171	* reference. A worker Tuplesortstate in leader is expected to do exactly the
172	* same amount of total initial processing work as a worker process
173	* Tuplesortstate, since the leader process has nothing else to do before
174	* workers finish.
175	*
176	* Note that only a very small amount of memory will be allocated prior to
177	* the leader state first consuming input, and that workers will free the
178	* vast majority of their memory upon returning from tuplesort_performsort().
179	* Callers can rely on this to arrange for memory to be used in a way that
180	* respects a workMem-style budget across an entire parallel sort operation.
181	*
182	* Callers are responsible for parallel safety in general. However, they
183	* can at least rely on there being no parallel safety hazards within
184	* tuplesort, because tuplesort thinks of the sort as several independent
185	* sorts whose results are combined. Since, in general, the behavior of
186	* sort operators is immutable, caller need only worry about the parallel
187	* safety of whatever the process is through which input tuples are
188	* generated (typically, caller uses a parallel heap scan).
189	*/
190
191	extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
192	int nkeys, AttrNumber *attNums,
193	Oid sortOperators, Oid sortCollations,
194	bool *nullsFirstFlags,
195	int workMem, SortCoordinate coordinate,
196	bool randomAccess);
197	extern Tuplesortstate *tuplesort_begin_cluster(TupleDesc tupDesc,
198	Relation indexRel, int workMem,
199	SortCoordinate coordinate, bool randomAccess);
200	extern Tuplesortstate *tuplesort_begin_index_btree(Relation heapRel,
201	Relation indexRel,
202	bool enforceUnique,
203	int workMem, SortCoordinate coordinate,
204	bool randomAccess);
205	extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel,
206	Relation indexRel,
207	uint32 high_mask,
208	uint32 low_mask,
209	uint32 max_buckets,
210	int workMem, SortCoordinate coordinate,
211	bool randomAccess);
212	extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
213	Oid sortOperator, Oid sortCollation,
214	bool nullsFirstFlag,
215	int workMem, SortCoordinate coordinate,
216	bool randomAccess);
217
218	extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
219
220	extern void tuplesort_puttupleslot(Tuplesortstate *state,
221	TupleTableSlot *slot);
222	extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
223	extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
224	Relation rel, ItemPointer self,
225	Datum values, bool isnull);
226	extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
227	bool isNull);
228
229	extern void tuplesort_performsort(Tuplesortstate *state);
230
231	extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
232	bool copy, TupleTableSlot slot, Datum abbrev);
233	extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
234	extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
235	extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
236	Datum val, bool isNull, Datum *abbrev);
237
238	extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
239	bool forward);
240
241	extern void tuplesort_end(Tuplesortstate *state);
242
243	extern void tuplesort_get_stats(Tuplesortstate *state,
244	TuplesortInstrumentation *stats);
245	extern const char *tuplesort_method_name(TuplesortMethod m);
246	extern const char *tuplesort_space_type_name(TuplesortSpaceType t);
247
248	extern int tuplesort_merge_order(int64 allowedMem);
249
250	extern Size tuplesort_estimate_shared(int nworkers);
251	extern void tuplesort_initialize_shared(Sharedsort shared, int* nWorkers,
252	dsm_segment *seg);
253	extern void tuplesort_attach_shared(Sharedsort shared, dsm_segment seg);
254
255	/*
256	* These routines may only be called if randomAccess was specified 'true'.
257	* Likewise, backwards scan in gettuple/getdatum is only allowed if
258	* randomAccess was specified. Note that parallel sorts do not support
259	* randomAccess.
260	*/
261
262	extern void tuplesort_rescan(Tuplesortstate *state);
263	extern void tuplesort_markpos(Tuplesortstate *state);
264	extern void tuplesort_restorepos(Tuplesortstate *state);
265
266	#endif /* TUPLESORT_H */
267

Browse the source code of PostgreSQL/src/include/utils/tuplesort.h