execGrouping.c source code [PostgreSQL/src/backend/executor/execGrouping.c]

1	/-------------------------------------------------------------------------*
2	*
3	* execGrouping.c
4	* executor utility routines for grouping, hashing, and aggregation
5	*
6	* Note: we currently assume that equality and hashing functions are not
7	* collation-sensitive, so the code in this file has no support for passing
8	* collation settings through from callers. That may have to change someday.
9	*
10	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
11	* Portions Copyright (c) 1994, Regents of the University of California
12	*
13	*
14	* IDENTIFICATION
15	* src/backend/executor/execGrouping.c
16	*
17	*-------------------------------------------------------------------------
18	*/
19	#include "postgres.h"
20
21	#include "access/parallel.h"
22	#include "executor/executor.h"
23	#include "miscadmin.h"
24	#include "utils/lsyscache.h"
25	#include "utils/hashutils.h"
26	#include "utils/memutils.h"
27
28	static uint32 TupleHashTableHash(struct tuplehash_hash tb, const* MinimalTuple tuple);
29	static int TupleHashTableMatch(struct tuplehash_hash tb, const* MinimalTuple tuple1, const MinimalTuple tuple2);
30
31	/*
32	* Define parameters for tuple hash table code generation. The interface is
33	* also declared in execnodes.h (to generate the types, which are externally
34	* visible).
35	*/
36	#define SH_PREFIX tuplehash
37	#define SH_ELEMENT_TYPE TupleHashEntryData
38	#define SH_KEY_TYPE MinimalTuple
39	#define SH_KEY firstTuple
40	#define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
41	#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42	#define SH_SCOPE extern
43	#define SH_STORE_HASH
44	#define SH_GET_HASH(tb, a) a->hash
45	#define SH_DEFINE
46	#include "lib/simplehash.h"
47
48
49	/*****************************************************************************
50	* Utility routines for grouping tuples together
51	*****************************************************************************/
52
53	/*
54	* execTuplesMatchPrepare
55	* Build expression that can be evaluated using ExecQual(), returning
56	* whether an ExprContext's inner/outer tuples are NOT DISTINCT
57	*/
58	ExprState *
59	execTuplesMatchPrepare(TupleDesc desc,
60	int numCols,
61	const AttrNumber *keyColIdx,
62	const Oid *eqOperators,
63	const Oid *collations,
64	PlanState *parent)
65	{
66	Oid eqFunctions = (Oid ) palloc(numCols * sizeof(Oid));
67	int i;
68	ExprState *expr;
69
70	if (numCols == `0`)
71	return NULL;
72
73	/ lookup equality functions /
74	for (i = `0`; i < numCols; i++)
75	eqFunctions[i] = get_opcode(eqOperators[i]);
76
77	/ build actual expression /
78	expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79	numCols, keyColIdx, eqFunctions, collations,
80	parent);
81
82	return expr;
83	}
84
85	/*
86	* execTuplesHashPrepare
87	* Look up the equality and hashing functions needed for a TupleHashTable.
88	*
89	* This is similar to execTuplesMatchPrepare, but we also need to find the
90	* hash functions associated with the equality operators. *eqFunctions and
91	* *hashFunctions receive the palloc'd result arrays.
92	*
93	* Note: we expect that the given operators are not cross-type comparisons.
94	*/
95	void
96	execTuplesHashPrepare(int numCols,
97	const Oid *eqOperators,
98	Oid **eqFuncOids,
99	FmgrInfo **hashFunctions)
100	{
101	int i;
102
103	eqFuncOids = (Oid ) palloc(numCols * sizeof(Oid));
104	hashFunctions = (FmgrInfo ) palloc(numCols * sizeof(FmgrInfo));
105
106	for (i = `0`; i < numCols; i++)
107	{
108	Oid eq_opr = eqOperators[i];
109	Oid eq_function;
110	Oid left_hash_function;
111	Oid right_hash_function;
112
113	eq_function = get_opcode(eq_opr);
114	if (!get_op_hash_functions(eq_opr,
115	&left_hash_function, &right_hash_function))
116	elog(ERROR, "could not find hash function for hash operator %u",
117	eq_opr);
118	/ We're not supporting cross-type cases here /
119	Assert(left_hash_function == right_hash_function);
120	(*eqFuncOids)[i] = eq_function;
121	fmgr_info(right_hash_function, &(*hashFunctions)[i]);
122	}
123	}
124
125
126	/*****************************************************************************
127	* Utility routines for all-in-memory hash tables
128	*
129	* These routines build hash tables for grouping tuples together (eg, for
130	* hash aggregation). There is one entry for each not-distinct set of tuples
131	* presented.
132	*****************************************************************************/
133
134	/*
135	* Construct an empty TupleHashTable
136	*
137	* numCols, keyColIdx: identify the tuple fields to use as lookup key
138	* eqfunctions: equality comparison functions to use
139	* hashfunctions: datatype-specific hashing functions to use
140	* nbuckets: initial estimate of hashtable size
141	* additionalsize: size of data stored in ->additional
142	* metacxt: memory context for long-lived allocation, but not per-entry data
143	* tablecxt: memory context in which to store table entries
144	* tempcxt: short-lived context for evaluation hash and comparison functions
145	*
146	* The function arrays may be made with execTuplesHashPrepare(). Note they
147	* are not cross-type functions, but expect to see the table datatype(s)
148	* on both sides.
149	*
150	* Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151	* storage that will live as long as the hashtable does.
152	*/
153	TupleHashTable
154	BuildTupleHashTableExt(PlanState *parent,
155	TupleDesc inputDesc,
156	int numCols, AttrNumber *keyColIdx,
157	const Oid *eqfuncoids,
158	FmgrInfo *hashfunctions,
159	Oid *collations,
160	long nbuckets, Size additionalsize,
161	MemoryContext metacxt,
162	MemoryContext tablecxt,
163	MemoryContext tempcxt,
164	bool use_variable_hash_iv)
165	{
166	TupleHashTable hashtable;
167	Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
168	MemoryContext oldcontext;
169	bool allow_jit;
170
171	Assert(nbuckets > `0`);
172
173	/ Limit initial table size request to not more than work_mem /
174	nbuckets = Min(nbuckets, (long) ((work_mem * `1024L`) / entrysize));
175
176	oldcontext = MemoryContextSwitchTo(metacxt);
177
178	hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
179
180	hashtable->numCols = numCols;
181	hashtable->keyColIdx = keyColIdx;
182	hashtable->tab_hash_funcs = hashfunctions;
183	hashtable->tab_collations = collations;
184	hashtable->tablecxt = tablecxt;
185	hashtable->tempcxt = tempcxt;
186	hashtable->entrysize = entrysize;
187	hashtable->tableslot = NULL; / will be made on first lookup /
188	hashtable->inputslot = NULL;
189	hashtable->in_hash_funcs = NULL;
190	hashtable->cur_eq_func = NULL;
191
192	/*
193	* If parallelism is in use, even if the master backend is performing the
194	* scan itself, we don't want to create the hashtable exactly the same way
195	* in all workers. As hashtables are iterated over in keyspace-order,
196	* doing so in all processes in the same way is likely to lead to
197	* "unbalanced" hashtables when the table size initially is
198	* underestimated.
199	*/
200	if (use_variable_hash_iv)
201	hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
202	else
203	hashtable->hash_iv = `0`;
204
205	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
206
207	/*
208	* We copy the input tuple descriptor just for safety --- we assume all
209	* input tuples will have equivalent descriptors.
210	*/
211	hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
212	&TTSOpsMinimalTuple);
213
214	/*
215	* If the old reset interface is used (i.e. BuildTupleHashTable, rather
216	* than BuildTupleHashTableExt), allowing JIT would lead to the generated
217	* functions to a) live longer than the query b) be re-generated each time
218	* the table is being reset. Therefore prevent JIT from being used in that
219	* case, by not providing a parent node (which prevents accessing the
220	* JitContext in the EState).
221	*/
222	allow_jit = metacxt != tablecxt;
223
224	/ build comparator for all columns /
225	/ XXX: should we support non-minimal tuples for the inputslot? /
226	hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
227	&TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
228	numCols,
229	keyColIdx, eqfuncoids, collations,
230	allow_jit ? parent : NULL);
231
232	/*
233	* While not pretty, it's ok to not shut down this context, but instead
234	* rely on the containing memory context being reset, as
235	* ExecBuildGroupingEqual() only builds a very simple expression calling
236	* functions (i.e. nothing that'd employ RegisterExprContextCallback()).
237	*/
238	hashtable->exprcontext = CreateStandaloneExprContext();
239
240	MemoryContextSwitchTo(oldcontext);
241
242	return hashtable;
243	}
244
245	/*
246	* BuildTupleHashTable is a backwards-compatibilty wrapper for
247	* BuildTupleHashTableExt(), that allocates the hashtable's metadata in
248	* tablecxt. Note that hashtables created this way cannot be reset leak-free
249	* with ResetTupleHashTable().
250	*/
251	TupleHashTable
252	BuildTupleHashTable(PlanState *parent,
253	TupleDesc inputDesc,
254	int numCols, AttrNumber *keyColIdx,
255	const Oid *eqfuncoids,
256	FmgrInfo *hashfunctions,
257	Oid *collations,
258	long nbuckets, Size additionalsize,
259	MemoryContext tablecxt,
260	MemoryContext tempcxt,
261	bool use_variable_hash_iv)
262	{
263	return BuildTupleHashTableExt(parent,
264	inputDesc,
265	numCols, keyColIdx,
266	eqfuncoids,
267	hashfunctions,
268	collations,
269	nbuckets, additionalsize,
270	tablecxt,
271	tablecxt,
272	tempcxt,
273	use_variable_hash_iv);
274	}
275
276	/*
277	* Reset contents of the hashtable to be empty, preserving all the non-content
278	* state. Note that the tablecxt passed to BuildTupleHashTableExt() should
279	* also be reset, otherwise there will be leaks.
280	*/
281	void
282	ResetTupleHashTable(TupleHashTable hashtable)
283	{
284	tuplehash_reset(hashtable->hashtab);
285	}
286
287	/*
288	* Find or create a hashtable entry for the tuple group containing the
289	* given tuple. The tuple must be the same type as the hashtable entries.
290	*
291	* If isnew is NULL, we do not create new entries; we return NULL if no
292	* match is found.
293	*
294	* If isnew isn't NULL, then a new entry is created if no existing entry
295	* matches. On return, *isnew is true if the entry is newly created,
296	* false if it existed already. ->additional_data in the new entry has
297	* been zeroed.
298	*/
299	TupleHashEntry
300	LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
301	bool *isnew)
302	{
303	TupleHashEntryData *entry;
304	MemoryContext oldContext;
305	bool found;
306	MinimalTuple key;
307
308	/ Need to run the hash functions in short-lived context /
309	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
310
311	/ set up data needed by hash and match functions /
312	hashtable->inputslot = slot;
313	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
314	hashtable->cur_eq_func = hashtable->tab_eq_func;
315
316	key = NULL; / flag to reference inputslot /
317
318	if (isnew)
319	{
320	entry = tuplehash_insert(hashtable->hashtab, key, &found);
321
322	if (found)
323	{
324	/ found pre-existing entry /
325	*isnew = false;
326	}
327	else
328	{
329	/ created new entry /
330	*isnew = true;
331	/ zero caller data /
332	entry->additional = NULL;
333	MemoryContextSwitchTo(hashtable->tablecxt);
334	/ Copy the first tuple into the table context /
335	entry->firstTuple = ExecCopySlotMinimalTuple(slot);
336	}
337	}
338	else
339	{
340	entry = tuplehash_lookup(hashtable->hashtab, key);
341	}
342
343	MemoryContextSwitchTo(oldContext);
344
345	return entry;
346	}
347
348	/*
349	* Search for a hashtable entry matching the given tuple. No entry is
350	* created if there's not a match. This is similar to the non-creating
351	* case of LookupTupleHashEntry, except that it supports cross-type
352	* comparisons, in which the given tuple is not of the same type as the
353	* table entries. The caller must provide the hash functions to use for
354	* the input tuple, as well as the equality functions, since these may be
355	* different from the table's internal functions.
356	*/
357	TupleHashEntry
358	FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
359	ExprState *eqcomp,
360	FmgrInfo *hashfunctions)
361	{
362	TupleHashEntry entry;
363	MemoryContext oldContext;
364	MinimalTuple key;
365
366	/ Need to run the hash functions in short-lived context /
367	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
368
369	/ Set up data needed by hash and match functions /
370	hashtable->inputslot = slot;
371	hashtable->in_hash_funcs = hashfunctions;
372	hashtable->cur_eq_func = eqcomp;
373
374	/ Search the hash table /
375	key = NULL; / flag to reference inputslot /
376	entry = tuplehash_lookup(hashtable->hashtab, key);
377	MemoryContextSwitchTo(oldContext);
378
379	return entry;
380	}
381
382	/*
383	* Compute the hash value for a tuple
384	*
385	* The passed-in key is a pointer to TupleHashEntryData. In an actual hash
386	* table entry, the firstTuple field points to a tuple (in MinimalTuple
387	* format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
388	* NULL firstTuple field --- that cues us to look at the inputslot instead.
389	* This convention avoids the need to materialize virtual input tuples unless
390	* they actually need to get copied into the table.
391	*
392	* Also, the caller must select an appropriate memory context for running
393	* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
394	*/
395	static uint32
396	TupleHashTableHash(struct tuplehash_hash tb, const* MinimalTuple tuple)
397	{
398	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
399	int numCols = hashtable->numCols;
400	AttrNumber *keyColIdx = hashtable->keyColIdx;
401	uint32 hashkey = hashtable->hash_iv;
402	TupleTableSlot *slot;
403	FmgrInfo *hashfunctions;
404	int i;
405
406	if (tuple == NULL)
407	{
408	/ Process the current input tuple for the table /
409	slot = hashtable->inputslot;
410	hashfunctions = hashtable->in_hash_funcs;
411	}
412	else
413	{
414	/*
415	* Process a tuple already stored in the table.
416	*
417	* (this case never actually occurs due to the way simplehash.h is
418	* used, as the hash-value is stored in the entries)
419	*/
420	slot = hashtable->tableslot;
421	ExecStoreMinimalTuple(tuple, slot, false);
422	hashfunctions = hashtable->tab_hash_funcs;
423	}
424
425	for (i = `0`; i < numCols; i++)
426	{
427	AttrNumber att = keyColIdx[i];
428	Datum attr;
429	bool isNull;
430
431	/ rotate hashkey left 1 bit at each step /
432	hashkey = (hashkey << `1`) \| ((hashkey & `0x80000000`) ? `1` : `0`);
433
434	attr = slot_getattr(slot, att, &isNull);
435
436	if (!isNull) / treat nulls as having hash key 0 /
437	{
438	uint32 hkey;
439
440	hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
441	hashtable->tab_collations[i],
442	attr));
443	hashkey ^= hkey;
444	}
445	}
446
447	/*
448	* The way hashes are combined above, among each other and with the IV,
449	* doesn't lead to good bit perturbation. As the IV's goal is to lead to
450	* achieve that, perform a round of hashing of the combined hash -
451	* resulting in near perfect perturbation.
452	*/
453	return murmurhash32(hashkey);
454	}
455
456	/*
457	* See whether two tuples (presumably of the same hash value) match
458	*
459	* As above, the passed pointers are pointers to TupleHashEntryData.
460	*/
461	static int
462	TupleHashTableMatch(struct tuplehash_hash tb, const* MinimalTuple tuple1, const MinimalTuple tuple2)
463	{
464	TupleTableSlot *slot1;
465	TupleTableSlot *slot2;
466	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
467	ExprContext *econtext = hashtable->exprcontext;
468
469	/*
470	* We assume that simplehash.h will only ever call us with the first
471	* argument being an actual table entry, and the second argument being
472	* LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
473	* could be supported too, but is not currently required.
474	*/
475	Assert(tuple1 != NULL);
476	slot1 = hashtable->tableslot;
477	ExecStoreMinimalTuple(tuple1, slot1, false);
478	Assert(tuple2 == NULL);
479	slot2 = hashtable->inputslot;
480
481	/ For crosstype comparisons, the inputslot must be first /
482	econtext->ecxt_innertuple = slot2;
483	econtext->ecxt_outertuple = slot1;
484	return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
485	}
486

Browse the source code of PostgreSQL/src/backend/executor/execGrouping.c