1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * execGrouping.c |
4 | * executor utility routines for grouping, hashing, and aggregation |
5 | * |
6 | * Note: we currently assume that equality and hashing functions are not |
7 | * collation-sensitive, so the code in this file has no support for passing |
8 | * collation settings through from callers. That may have to change someday. |
9 | * |
10 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
11 | * Portions Copyright (c) 1994, Regents of the University of California |
12 | * |
13 | * |
14 | * IDENTIFICATION |
15 | * src/backend/executor/execGrouping.c |
16 | * |
17 | *------------------------------------------------------------------------- |
18 | */ |
19 | #include "postgres.h" |
20 | |
21 | #include "access/parallel.h" |
22 | #include "executor/executor.h" |
23 | #include "miscadmin.h" |
24 | #include "utils/lsyscache.h" |
25 | #include "utils/hashutils.h" |
26 | #include "utils/memutils.h" |
27 | |
28 | static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple); |
29 | static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); |
30 | |
31 | /* |
32 | * Define parameters for tuple hash table code generation. The interface is |
33 | * *also* declared in execnodes.h (to generate the types, which are externally |
34 | * visible). |
35 | */ |
36 | #define SH_PREFIX tuplehash |
37 | #define SH_ELEMENT_TYPE TupleHashEntryData |
38 | #define SH_KEY_TYPE MinimalTuple |
39 | #define SH_KEY firstTuple |
40 | #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key) |
41 | #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0 |
42 | #define SH_SCOPE extern |
43 | #define SH_STORE_HASH |
44 | #define SH_GET_HASH(tb, a) a->hash |
45 | #define SH_DEFINE |
46 | #include "lib/simplehash.h" |
47 | |
48 | |
49 | /***************************************************************************** |
50 | * Utility routines for grouping tuples together |
51 | *****************************************************************************/ |
52 | |
53 | /* |
54 | * execTuplesMatchPrepare |
55 | * Build expression that can be evaluated using ExecQual(), returning |
56 | * whether an ExprContext's inner/outer tuples are NOT DISTINCT |
57 | */ |
58 | ExprState * |
59 | execTuplesMatchPrepare(TupleDesc desc, |
60 | int numCols, |
61 | const AttrNumber *keyColIdx, |
62 | const Oid *eqOperators, |
63 | const Oid *collations, |
64 | PlanState *parent) |
65 | { |
66 | Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); |
67 | int i; |
68 | ExprState *expr; |
69 | |
70 | if (numCols == 0) |
71 | return NULL; |
72 | |
73 | /* lookup equality functions */ |
74 | for (i = 0; i < numCols; i++) |
75 | eqFunctions[i] = get_opcode(eqOperators[i]); |
76 | |
77 | /* build actual expression */ |
78 | expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, |
79 | numCols, keyColIdx, eqFunctions, collations, |
80 | parent); |
81 | |
82 | return expr; |
83 | } |
84 | |
85 | /* |
86 | * execTuplesHashPrepare |
87 | * Look up the equality and hashing functions needed for a TupleHashTable. |
88 | * |
89 | * This is similar to execTuplesMatchPrepare, but we also need to find the |
90 | * hash functions associated with the equality operators. *eqFunctions and |
91 | * *hashFunctions receive the palloc'd result arrays. |
92 | * |
93 | * Note: we expect that the given operators are not cross-type comparisons. |
94 | */ |
95 | void |
96 | execTuplesHashPrepare(int numCols, |
97 | const Oid *eqOperators, |
98 | Oid **eqFuncOids, |
99 | FmgrInfo **hashFunctions) |
100 | { |
101 | int i; |
102 | |
103 | *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid)); |
104 | *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); |
105 | |
106 | for (i = 0; i < numCols; i++) |
107 | { |
108 | Oid eq_opr = eqOperators[i]; |
109 | Oid eq_function; |
110 | Oid left_hash_function; |
111 | Oid right_hash_function; |
112 | |
113 | eq_function = get_opcode(eq_opr); |
114 | if (!get_op_hash_functions(eq_opr, |
115 | &left_hash_function, &right_hash_function)) |
116 | elog(ERROR, "could not find hash function for hash operator %u" , |
117 | eq_opr); |
118 | /* We're not supporting cross-type cases here */ |
119 | Assert(left_hash_function == right_hash_function); |
120 | (*eqFuncOids)[i] = eq_function; |
121 | fmgr_info(right_hash_function, &(*hashFunctions)[i]); |
122 | } |
123 | } |
124 | |
125 | |
126 | /***************************************************************************** |
127 | * Utility routines for all-in-memory hash tables |
128 | * |
129 | * These routines build hash tables for grouping tuples together (eg, for |
130 | * hash aggregation). There is one entry for each not-distinct set of tuples |
131 | * presented. |
132 | *****************************************************************************/ |
133 | |
134 | /* |
135 | * Construct an empty TupleHashTable |
136 | * |
137 | * numCols, keyColIdx: identify the tuple fields to use as lookup key |
138 | * eqfunctions: equality comparison functions to use |
139 | * hashfunctions: datatype-specific hashing functions to use |
140 | * nbuckets: initial estimate of hashtable size |
141 | * additionalsize: size of data stored in ->additional |
142 | * metacxt: memory context for long-lived allocation, but not per-entry data |
143 | * tablecxt: memory context in which to store table entries |
144 | * tempcxt: short-lived context for evaluation hash and comparison functions |
145 | * |
146 | * The function arrays may be made with execTuplesHashPrepare(). Note they |
147 | * are not cross-type functions, but expect to see the table datatype(s) |
148 | * on both sides. |
149 | * |
150 | * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in |
151 | * storage that will live as long as the hashtable does. |
152 | */ |
153 | TupleHashTable |
154 | BuildTupleHashTableExt(PlanState *parent, |
155 | TupleDesc inputDesc, |
156 | int numCols, AttrNumber *keyColIdx, |
157 | const Oid *eqfuncoids, |
158 | FmgrInfo *hashfunctions, |
159 | Oid *collations, |
160 | long nbuckets, Size additionalsize, |
161 | MemoryContext metacxt, |
162 | MemoryContext tablecxt, |
163 | MemoryContext tempcxt, |
164 | bool use_variable_hash_iv) |
165 | { |
166 | TupleHashTable hashtable; |
167 | Size entrysize = sizeof(TupleHashEntryData) + additionalsize; |
168 | MemoryContext oldcontext; |
169 | bool allow_jit; |
170 | |
171 | Assert(nbuckets > 0); |
172 | |
173 | /* Limit initial table size request to not more than work_mem */ |
174 | nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize)); |
175 | |
176 | oldcontext = MemoryContextSwitchTo(metacxt); |
177 | |
178 | hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData)); |
179 | |
180 | hashtable->numCols = numCols; |
181 | hashtable->keyColIdx = keyColIdx; |
182 | hashtable->tab_hash_funcs = hashfunctions; |
183 | hashtable->tab_collations = collations; |
184 | hashtable->tablecxt = tablecxt; |
185 | hashtable->tempcxt = tempcxt; |
186 | hashtable->entrysize = entrysize; |
187 | hashtable->tableslot = NULL; /* will be made on first lookup */ |
188 | hashtable->inputslot = NULL; |
189 | hashtable->in_hash_funcs = NULL; |
190 | hashtable->cur_eq_func = NULL; |
191 | |
192 | /* |
193 | * If parallelism is in use, even if the master backend is performing the |
194 | * scan itself, we don't want to create the hashtable exactly the same way |
195 | * in all workers. As hashtables are iterated over in keyspace-order, |
196 | * doing so in all processes in the same way is likely to lead to |
197 | * "unbalanced" hashtables when the table size initially is |
198 | * underestimated. |
199 | */ |
200 | if (use_variable_hash_iv) |
201 | hashtable->hash_iv = murmurhash32(ParallelWorkerNumber); |
202 | else |
203 | hashtable->hash_iv = 0; |
204 | |
205 | hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable); |
206 | |
207 | /* |
208 | * We copy the input tuple descriptor just for safety --- we assume all |
209 | * input tuples will have equivalent descriptors. |
210 | */ |
211 | hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc), |
212 | &TTSOpsMinimalTuple); |
213 | |
214 | /* |
215 | * If the old reset interface is used (i.e. BuildTupleHashTable, rather |
216 | * than BuildTupleHashTableExt), allowing JIT would lead to the generated |
217 | * functions to a) live longer than the query b) be re-generated each time |
218 | * the table is being reset. Therefore prevent JIT from being used in that |
219 | * case, by not providing a parent node (which prevents accessing the |
220 | * JitContext in the EState). |
221 | */ |
222 | allow_jit = metacxt != tablecxt; |
223 | |
224 | /* build comparator for all columns */ |
225 | /* XXX: should we support non-minimal tuples for the inputslot? */ |
226 | hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, |
227 | &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, |
228 | numCols, |
229 | keyColIdx, eqfuncoids, collations, |
230 | allow_jit ? parent : NULL); |
231 | |
232 | /* |
233 | * While not pretty, it's ok to not shut down this context, but instead |
234 | * rely on the containing memory context being reset, as |
235 | * ExecBuildGroupingEqual() only builds a very simple expression calling |
236 | * functions (i.e. nothing that'd employ RegisterExprContextCallback()). |
237 | */ |
238 | hashtable->exprcontext = CreateStandaloneExprContext(); |
239 | |
240 | MemoryContextSwitchTo(oldcontext); |
241 | |
242 | return hashtable; |
243 | } |
244 | |
245 | /* |
246 | * BuildTupleHashTable is a backwards-compatibilty wrapper for |
247 | * BuildTupleHashTableExt(), that allocates the hashtable's metadata in |
248 | * tablecxt. Note that hashtables created this way cannot be reset leak-free |
249 | * with ResetTupleHashTable(). |
250 | */ |
251 | TupleHashTable |
252 | BuildTupleHashTable(PlanState *parent, |
253 | TupleDesc inputDesc, |
254 | int numCols, AttrNumber *keyColIdx, |
255 | const Oid *eqfuncoids, |
256 | FmgrInfo *hashfunctions, |
257 | Oid *collations, |
258 | long nbuckets, Size additionalsize, |
259 | MemoryContext tablecxt, |
260 | MemoryContext tempcxt, |
261 | bool use_variable_hash_iv) |
262 | { |
263 | return BuildTupleHashTableExt(parent, |
264 | inputDesc, |
265 | numCols, keyColIdx, |
266 | eqfuncoids, |
267 | hashfunctions, |
268 | collations, |
269 | nbuckets, additionalsize, |
270 | tablecxt, |
271 | tablecxt, |
272 | tempcxt, |
273 | use_variable_hash_iv); |
274 | } |
275 | |
276 | /* |
277 | * Reset contents of the hashtable to be empty, preserving all the non-content |
278 | * state. Note that the tablecxt passed to BuildTupleHashTableExt() should |
279 | * also be reset, otherwise there will be leaks. |
280 | */ |
281 | void |
282 | ResetTupleHashTable(TupleHashTable hashtable) |
283 | { |
284 | tuplehash_reset(hashtable->hashtab); |
285 | } |
286 | |
287 | /* |
288 | * Find or create a hashtable entry for the tuple group containing the |
289 | * given tuple. The tuple must be the same type as the hashtable entries. |
290 | * |
291 | * If isnew is NULL, we do not create new entries; we return NULL if no |
292 | * match is found. |
293 | * |
294 | * If isnew isn't NULL, then a new entry is created if no existing entry |
295 | * matches. On return, *isnew is true if the entry is newly created, |
296 | * false if it existed already. ->additional_data in the new entry has |
297 | * been zeroed. |
298 | */ |
299 | TupleHashEntry |
300 | LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, |
301 | bool *isnew) |
302 | { |
303 | TupleHashEntryData *entry; |
304 | MemoryContext oldContext; |
305 | bool found; |
306 | MinimalTuple key; |
307 | |
308 | /* Need to run the hash functions in short-lived context */ |
309 | oldContext = MemoryContextSwitchTo(hashtable->tempcxt); |
310 | |
311 | /* set up data needed by hash and match functions */ |
312 | hashtable->inputslot = slot; |
313 | hashtable->in_hash_funcs = hashtable->tab_hash_funcs; |
314 | hashtable->cur_eq_func = hashtable->tab_eq_func; |
315 | |
316 | key = NULL; /* flag to reference inputslot */ |
317 | |
318 | if (isnew) |
319 | { |
320 | entry = tuplehash_insert(hashtable->hashtab, key, &found); |
321 | |
322 | if (found) |
323 | { |
324 | /* found pre-existing entry */ |
325 | *isnew = false; |
326 | } |
327 | else |
328 | { |
329 | /* created new entry */ |
330 | *isnew = true; |
331 | /* zero caller data */ |
332 | entry->additional = NULL; |
333 | MemoryContextSwitchTo(hashtable->tablecxt); |
334 | /* Copy the first tuple into the table context */ |
335 | entry->firstTuple = ExecCopySlotMinimalTuple(slot); |
336 | } |
337 | } |
338 | else |
339 | { |
340 | entry = tuplehash_lookup(hashtable->hashtab, key); |
341 | } |
342 | |
343 | MemoryContextSwitchTo(oldContext); |
344 | |
345 | return entry; |
346 | } |
347 | |
348 | /* |
349 | * Search for a hashtable entry matching the given tuple. No entry is |
350 | * created if there's not a match. This is similar to the non-creating |
351 | * case of LookupTupleHashEntry, except that it supports cross-type |
352 | * comparisons, in which the given tuple is not of the same type as the |
353 | * table entries. The caller must provide the hash functions to use for |
354 | * the input tuple, as well as the equality functions, since these may be |
355 | * different from the table's internal functions. |
356 | */ |
357 | TupleHashEntry |
358 | FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, |
359 | ExprState *eqcomp, |
360 | FmgrInfo *hashfunctions) |
361 | { |
362 | TupleHashEntry entry; |
363 | MemoryContext oldContext; |
364 | MinimalTuple key; |
365 | |
366 | /* Need to run the hash functions in short-lived context */ |
367 | oldContext = MemoryContextSwitchTo(hashtable->tempcxt); |
368 | |
369 | /* Set up data needed by hash and match functions */ |
370 | hashtable->inputslot = slot; |
371 | hashtable->in_hash_funcs = hashfunctions; |
372 | hashtable->cur_eq_func = eqcomp; |
373 | |
374 | /* Search the hash table */ |
375 | key = NULL; /* flag to reference inputslot */ |
376 | entry = tuplehash_lookup(hashtable->hashtab, key); |
377 | MemoryContextSwitchTo(oldContext); |
378 | |
379 | return entry; |
380 | } |
381 | |
382 | /* |
383 | * Compute the hash value for a tuple |
384 | * |
385 | * The passed-in key is a pointer to TupleHashEntryData. In an actual hash |
386 | * table entry, the firstTuple field points to a tuple (in MinimalTuple |
387 | * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a |
388 | * NULL firstTuple field --- that cues us to look at the inputslot instead. |
389 | * This convention avoids the need to materialize virtual input tuples unless |
390 | * they actually need to get copied into the table. |
391 | * |
392 | * Also, the caller must select an appropriate memory context for running |
393 | * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) |
394 | */ |
395 | static uint32 |
396 | TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) |
397 | { |
398 | TupleHashTable hashtable = (TupleHashTable) tb->private_data; |
399 | int numCols = hashtable->numCols; |
400 | AttrNumber *keyColIdx = hashtable->keyColIdx; |
401 | uint32 hashkey = hashtable->hash_iv; |
402 | TupleTableSlot *slot; |
403 | FmgrInfo *hashfunctions; |
404 | int i; |
405 | |
406 | if (tuple == NULL) |
407 | { |
408 | /* Process the current input tuple for the table */ |
409 | slot = hashtable->inputslot; |
410 | hashfunctions = hashtable->in_hash_funcs; |
411 | } |
412 | else |
413 | { |
414 | /* |
415 | * Process a tuple already stored in the table. |
416 | * |
417 | * (this case never actually occurs due to the way simplehash.h is |
418 | * used, as the hash-value is stored in the entries) |
419 | */ |
420 | slot = hashtable->tableslot; |
421 | ExecStoreMinimalTuple(tuple, slot, false); |
422 | hashfunctions = hashtable->tab_hash_funcs; |
423 | } |
424 | |
425 | for (i = 0; i < numCols; i++) |
426 | { |
427 | AttrNumber att = keyColIdx[i]; |
428 | Datum attr; |
429 | bool isNull; |
430 | |
431 | /* rotate hashkey left 1 bit at each step */ |
432 | hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); |
433 | |
434 | attr = slot_getattr(slot, att, &isNull); |
435 | |
436 | if (!isNull) /* treat nulls as having hash key 0 */ |
437 | { |
438 | uint32 hkey; |
439 | |
440 | hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], |
441 | hashtable->tab_collations[i], |
442 | attr)); |
443 | hashkey ^= hkey; |
444 | } |
445 | } |
446 | |
447 | /* |
448 | * The way hashes are combined above, among each other and with the IV, |
449 | * doesn't lead to good bit perturbation. As the IV's goal is to lead to |
450 | * achieve that, perform a round of hashing of the combined hash - |
451 | * resulting in near perfect perturbation. |
452 | */ |
453 | return murmurhash32(hashkey); |
454 | } |
455 | |
456 | /* |
457 | * See whether two tuples (presumably of the same hash value) match |
458 | * |
459 | * As above, the passed pointers are pointers to TupleHashEntryData. |
460 | */ |
461 | static int |
462 | TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2) |
463 | { |
464 | TupleTableSlot *slot1; |
465 | TupleTableSlot *slot2; |
466 | TupleHashTable hashtable = (TupleHashTable) tb->private_data; |
467 | ExprContext *econtext = hashtable->exprcontext; |
468 | |
469 | /* |
470 | * We assume that simplehash.h will only ever call us with the first |
471 | * argument being an actual table entry, and the second argument being |
472 | * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction |
473 | * could be supported too, but is not currently required. |
474 | */ |
475 | Assert(tuple1 != NULL); |
476 | slot1 = hashtable->tableslot; |
477 | ExecStoreMinimalTuple(tuple1, slot1, false); |
478 | Assert(tuple2 == NULL); |
479 | slot2 = hashtable->inputslot; |
480 | |
481 | /* For crosstype comparisons, the inputslot must be first */ |
482 | econtext->ecxt_innertuple = slot2; |
483 | econtext->ecxt_outertuple = slot1; |
484 | return !ExecQualAndReset(hashtable->cur_eq_func, econtext); |
485 | } |
486 | |