1/*-------------------------------------------------------------------------
2 *
3 * execGrouping.c
4 * executor utility routines for grouping, hashing, and aggregation
5 *
6 * Note: we currently assume that equality and hashing functions are not
7 * collation-sensitive, so the code in this file has no support for passing
8 * collation settings through from callers. That may have to change someday.
9 *
10 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 *
14 * IDENTIFICATION
15 * src/backend/executor/execGrouping.c
16 *
17 *-------------------------------------------------------------------------
18 */
19#include "postgres.h"
20
21#include "access/parallel.h"
22#include "executor/executor.h"
23#include "miscadmin.h"
24#include "utils/lsyscache.h"
25#include "utils/hashutils.h"
26#include "utils/memutils.h"
27
28static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
29static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
30
31/*
32 * Define parameters for tuple hash table code generation. The interface is
33 * *also* declared in execnodes.h (to generate the types, which are externally
34 * visible).
35 */
36#define SH_PREFIX tuplehash
37#define SH_ELEMENT_TYPE TupleHashEntryData
38#define SH_KEY_TYPE MinimalTuple
39#define SH_KEY firstTuple
40#define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
41#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42#define SH_SCOPE extern
43#define SH_STORE_HASH
44#define SH_GET_HASH(tb, a) a->hash
45#define SH_DEFINE
46#include "lib/simplehash.h"
47
48
49/*****************************************************************************
50 * Utility routines for grouping tuples together
51 *****************************************************************************/
52
53/*
54 * execTuplesMatchPrepare
55 * Build expression that can be evaluated using ExecQual(), returning
56 * whether an ExprContext's inner/outer tuples are NOT DISTINCT
57 */
58ExprState *
59execTuplesMatchPrepare(TupleDesc desc,
60 int numCols,
61 const AttrNumber *keyColIdx,
62 const Oid *eqOperators,
63 const Oid *collations,
64 PlanState *parent)
65{
66 Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67 int i;
68 ExprState *expr;
69
70 if (numCols == 0)
71 return NULL;
72
73 /* lookup equality functions */
74 for (i = 0; i < numCols; i++)
75 eqFunctions[i] = get_opcode(eqOperators[i]);
76
77 /* build actual expression */
78 expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79 numCols, keyColIdx, eqFunctions, collations,
80 parent);
81
82 return expr;
83}
84
85/*
86 * execTuplesHashPrepare
87 * Look up the equality and hashing functions needed for a TupleHashTable.
88 *
89 * This is similar to execTuplesMatchPrepare, but we also need to find the
90 * hash functions associated with the equality operators. *eqFunctions and
91 * *hashFunctions receive the palloc'd result arrays.
92 *
93 * Note: we expect that the given operators are not cross-type comparisons.
94 */
95void
96execTuplesHashPrepare(int numCols,
97 const Oid *eqOperators,
98 Oid **eqFuncOids,
99 FmgrInfo **hashFunctions)
100{
101 int i;
102
103 *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
104 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
105
106 for (i = 0; i < numCols; i++)
107 {
108 Oid eq_opr = eqOperators[i];
109 Oid eq_function;
110 Oid left_hash_function;
111 Oid right_hash_function;
112
113 eq_function = get_opcode(eq_opr);
114 if (!get_op_hash_functions(eq_opr,
115 &left_hash_function, &right_hash_function))
116 elog(ERROR, "could not find hash function for hash operator %u",
117 eq_opr);
118 /* We're not supporting cross-type cases here */
119 Assert(left_hash_function == right_hash_function);
120 (*eqFuncOids)[i] = eq_function;
121 fmgr_info(right_hash_function, &(*hashFunctions)[i]);
122 }
123}
124
125
126/*****************************************************************************
127 * Utility routines for all-in-memory hash tables
128 *
129 * These routines build hash tables for grouping tuples together (eg, for
130 * hash aggregation). There is one entry for each not-distinct set of tuples
131 * presented.
132 *****************************************************************************/
133
134/*
135 * Construct an empty TupleHashTable
136 *
137 * numCols, keyColIdx: identify the tuple fields to use as lookup key
138 * eqfunctions: equality comparison functions to use
139 * hashfunctions: datatype-specific hashing functions to use
140 * nbuckets: initial estimate of hashtable size
141 * additionalsize: size of data stored in ->additional
142 * metacxt: memory context for long-lived allocation, but not per-entry data
143 * tablecxt: memory context in which to store table entries
144 * tempcxt: short-lived context for evaluation hash and comparison functions
145 *
146 * The function arrays may be made with execTuplesHashPrepare(). Note they
147 * are not cross-type functions, but expect to see the table datatype(s)
148 * on both sides.
149 *
150 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151 * storage that will live as long as the hashtable does.
152 */
153TupleHashTable
154BuildTupleHashTableExt(PlanState *parent,
155 TupleDesc inputDesc,
156 int numCols, AttrNumber *keyColIdx,
157 const Oid *eqfuncoids,
158 FmgrInfo *hashfunctions,
159 Oid *collations,
160 long nbuckets, Size additionalsize,
161 MemoryContext metacxt,
162 MemoryContext tablecxt,
163 MemoryContext tempcxt,
164 bool use_variable_hash_iv)
165{
166 TupleHashTable hashtable;
167 Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
168 MemoryContext oldcontext;
169 bool allow_jit;
170
171 Assert(nbuckets > 0);
172
173 /* Limit initial table size request to not more than work_mem */
174 nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
175
176 oldcontext = MemoryContextSwitchTo(metacxt);
177
178 hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
179
180 hashtable->numCols = numCols;
181 hashtable->keyColIdx = keyColIdx;
182 hashtable->tab_hash_funcs = hashfunctions;
183 hashtable->tab_collations = collations;
184 hashtable->tablecxt = tablecxt;
185 hashtable->tempcxt = tempcxt;
186 hashtable->entrysize = entrysize;
187 hashtable->tableslot = NULL; /* will be made on first lookup */
188 hashtable->inputslot = NULL;
189 hashtable->in_hash_funcs = NULL;
190 hashtable->cur_eq_func = NULL;
191
192 /*
193 * If parallelism is in use, even if the master backend is performing the
194 * scan itself, we don't want to create the hashtable exactly the same way
195 * in all workers. As hashtables are iterated over in keyspace-order,
196 * doing so in all processes in the same way is likely to lead to
197 * "unbalanced" hashtables when the table size initially is
198 * underestimated.
199 */
200 if (use_variable_hash_iv)
201 hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
202 else
203 hashtable->hash_iv = 0;
204
205 hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
206
207 /*
208 * We copy the input tuple descriptor just for safety --- we assume all
209 * input tuples will have equivalent descriptors.
210 */
211 hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
212 &TTSOpsMinimalTuple);
213
214 /*
215 * If the old reset interface is used (i.e. BuildTupleHashTable, rather
216 * than BuildTupleHashTableExt), allowing JIT would lead to the generated
217 * functions to a) live longer than the query b) be re-generated each time
218 * the table is being reset. Therefore prevent JIT from being used in that
219 * case, by not providing a parent node (which prevents accessing the
220 * JitContext in the EState).
221 */
222 allow_jit = metacxt != tablecxt;
223
224 /* build comparator for all columns */
225 /* XXX: should we support non-minimal tuples for the inputslot? */
226 hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
227 &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
228 numCols,
229 keyColIdx, eqfuncoids, collations,
230 allow_jit ? parent : NULL);
231
232 /*
233 * While not pretty, it's ok to not shut down this context, but instead
234 * rely on the containing memory context being reset, as
235 * ExecBuildGroupingEqual() only builds a very simple expression calling
236 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
237 */
238 hashtable->exprcontext = CreateStandaloneExprContext();
239
240 MemoryContextSwitchTo(oldcontext);
241
242 return hashtable;
243}
244
245/*
246 * BuildTupleHashTable is a backwards-compatibilty wrapper for
247 * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
248 * tablecxt. Note that hashtables created this way cannot be reset leak-free
249 * with ResetTupleHashTable().
250 */
251TupleHashTable
252BuildTupleHashTable(PlanState *parent,
253 TupleDesc inputDesc,
254 int numCols, AttrNumber *keyColIdx,
255 const Oid *eqfuncoids,
256 FmgrInfo *hashfunctions,
257 Oid *collations,
258 long nbuckets, Size additionalsize,
259 MemoryContext tablecxt,
260 MemoryContext tempcxt,
261 bool use_variable_hash_iv)
262{
263 return BuildTupleHashTableExt(parent,
264 inputDesc,
265 numCols, keyColIdx,
266 eqfuncoids,
267 hashfunctions,
268 collations,
269 nbuckets, additionalsize,
270 tablecxt,
271 tablecxt,
272 tempcxt,
273 use_variable_hash_iv);
274}
275
276/*
277 * Reset contents of the hashtable to be empty, preserving all the non-content
278 * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
279 * also be reset, otherwise there will be leaks.
280 */
281void
282ResetTupleHashTable(TupleHashTable hashtable)
283{
284 tuplehash_reset(hashtable->hashtab);
285}
286
287/*
288 * Find or create a hashtable entry for the tuple group containing the
289 * given tuple. The tuple must be the same type as the hashtable entries.
290 *
291 * If isnew is NULL, we do not create new entries; we return NULL if no
292 * match is found.
293 *
294 * If isnew isn't NULL, then a new entry is created if no existing entry
295 * matches. On return, *isnew is true if the entry is newly created,
296 * false if it existed already. ->additional_data in the new entry has
297 * been zeroed.
298 */
299TupleHashEntry
300LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
301 bool *isnew)
302{
303 TupleHashEntryData *entry;
304 MemoryContext oldContext;
305 bool found;
306 MinimalTuple key;
307
308 /* Need to run the hash functions in short-lived context */
309 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
310
311 /* set up data needed by hash and match functions */
312 hashtable->inputslot = slot;
313 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
314 hashtable->cur_eq_func = hashtable->tab_eq_func;
315
316 key = NULL; /* flag to reference inputslot */
317
318 if (isnew)
319 {
320 entry = tuplehash_insert(hashtable->hashtab, key, &found);
321
322 if (found)
323 {
324 /* found pre-existing entry */
325 *isnew = false;
326 }
327 else
328 {
329 /* created new entry */
330 *isnew = true;
331 /* zero caller data */
332 entry->additional = NULL;
333 MemoryContextSwitchTo(hashtable->tablecxt);
334 /* Copy the first tuple into the table context */
335 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
336 }
337 }
338 else
339 {
340 entry = tuplehash_lookup(hashtable->hashtab, key);
341 }
342
343 MemoryContextSwitchTo(oldContext);
344
345 return entry;
346}
347
348/*
349 * Search for a hashtable entry matching the given tuple. No entry is
350 * created if there's not a match. This is similar to the non-creating
351 * case of LookupTupleHashEntry, except that it supports cross-type
352 * comparisons, in which the given tuple is not of the same type as the
353 * table entries. The caller must provide the hash functions to use for
354 * the input tuple, as well as the equality functions, since these may be
355 * different from the table's internal functions.
356 */
357TupleHashEntry
358FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
359 ExprState *eqcomp,
360 FmgrInfo *hashfunctions)
361{
362 TupleHashEntry entry;
363 MemoryContext oldContext;
364 MinimalTuple key;
365
366 /* Need to run the hash functions in short-lived context */
367 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
368
369 /* Set up data needed by hash and match functions */
370 hashtable->inputslot = slot;
371 hashtable->in_hash_funcs = hashfunctions;
372 hashtable->cur_eq_func = eqcomp;
373
374 /* Search the hash table */
375 key = NULL; /* flag to reference inputslot */
376 entry = tuplehash_lookup(hashtable->hashtab, key);
377 MemoryContextSwitchTo(oldContext);
378
379 return entry;
380}
381
382/*
383 * Compute the hash value for a tuple
384 *
385 * The passed-in key is a pointer to TupleHashEntryData. In an actual hash
386 * table entry, the firstTuple field points to a tuple (in MinimalTuple
387 * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
388 * NULL firstTuple field --- that cues us to look at the inputslot instead.
389 * This convention avoids the need to materialize virtual input tuples unless
390 * they actually need to get copied into the table.
391 *
392 * Also, the caller must select an appropriate memory context for running
393 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
394 */
395static uint32
396TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
397{
398 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
399 int numCols = hashtable->numCols;
400 AttrNumber *keyColIdx = hashtable->keyColIdx;
401 uint32 hashkey = hashtable->hash_iv;
402 TupleTableSlot *slot;
403 FmgrInfo *hashfunctions;
404 int i;
405
406 if (tuple == NULL)
407 {
408 /* Process the current input tuple for the table */
409 slot = hashtable->inputslot;
410 hashfunctions = hashtable->in_hash_funcs;
411 }
412 else
413 {
414 /*
415 * Process a tuple already stored in the table.
416 *
417 * (this case never actually occurs due to the way simplehash.h is
418 * used, as the hash-value is stored in the entries)
419 */
420 slot = hashtable->tableslot;
421 ExecStoreMinimalTuple(tuple, slot, false);
422 hashfunctions = hashtable->tab_hash_funcs;
423 }
424
425 for (i = 0; i < numCols; i++)
426 {
427 AttrNumber att = keyColIdx[i];
428 Datum attr;
429 bool isNull;
430
431 /* rotate hashkey left 1 bit at each step */
432 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
433
434 attr = slot_getattr(slot, att, &isNull);
435
436 if (!isNull) /* treat nulls as having hash key 0 */
437 {
438 uint32 hkey;
439
440 hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
441 hashtable->tab_collations[i],
442 attr));
443 hashkey ^= hkey;
444 }
445 }
446
447 /*
448 * The way hashes are combined above, among each other and with the IV,
449 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
450 * achieve that, perform a round of hashing of the combined hash -
451 * resulting in near perfect perturbation.
452 */
453 return murmurhash32(hashkey);
454}
455
456/*
457 * See whether two tuples (presumably of the same hash value) match
458 *
459 * As above, the passed pointers are pointers to TupleHashEntryData.
460 */
461static int
462TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
463{
464 TupleTableSlot *slot1;
465 TupleTableSlot *slot2;
466 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
467 ExprContext *econtext = hashtable->exprcontext;
468
469 /*
470 * We assume that simplehash.h will only ever call us with the first
471 * argument being an actual table entry, and the second argument being
472 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
473 * could be supported too, but is not currently required.
474 */
475 Assert(tuple1 != NULL);
476 slot1 = hashtable->tableslot;
477 ExecStoreMinimalTuple(tuple1, slot1, false);
478 Assert(tuple2 == NULL);
479 slot2 = hashtable->inputslot;
480
481 /* For crosstype comparisons, the inputslot must be first */
482 econtext->ecxt_innertuple = slot2;
483 econtext->ecxt_outertuple = slot1;
484 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
485}
486