1/*-------------------------------------------------------------------------
2 *
3 * hashfn.c
4 * Generic hashing functions, and hash functions for use in dynahash.c
5 * hashtables
6 *
7 *
8 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
9 * Portions Copyright (c) 1994, Regents of the University of California
10 *
11 *
12 * IDENTIFICATION
13 * src/backend/utils/hash/hashfn.c
14 *
15 * NOTES
16 * It is expected that every bit of a hash function's 32-bit result is
17 * as random as every other; failure to ensure this is likely to lead
18 * to poor performance of hash tables. In most cases a hash
19 * function should use hash_any() or its variant hash_uint32().
20 *
21 *-------------------------------------------------------------------------
22 */
23#include "postgres.h"
24
25#include "fmgr.h"
26#include "nodes/bitmapset.h"
27#include "utils/hashutils.h"
28#include "utils/hsearch.h"
29
30
31/*
32 * This hash function was written by Bob Jenkins
33 * (bob_jenkins@burtleburtle.net), and superficially adapted
34 * for PostgreSQL by Neil Conway. For more information on this
35 * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
36 * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
37 *
38 * In the current code, we have adopted Bob's 2006 update of his hash
39 * function to fetch the data a word at a time when it is suitably aligned.
40 * This makes for a useful speedup, at the cost of having to maintain
41 * four code paths (aligned vs unaligned, and little-endian vs big-endian).
42 * It also uses two separate mixing functions mix() and final(), instead
43 * of a slower multi-purpose function.
44 */
45
46/* Get a bit mask of the bits set in non-uint32 aligned addresses */
47#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
48
49/* Rotate a uint32 value left by k bits - note multiple evaluation! */
50#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
51
52/*----------
53 * mix -- mix 3 32-bit values reversibly.
54 *
55 * This is reversible, so any information in (a,b,c) before mix() is
56 * still in (a,b,c) after mix().
57 *
58 * If four pairs of (a,b,c) inputs are run through mix(), or through
59 * mix() in reverse, there are at least 32 bits of the output that
60 * are sometimes the same for one pair and different for another pair.
61 * This was tested for:
62 * * pairs that differed by one bit, by two bits, in any combination
63 * of top bits of (a,b,c), or in any combination of bottom bits of
64 * (a,b,c).
65 * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
66 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
67 * is commonly produced by subtraction) look like a single 1-bit
68 * difference.
69 * * the base values were pseudorandom, all zero but one bit set, or
70 * all zero plus a counter that starts at zero.
71 *
72 * This does not achieve avalanche. There are input bits of (a,b,c)
73 * that fail to affect some output bits of (a,b,c), especially of a. The
74 * most thoroughly mixed value is c, but it doesn't really even achieve
75 * avalanche in c.
76 *
77 * This allows some parallelism. Read-after-writes are good at doubling
78 * the number of bits affected, so the goal of mixing pulls in the opposite
79 * direction from the goal of parallelism. I did what I could. Rotates
80 * seem to cost as much as shifts on every machine I could lay my hands on,
81 * and rotates are much kinder to the top and bottom bits, so I used rotates.
82 *----------
83 */
84#define mix(a,b,c) \
85{ \
86 a -= c; a ^= rot(c, 4); c += b; \
87 b -= a; b ^= rot(a, 6); a += c; \
88 c -= b; c ^= rot(b, 8); b += a; \
89 a -= c; a ^= rot(c,16); c += b; \
90 b -= a; b ^= rot(a,19); a += c; \
91 c -= b; c ^= rot(b, 4); b += a; \
92}
93
94/*----------
95 * final -- final mixing of 3 32-bit values (a,b,c) into c
96 *
97 * Pairs of (a,b,c) values differing in only a few bits will usually
98 * produce values of c that look totally different. This was tested for
99 * * pairs that differed by one bit, by two bits, in any combination
100 * of top bits of (a,b,c), or in any combination of bottom bits of
101 * (a,b,c).
102 * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
103 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
104 * is commonly produced by subtraction) look like a single 1-bit
105 * difference.
106 * * the base values were pseudorandom, all zero but one bit set, or
107 * all zero plus a counter that starts at zero.
108 *
109 * The use of separate functions for mix() and final() allow for a
110 * substantial performance increase since final() does not need to
111 * do well in reverse, but is does need to affect all output bits.
112 * mix(), on the other hand, does not need to affect all output
113 * bits (affecting 32 bits is enough). The original hash function had
114 * a single mixing operation that had to satisfy both sets of requirements
115 * and was slower as a result.
116 *----------
117 */
118#define final(a,b,c) \
119{ \
120 c ^= b; c -= rot(b,14); \
121 a ^= c; a -= rot(c,11); \
122 b ^= a; b -= rot(a,25); \
123 c ^= b; c -= rot(b,16); \
124 a ^= c; a -= rot(c, 4); \
125 b ^= a; b -= rot(a,14); \
126 c ^= b; c -= rot(b,24); \
127}
128
129/*
130 * hash_any() -- hash a variable-length key into a 32-bit value
131 * k : the key (the unaligned variable-length array of bytes)
132 * len : the length of the key, counting by bytes
133 *
134 * Returns a uint32 value. Every bit of the key affects every bit of
135 * the return value. Every 1-bit and 2-bit delta achieves avalanche.
136 * About 6*len+35 instructions. The best hash table sizes are powers
137 * of 2. There is no need to do mod a prime (mod is sooo slow!).
138 * If you need less than 32 bits, use a bitmask.
139 *
140 * This procedure must never throw elog(ERROR); the ResourceOwner code
141 * relies on this not to fail.
142 *
143 * Note: we could easily change this function to return a 64-bit hash value
144 * by using the final values of both b and c. b is perhaps a little less
145 * well mixed than c, however.
146 */
147Datum
148hash_any(register const unsigned char *k, register int keylen)
149{
150 register uint32 a,
151 b,
152 c,
153 len;
154
155 /* Set up the internal state */
156 len = keylen;
157 a = b = c = 0x9e3779b9 + len + 3923095;
158
159 /* If the source pointer is word-aligned, we use word-wide fetches */
160 if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
161 {
162 /* Code path for aligned source data */
163 register const uint32 *ka = (const uint32 *) k;
164
165 /* handle most of the key */
166 while (len >= 12)
167 {
168 a += ka[0];
169 b += ka[1];
170 c += ka[2];
171 mix(a, b, c);
172 ka += 3;
173 len -= 12;
174 }
175
176 /* handle the last 11 bytes */
177 k = (const unsigned char *) ka;
178#ifdef WORDS_BIGENDIAN
179 switch (len)
180 {
181 case 11:
182 c += ((uint32) k[10] << 8);
183 /* fall through */
184 case 10:
185 c += ((uint32) k[9] << 16);
186 /* fall through */
187 case 9:
188 c += ((uint32) k[8] << 24);
189 /* fall through */
190 case 8:
191 /* the lowest byte of c is reserved for the length */
192 b += ka[1];
193 a += ka[0];
194 break;
195 case 7:
196 b += ((uint32) k[6] << 8);
197 /* fall through */
198 case 6:
199 b += ((uint32) k[5] << 16);
200 /* fall through */
201 case 5:
202 b += ((uint32) k[4] << 24);
203 /* fall through */
204 case 4:
205 a += ka[0];
206 break;
207 case 3:
208 a += ((uint32) k[2] << 8);
209 /* fall through */
210 case 2:
211 a += ((uint32) k[1] << 16);
212 /* fall through */
213 case 1:
214 a += ((uint32) k[0] << 24);
215 /* case 0: nothing left to add */
216 }
217#else /* !WORDS_BIGENDIAN */
218 switch (len)
219 {
220 case 11:
221 c += ((uint32) k[10] << 24);
222 /* fall through */
223 case 10:
224 c += ((uint32) k[9] << 16);
225 /* fall through */
226 case 9:
227 c += ((uint32) k[8] << 8);
228 /* fall through */
229 case 8:
230 /* the lowest byte of c is reserved for the length */
231 b += ka[1];
232 a += ka[0];
233 break;
234 case 7:
235 b += ((uint32) k[6] << 16);
236 /* fall through */
237 case 6:
238 b += ((uint32) k[5] << 8);
239 /* fall through */
240 case 5:
241 b += k[4];
242 /* fall through */
243 case 4:
244 a += ka[0];
245 break;
246 case 3:
247 a += ((uint32) k[2] << 16);
248 /* fall through */
249 case 2:
250 a += ((uint32) k[1] << 8);
251 /* fall through */
252 case 1:
253 a += k[0];
254 /* case 0: nothing left to add */
255 }
256#endif /* WORDS_BIGENDIAN */
257 }
258 else
259 {
260 /* Code path for non-aligned source data */
261
262 /* handle most of the key */
263 while (len >= 12)
264 {
265#ifdef WORDS_BIGENDIAN
266 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
267 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
268 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
269#else /* !WORDS_BIGENDIAN */
270 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
271 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
272 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
273#endif /* WORDS_BIGENDIAN */
274 mix(a, b, c);
275 k += 12;
276 len -= 12;
277 }
278
279 /* handle the last 11 bytes */
280#ifdef WORDS_BIGENDIAN
281 switch (len)
282 {
283 case 11:
284 c += ((uint32) k[10] << 8);
285 /* fall through */
286 case 10:
287 c += ((uint32) k[9] << 16);
288 /* fall through */
289 case 9:
290 c += ((uint32) k[8] << 24);
291 /* fall through */
292 case 8:
293 /* the lowest byte of c is reserved for the length */
294 b += k[7];
295 /* fall through */
296 case 7:
297 b += ((uint32) k[6] << 8);
298 /* fall through */
299 case 6:
300 b += ((uint32) k[5] << 16);
301 /* fall through */
302 case 5:
303 b += ((uint32) k[4] << 24);
304 /* fall through */
305 case 4:
306 a += k[3];
307 /* fall through */
308 case 3:
309 a += ((uint32) k[2] << 8);
310 /* fall through */
311 case 2:
312 a += ((uint32) k[1] << 16);
313 /* fall through */
314 case 1:
315 a += ((uint32) k[0] << 24);
316 /* case 0: nothing left to add */
317 }
318#else /* !WORDS_BIGENDIAN */
319 switch (len)
320 {
321 case 11:
322 c += ((uint32) k[10] << 24);
323 /* fall through */
324 case 10:
325 c += ((uint32) k[9] << 16);
326 /* fall through */
327 case 9:
328 c += ((uint32) k[8] << 8);
329 /* fall through */
330 case 8:
331 /* the lowest byte of c is reserved for the length */
332 b += ((uint32) k[7] << 24);
333 /* fall through */
334 case 7:
335 b += ((uint32) k[6] << 16);
336 /* fall through */
337 case 6:
338 b += ((uint32) k[5] << 8);
339 /* fall through */
340 case 5:
341 b += k[4];
342 /* fall through */
343 case 4:
344 a += ((uint32) k[3] << 24);
345 /* fall through */
346 case 3:
347 a += ((uint32) k[2] << 16);
348 /* fall through */
349 case 2:
350 a += ((uint32) k[1] << 8);
351 /* fall through */
352 case 1:
353 a += k[0];
354 /* case 0: nothing left to add */
355 }
356#endif /* WORDS_BIGENDIAN */
357 }
358
359 final(a, b, c);
360
361 /* report the result */
362 return UInt32GetDatum(c);
363}
364
365/*
366 * hash_any_extended() -- hash into a 64-bit value, using an optional seed
367 * k : the key (the unaligned variable-length array of bytes)
368 * len : the length of the key, counting by bytes
369 * seed : a 64-bit seed (0 means no seed)
370 *
371 * Returns a uint64 value. Otherwise similar to hash_any.
372 */
373Datum
374hash_any_extended(register const unsigned char *k, register int keylen,
375 uint64 seed)
376{
377 register uint32 a,
378 b,
379 c,
380 len;
381
382 /* Set up the internal state */
383 len = keylen;
384 a = b = c = 0x9e3779b9 + len + 3923095;
385
386 /* If the seed is non-zero, use it to perturb the internal state. */
387 if (seed != 0)
388 {
389 /*
390 * In essence, the seed is treated as part of the data being hashed,
391 * but for simplicity, we pretend that it's padded with four bytes of
392 * zeroes so that the seed constitutes a 12-byte chunk.
393 */
394 a += (uint32) (seed >> 32);
395 b += (uint32) seed;
396 mix(a, b, c);
397 }
398
399 /* If the source pointer is word-aligned, we use word-wide fetches */
400 if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
401 {
402 /* Code path for aligned source data */
403 register const uint32 *ka = (const uint32 *) k;
404
405 /* handle most of the key */
406 while (len >= 12)
407 {
408 a += ka[0];
409 b += ka[1];
410 c += ka[2];
411 mix(a, b, c);
412 ka += 3;
413 len -= 12;
414 }
415
416 /* handle the last 11 bytes */
417 k = (const unsigned char *) ka;
418#ifdef WORDS_BIGENDIAN
419 switch (len)
420 {
421 case 11:
422 c += ((uint32) k[10] << 8);
423 /* fall through */
424 case 10:
425 c += ((uint32) k[9] << 16);
426 /* fall through */
427 case 9:
428 c += ((uint32) k[8] << 24);
429 /* fall through */
430 case 8:
431 /* the lowest byte of c is reserved for the length */
432 b += ka[1];
433 a += ka[0];
434 break;
435 case 7:
436 b += ((uint32) k[6] << 8);
437 /* fall through */
438 case 6:
439 b += ((uint32) k[5] << 16);
440 /* fall through */
441 case 5:
442 b += ((uint32) k[4] << 24);
443 /* fall through */
444 case 4:
445 a += ka[0];
446 break;
447 case 3:
448 a += ((uint32) k[2] << 8);
449 /* fall through */
450 case 2:
451 a += ((uint32) k[1] << 16);
452 /* fall through */
453 case 1:
454 a += ((uint32) k[0] << 24);
455 /* case 0: nothing left to add */
456 }
457#else /* !WORDS_BIGENDIAN */
458 switch (len)
459 {
460 case 11:
461 c += ((uint32) k[10] << 24);
462 /* fall through */
463 case 10:
464 c += ((uint32) k[9] << 16);
465 /* fall through */
466 case 9:
467 c += ((uint32) k[8] << 8);
468 /* fall through */
469 case 8:
470 /* the lowest byte of c is reserved for the length */
471 b += ka[1];
472 a += ka[0];
473 break;
474 case 7:
475 b += ((uint32) k[6] << 16);
476 /* fall through */
477 case 6:
478 b += ((uint32) k[5] << 8);
479 /* fall through */
480 case 5:
481 b += k[4];
482 /* fall through */
483 case 4:
484 a += ka[0];
485 break;
486 case 3:
487 a += ((uint32) k[2] << 16);
488 /* fall through */
489 case 2:
490 a += ((uint32) k[1] << 8);
491 /* fall through */
492 case 1:
493 a += k[0];
494 /* case 0: nothing left to add */
495 }
496#endif /* WORDS_BIGENDIAN */
497 }
498 else
499 {
500 /* Code path for non-aligned source data */
501
502 /* handle most of the key */
503 while (len >= 12)
504 {
505#ifdef WORDS_BIGENDIAN
506 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
507 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
508 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
509#else /* !WORDS_BIGENDIAN */
510 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
511 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
512 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
513#endif /* WORDS_BIGENDIAN */
514 mix(a, b, c);
515 k += 12;
516 len -= 12;
517 }
518
519 /* handle the last 11 bytes */
520#ifdef WORDS_BIGENDIAN
521 switch (len)
522 {
523 case 11:
524 c += ((uint32) k[10] << 8);
525 /* fall through */
526 case 10:
527 c += ((uint32) k[9] << 16);
528 /* fall through */
529 case 9:
530 c += ((uint32) k[8] << 24);
531 /* fall through */
532 case 8:
533 /* the lowest byte of c is reserved for the length */
534 b += k[7];
535 /* fall through */
536 case 7:
537 b += ((uint32) k[6] << 8);
538 /* fall through */
539 case 6:
540 b += ((uint32) k[5] << 16);
541 /* fall through */
542 case 5:
543 b += ((uint32) k[4] << 24);
544 /* fall through */
545 case 4:
546 a += k[3];
547 /* fall through */
548 case 3:
549 a += ((uint32) k[2] << 8);
550 /* fall through */
551 case 2:
552 a += ((uint32) k[1] << 16);
553 /* fall through */
554 case 1:
555 a += ((uint32) k[0] << 24);
556 /* case 0: nothing left to add */
557 }
558#else /* !WORDS_BIGENDIAN */
559 switch (len)
560 {
561 case 11:
562 c += ((uint32) k[10] << 24);
563 /* fall through */
564 case 10:
565 c += ((uint32) k[9] << 16);
566 /* fall through */
567 case 9:
568 c += ((uint32) k[8] << 8);
569 /* fall through */
570 case 8:
571 /* the lowest byte of c is reserved for the length */
572 b += ((uint32) k[7] << 24);
573 /* fall through */
574 case 7:
575 b += ((uint32) k[6] << 16);
576 /* fall through */
577 case 6:
578 b += ((uint32) k[5] << 8);
579 /* fall through */
580 case 5:
581 b += k[4];
582 /* fall through */
583 case 4:
584 a += ((uint32) k[3] << 24);
585 /* fall through */
586 case 3:
587 a += ((uint32) k[2] << 16);
588 /* fall through */
589 case 2:
590 a += ((uint32) k[1] << 8);
591 /* fall through */
592 case 1:
593 a += k[0];
594 /* case 0: nothing left to add */
595 }
596#endif /* WORDS_BIGENDIAN */
597 }
598
599 final(a, b, c);
600
601 /* report the result */
602 PG_RETURN_UINT64(((uint64) b << 32) | c);
603}
604
605/*
606 * hash_uint32() -- hash a 32-bit value to a 32-bit value
607 *
608 * This has the same result as
609 * hash_any(&k, sizeof(uint32))
610 * but is faster and doesn't force the caller to store k into memory.
611 */
612Datum
613hash_uint32(uint32 k)
614{
615 register uint32 a,
616 b,
617 c;
618
619 a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
620 a += k;
621
622 final(a, b, c);
623
624 /* report the result */
625 return UInt32GetDatum(c);
626}
627
628/*
629 * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed
630 *
631 * Like hash_uint32, this is a convenience function.
632 */
633Datum
634hash_uint32_extended(uint32 k, uint64 seed)
635{
636 register uint32 a,
637 b,
638 c;
639
640 a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
641
642 if (seed != 0)
643 {
644 a += (uint32) (seed >> 32);
645 b += (uint32) seed;
646 mix(a, b, c);
647 }
648
649 a += k;
650
651 final(a, b, c);
652
653 /* report the result */
654 PG_RETURN_UINT64(((uint64) b << 32) | c);
655}
656
657/*
658 * string_hash: hash function for keys that are NUL-terminated strings.
659 *
660 * NOTE: this is the default hash function if none is specified.
661 */
662uint32
663string_hash(const void *key, Size keysize)
664{
665 /*
666 * If the string exceeds keysize-1 bytes, we want to hash only that many,
667 * because when it is copied into the hash table it will be truncated at
668 * that length.
669 */
670 Size s_len = strlen((const char *) key);
671
672 s_len = Min(s_len, keysize - 1);
673 return DatumGetUInt32(hash_any((const unsigned char *) key,
674 (int) s_len));
675}
676
677/*
678 * tag_hash: hash function for fixed-size tag values
679 */
680uint32
681tag_hash(const void *key, Size keysize)
682{
683 return DatumGetUInt32(hash_any((const unsigned char *) key,
684 (int) keysize));
685}
686
687/*
688 * uint32_hash: hash function for keys that are uint32 or int32
689 *
690 * (tag_hash works for this case too, but is slower)
691 */
692uint32
693uint32_hash(const void *key, Size keysize)
694{
695 Assert(keysize == sizeof(uint32));
696 return DatumGetUInt32(hash_uint32(*((const uint32 *) key)));
697}
698
699/*
700 * bitmap_hash: hash function for keys that are (pointers to) Bitmapsets
701 *
702 * Note: don't forget to specify bitmap_match as the match function!
703 */
704uint32
705bitmap_hash(const void *key, Size keysize)
706{
707 Assert(keysize == sizeof(Bitmapset *));
708 return bms_hash_value(*((const Bitmapset *const *) key));
709}
710
711/*
712 * bitmap_match: match function to use with bitmap_hash
713 */
714int
715bitmap_match(const void *key1, const void *key2, Size keysize)
716{
717 Assert(keysize == sizeof(Bitmapset *));
718 return !bms_equal(*((const Bitmapset *const *) key1),
719 *((const Bitmapset *const *) key2));
720}
721