1#ifndef AWS_COMMON_PRIVATE_LOOKUP3_INL
2#define AWS_COMMON_PRIVATE_LOOKUP3_INL
3/* clang-format off */
4
5/*
6 * The following public domain code has been modified as follows:
7 * # All functions have been made static.
8 * # The self test harness has been turned off.
9 * # stdint.h include removed for C89 compatibility.
10 *
11 * The original code was retrieved from http://burtleburtle.net/bob/c/lookup3.c
12 */
13
14/*
15-------------------------------------------------------------------------------
16lookup3.c, by Bob Jenkins, May 2006, Public Domain.
17
18These are functions for producing 32-bit hashes for hash table lookup.
19hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
20are externally useful functions. Routines to test the hash are included
21if SELF_TEST is defined. You can use this free for any purpose. It's in
22the public domain. It has no warranty.
23
24You probably want to use hashlittle(). hashlittle() and hashbig()
25hash byte arrays. hashlittle() is is faster than hashbig() on
26little-endian machines. Intel and AMD are little-endian machines.
27On second thought, you probably want hashlittle2(), which is identical to
28hashlittle() except it returns two 32-bit hashes for the price of one.
29You could implement hashbig2() if you wanted but I haven't bothered here.
30
31If you want to find a hash of, say, exactly 7 integers, do
32 a = i1; b = i2; c = i3;
33 mix(a,b,c);
34 a += i4; b += i5; c += i6;
35 mix(a,b,c);
36 a += i7;
37 final(a,b,c);
38then use c as the hash value. If you have a variable length array of
394-byte integers to hash, use hashword(). If you have a byte array (like
40a character string), use hashlittle(). If you have several byte arrays, or
41a mix of things, see the comments above hashlittle().
42
43Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
44then mix those integers. This is fast (you can do a lot more thorough
45mixing with 12*3 instructions on 3 integers than you can with 3 instructions
46on 1 byte), but shoehorning those bytes into integers efficiently is messy.
47-------------------------------------------------------------------------------
48*/
49// #define SELF_TEST 1
50
51#include <stdio.h> /* defines printf for tests */
52#include <time.h> /* defines time_t for timings in the test */
53#ifndef _MSC_VER
54#include <sys/param.h> /* attempt to define endianness */
55#endif
56#ifdef linux
57# include <endian.h> /* attempt to define endianness */
58#endif
59
60#if _MSC_VER
61#pragma warning(push)
62#pragma warning(disable:4127) /*Disable "conditional expression is constant" */
63#endif /* _MSC_VER */
64
65#ifdef CBMC
66# pragma CPROVER check push
67# pragma CPROVER check disable "unsigned-overflow"
68#endif /* CBMC */
69
70/*
71 * My best guess at if you are big-endian or little-endian. This may
72 * need adjustment.
73 */
74 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
75 __BYTE_ORDER == __LITTLE_ENDIAN) || \
76 (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
77 __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || \
78 (defined(i386) || defined(__i386__) || defined(__i486__) || \
79 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || \
80 defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64) || defined(_M_ARM))
81# define HASH_LITTLE_ENDIAN 1
82# define HASH_BIG_ENDIAN 0
83#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
84 __BYTE_ORDER == __BIG_ENDIAN) || \
85 (defined(sparc) || defined(POWERPC) || defined(_M_PPC) || defined(mc68000) || defined(sel))
86# define HASH_LITTLE_ENDIAN 0
87# define HASH_BIG_ENDIAN 1
88#else
89# define HASH_LITTLE_ENDIAN 0
90# define HASH_BIG_ENDIAN 0
91#endif
92
93#define hashsize(n) ((uint32_t)1<<(n))
94#define hashmask(n) (hashsize(n)-1)
95#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
96
97/*
98-------------------------------------------------------------------------------
99mix -- mix 3 32-bit values reversibly.
100
101This is reversible, so any information in (a,b,c) before mix() is
102still in (a,b,c) after mix().
103
104If four pairs of (a,b,c) inputs are run through mix(), or through
105mix() in reverse, there are at least 32 bits of the output that
106are sometimes the same for one pair and different for another pair.
107This was tested for:
108* pairs that differed by one bit, by two bits, in any combination
109 of top bits of (a,b,c), or in any combination of bottom bits of
110 (a,b,c).
111* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
112 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
113 is commonly produced by subtraction) look like a single 1-bit
114 difference.
115* the base values were pseudorandom, all zero but one bit set, or
116 all zero plus a counter that starts at zero.
117
118Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
119satisfy this are
120 4 6 8 16 19 4
121 9 15 3 18 27 15
122 14 9 3 7 17 3
123Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
124for "differ" defined as + with a one-bit base and a two-bit delta. I
125used http://burtleburtle.net/bob/hash/avalanche.html to choose
126the operations, constants, and arrangements of the variables.
127
128This does not achieve avalanche. There are input bits of (a,b,c)
129that fail to affect some output bits of (a,b,c), especially of a. The
130most thoroughly mixed value is c, but it doesn't really even achieve
131avalanche in c.
132
133This allows some parallelism. Read-after-writes are good at doubling
134the number of bits affected, so the goal of mixing pulls in the opposite
135direction as the goal of parallelism. I did what I could. Rotates
136seem to cost as much as shifts on every machine I could lay my hands
137on, and rotates are much kinder to the top and bottom bits, so I used
138rotates.
139-------------------------------------------------------------------------------
140*/
141#define mix(a,b,c) \
142{ \
143 a -= c; a ^= rot(c, 4); c += b; \
144 b -= a; b ^= rot(a, 6); a += c; \
145 c -= b; c ^= rot(b, 8); b += a; \
146 a -= c; a ^= rot(c,16); c += b; \
147 b -= a; b ^= rot(a,19); a += c; \
148 c -= b; c ^= rot(b, 4); b += a; \
149}
150
151/*
152-------------------------------------------------------------------------------
153final -- final mixing of 3 32-bit values (a,b,c) into c
154
155Pairs of (a,b,c) values differing in only a few bits will usually
156produce values of c that look totally different. This was tested for
157* pairs that differed by one bit, by two bits, in any combination
158 of top bits of (a,b,c), or in any combination of bottom bits of
159 (a,b,c).
160* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
161 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
162 is commonly produced by subtraction) look like a single 1-bit
163 difference.
164* the base values were pseudorandom, all zero but one bit set, or
165 all zero plus a counter that starts at zero.
166
167These constants passed:
168 14 11 25 16 4 14 24
169 12 14 25 16 4 14 24
170and these came close:
171 4 8 15 26 3 22 24
172 10 8 15 26 3 22 24
173 11 8 15 26 3 22 24
174-------------------------------------------------------------------------------
175*/
176#define final(a,b,c) \
177{ \
178 c ^= b; c -= rot(b,14); \
179 a ^= c; a -= rot(c,11); \
180 b ^= a; b -= rot(a,25); \
181 c ^= b; c -= rot(b,16); \
182 a ^= c; a -= rot(c,4); \
183 b ^= a; b -= rot(a,14); \
184 c ^= b; c -= rot(b,24); \
185}
186
187/*
188--------------------------------------------------------------------
189 This works on all machines. To be useful, it requires
190 -- that the key be an array of uint32_t's, and
191 -- that the length be the number of uint32_t's in the key
192
193 The function hashword() is identical to hashlittle() on little-endian
194 machines, and identical to hashbig() on big-endian machines,
195 except that the length has to be measured in uint32_ts rather than in
196 bytes. hashlittle() is more complicated than hashword() only because
197 hashlittle() has to dance around fitting the key bytes into registers.
198--------------------------------------------------------------------
199*/
200static uint32_t hashword(
201const uint32_t *k, /* the key, an array of uint32_t values */
202size_t length, /* the length of the key, in uint32_ts */
203uint32_t initval) /* the previous hash, or an arbitrary value */
204{
205 uint32_t a,b,c;
206
207 /* Set up the internal state */
208 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
209
210 /*------------------------------------------------- handle most of the key */
211 while (length > 3)
212 {
213 a += k[0];
214 b += k[1];
215 c += k[2];
216 mix(a,b,c);
217 length -= 3;
218 k += 3;
219 }
220
221 /*------------------------------------------- handle the last 3 uint32_t's */
222 switch(length) /* all the case statements fall through */
223 {
224 case 3 : c+=k[2];
225 case 2 : b+=k[1];
226 case 1 : a+=k[0];
227 final(a,b,c);
228 case 0: /* case 0: nothing left to add */
229 break;
230 }
231 /*------------------------------------------------------ report the result */
232 return c;
233}
234
235
236/*
237--------------------------------------------------------------------
238hashword2() -- same as hashword(), but take two seeds and return two
23932-bit values. pc and pb must both be nonnull, and *pc and *pb must
240both be initialized with seeds. If you pass in (*pb)==0, the output
241(*pc) will be the same as the return value from hashword().
242--------------------------------------------------------------------
243*/
244static void hashword2 (
245const uint32_t *k, /* the key, an array of uint32_t values */
246size_t length, /* the length of the key, in uint32_ts */
247uint32_t *pc, /* IN: seed OUT: primary hash value */
248uint32_t *pb) /* IN: more seed OUT: secondary hash value */
249{
250 uint32_t a,b,c;
251
252 /* Set up the internal state */
253 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
254 c += *pb;
255
256 /*------------------------------------------------- handle most of the key */
257 while (length > 3)
258 {
259 a += k[0];
260 b += k[1];
261 c += k[2];
262 mix(a,b,c);
263 length -= 3;
264 k += 3;
265 }
266
267 /*------------------------------------------- handle the last 3 uint32_t's */
268 switch(length) /* all the case statements fall through */
269 {
270 case 3 : c+=k[2];
271 case 2 : b+=k[1];
272 case 1 : a+=k[0];
273 final(a,b,c);
274 case 0: /* case 0: nothing left to add */
275 break;
276 }
277 /*------------------------------------------------------ report the result */
278 *pc=c; *pb=b;
279}
280
281
282/*
283-------------------------------------------------------------------------------
284hashlittle() -- hash a variable-length key into a 32-bit value
285 k : the key (the unaligned variable-length array of bytes)
286 length : the length of the key, counting by bytes
287 initval : can be any 4-byte value
288Returns a 32-bit value. Every bit of the key affects every bit of
289the return value. Two keys differing by one or two bits will have
290totally different hash values.
291
292The best hash table sizes are powers of 2. There is no need to do
293mod a prime (mod is sooo slow!). If you need less than 32 bits,
294use a bitmask. For example, if you need only 10 bits, do
295 h = (h & hashmask(10));
296In which case, the hash table should have hashsize(10) elements.
297
298If you are hashing n strings (uint8_t **)k, do it like this:
299 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
300
301By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
302code any way you wish, private, educational, or commercial. It's free.
303
304Use for hash table lookup, or anything where one collision in 2^^32 is
305acceptable. Do NOT use for cryptographic purposes.
306-------------------------------------------------------------------------------
307*/
308
309static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
310{
311 uint32_t a,b,c; /* internal state */
312 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
313
314 /* Set up the internal state */
315 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
316
317 u.ptr = key;
318 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
319 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
320
321 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
322 while (length > 12)
323 {
324 a += k[0];
325 b += k[1];
326 c += k[2];
327 mix(a,b,c);
328 length -= 12;
329 k += 3;
330 }
331
332 /*----------------------------- handle the last (probably partial) block */
333 /*
334 * "k[2]&0xffffff" actually reads beyond the end of the string, but
335 * then masks off the part it's not allowed to read. Because the
336 * string is aligned, the masked-off tail is in the same word as the
337 * rest of the string. Every machine with memory protection I've seen
338 * does it on word boundaries, so is OK with this. But VALGRIND and CBMC
339 * will still catch it and complain. CBMC will ignore this type of error
340 * in the code block between the pragmas "CPROVER check push" and
341 * "CPROVER check pop". The masking trick does make the hash noticably
342 * faster for short strings (like English words).
343 */
344#ifndef VALGRIND
345#ifdef CBMC
346# pragma CPROVER check push
347# pragma CPROVER check disable "pointer"
348#endif
349 // changed in aws-c-common: fix unused variable warning
350
351 switch(length)
352 {
353 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
354 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
355 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
356 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
357 case 8 : b+=k[1]; a+=k[0]; break;
358 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
359 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
360 case 5 : b+=k[1]&0xff; a+=k[0]; break;
361 case 4 : a+=k[0]; break;
362 case 3 : a+=k[0]&0xffffff; break;
363 case 2 : a+=k[0]&0xffff; break;
364 case 1 : a+=k[0]&0xff; break;
365 case 0 : return c; /* zero length strings require no mixing */
366 }
367#ifdef CBMC
368# pragma CPROVER check pop
369#endif
370#else /* make valgrind happy */
371
372 const uint8_t *k8 = (const uint8_t *)k;
373 switch(length)
374 {
375 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
376 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
377 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
378 case 9 : c+=k8[8]; /* fall through */
379 case 8 : b+=k[1]; a+=k[0]; break;
380 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
381 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
382 case 5 : b+=k8[4]; /* fall through */
383 case 4 : a+=k[0]; break;
384 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
385 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
386 case 1 : a+=k8[0]; break;
387 case 0 : return c;
388 }
389
390#endif /* !valgrind */
391
392 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
393 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
394
395 /*--------------- all but last block: aligned reads and different mixing */
396 while (length > 12)
397 {
398 a += k[0] + (((uint32_t)k[1])<<16);
399 b += k[2] + (((uint32_t)k[3])<<16);
400 c += k[4] + (((uint32_t)k[5])<<16);
401 mix(a,b,c);
402 length -= 12;
403 k += 6;
404 }
405
406 /*----------------------------- handle the last (probably partial) block */
407 const uint8_t *k8 = (const uint8_t *)k;
408 switch(length)
409 {
410 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
411 b+=k[2]+(((uint32_t)k[3])<<16);
412 a+=k[0]+(((uint32_t)k[1])<<16);
413 break;
414 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
415 case 10: c+=k[4];
416 b+=k[2]+(((uint32_t)k[3])<<16);
417 a+=k[0]+(((uint32_t)k[1])<<16);
418 break;
419 case 9 : c+=k8[8]; /* fall through */
420 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
421 a+=k[0]+(((uint32_t)k[1])<<16);
422 break;
423 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
424 case 6 : b+=k[2];
425 a+=k[0]+(((uint32_t)k[1])<<16);
426 break;
427 case 5 : b+=k8[4]; /* fall through */
428 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
429 break;
430 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
431 case 2 : a+=k[0];
432 break;
433 case 1 : a+=k8[0];
434 break;
435 case 0 : return c; /* zero length requires no mixing */
436 }
437
438 } else { /* need to read the key one byte at a time */
439 const uint8_t *k = (const uint8_t *)key;
440
441 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
442 while (length > 12)
443 {
444 a += k[0];
445 a += ((uint32_t)k[1])<<8;
446 a += ((uint32_t)k[2])<<16;
447 a += ((uint32_t)k[3])<<24;
448 b += k[4];
449 b += ((uint32_t)k[5])<<8;
450 b += ((uint32_t)k[6])<<16;
451 b += ((uint32_t)k[7])<<24;
452 c += k[8];
453 c += ((uint32_t)k[9])<<8;
454 c += ((uint32_t)k[10])<<16;
455 c += ((uint32_t)k[11])<<24;
456 mix(a,b,c);
457 length -= 12;
458 k += 12;
459 }
460
461 /*-------------------------------- last block: affect all 32 bits of (c) */
462 switch(length) /* all the case statements fall through */
463 {
464 case 12: c+=((uint32_t)k[11])<<24;
465 case 11: c+=((uint32_t)k[10])<<16;
466 case 10: c+=((uint32_t)k[9])<<8;
467 case 9 : c+=k[8];
468 case 8 : b+=((uint32_t)k[7])<<24;
469 case 7 : b+=((uint32_t)k[6])<<16;
470 case 6 : b+=((uint32_t)k[5])<<8;
471 case 5 : b+=k[4];
472 case 4 : a+=((uint32_t)k[3])<<24;
473 case 3 : a+=((uint32_t)k[2])<<16;
474 case 2 : a+=((uint32_t)k[1])<<8;
475 case 1 : a+=k[0];
476 break;
477 case 0 : return c;
478 }
479 }
480
481 final(a,b,c);
482 return c;
483}
484
485
486/*
487 * hashlittle2: return 2 32-bit hash values
488 *
489 * This is identical to hashlittle(), except it returns two 32-bit hash
490 * values instead of just one. This is good enough for hash table
491 * lookup with 2^^64 buckets, or if you want a second hash if you're not
492 * happy with the first, or if you want a probably-unique 64-bit ID for
493 * the key. *pc is better mixed than *pb, so use *pc first. If you want
494 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
495 */
496static void hashlittle2(
497 const void *key, /* the key to hash */
498 size_t length, /* length of the key */
499 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
500 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
501{
502 uint32_t a,b,c; /* internal state */
503 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
504
505 /* Set up the internal state */
506 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
507 c += *pb;
508
509 u.ptr = key;
510 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
511 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
512
513 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
514 while (length > 12)
515 {
516 a += k[0];
517 b += k[1];
518 c += k[2];
519 mix(a,b,c);
520 length -= 12;
521 k += 3;
522 }
523
524 /*----------------------------- handle the last (probably partial) block */
525 /*
526 * "k[2]&0xffffff" actually reads beyond the end of the string, but
527 * then masks off the part it's not allowed to read. Because the
528 * string is aligned, the masked-off tail is in the same word as the
529 * rest of the string. Every machine with memory protection I've seen
530 * does it on word boundaries, so is OK with this. But VALGRIND and CBMC
531 * will still catch it and complain. CBMC will ignore this type of error
532 * in the code block between the pragmas "CPROVER check push" and
533 * "CPROVER check pop". The masking trick does make the hash noticably
534 * faster for short strings (like English words).
535 */
536#ifndef VALGRIND
537#ifdef CBMC
538# pragma CPROVER check push
539# pragma CPROVER check disable "pointer"
540#endif
541 // changed in aws-c-common: fix unused variable warning
542
543 switch(length)
544 {
545 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
546 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
547 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
548 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
549 case 8 : b+=k[1]; a+=k[0]; break;
550 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
551 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
552 case 5 : b+=k[1]&0xff; a+=k[0]; break;
553 case 4 : a+=k[0]; break;
554 case 3 : a+=k[0]&0xffffff; break;
555 case 2 : a+=k[0]&0xffff; break;
556 case 1 : a+=k[0]&0xff; break;
557 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
558 }
559
560#ifdef CBMC
561# pragma CPROVER check pop
562#endif
563#else /* make valgrind happy */
564
565 const uint8_t *k8 = (const uint8_t *)k;
566 switch(length)
567 {
568 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
569 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
570 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
571 case 9 : c+=k8[8]; /* fall through */
572 case 8 : b+=k[1]; a+=k[0]; break;
573 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
574 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
575 case 5 : b+=k8[4]; /* fall through */
576 case 4 : a+=k[0]; break;
577 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
578 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
579 case 1 : a+=k8[0]; break;
580 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
581 }
582
583#endif /* !valgrind */
584
585 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
586 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
587
588 /*--------------- all but last block: aligned reads and different mixing */
589 while (length > 12)
590 {
591 a += k[0] + (((uint32_t)k[1])<<16);
592 b += k[2] + (((uint32_t)k[3])<<16);
593 c += k[4] + (((uint32_t)k[5])<<16);
594 mix(a,b,c);
595 length -= 12;
596 k += 6;
597 }
598
599 /*----------------------------- handle the last (probably partial) block */
600 const uint8_t *k8 = (const uint8_t *)k;
601 switch(length)
602 {
603 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
604 b+=k[2]+(((uint32_t)k[3])<<16);
605 a+=k[0]+(((uint32_t)k[1])<<16);
606 break;
607 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
608 case 10: c+=k[4];
609 b+=k[2]+(((uint32_t)k[3])<<16);
610 a+=k[0]+(((uint32_t)k[1])<<16);
611 break;
612 case 9 : c+=k8[8]; /* fall through */
613 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
614 a+=k[0]+(((uint32_t)k[1])<<16);
615 break;
616 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
617 case 6 : b+=k[2];
618 a+=k[0]+(((uint32_t)k[1])<<16);
619 break;
620 case 5 : b+=k8[4]; /* fall through */
621 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
622 break;
623 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
624 case 2 : a+=k[0];
625 break;
626 case 1 : a+=k8[0];
627 break;
628 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
629 }
630
631 } else { /* need to read the key one byte at a time */
632 const uint8_t *k = (const uint8_t *)key;
633
634 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
635 while (length > 12)
636 {
637 a += k[0];
638 a += ((uint32_t)k[1])<<8;
639 a += ((uint32_t)k[2])<<16;
640 a += ((uint32_t)k[3])<<24;
641 b += k[4];
642 b += ((uint32_t)k[5])<<8;
643 b += ((uint32_t)k[6])<<16;
644 b += ((uint32_t)k[7])<<24;
645 c += k[8];
646 c += ((uint32_t)k[9])<<8;
647 c += ((uint32_t)k[10])<<16;
648 c += ((uint32_t)k[11])<<24;
649 mix(a,b,c);
650 length -= 12;
651 k += 12;
652 }
653
654 /*-------------------------------- last block: affect all 32 bits of (c) */
655 switch(length) /* all the case statements fall through */
656 {
657 case 12: c+=((uint32_t)k[11])<<24;
658 case 11: c+=((uint32_t)k[10])<<16;
659 case 10: c+=((uint32_t)k[9])<<8;
660 case 9 : c+=k[8];
661 case 8 : b+=((uint32_t)k[7])<<24;
662 case 7 : b+=((uint32_t)k[6])<<16;
663 case 6 : b+=((uint32_t)k[5])<<8;
664 case 5 : b+=k[4];
665 case 4 : a+=((uint32_t)k[3])<<24;
666 case 3 : a+=((uint32_t)k[2])<<16;
667 case 2 : a+=((uint32_t)k[1])<<8;
668 case 1 : a+=k[0];
669 break;
670 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
671 }
672 }
673
674 final(a,b,c);
675 *pc=c; *pb=b;
676}
677
678
679
680/*
681 * hashbig():
682 * This is the same as hashword() on big-endian machines. It is different
683 * from hashlittle() on all machines. hashbig() takes advantage of
684 * big-endian byte ordering.
685 */
686static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
687{
688 uint32_t a,b,c;
689 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
690
691 /* Set up the internal state */
692 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
693
694 u.ptr = key;
695 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
696 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
697
698 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
699 while (length > 12)
700 {
701 a += k[0];
702 b += k[1];
703 c += k[2];
704 mix(a,b,c);
705 length -= 12;
706 k += 3;
707 }
708
709 /*----------------------------- handle the last (probably partial) block */
710 /*
711 * "k[2]<<8" actually reads beyond the end of the string, but
712 * then shifts out the part it's not allowed to read. Because the
713 * string is aligned, the illegal read is in the same word as the
714 * rest of the string. Every machine with memory protection I've seen
715 * does it on word boundaries, so is OK with this. But VALGRIND and CBMC
716 * will still catch it and complain. CBMC will ignore this type of error
717 * in the code block between the pragmas "CPROVER check push" and
718 * "CPROVER check pop". The masking trick does make the hash noticably
719 * faster for short strings (like English words).
720 */
721#ifndef VALGRIND
722#ifdef CBMC
723# pragma CPROVER check push
724# pragma CPROVER check disable "pointer"
725#endif
726 // changed in aws-c-common: fix unused variable warning
727
728 switch(length)
729 {
730 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
731 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
732 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
733 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
734 case 8 : b+=k[1]; a+=k[0]; break;
735 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
736 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
737 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
738 case 4 : a+=k[0]; break;
739 case 3 : a+=k[0]&0xffffff00; break;
740 case 2 : a+=k[0]&0xffff0000; break;
741 case 1 : a+=k[0]&0xff000000; break;
742 case 0 : return c; /* zero length strings require no mixing */
743 }
744#ifdef CBMC
745# pragma CPROVER check pop
746#endif
747#else /* make valgrind happy */
748
749 const uint8_t *k8 = (const uint8_t *)k;
750 switch(length) /* all the case statements fall through */
751 {
752 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
753 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
754 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
755 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
756 case 8 : b+=k[1]; a+=k[0]; break;
757 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
758 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
759 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
760 case 4 : a+=k[0]; break;
761 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
762 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
763 case 1 : a+=((uint32_t)k8[0])<<24; break;
764 case 0 : return c;
765 }
766
767#endif /* !VALGRIND */
768
769 } else { /* need to read the key one byte at a time */
770 const uint8_t *k = (const uint8_t *)key;
771
772 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
773 while (length > 12)
774 {
775 a += ((uint32_t)k[0])<<24;
776 a += ((uint32_t)k[1])<<16;
777 a += ((uint32_t)k[2])<<8;
778 a += ((uint32_t)k[3]);
779 b += ((uint32_t)k[4])<<24;
780 b += ((uint32_t)k[5])<<16;
781 b += ((uint32_t)k[6])<<8;
782 b += ((uint32_t)k[7]);
783 c += ((uint32_t)k[8])<<24;
784 c += ((uint32_t)k[9])<<16;
785 c += ((uint32_t)k[10])<<8;
786 c += ((uint32_t)k[11]);
787 mix(a,b,c);
788 length -= 12;
789 k += 12;
790 }
791
792 /*-------------------------------- last block: affect all 32 bits of (c) */
793 switch(length) /* all the case statements fall through */
794 {
795 case 12: c+=k[11];
796 case 11: c+=((uint32_t)k[10])<<8;
797 case 10: c+=((uint32_t)k[9])<<16;
798 case 9 : c+=((uint32_t)k[8])<<24;
799 case 8 : b+=k[7];
800 case 7 : b+=((uint32_t)k[6])<<8;
801 case 6 : b+=((uint32_t)k[5])<<16;
802 case 5 : b+=((uint32_t)k[4])<<24;
803 case 4 : a+=k[3];
804 case 3 : a+=((uint32_t)k[2])<<8;
805 case 2 : a+=((uint32_t)k[1])<<16;
806 case 1 : a+=((uint32_t)k[0])<<24;
807 break;
808 case 0 : return c;
809 }
810 }
811
812 final(a,b,c);
813 return c;
814}
815
816
817#ifdef SELF_TEST
818
819/* used for timings */
820void driver1()
821{
822 uint8_t buf[256];
823 uint32_t i;
824 uint32_t h=0;
825 time_t a,z;
826
827 time(&a);
828 for (i=0; i<256; ++i) buf[i] = 'x';
829 for (i=0; i<1; ++i)
830 {
831 h = hashlittle(&buf[0],1,h);
832 }
833 time(&z);
834 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
835}
836
837/* check that every input bit changes every output bit half the time */
838#define HASHSTATE 1
839#define HASHLEN 1
840#define MAXPAIR 60
841#define MAXLEN 70
842void driver2()
843{
844 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
845 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
846 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
847 uint32_t x[HASHSTATE],y[HASHSTATE];
848 uint32_t hlen;
849
850 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
851 for (hlen=0; hlen < MAXLEN; ++hlen)
852 {
853 z=0;
854 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
855 {
856 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
857 {
858 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
859 {
860 for (l=0; l<HASHSTATE; ++l)
861 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
862
863 /*---- check that every output bit is affected by that input bit */
864 for (k=0; k<MAXPAIR; k+=2)
865 {
866 uint32_t finished=1;
867 /* keys have one bit different */
868 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
869 /* have a and b be two keys differing in only one bit */
870 a[i] ^= (k<<j);
871 a[i] ^= (k>>(8-j));
872 c[0] = hashlittle(a, hlen, m);
873 b[i] ^= ((k+1)<<j);
874 b[i] ^= ((k+1)>>(8-j));
875 d[0] = hashlittle(b, hlen, m);
876 /* check every bit is 1, 0, set, and not set at least once */
877 for (l=0; l<HASHSTATE; ++l)
878 {
879 e[l] &= (c[l]^d[l]);
880 f[l] &= ~(c[l]^d[l]);
881 g[l] &= c[l];
882 h[l] &= ~c[l];
883 x[l] &= d[l];
884 y[l] &= ~d[l];
885 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
886 }
887 if (finished) break;
888 }
889 if (k>z) z=k;
890 if (k==MAXPAIR)
891 {
892 printf("Some bit didn't change: ");
893 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
894 e[0],f[0],g[0],h[0],x[0],y[0]);
895 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
896 }
897 if (z==MAXPAIR) goto done;
898 }
899 }
900 }
901 done:
902 if (z < MAXPAIR)
903 {
904 printf("Mix success %2d bytes %2d initvals ",i,m);
905 printf("required %d trials\n", z/2);
906 }
907 }
908 printf("\n");
909}
910
911/* Check for reading beyond the end of the buffer and alignment problems */
912void driver3()
913{
914 uint8_t buf[MAXLEN+20], *b;
915 uint32_t len;
916 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
917 uint32_t h;
918 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
919 uint32_t i;
920 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
921 uint32_t j;
922 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
923 uint32_t ref,x,y;
924 uint8_t *p;
925
926 printf("Endianness. These lines should all be the same (for values filled in):\n");
927 printf("%.8x %.8x %.8x\n",
928 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
929 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
930 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
931 p = q;
932 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
933 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
934 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
935 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
936 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
937 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
938 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
939 p = &qq[1];
940 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
941 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
942 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
943 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
944 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
945 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
946 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
947 p = &qqq[2];
948 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
949 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
950 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
951 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
952 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
953 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
954 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
955 p = &qqqq[3];
956 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
957 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
958 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
959 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
960 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
961 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
962 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
963 printf("\n");
964
965 /* check that hashlittle2 and hashlittle produce the same results */
966 i=47; j=0;
967 hashlittle2(q, sizeof(q), &i, &j);
968 if (hashlittle(q, sizeof(q), 47) != i)
969 printf("hashlittle2 and hashlittle mismatch\n");
970
971 /* check that hashword2 and hashword produce the same results */
972 len = 0xdeadbeef;
973 i=47, j=0;
974 hashword2(&len, 1, &i, &j);
975 if (hashword(&len, 1, 47) != i)
976 printf("hashword2 and hashword mismatch %x %x\n",
977 i, hashword(&len, 1, 47));
978
979 /* check hashlittle doesn't read before or after the ends of the string */
980 for (h=0, b=buf+1; h<8; ++h, ++b)
981 {
982 for (i=0; i<MAXLEN; ++i)
983 {
984 len = i;
985 for (j=0; j<i; ++j) *(b+j)=0;
986
987 /* these should all be equal */
988 ref = hashlittle(b, len, (uint32_t)1);
989 *(b+i)=(uint8_t)~0;
990 *(b-1)=(uint8_t)~0;
991 x = hashlittle(b, len, (uint32_t)1);
992 y = hashlittle(b, len, (uint32_t)1);
993 if ((ref != x) || (ref != y))
994 {
995 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
996 h, i);
997 }
998 }
999 }
1000}
1001
1002/* check for problems with nulls */
1003 void driver4()
1004{
1005 uint8_t buf[1];
1006 uint32_t h,i,state[HASHSTATE];
1007
1008
1009 buf[0] = ~0;
1010 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1011 printf("These should all be different\n");
1012 for (i=0, h=0; i<8; ++i)
1013 {
1014 h = hashlittle(buf, 0, h);
1015 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
1016 }
1017}
1018
1019void driver5()
1020{
1021 uint32_t b,c;
1022 b=0, c=0, hashlittle2("", 0, &c, &b);
1023 printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
1024 b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
1025 printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
1026 b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
1027 printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
1028 b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
1029 printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
1030 b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
1031 printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
1032 b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
1033 printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
1034 c = hashlittle("Four score and seven years ago", 30, 0);
1035 printf("hash is %.8lx\n", c); /* 17770551 */
1036 c = hashlittle("Four score and seven years ago", 30, 1);
1037 printf("hash is %.8lx\n", c); /* cd628161 */
1038}
1039
1040
1041int main()
1042{
1043 driver1(); /* test that the key is hashed: used for timings */
1044 driver2(); /* test that whole key is hashed thoroughly */
1045 driver3(); /* test that nothing but the key is hashed */
1046 driver4(); /* test hashing multiple buffers (all buffers are null) */
1047 driver5(); /* test the hash against known vectors */
1048 return 1;
1049}
1050
1051#endif /* SELF_TEST */
1052
1053
1054#if _MSC_VER
1055#pragma warning(pop)
1056#endif /* _MSC_VER */
1057
1058#ifdef CBMC
1059# pragma CPROVER check pop
1060#endif /* CBMC */
1061
1062/* clang-format on */
1063#endif /* AWS_COMMON_PRIVATE_LOOKUP3_INL */
1064