1 | /* |
2 | * Copyright (c) 2016, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #ifndef STREAM_LONG_LIT_HASH_H |
30 | #define STREAM_LONG_LIT_HASH_H |
31 | |
32 | #include "ue2common.h" |
33 | #include "util/bitutils.h" |
34 | #include "util/unaligned.h" |
35 | |
36 | /** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ |
37 | #define LONG_LIT_HASH_LEN 24 |
38 | |
39 | /** \brief Multiplier used by al the hash functions below. */ |
40 | #define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL |
41 | |
42 | /** \brief Hash function used for long literal table in streaming mode. */ |
43 | static really_inline |
44 | u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { |
45 | // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this |
46 | // hash are for strings longer than this. |
47 | assert(len >= 24); |
48 | |
49 | u64a v1 = unaligned_load_u64a(ptr); |
50 | u64a v2 = unaligned_load_u64a(ptr + 8); |
51 | u64a v3 = unaligned_load_u64a(ptr + 16); |
52 | if (nocase) { |
53 | v1 &= OCTO_CASE_CLEAR; |
54 | v2 &= OCTO_CASE_CLEAR; |
55 | v3 &= OCTO_CASE_CLEAR; |
56 | } |
57 | v1 *= HASH_MULTIPLIER; |
58 | v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER; |
59 | v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; |
60 | v1 >>= 32; |
61 | v2 >>= 32; |
62 | v3 >>= 32; |
63 | return v1 ^ v2 ^ v3; |
64 | } |
65 | |
66 | /** |
67 | * \brief Internal, used by the bloom filter hash functions below. Hashes 16 |
68 | * bytes beginning at (ptr + offset). |
69 | */ |
70 | static really_inline |
71 | u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) { |
72 | assert(offset + 16 <= LONG_LIT_HASH_LEN); |
73 | |
74 | u64a v = unaligned_load_u64a(ptr + offset); |
75 | if (nocase) { |
76 | v &= OCTO_CASE_CLEAR; |
77 | } |
78 | v *= multiplier; |
79 | return v >> 32; |
80 | } |
81 | |
82 | /* |
83 | * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input |
84 | * data (using at least one of the following functions). |
85 | */ |
86 | |
87 | static really_inline |
88 | u32 bloomHash_1(const u8 *ptr, char nocase) { |
89 | const u64a multiplier = HASH_MULTIPLIER; |
90 | return bloomHash_i(ptr, 0, multiplier, nocase); |
91 | } |
92 | |
93 | static really_inline |
94 | u32 bloomHash_2(const u8 *ptr, char nocase) { |
95 | const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER; |
96 | return bloomHash_i(ptr, 4, multiplier, nocase); |
97 | } |
98 | |
99 | static really_inline |
100 | u32 bloomHash_3(const u8 *ptr, char nocase) { |
101 | const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; |
102 | return bloomHash_i(ptr, 8, multiplier, nocase); |
103 | } |
104 | |
105 | #endif // STREAM_LONG_LIT_HASH_H |
106 | |