1 | // metrohash128crc.cpp |
2 | // |
3 | // Copyright 2015-2018 J. Andrew Rogers |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
6 | // you may not use this file except in compliance with the License. |
7 | // You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, software |
12 | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | // See the License for the specific language governing permissions and |
15 | // limitations under the License. |
16 | |
17 | |
18 | #include <nmmintrin.h> |
19 | #include <string.h> |
20 | #include "metrohash.h" |
21 | #include "platform.h" |
22 | |
23 | |
24 | void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) |
25 | { |
26 | static const uint64_t k0 = 0xC83A91E1; |
27 | static const uint64_t k1 = 0x8648DBDB; |
28 | static const uint64_t k2 = 0x7BDEC03B; |
29 | static const uint64_t k3 = 0x2F5870A5; |
30 | |
31 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key); |
32 | const uint8_t * const end = ptr + len; |
33 | |
34 | uint64_t v[4]; |
35 | |
36 | v[0] = ((static_cast<uint64_t>(seed) - k0) * k3) + len; |
37 | v[1] = ((static_cast<uint64_t>(seed) + k1) * k2) + len; |
38 | |
39 | if (len >= 32) |
40 | { |
41 | v[2] = ((static_cast<uint64_t>(seed) + k0) * k2) + len; |
42 | v[3] = ((static_cast<uint64_t>(seed) - k1) * k3) + len; |
43 | |
44 | do |
45 | { |
46 | v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr)); ptr += 8; |
47 | v[1] ^= _mm_crc32_u64(v[1], read_u64(ptr)); ptr += 8; |
48 | v[2] ^= _mm_crc32_u64(v[2], read_u64(ptr)); ptr += 8; |
49 | v[3] ^= _mm_crc32_u64(v[3], read_u64(ptr)); ptr += 8; |
50 | } |
51 | while (ptr <= (end - 32)); |
52 | |
53 | v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 34) * k1; |
54 | v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 37) * k0; |
55 | v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 34) * k1; |
56 | v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 37) * k0; |
57 | } |
58 | |
59 | if ((end - ptr) >= 16) |
60 | { |
61 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],34) * k3; |
62 | v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],34) * k3; |
63 | v[0] ^= rotate_right((v[0] * k2) + v[1], 30) * k1; |
64 | v[1] ^= rotate_right((v[1] * k3) + v[0], 30) * k0; |
65 | } |
66 | |
67 | if ((end - ptr) >= 8) |
68 | { |
69 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],36) * k3; |
70 | v[0] ^= rotate_right((v[0] * k2) + v[1], 23) * k1; |
71 | } |
72 | |
73 | if ((end - ptr) >= 4) |
74 | { |
75 | v[1] ^= _mm_crc32_u64(v[0], read_u32(ptr)); ptr += 4; |
76 | v[1] ^= rotate_right((v[1] * k3) + v[0], 19) * k0; |
77 | } |
78 | |
79 | if ((end - ptr) >= 2) |
80 | { |
81 | v[0] ^= _mm_crc32_u64(v[1], read_u16(ptr)); ptr += 2; |
82 | v[0] ^= rotate_right((v[0] * k2) + v[1], 13) * k1; |
83 | } |
84 | |
85 | if ((end - ptr) >= 1) |
86 | { |
87 | v[1] ^= _mm_crc32_u64(v[0], read_u8 (ptr)); |
88 | v[1] ^= rotate_right((v[1] * k3) + v[0], 17) * k0; |
89 | } |
90 | |
91 | v[0] += rotate_right((v[0] * k0) + v[1], 11); |
92 | v[1] += rotate_right((v[1] * k1) + v[0], 26); |
93 | v[0] += rotate_right((v[0] * k0) + v[1], 11); |
94 | v[1] += rotate_right((v[1] * k1) + v[0], 26); |
95 | |
96 | memcpy(out, v, 16); |
97 | } |
98 | |
99 | |
100 | void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) |
101 | { |
102 | static const uint64_t k0 = 0xEE783E2F; |
103 | static const uint64_t k1 = 0xAD07C493; |
104 | static const uint64_t k2 = 0x797A90BB; |
105 | static const uint64_t k3 = 0x2E4B2E1B; |
106 | |
107 | const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key); |
108 | const uint8_t * const end = ptr + len; |
109 | |
110 | uint64_t v[4]; |
111 | |
112 | v[0] = ((static_cast<uint64_t>(seed) - k0) * k3) + len; |
113 | v[1] = ((static_cast<uint64_t>(seed) + k1) * k2) + len; |
114 | |
115 | if (len >= 32) |
116 | { |
117 | v[2] = ((static_cast<uint64_t>(seed) + k0) * k2) + len; |
118 | v[3] = ((static_cast<uint64_t>(seed) - k1) * k3) + len; |
119 | |
120 | do |
121 | { |
122 | v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr)); ptr += 8; |
123 | v[1] ^= _mm_crc32_u64(v[1], read_u64(ptr)); ptr += 8; |
124 | v[2] ^= _mm_crc32_u64(v[2], read_u64(ptr)); ptr += 8; |
125 | v[3] ^= _mm_crc32_u64(v[3], read_u64(ptr)); ptr += 8; |
126 | } |
127 | while (ptr <= (end - 32)); |
128 | |
129 | v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 12) * k1; |
130 | v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 19) * k0; |
131 | v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 12) * k1; |
132 | v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 19) * k0; |
133 | } |
134 | |
135 | if ((end - ptr) >= 16) |
136 | { |
137 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],41) * k3; |
138 | v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],41) * k3; |
139 | v[0] ^= rotate_right((v[0] * k2) + v[1], 10) * k1; |
140 | v[1] ^= rotate_right((v[1] * k3) + v[0], 10) * k0; |
141 | } |
142 | |
143 | if ((end - ptr) >= 8) |
144 | { |
145 | v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],34) * k3; |
146 | v[0] ^= rotate_right((v[0] * k2) + v[1], 22) * k1; |
147 | } |
148 | |
149 | if ((end - ptr) >= 4) |
150 | { |
151 | v[1] ^= _mm_crc32_u64(v[0], read_u32(ptr)); ptr += 4; |
152 | v[1] ^= rotate_right((v[1] * k3) + v[0], 14) * k0; |
153 | } |
154 | |
155 | if ((end - ptr) >= 2) |
156 | { |
157 | v[0] ^= _mm_crc32_u64(v[1], read_u16(ptr)); ptr += 2; |
158 | v[0] ^= rotate_right((v[0] * k2) + v[1], 15) * k1; |
159 | } |
160 | |
161 | if ((end - ptr) >= 1) |
162 | { |
163 | v[1] ^= _mm_crc32_u64(v[0], read_u8 (ptr)); |
164 | v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0; |
165 | } |
166 | |
167 | v[0] += rotate_right((v[0] * k0) + v[1], 15); |
168 | v[1] += rotate_right((v[1] * k1) + v[0], 27); |
169 | v[0] += rotate_right((v[0] * k0) + v[1], 15); |
170 | v[1] += rotate_right((v[1] * k1) + v[0], 27); |
171 | |
172 | memcpy(out, v, 16); |
173 | } |
174 | |