1 | /* |
2 | * Copyright 2018-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/hash/detail/ChecksumDetail.h> |
18 | |
19 | #include <array> |
20 | |
21 | #include <folly/Bits.h> |
22 | #include <folly/ConstexprMath.h> |
23 | |
24 | namespace folly { |
25 | |
26 | // Standard galois-field multiply. The only modification is that a, |
27 | // b, m, and p are all bit-reflected. |
28 | // |
29 | // https://en.wikipedia.org/wiki/Finite_field_arithmetic |
30 | static constexpr uint32_t |
31 | gf_multiply_sw_1(size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) { |
32 | // clang-format off |
33 | return i == 32 ? p : gf_multiply_sw_1( |
34 | /* i = */ i + 1, |
35 | /* p = */ p ^ (-((b >> 31) & 1) & a), |
36 | /* a = */ (a >> 1) ^ (-(a & 1) & m), |
37 | /* b = */ b << 1, |
38 | /* m = */ m); |
39 | // clang-format on |
40 | } |
41 | static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) { |
42 | return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m); |
43 | } |
44 | |
45 | static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) { |
46 | return gf_multiply_sw(a, a, m); |
47 | } |
48 | |
49 | namespace { |
50 | |
51 | template <size_t i, uint32_t m> |
52 | struct gf_powers_memo { |
53 | static constexpr uint32_t value = |
54 | gf_square_sw(gf_powers_memo<i - 1, m>::value, m); |
55 | }; |
56 | template <uint32_t m> |
57 | struct gf_powers_memo<0, m> { |
58 | static constexpr uint32_t value = m; |
59 | }; |
60 | |
61 | template <uint32_t m> |
62 | struct gf_powers_make { |
63 | template <size_t... i> |
64 | constexpr auto operator()(index_sequence<i...>) const { |
65 | return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}}; |
66 | } |
67 | }; |
68 | |
69 | } // namespace |
70 | |
71 | #if FOLLY_SSE_PREREQ(4, 2) |
72 | |
73 | // Reduction taken from |
74 | // https://www.nicst.de/crc.pdf |
75 | // |
76 | // This is an intrinsics-based implementation of listing 3. |
77 | static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) { |
78 | const auto crc1_xmm = _mm_set_epi64x(0, crc1); |
79 | const auto crc2_xmm = _mm_set_epi64x(0, crc2); |
80 | const auto count = _mm_set_epi64x(0, 1); |
81 | const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00); |
82 | const auto res1 = _mm_sll_epi64(res0, count); |
83 | |
84 | // Use hardware crc32c to do reduction from 64 -> 32 bytes |
85 | const auto res2 = _mm_cvtsi128_si64(res1); |
86 | const auto res3 = _mm_crc32_u32(0, res2); |
87 | const auto res4 = _mm_extract_epi32(res1, 1); |
88 | return res3 ^ res4; |
89 | } |
90 | |
91 | static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) { |
92 | const auto crc1_xmm = _mm_set_epi64x(0, crc1); |
93 | const auto crc2_xmm = _mm_set_epi64x(0, crc2); |
94 | const auto count = _mm_set_epi64x(0, 1); |
95 | const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00); |
96 | const auto res1 = _mm_sll_epi64(res0, count); |
97 | |
98 | // Do barrett reduction of 64 -> 32 bytes |
99 | const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF); |
100 | const auto barrett_reduction_constants = |
101 | _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641); |
102 | const auto res2 = _mm_clmulepi64_si128( |
103 | _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00); |
104 | const auto res3 = _mm_clmulepi64_si128( |
105 | _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10); |
106 | return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4)); |
107 | } |
108 | |
109 | #else |
110 | |
111 | static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) { |
112 | return 0; |
113 | } |
114 | static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) { |
115 | return 0; |
116 | } |
117 | |
118 | #endif |
119 | |
120 | static constexpr uint32_t crc32c_m = 0x82f63b78; |
121 | static constexpr uint32_t crc32_m = 0xedb88320; |
122 | |
123 | /* |
124 | * Pre-calculated powers tables for crc32c and crc32. |
125 | */ |
126 | static constexpr std::array<uint32_t, 62> const crc32c_powers = |
127 | gf_powers_make<crc32c_m>{}(make_index_sequence<62>{}); |
128 | static constexpr std::array<uint32_t, 62> const crc32_powers = |
129 | gf_powers_make<crc32_m>{}(make_index_sequence<62>{}); |
130 | |
131 | template <typename F> |
132 | static uint32_t crc32_append_zeroes( |
133 | F mult, |
134 | uint32_t crc, |
135 | size_t len, |
136 | uint32_t polynomial, |
137 | std::array<uint32_t, 62> const& powers_array) { |
138 | auto powers = powers_array.data(); |
139 | |
140 | // Append by multiplying by consecutive powers of two of the zeroes |
141 | // array |
142 | len >>= 2; |
143 | |
144 | while (len) { |
145 | // Advance directly to next bit set. |
146 | auto r = findFirstSet(len) - 1; |
147 | len >>= r; |
148 | powers += r; |
149 | |
150 | crc = mult(crc, *powers, polynomial); |
151 | |
152 | len >>= 1; |
153 | powers++; |
154 | } |
155 | |
156 | return crc; |
157 | } |
158 | |
159 | namespace detail { |
160 | |
161 | uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) { |
162 | return crc2 ^ |
163 | crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers); |
164 | } |
165 | |
166 | uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) { |
167 | return crc2 ^ |
168 | crc32_append_zeroes( |
169 | gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers); |
170 | } |
171 | |
172 | uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) { |
173 | return crc2 ^ |
174 | crc32_append_zeroes( |
175 | gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers); |
176 | } |
177 | |
178 | uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) { |
179 | return crc2 ^ |
180 | crc32_append_zeroes( |
181 | gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers); |
182 | } |
183 | |
184 | } // namespace detail |
185 | |
186 | } // namespace folly |
187 | |