1/*
2 * Copyright 2018-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/hash/detail/ChecksumDetail.h>
18
19#include <array>
20
21#include <folly/Bits.h>
22#include <folly/ConstexprMath.h>
23
24namespace folly {
25
26// Standard galois-field multiply. The only modification is that a,
27// b, m, and p are all bit-reflected.
28//
29// https://en.wikipedia.org/wiki/Finite_field_arithmetic
30static constexpr uint32_t
31gf_multiply_sw_1(size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
32 // clang-format off
33 return i == 32 ? p : gf_multiply_sw_1(
34 /* i = */ i + 1,
35 /* p = */ p ^ (-((b >> 31) & 1) & a),
36 /* a = */ (a >> 1) ^ (-(a & 1) & m),
37 /* b = */ b << 1,
38 /* m = */ m);
39 // clang-format on
40}
41static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
42 return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
43}
44
45static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
46 return gf_multiply_sw(a, a, m);
47}
48
49namespace {
50
51template <size_t i, uint32_t m>
52struct gf_powers_memo {
53 static constexpr uint32_t value =
54 gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
55};
56template <uint32_t m>
57struct gf_powers_memo<0, m> {
58 static constexpr uint32_t value = m;
59};
60
61template <uint32_t m>
62struct gf_powers_make {
63 template <size_t... i>
64 constexpr auto operator()(index_sequence<i...>) const {
65 return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
66 }
67};
68
69} // namespace
70
71#if FOLLY_SSE_PREREQ(4, 2)
72
73// Reduction taken from
74// https://www.nicst.de/crc.pdf
75//
76// This is an intrinsics-based implementation of listing 3.
77static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
78 const auto crc1_xmm = _mm_set_epi64x(0, crc1);
79 const auto crc2_xmm = _mm_set_epi64x(0, crc2);
80 const auto count = _mm_set_epi64x(0, 1);
81 const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
82 const auto res1 = _mm_sll_epi64(res0, count);
83
84 // Use hardware crc32c to do reduction from 64 -> 32 bytes
85 const auto res2 = _mm_cvtsi128_si64(res1);
86 const auto res3 = _mm_crc32_u32(0, res2);
87 const auto res4 = _mm_extract_epi32(res1, 1);
88 return res3 ^ res4;
89}
90
91static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
92 const auto crc1_xmm = _mm_set_epi64x(0, crc1);
93 const auto crc2_xmm = _mm_set_epi64x(0, crc2);
94 const auto count = _mm_set_epi64x(0, 1);
95 const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
96 const auto res1 = _mm_sll_epi64(res0, count);
97
98 // Do barrett reduction of 64 -> 32 bytes
99 const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
100 const auto barrett_reduction_constants =
101 _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
102 const auto res2 = _mm_clmulepi64_si128(
103 _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
104 const auto res3 = _mm_clmulepi64_si128(
105 _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
106 return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
107}
108
109#else
110
111static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
112 return 0;
113}
114static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
115 return 0;
116}
117
118#endif
119
120static constexpr uint32_t crc32c_m = 0x82f63b78;
121static constexpr uint32_t crc32_m = 0xedb88320;
122
123/*
124 * Pre-calculated powers tables for crc32c and crc32.
125 */
126static constexpr std::array<uint32_t, 62> const crc32c_powers =
127 gf_powers_make<crc32c_m>{}(make_index_sequence<62>{});
128static constexpr std::array<uint32_t, 62> const crc32_powers =
129 gf_powers_make<crc32_m>{}(make_index_sequence<62>{});
130
131template <typename F>
132static uint32_t crc32_append_zeroes(
133 F mult,
134 uint32_t crc,
135 size_t len,
136 uint32_t polynomial,
137 std::array<uint32_t, 62> const& powers_array) {
138 auto powers = powers_array.data();
139
140 // Append by multiplying by consecutive powers of two of the zeroes
141 // array
142 len >>= 2;
143
144 while (len) {
145 // Advance directly to next bit set.
146 auto r = findFirstSet(len) - 1;
147 len >>= r;
148 powers += r;
149
150 crc = mult(crc, *powers, polynomial);
151
152 len >>= 1;
153 powers++;
154 }
155
156 return crc;
157}
158
159namespace detail {
160
161uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
162 return crc2 ^
163 crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
164}
165
166uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
167 return crc2 ^
168 crc32_append_zeroes(
169 gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
170}
171
172uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
173 return crc2 ^
174 crc32_append_zeroes(
175 gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
176}
177
178uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
179 return crc2 ^
180 crc32_append_zeroes(
181 gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
182}
183
184} // namespace detail
185
186} // namespace folly
187