Crc32CombineDetail.cpp source code [folly/hash/detail/Crc32CombineDetail.cpp]

1	/*
2	* Copyright 2018-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include <folly/hash/detail/ChecksumDetail.h>
18
19	#include <array>
20
21	#include <folly/Bits.h>
22	#include <folly/ConstexprMath.h>
23
24	namespace folly {
25
26	// Standard galois-field multiply. The only modification is that a,
27	// b, m, and p are all bit-reflected.
28	//
29	// https://en.wikipedia.org/wiki/Finite_field_arithmetic
30	static constexpr uint32_t
31	gf_multiply_sw_1(size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
32	// clang-format off
33	return i == `32` ? p : gf_multiply_sw_1(
34	/ i = / i + `1`,
35	/ p = / p ^ (-((b >> `31`) & `1`) & a),
36	/ a = / (a >> `1`) ^ (-(a & `1`) & m),
37	/ b = / b << `1`,
38	/ m = / m);
39	// clang-format on
40	}
41	static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
42	return gf_multiply_sw_1(/ i = / `0`, / p = / `0`, a, b, m);
43	}
44
45	static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
46	return gf_multiply_sw(a, a, m);
47	}
48
49	namespace {
50
51	template <size_t i, uint32_t m>
52	struct gf_powers_memo {
53	static constexpr uint32_t value =
54	gf_square_sw(gf_powers_memo<i - `1`, m>::value, m);
55	};
56	template <uint32_t m>
57	struct gf_powers_memo<`0`, m> {
58	static constexpr uint32_t value = m;
59	};
60
61	template <uint32_t m>
62	struct gf_powers_make {
63	template <size_t... i>
64	constexpr auto operator()(index_sequence<i...>) const {
65	return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
66	}
67	};
68
69	} // namespace
70
71	#if FOLLY_SSE_PREREQ(4, 2)
72
73	// Reduction taken from
74	// https://www.nicst.de/crc.pdf
75	//
76	// This is an intrinsics-based implementation of listing 3.
77	static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
78	const auto crc1_xmm = _mm_set_epi64x(`0`, crc1);
79	const auto crc2_xmm = _mm_set_epi64x(`0`, crc2);
80	const auto count = _mm_set_epi64x(`0`, `1`);
81	const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, `0x00`);
82	const auto res1 = _mm_sll_epi64(res0, count);
83
84	// Use hardware crc32c to do reduction from 64 -> 32 bytes
85	const auto res2 = _mm_cvtsi128_si64(res1);
86	const auto res3 = _mm_crc32_u32(`0`, res2);
87	const auto res4 = _mm_extract_epi32(res1, `1`);
88	return res3 ^ res4;
89	}
90
91	static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
92	const auto crc1_xmm = _mm_set_epi64x(`0`, crc1);
93	const auto crc2_xmm = _mm_set_epi64x(`0`, crc2);
94	const auto count = _mm_set_epi64x(`0`, `1`);
95	const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, `0x00`);
96	const auto res1 = _mm_sll_epi64(res0, count);
97
98	// Do barrett reduction of 64 -> 32 bytes
99	const auto mask32 = _mm_set_epi32(`0`, `0`, `0`, `0xFFFFFFFF`);
100	const auto barrett_reduction_constants =
101	_mm_set_epi32(`0x1`, `0xDB710641`, `0x1`, `0xF7011641`);
102	const auto res2 = _mm_clmulepi64_si128(
103	_mm_and_si128(res1, mask32), barrett_reduction_constants, `0x00`);
104	const auto res3 = _mm_clmulepi64_si128(
105	_mm_and_si128(res2, mask32), barrett_reduction_constants, `0x10`);
106	return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), `4`));
107	}
108
109	#else
110
111	static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
112	return `0`;
113	}
114	static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
115	return `0`;
116	}
117
118	#endif
119
120	static constexpr uint32_t crc32c_m = `0x82f63b78`;
121	static constexpr uint32_t crc32_m = `0xedb88320`;
122
123	/*
124	* Pre-calculated powers tables for crc32c and crc32.
125	*/
126	static constexpr std::array<uint32_t, `62`> const crc32c_powers =
127	gf_powers_make<crc32c_m>{}(make_index_sequence<`62`>{});
128	static constexpr std::array<uint32_t, `62`> const crc32_powers =
129	gf_powers_make<crc32_m>{}(make_index_sequence<`62`>{});
130
131	template <typename F>
132	static uint32_t crc32_append_zeroes(
133	F mult,
134	uint32_t crc,
135	size_t len,
136	uint32_t polynomial,
137	std::array<uint32_t, `62`> const& powers_array) {
138	auto powers = powers_array.data();
139
140	// Append by multiplying by consecutive powers of two of the zeroes
141	// array
142	len >>= `2`;
143
144	while (len) {
145	// Advance directly to next bit set.
146	auto r = findFirstSet(len) - `1`;
147	len >>= r;
148	powers += r;
149
150	crc = mult(crc, *powers, polynomial);
151
152	len >>= `1`;
153	powers++;
154	}
155
156	return crc;
157	}
158
159	namespace detail {
160
161	uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
162	return crc2 ^
163	crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
164	}
165
166	uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
167	return crc2 ^
168	crc32_append_zeroes(
169	gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
170	}
171
172	uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
173	return crc2 ^
174	crc32_append_zeroes(
175	gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
176	}
177
178	uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
179	return crc2 ^
180	crc32_append_zeroes(
181	gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
182	}
183
184	} // namespace detail
185
186	} // namespace folly
187

Browse the source code of folly/hash/detail/Crc32CombineDetail.cpp