1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/hash/Checksum.h> |
18 | #include <boost/crc.hpp> |
19 | #include <folly/CpuId.h> |
20 | #include <folly/hash/detail/ChecksumDetail.h> |
21 | #include <algorithm> |
22 | #include <stdexcept> |
23 | |
24 | #if FOLLY_SSE_PREREQ(4, 2) |
25 | #include <emmintrin.h> |
26 | #include <nmmintrin.h> |
27 | #endif |
28 | |
29 | namespace folly { |
30 | |
31 | namespace detail { |
32 | |
33 | uint32_t |
34 | crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); |
35 | #if FOLLY_SSE_PREREQ(4, 2) |
36 | |
37 | uint32_t |
38 | crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); |
39 | |
40 | // Fast SIMD implementation of CRC-32 for x86 with pclmul |
41 | uint32_t |
42 | crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
43 | uint32_t sum = startingChecksum; |
44 | size_t offset = 0; |
45 | |
46 | // Process unaligned bytes |
47 | if ((uintptr_t)data & 15) { |
48 | size_t limit = std::min(nbytes, -(uintptr_t)data & 15); |
49 | sum = crc32_sw(data, limit, sum); |
50 | offset += limit; |
51 | nbytes -= limit; |
52 | } |
53 | |
54 | if (nbytes >= 16) { |
55 | sum = crc32_hw_aligned(sum, (const __m128i*)(data + offset), nbytes / 16); |
56 | offset += nbytes & ~15; |
57 | nbytes &= 15; |
58 | } |
59 | |
60 | // Remaining unaligned bytes |
61 | return crc32_sw(data + offset, nbytes, sum); |
62 | } |
63 | |
64 | bool crc32c_hw_supported() { |
65 | static folly::CpuId id; |
66 | return id.sse42(); |
67 | } |
68 | |
69 | bool crc32_hw_supported() { |
70 | static folly::CpuId id; |
71 | return id.sse42(); |
72 | } |
73 | |
74 | #else |
75 | |
76 | uint32_t crc32_hw( |
77 | const uint8_t* /* data */, |
78 | size_t /* nbytes */, |
79 | uint32_t /* startingChecksum */) { |
80 | throw std::runtime_error("crc32_hw is not implemented on this platform" ); |
81 | } |
82 | |
83 | bool crc32c_hw_supported() { |
84 | return false; |
85 | } |
86 | |
87 | bool crc32_hw_supported() { |
88 | return false; |
89 | } |
90 | #endif |
91 | |
92 | template <uint32_t CRC_POLYNOMIAL> |
93 | uint32_t crc_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
94 | // Reverse the bits in the starting checksum so they'll be in the |
95 | // right internal format for Boost's CRC engine. |
96 | // O(1)-time, branchless bit reversal algorithm from |
97 | // http://graphics.stanford.edu/~seander/bithacks.html |
98 | startingChecksum = ((startingChecksum >> 1) & 0x55555555) | |
99 | ((startingChecksum & 0x55555555) << 1); |
100 | startingChecksum = ((startingChecksum >> 2) & 0x33333333) | |
101 | ((startingChecksum & 0x33333333) << 2); |
102 | startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) | |
103 | ((startingChecksum & 0x0f0f0f0f) << 4); |
104 | startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) | |
105 | ((startingChecksum & 0x00ff00ff) << 8); |
106 | startingChecksum = (startingChecksum >> 16) | (startingChecksum << 16); |
107 | |
108 | boost::crc_optimal<32, CRC_POLYNOMIAL, ~0U, 0, true, true> sum( |
109 | startingChecksum); |
110 | sum.process_bytes(data, nbytes); |
111 | return sum.checksum(); |
112 | } |
113 | |
114 | uint32_t |
115 | crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
116 | constexpr uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41; |
117 | return crc_sw<CRC32C_POLYNOMIAL>(data, nbytes, startingChecksum); |
118 | } |
119 | |
120 | uint32_t |
121 | crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
122 | constexpr uint32_t CRC32_POLYNOMIAL = 0x04C11DB7; |
123 | return crc_sw<CRC32_POLYNOMIAL>(data, nbytes, startingChecksum); |
124 | } |
125 | |
126 | } // namespace detail |
127 | |
128 | uint32_t crc32c(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
129 | if (detail::crc32c_hw_supported()) { |
130 | return detail::crc32c_hw(data, nbytes, startingChecksum); |
131 | } else { |
132 | return detail::crc32c_sw(data, nbytes, startingChecksum); |
133 | } |
134 | } |
135 | |
136 | uint32_t crc32(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
137 | if (detail::crc32_hw_supported()) { |
138 | return detail::crc32_hw(data, nbytes, startingChecksum); |
139 | } else { |
140 | return detail::crc32_sw(data, nbytes, startingChecksum); |
141 | } |
142 | } |
143 | |
144 | uint32_t |
145 | crc32_type(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) { |
146 | return ~crc32(data, nbytes, startingChecksum); |
147 | } |
148 | |
149 | uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, size_t crc2len) { |
150 | // Append up to 32 bits of zeroes in the normal way |
151 | uint8_t data[4] = {0, 0, 0, 0}; |
152 | auto len = crc2len & 3; |
153 | if (len) { |
154 | crc1 = crc32(data, len, crc1); |
155 | } |
156 | |
157 | if (detail::crc32_hw_supported()) { |
158 | return detail::crc32_combine_hw(crc1, crc2, crc2len); |
159 | } else { |
160 | return detail::crc32_combine_sw(crc1, crc2, crc2len); |
161 | } |
162 | } |
163 | |
164 | uint32_t crc32c_combine(uint32_t crc1, uint32_t crc2, size_t crc2len) { |
165 | // Append up to 32 bits of zeroes in the normal way |
166 | uint8_t data[4] = {0, 0, 0, 0}; |
167 | auto len = crc2len & 3; |
168 | if (len) { |
169 | crc1 = crc32c(data, len, crc1); |
170 | } |
171 | |
172 | if (detail::crc32_hw_supported()) { |
173 | return detail::crc32c_combine_hw(crc1, crc2, crc2len - len); |
174 | } else { |
175 | return detail::crc32c_combine_sw(crc1, crc2, crc2len - len); |
176 | } |
177 | } |
178 | |
179 | } // namespace folly |
180 | |