1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/hash/Checksum.h>
18#include <boost/crc.hpp>
19#include <folly/CpuId.h>
20#include <folly/hash/detail/ChecksumDetail.h>
21#include <algorithm>
22#include <stdexcept>
23
24#if FOLLY_SSE_PREREQ(4, 2)
25#include <emmintrin.h>
26#include <nmmintrin.h>
27#endif
28
29namespace folly {
30
31namespace detail {
32
33uint32_t
34crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum);
35#if FOLLY_SSE_PREREQ(4, 2)
36
37uint32_t
38crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum);
39
40// Fast SIMD implementation of CRC-32 for x86 with pclmul
41uint32_t
42crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
43 uint32_t sum = startingChecksum;
44 size_t offset = 0;
45
46 // Process unaligned bytes
47 if ((uintptr_t)data & 15) {
48 size_t limit = std::min(nbytes, -(uintptr_t)data & 15);
49 sum = crc32_sw(data, limit, sum);
50 offset += limit;
51 nbytes -= limit;
52 }
53
54 if (nbytes >= 16) {
55 sum = crc32_hw_aligned(sum, (const __m128i*)(data + offset), nbytes / 16);
56 offset += nbytes & ~15;
57 nbytes &= 15;
58 }
59
60 // Remaining unaligned bytes
61 return crc32_sw(data + offset, nbytes, sum);
62}
63
64bool crc32c_hw_supported() {
65 static folly::CpuId id;
66 return id.sse42();
67}
68
69bool crc32_hw_supported() {
70 static folly::CpuId id;
71 return id.sse42();
72}
73
74#else
75
76uint32_t crc32_hw(
77 const uint8_t* /* data */,
78 size_t /* nbytes */,
79 uint32_t /* startingChecksum */) {
80 throw std::runtime_error("crc32_hw is not implemented on this platform");
81}
82
83bool crc32c_hw_supported() {
84 return false;
85}
86
87bool crc32_hw_supported() {
88 return false;
89}
90#endif
91
92template <uint32_t CRC_POLYNOMIAL>
93uint32_t crc_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
94 // Reverse the bits in the starting checksum so they'll be in the
95 // right internal format for Boost's CRC engine.
96 // O(1)-time, branchless bit reversal algorithm from
97 // http://graphics.stanford.edu/~seander/bithacks.html
98 startingChecksum = ((startingChecksum >> 1) & 0x55555555) |
99 ((startingChecksum & 0x55555555) << 1);
100 startingChecksum = ((startingChecksum >> 2) & 0x33333333) |
101 ((startingChecksum & 0x33333333) << 2);
102 startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) |
103 ((startingChecksum & 0x0f0f0f0f) << 4);
104 startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) |
105 ((startingChecksum & 0x00ff00ff) << 8);
106 startingChecksum = (startingChecksum >> 16) | (startingChecksum << 16);
107
108 boost::crc_optimal<32, CRC_POLYNOMIAL, ~0U, 0, true, true> sum(
109 startingChecksum);
110 sum.process_bytes(data, nbytes);
111 return sum.checksum();
112}
113
114uint32_t
115crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
116 constexpr uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41;
117 return crc_sw<CRC32C_POLYNOMIAL>(data, nbytes, startingChecksum);
118}
119
120uint32_t
121crc32_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
122 constexpr uint32_t CRC32_POLYNOMIAL = 0x04C11DB7;
123 return crc_sw<CRC32_POLYNOMIAL>(data, nbytes, startingChecksum);
124}
125
126} // namespace detail
127
128uint32_t crc32c(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
129 if (detail::crc32c_hw_supported()) {
130 return detail::crc32c_hw(data, nbytes, startingChecksum);
131 } else {
132 return detail::crc32c_sw(data, nbytes, startingChecksum);
133 }
134}
135
136uint32_t crc32(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
137 if (detail::crc32_hw_supported()) {
138 return detail::crc32_hw(data, nbytes, startingChecksum);
139 } else {
140 return detail::crc32_sw(data, nbytes, startingChecksum);
141 }
142}
143
144uint32_t
145crc32_type(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
146 return ~crc32(data, nbytes, startingChecksum);
147}
148
149uint32_t crc32_combine(uint32_t crc1, uint32_t crc2, size_t crc2len) {
150 // Append up to 32 bits of zeroes in the normal way
151 uint8_t data[4] = {0, 0, 0, 0};
152 auto len = crc2len & 3;
153 if (len) {
154 crc1 = crc32(data, len, crc1);
155 }
156
157 if (detail::crc32_hw_supported()) {
158 return detail::crc32_combine_hw(crc1, crc2, crc2len);
159 } else {
160 return detail::crc32_combine_sw(crc1, crc2, crc2len);
161 }
162}
163
164uint32_t crc32c_combine(uint32_t crc1, uint32_t crc2, size_t crc2len) {
165 // Append up to 32 bits of zeroes in the normal way
166 uint8_t data[4] = {0, 0, 0, 0};
167 auto len = crc2len & 3;
168 if (len) {
169 crc1 = crc32c(data, len, crc1);
170 }
171
172 if (detail::crc32_hw_supported()) {
173 return detail::crc32c_combine_hw(crc1, crc2, crc2len - len);
174 } else {
175 return detail::crc32c_combine_sw(crc1, crc2, crc2len - len);
176 }
177}
178
179} // namespace folly
180