1 | /****************************************************** |
2 | Copyright (c) 2017 Percona LLC and/or its affiliates. |
3 | |
4 | CRC32 using Intel's PCLMUL instruction. |
5 | |
6 | This program is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by |
8 | the Free Software Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, write to the Free Software |
17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA |
18 | |
19 | *******************************************************/ |
20 | |
21 | /* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation |
22 | * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
23 | * |
24 | * This file is part of Libgcrypt. |
25 | * |
26 | * Libgcrypt is free software; you can redistribute it and/or modify |
27 | * it under the terms of the GNU Lesser General Public License as |
28 | * published by the Free Software Foundation; either version 2.1 of |
29 | * the License, or (at your option) any later version. |
30 | * |
31 | * Libgcrypt is distributed in the hope that it will be useful, |
32 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
33 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
34 | * GNU Lesser General Public License for more details. |
35 | * |
36 | * You should have received a copy of the GNU Lesser General Public |
37 | * License along with this program; if not, write to the Free Software |
38 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
39 | * |
40 | */ |
41 | |
42 | #include <stdio.h> |
43 | #include <stdlib.h> |
44 | #include <string.h> |
45 | #include <stdint.h> |
46 | |
47 | # define U64_C(c) (c ## UL) |
48 | |
49 | typedef uint32_t u32; |
50 | typedef uint16_t u16; |
51 | typedef uint64_t u64; |
52 | #ifndef byte |
53 | typedef uint8_t byte; |
54 | #endif |
55 | |
56 | # define _gcry_bswap32 __builtin_bswap32 |
57 | |
58 | #if __GNUC__ >= 4 && defined(__x86_64__) && defined(HAVE_CLMUL_INSTRUCTION) |
59 | |
60 | #if defined(_GCRY_GCC_VERSION) && _GCRY_GCC_VERSION >= 40400 /* 4.4 */ |
61 | /* Prevent compiler from issuing SSE instructions between asm blocks. */ |
62 | # pragma GCC target("no-sse") |
63 | #endif |
64 | |
65 | |
66 | #define ALIGNED_16 __attribute__ ((aligned (16))) |
67 | |
68 | |
69 | struct u16_unaligned_s |
70 | { |
71 | u16 a; |
72 | } __attribute__((packed, aligned (1), may_alias)); |
73 | |
74 | |
75 | /* Constants structure for generic reflected/non-reflected CRC32 CLMUL |
76 | * functions. */ |
77 | struct crc32_consts_s |
78 | { |
79 | /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */ |
80 | u64 k[6]; |
81 | /* my_p: { floor(x^64 / P(x)), P(x) } */ |
82 | u64 my_p[2]; |
83 | }; |
84 | |
85 | |
86 | /* CLMUL constants for CRC32 and CRC32RFC1510. */ |
87 | static const struct crc32_consts_s crc32_consts ALIGNED_16 = |
88 | { |
89 | { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */ |
90 | U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */ |
91 | U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */ |
92 | U64_C(0x163cd6124), 0 /* y = 2 */ |
93 | }, |
94 | { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */ |
95 | U64_C(0x1f7011641), U64_C(0x1db710641) |
96 | } |
97 | }; |
98 | |
99 | /* Common constants for CRC32 algorithms. */ |
100 | static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 = |
101 | { |
102 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
103 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
104 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
105 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
106 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
107 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
108 | }; |
109 | static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 = |
110 | { |
111 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
112 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
113 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
114 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
115 | }; |
116 | static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 = |
117 | { |
118 | { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */ |
119 | { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) }, |
120 | { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) }, |
121 | { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) }, |
122 | { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) }, |
123 | { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) }, |
124 | { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */ |
125 | }; |
126 | static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 = |
127 | { |
128 | { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */ |
129 | { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) }, |
130 | { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */ |
131 | }; |
132 | |
133 | /* PCLMUL functions for reflected CRC32. */ |
134 | static inline void |
135 | crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, |
136 | const struct crc32_consts_s *consts) |
137 | { |
138 | if (inlen >= 8 * 16) |
139 | { |
140 | asm volatile ("movd %[crc], %%xmm4\n\t" |
141 | "movdqu %[inbuf_0], %%xmm0\n\t" |
142 | "movdqu %[inbuf_1], %%xmm1\n\t" |
143 | "movdqu %[inbuf_2], %%xmm2\n\t" |
144 | "movdqu %[inbuf_3], %%xmm3\n\t" |
145 | "pxor %%xmm4, %%xmm0\n\t" |
146 | : |
147 | : [inbuf_0] "m" (inbuf[0 * 16]), |
148 | [inbuf_1] "m" (inbuf[1 * 16]), |
149 | [inbuf_2] "m" (inbuf[2 * 16]), |
150 | [inbuf_3] "m" (inbuf[3 * 16]), |
151 | [crc] "m" (*pcrc) |
152 | ); |
153 | |
154 | inbuf += 4 * 16; |
155 | inlen -= 4 * 16; |
156 | |
157 | asm volatile ("movdqa %[k1k2], %%xmm4\n\t" |
158 | : |
159 | : [k1k2] "m" (consts->k[1 - 1]) |
160 | ); |
161 | |
162 | /* Fold by 4. */ |
163 | while (inlen >= 4 * 16) |
164 | { |
165 | asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" |
166 | "movdqa %%xmm0, %%xmm6\n\t" |
167 | "pclmulqdq $0x00, %%xmm4, %%xmm0\n\t" |
168 | "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" |
169 | "pxor %%xmm5, %%xmm0\n\t" |
170 | "pxor %%xmm6, %%xmm0\n\t" |
171 | |
172 | "movdqu %[inbuf_1], %%xmm5\n\t" |
173 | "movdqa %%xmm1, %%xmm6\n\t" |
174 | "pclmulqdq $0x00, %%xmm4, %%xmm1\n\t" |
175 | "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" |
176 | "pxor %%xmm5, %%xmm1\n\t" |
177 | "pxor %%xmm6, %%xmm1\n\t" |
178 | |
179 | "movdqu %[inbuf_2], %%xmm5\n\t" |
180 | "movdqa %%xmm2, %%xmm6\n\t" |
181 | "pclmulqdq $0x00, %%xmm4, %%xmm2\n\t" |
182 | "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" |
183 | "pxor %%xmm5, %%xmm2\n\t" |
184 | "pxor %%xmm6, %%xmm2\n\t" |
185 | |
186 | "movdqu %[inbuf_3], %%xmm5\n\t" |
187 | "movdqa %%xmm3, %%xmm6\n\t" |
188 | "pclmulqdq $0x00, %%xmm4, %%xmm3\n\t" |
189 | "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" |
190 | "pxor %%xmm5, %%xmm3\n\t" |
191 | "pxor %%xmm6, %%xmm3\n\t" |
192 | : |
193 | : [inbuf_0] "m" (inbuf[0 * 16]), |
194 | [inbuf_1] "m" (inbuf[1 * 16]), |
195 | [inbuf_2] "m" (inbuf[2 * 16]), |
196 | [inbuf_3] "m" (inbuf[3 * 16]) |
197 | ); |
198 | |
199 | inbuf += 4 * 16; |
200 | inlen -= 4 * 16; |
201 | } |
202 | |
203 | asm volatile ("movdqa %[k3k4], %%xmm6\n\t" |
204 | "movdqa %[my_p], %%xmm5\n\t" |
205 | : |
206 | : [k3k4] "m" (consts->k[3 - 1]), |
207 | [my_p] "m" (consts->my_p[0]) |
208 | ); |
209 | |
210 | /* Fold 4 to 1. */ |
211 | |
212 | asm volatile ("movdqa %%xmm0, %%xmm4\n\t" |
213 | "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" |
214 | "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" |
215 | "pxor %%xmm1, %%xmm0\n\t" |
216 | "pxor %%xmm4, %%xmm0\n\t" |
217 | |
218 | "movdqa %%xmm0, %%xmm4\n\t" |
219 | "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" |
220 | "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" |
221 | "pxor %%xmm2, %%xmm0\n\t" |
222 | "pxor %%xmm4, %%xmm0\n\t" |
223 | |
224 | "movdqa %%xmm0, %%xmm4\n\t" |
225 | "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" |
226 | "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" |
227 | "pxor %%xmm3, %%xmm0\n\t" |
228 | "pxor %%xmm4, %%xmm0\n\t" |
229 | : |
230 | : |
231 | ); |
232 | } |
233 | else |
234 | { |
235 | asm volatile ("movd %[crc], %%xmm1\n\t" |
236 | "movdqu %[inbuf], %%xmm0\n\t" |
237 | "movdqa %[k3k4], %%xmm6\n\t" |
238 | "pxor %%xmm1, %%xmm0\n\t" |
239 | "movdqa %[my_p], %%xmm5\n\t" |
240 | : |
241 | : [inbuf] "m" (*inbuf), |
242 | [crc] "m" (*pcrc), |
243 | [k3k4] "m" (consts->k[3 - 1]), |
244 | [my_p] "m" (consts->my_p[0]) |
245 | ); |
246 | |
247 | inbuf += 16; |
248 | inlen -= 16; |
249 | } |
250 | |
251 | /* Fold by 1. */ |
252 | if (inlen >= 16) |
253 | { |
254 | while (inlen >= 16) |
255 | { |
256 | /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ |
257 | asm volatile ("movdqu %[inbuf], %%xmm2\n\t" |
258 | "movdqa %%xmm0, %%xmm1\n\t" |
259 | "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" |
260 | "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" |
261 | "pxor %%xmm2, %%xmm0\n\t" |
262 | "pxor %%xmm1, %%xmm0\n\t" |
263 | : |
264 | : [inbuf] "m" (*inbuf) |
265 | ); |
266 | |
267 | inbuf += 16; |
268 | inlen -= 16; |
269 | } |
270 | } |
271 | |
272 | /* Partial fold. */ |
273 | if (inlen) |
274 | { |
275 | /* Load last input and add padding zeros. */ |
276 | asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t" |
277 | "movdqu %[shl_shuf], %%xmm4\n\t" |
278 | "movdqu %[mask], %%xmm2\n\t" |
279 | |
280 | "movdqa %%xmm0, %%xmm1\n\t" |
281 | "pshufb %%xmm4, %%xmm0\n\t" |
282 | "movdqu %[inbuf], %%xmm4\n\t" |
283 | "pshufb %%xmm3, %%xmm1\n\t" |
284 | "pand %%xmm4, %%xmm2\n\t" |
285 | "por %%xmm1, %%xmm2\n\t" |
286 | |
287 | "movdqa %%xmm0, %%xmm1\n\t" |
288 | "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" |
289 | "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" |
290 | "pxor %%xmm2, %%xmm0\n\t" |
291 | "pxor %%xmm1, %%xmm0\n\t" |
292 | : |
293 | : [inbuf] "m" (*(inbuf - 16 + inlen)), |
294 | [mask] "m" (crc32_partial_fold_input_mask[inlen]), |
295 | [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]), |
296 | [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16]) |
297 | ); |
298 | |
299 | inbuf += inlen; |
300 | inlen -= inlen; |
301 | } |
302 | |
303 | /* Final fold. */ |
304 | asm volatile (/* reduce 128-bits to 96-bits */ |
305 | "movdqa %%xmm0, %%xmm1\n\t" |
306 | "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" |
307 | "psrldq $8, %%xmm1\n\t" |
308 | "pxor %%xmm1, %%xmm0\n\t" |
309 | |
310 | /* reduce 96-bits to 64-bits */ |
311 | "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ |
312 | "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ |
313 | "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ |
314 | "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ |
315 | |
316 | /* barrett reduction */ |
317 | "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ |
318 | "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ |
319 | "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ |
320 | "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ |
321 | "pxor %%xmm1, %%xmm0\n\t" |
322 | |
323 | /* store CRC */ |
324 | "pextrd $2, %%xmm0, %[out]\n\t" |
325 | : [out] "=m" (*pcrc) |
326 | : [k5] "m" (consts->k[5 - 1]) |
327 | ); |
328 | } |
329 | |
330 | static inline void |
331 | crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, |
332 | const struct crc32_consts_s *consts) |
333 | { |
334 | if (inlen < 4) |
335 | { |
336 | u32 crc = *pcrc; |
337 | u32 data; |
338 | |
339 | asm volatile ("movdqa %[my_p], %%xmm5\n\t" |
340 | : |
341 | : [my_p] "m" (consts->my_p[0]) |
342 | ); |
343 | |
344 | if (inlen == 1) |
345 | { |
346 | data = inbuf[0]; |
347 | data ^= crc; |
348 | data <<= 24; |
349 | crc >>= 8; |
350 | } |
351 | else if (inlen == 2) |
352 | { |
353 | data = ((const struct u16_unaligned_s *)inbuf)->a; |
354 | data ^= crc; |
355 | data <<= 16; |
356 | crc >>= 16; |
357 | } |
358 | else |
359 | { |
360 | data = ((const struct u16_unaligned_s *)inbuf)->a; |
361 | data |= inbuf[2] << 16; |
362 | data ^= crc; |
363 | data <<= 8; |
364 | crc >>= 24; |
365 | } |
366 | |
367 | /* Barrett reduction */ |
368 | asm volatile ("movd %[in], %%xmm0\n\t" |
369 | "movd %[crc], %%xmm1\n\t" |
370 | |
371 | "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ |
372 | "psllq $32, %%xmm1\n\t" |
373 | "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ |
374 | "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ |
375 | "pxor %%xmm1, %%xmm0\n\t" |
376 | |
377 | "pextrd $1, %%xmm0, %[out]\n\t" |
378 | : [out] "=m" (*pcrc) |
379 | : [in] "rm" (data), |
380 | [crc] "rm" (crc) |
381 | ); |
382 | } |
383 | else if (inlen == 4) |
384 | { |
385 | /* Barrett reduction */ |
386 | asm volatile ("movd %[crc], %%xmm1\n\t" |
387 | "movd %[in], %%xmm0\n\t" |
388 | "movdqa %[my_p], %%xmm5\n\t" |
389 | "pxor %%xmm1, %%xmm0\n\t" |
390 | |
391 | "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ |
392 | "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ |
393 | "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ |
394 | |
395 | "pextrd $1, %%xmm0, %[out]\n\t" |
396 | : [out] "=m" (*pcrc) |
397 | : [in] "m" (*inbuf), |
398 | [crc] "m" (*pcrc), |
399 | [my_p] "m" (consts->my_p[0]) |
400 | ); |
401 | } |
402 | else |
403 | { |
404 | asm volatile ("movdqu %[shuf], %%xmm4\n\t" |
405 | "movd %[crc], %%xmm1\n\t" |
406 | "movdqa %[my_p], %%xmm5\n\t" |
407 | "movdqa %[k3k4], %%xmm6\n\t" |
408 | : |
409 | : [shuf] "m" (crc32_refl_shuf_shift[inlen]), |
410 | [crc] "m" (*pcrc), |
411 | [my_p] "m" (consts->my_p[0]), |
412 | [k3k4] "m" (consts->k[3 - 1]) |
413 | ); |
414 | |
415 | if (inlen >= 8) |
416 | { |
417 | asm volatile ("movq %[inbuf], %%xmm0\n\t" |
418 | : |
419 | : [inbuf] "m" (*inbuf) |
420 | ); |
421 | if (inlen > 8) |
422 | { |
423 | asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ |
424 | "movq %[inbuf_tail], %%xmm2\n\t" |
425 | "punpcklqdq %%xmm2, %%xmm0\n\t" |
426 | "pshufb %[merge_shuf], %%xmm0\n\t" |
427 | : |
428 | : [inbuf_tail] "m" (inbuf[inlen - 8]), |
429 | [merge_shuf] "m" |
430 | (*crc32_merge9to15_shuf[inlen - 9]) |
431 | ); |
432 | } |
433 | } |
434 | else |
435 | { |
436 | asm volatile ("movd %[inbuf], %%xmm0\n\t" |
437 | "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" |
438 | "pshufb %[merge_shuf], %%xmm0\n\t" |
439 | : |
440 | : [inbuf] "m" (*inbuf), |
441 | [inbuf_tail] "m" (inbuf[inlen - 4]), |
442 | [merge_shuf] "m" |
443 | (*crc32_merge5to7_shuf[inlen - 5]) |
444 | ); |
445 | } |
446 | |
447 | /* Final fold. */ |
448 | asm volatile ("pxor %%xmm1, %%xmm0\n\t" |
449 | "pshufb %%xmm4, %%xmm0\n\t" |
450 | |
451 | /* reduce 128-bits to 96-bits */ |
452 | "movdqa %%xmm0, %%xmm1\n\t" |
453 | "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" |
454 | "psrldq $8, %%xmm1\n\t" |
455 | "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */ |
456 | |
457 | /* reduce 96-bits to 64-bits */ |
458 | "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ |
459 | "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ |
460 | "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ |
461 | "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ |
462 | |
463 | /* barrett reduction */ |
464 | "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ |
465 | "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ |
466 | "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ |
467 | "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ |
468 | "pxor %%xmm1, %%xmm0\n\t" |
469 | |
470 | /* store CRC */ |
471 | "pextrd $2, %%xmm0, %[out]\n\t" |
472 | : [out] "=m" (*pcrc) |
473 | : [k5] "m" (consts->k[5 - 1]) |
474 | ); |
475 | } |
476 | } |
477 | |
478 | void |
479 | crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) |
480 | { |
481 | const struct crc32_consts_s *consts = &crc32_consts; |
482 | #if defined(__x86_64__) && defined(__WIN64__) |
483 | char win64tmp[2 * 16]; |
484 | |
485 | /* XMM6-XMM7 need to be restored after use. */ |
486 | asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" |
487 | "movdqu %%xmm7, 1*16(%0)\n\t" |
488 | : |
489 | : "r" (win64tmp) |
490 | : "memory" ); |
491 | #endif |
492 | |
493 | if (!inlen) |
494 | return; |
495 | |
496 | if (inlen >= 16) |
497 | crc32_reflected_bulk(pcrc, inbuf, inlen, consts); |
498 | else |
499 | crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts); |
500 | |
501 | #if defined(__x86_64__) && defined(__WIN64__) |
502 | /* Restore used registers. */ |
503 | asm volatile("movdqu 0*16(%0), %%xmm6\n\t" |
504 | "movdqu 1*16(%0), %%xmm7\n\t" |
505 | : |
506 | : "r" (win64tmp) |
507 | : "memory" ); |
508 | #endif |
509 | } |
510 | |
511 | #endif |
512 | |