1/* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
6 * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
7 * tables for updating the shift register in one step with three exclusive-ors
8 * instead of four steps with four exclusive-ors. This results in about a
9 * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
10 */
11
12/* @(#) $Id$ */
13
14/*
15 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
16 protection on the static variables used to control the first-use generation
17 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
18 first call get_crc_table() to initialize the tables before allowing more than
19 one thread to use crc32().
20
21 DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h.
22 */
23
24#ifdef MAKECRCH
25# include <stdio.h>
26# ifndef DYNAMIC_CRC_TABLE
27# define DYNAMIC_CRC_TABLE
28# endif /* !DYNAMIC_CRC_TABLE */
29#endif /* MAKECRCH */
30
31#include "deflate.h"
32#include "x86.h"
33#include "zutil.h" /* for STDC and FAR definitions */
34
35#if defined(CRC32_SIMD_SSE42_PCLMUL)
36#include "crc32_simd.h"
37#elif defined(CRC32_ARMV8_CRC32)
38#include "arm_features.h"
39#include "crc32_simd.h"
40#endif
41
42/* Definitions for doing the crc four data bytes at a time. */
43#if !defined(NOBYFOUR) && defined(Z_U4)
44# define BYFOUR
45#endif
46#ifdef BYFOUR
47 local unsigned long crc32_little OF((unsigned long,
48 const unsigned char FAR *, z_size_t));
49 local unsigned long crc32_big OF((unsigned long,
50 const unsigned char FAR *, z_size_t));
51# define TBLS 8
52#else
53# define TBLS 1
54#endif /* BYFOUR */
55
56/* Local functions for crc concatenation */
57local unsigned long gf2_matrix_times OF((unsigned long *mat,
58 unsigned long vec));
59local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
60local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2));
61
62
63#ifdef DYNAMIC_CRC_TABLE
64
65local volatile int crc_table_empty = 1;
66local z_crc_t FAR crc_table[TBLS][256];
67local void make_crc_table OF((void));
68#ifdef MAKECRCH
69 local void write_table OF((FILE *, const z_crc_t FAR *));
70#endif /* MAKECRCH */
71/*
72 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
73 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
74
75 Polynomials over GF(2) are represented in binary, one bit per coefficient,
76 with the lowest powers in the most significant bit. Then adding polynomials
77 is just exclusive-or, and multiplying a polynomial by x is a right shift by
78 one. If we call the above polynomial p, and represent a byte as the
79 polynomial q, also with the lowest power in the most significant bit (so the
80 byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
81 where a mod b means the remainder after dividing a by b.
82
83 This calculation is done using the shift-register method of multiplying and
84 taking the remainder. The register is initialized to zero, and for each
85 incoming bit, x^32 is added mod p to the register if the bit is a one (where
86 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
87 x (which is shifting right by one and adding x^32 mod p if the bit shifted
88 out is a one). We start with the highest power (least significant bit) of
89 q and repeat for all eight bits of q.
90
91 The first table is simply the CRC of all possible eight bit values. This is
92 all the information needed to generate CRCs on data a byte at a time for all
93 combinations of CRC register values and incoming bytes. The remaining tables
94 allow for word-at-a-time CRC calculation for both big-endian and little-
95 endian machines, where a word is four bytes.
96*/
97local void make_crc_table()
98{
99 z_crc_t c;
100 int n, k;
101 z_crc_t poly; /* polynomial exclusive-or pattern */
102 /* terms of polynomial defining this crc (except x^32): */
103 static volatile int first = 1; /* flag to limit concurrent making */
104 static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
105
106 /* See if another task is already doing this (not thread-safe, but better
107 than nothing -- significantly reduces duration of vulnerability in
108 case the advice about DYNAMIC_CRC_TABLE is ignored) */
109 if (first) {
110 first = 0;
111
112 /* make exclusive-or pattern from polynomial (0xedb88320UL) */
113 poly = 0;
114 for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
115 poly |= (z_crc_t)1 << (31 - p[n]);
116
117 /* generate a crc for every 8-bit value */
118 for (n = 0; n < 256; n++) {
119 c = (z_crc_t)n;
120 for (k = 0; k < 8; k++)
121 c = c & 1 ? poly ^ (c >> 1) : c >> 1;
122 crc_table[0][n] = c;
123 }
124
125#ifdef BYFOUR
126 /* generate crc for each value followed by one, two, and three zeros,
127 and then the byte reversal of those as well as the first table */
128 for (n = 0; n < 256; n++) {
129 c = crc_table[0][n];
130 crc_table[4][n] = ZSWAP32(c);
131 for (k = 1; k < 4; k++) {
132 c = crc_table[0][c & 0xff] ^ (c >> 8);
133 crc_table[k][n] = c;
134 crc_table[k + 4][n] = ZSWAP32(c);
135 }
136 }
137#endif /* BYFOUR */
138
139 crc_table_empty = 0;
140 }
141 else { /* not first */
142 /* wait for the other guy to finish (not efficient, but rare) */
143 while (crc_table_empty)
144 ;
145 }
146
147#ifdef MAKECRCH
148 /* write out CRC tables to crc32.h */
149 {
150 FILE *out;
151
152 out = fopen("crc32.h", "w");
153 if (out == NULL) return;
154 fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
155 fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
156 fprintf(out, "local const z_crc_t FAR ");
157 fprintf(out, "crc_table[TBLS][256] =\n{\n {\n");
158 write_table(out, crc_table[0]);
159# ifdef BYFOUR
160 fprintf(out, "#ifdef BYFOUR\n");
161 for (k = 1; k < 8; k++) {
162 fprintf(out, " },\n {\n");
163 write_table(out, crc_table[k]);
164 }
165 fprintf(out, "#endif\n");
166# endif /* BYFOUR */
167 fprintf(out, " }\n};\n");
168 fclose(out);
169 }
170#endif /* MAKECRCH */
171}
172
173#ifdef MAKECRCH
174local void write_table(out, table)
175 FILE *out;
176 const z_crc_t FAR *table;
177{
178 int n;
179
180 for (n = 0; n < 256; n++)
181 fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ",
182 (unsigned long)(table[n]),
183 n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
184}
185#endif /* MAKECRCH */
186
187#else /* !DYNAMIC_CRC_TABLE */
188/* ========================================================================
189 * Tables of CRC-32s of all single-byte values, made by make_crc_table().
190 */
191#include "crc32.h"
192#endif /* DYNAMIC_CRC_TABLE */
193
194/* =========================================================================
195 * This function can be used by asm versions of crc32()
196 */
197const z_crc_t FAR * ZEXPORT get_crc_table()
198{
199#ifdef DYNAMIC_CRC_TABLE
200 if (crc_table_empty)
201 make_crc_table();
202#endif /* DYNAMIC_CRC_TABLE */
203 return (const z_crc_t FAR *)crc_table;
204}
205
206/* ========================================================================= */
207#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
208#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
209
210/* ========================================================================= */
211unsigned long ZEXPORT crc32_z(crc, buf, len)
212 unsigned long crc;
213 const unsigned char FAR *buf;
214 z_size_t len;
215{
216 /*
217 * zlib convention is to call crc32(0, NULL, 0); before making
218 * calls to crc32(). So this is a good, early (and infrequent)
219 * place to cache CPU features if needed for those later, more
220 * interesting crc32() calls.
221 */
222#if defined(CRC32_SIMD_SSE42_PCLMUL)
223 /*
224 * Use x86 sse4.2+pclmul SIMD to compute the crc32. Since this
225 * routine can be freely used, check CPU features here.
226 */
227 if (buf == Z_NULL) {
228 if (!len) /* Assume user is calling crc32(0, NULL, 0); */
229 x86_check_features();
230 return 0UL;
231 }
232
233 if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
234 /* crc32 16-byte chunks */
235 z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;
236 crc = ~crc32_sse42_simd_(buf, chunk_size, ~(uint32_t)crc);
237 /* check remaining data */
238 len -= chunk_size;
239 if (!len)
240 return crc;
241 /* Fall into the default crc32 for the remaining data. */
242 buf += chunk_size;
243 }
244#else
245 if (buf == Z_NULL) {
246 return 0UL;
247 }
248#endif /* CRC32_SIMD_SSE42_PCLMUL */
249
250#ifdef DYNAMIC_CRC_TABLE
251 if (crc_table_empty)
252 make_crc_table();
253#endif /* DYNAMIC_CRC_TABLE */
254
255#ifdef BYFOUR
256 if (sizeof(void *) == sizeof(ptrdiff_t)) {
257 z_crc_t endian;
258
259 endian = 1;
260 if (*((unsigned char *)(&endian)))
261 return crc32_little(crc, buf, len);
262 else
263 return crc32_big(crc, buf, len);
264 }
265#endif /* BYFOUR */
266 crc = crc ^ 0xffffffffUL;
267 while (len >= 8) {
268 DO8;
269 len -= 8;
270 }
271 if (len) do {
272 DO1;
273 } while (--len);
274 return crc ^ 0xffffffffUL;
275}
276
277/* ========================================================================= */
278unsigned long ZEXPORT crc32(crc, buf, len)
279 unsigned long crc;
280 const unsigned char FAR *buf;
281 uInt len;
282{
283#if defined(CRC32_ARMV8_CRC32)
284 /* We got to verify ARM CPU features, so exploit the common usage pattern
285 * of calling this function with Z_NULL for an initial valid crc value.
286 * This allows to cache the result of the feature check and avoid extraneous
287 * function calls.
288 * TODO: try to move this to crc32_z if we don't loose performance on ARM.
289 */
290 if (buf == Z_NULL) {
291 if (!len) /* Assume user is calling crc32(0, NULL, 0); */
292 arm_check_features();
293 return 0UL;
294 }
295
296 if (arm_cpu_enable_crc32)
297 return armv8_crc32_little(crc, buf, len);
298#endif
299 return crc32_z(crc, buf, len);
300}
301
302#ifdef BYFOUR
303
304/*
305 This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
306 integer pointer type. This violates the strict aliasing rule, where a
307 compiler can assume, for optimization purposes, that two pointers to
308 fundamentally different types won't ever point to the same memory. This can
309 manifest as a problem only if one of the pointers is written to. This code
310 only reads from those pointers. So long as this code remains isolated in
311 this compilation unit, there won't be a problem. For this reason, this code
312 should not be copied and pasted into a compilation unit in which other code
313 writes to the buffer that is passed to these routines.
314 */
315
316/* ========================================================================= */
317#define DOLIT4 c ^= *buf4++; \
318 c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
319 crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
320#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
321
322/* ========================================================================= */
323local unsigned long crc32_little(crc, buf, len)
324 unsigned long crc;
325 const unsigned char FAR *buf;
326 z_size_t len;
327{
328 register z_crc_t c;
329 register const z_crc_t FAR *buf4;
330
331 c = (z_crc_t)crc;
332 c = ~c;
333 while (len && ((ptrdiff_t)buf & 3)) {
334 c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
335 len--;
336 }
337
338 buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
339 while (len >= 32) {
340 DOLIT32;
341 len -= 32;
342 }
343 while (len >= 4) {
344 DOLIT4;
345 len -= 4;
346 }
347 buf = (const unsigned char FAR *)buf4;
348
349 if (len) do {
350 c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
351 } while (--len);
352 c = ~c;
353 return (unsigned long)c;
354}
355
356/* ========================================================================= */
357#define DOBIG4 c ^= *buf4++; \
358 c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
359 crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
360#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
361
362/* ========================================================================= */
363local unsigned long crc32_big(crc, buf, len)
364 unsigned long crc;
365 const unsigned char FAR *buf;
366 z_size_t len;
367{
368 register z_crc_t c;
369 register const z_crc_t FAR *buf4;
370
371 c = ZSWAP32((z_crc_t)crc);
372 c = ~c;
373 while (len && ((ptrdiff_t)buf & 3)) {
374 c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
375 len--;
376 }
377
378 buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
379 while (len >= 32) {
380 DOBIG32;
381 len -= 32;
382 }
383 while (len >= 4) {
384 DOBIG4;
385 len -= 4;
386 }
387 buf = (const unsigned char FAR *)buf4;
388
389 if (len) do {
390 c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
391 } while (--len);
392 c = ~c;
393 return (unsigned long)(ZSWAP32(c));
394}
395
396#endif /* BYFOUR */
397
398#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
399
400/* ========================================================================= */
401local unsigned long gf2_matrix_times(mat, vec)
402 unsigned long *mat;
403 unsigned long vec;
404{
405 unsigned long sum;
406
407 sum = 0;
408 while (vec) {
409 if (vec & 1)
410 sum ^= *mat;
411 vec >>= 1;
412 mat++;
413 }
414 return sum;
415}
416
417/* ========================================================================= */
418local void gf2_matrix_square(square, mat)
419 unsigned long *square;
420 unsigned long *mat;
421{
422 int n;
423
424 for (n = 0; n < GF2_DIM; n++)
425 square[n] = gf2_matrix_times(mat, mat[n]);
426}
427
428/* ========================================================================= */
429local uLong crc32_combine_(crc1, crc2, len2)
430 uLong crc1;
431 uLong crc2;
432 z_off64_t len2;
433{
434 int n;
435 unsigned long row;
436 unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */
437 unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */
438
439 /* degenerate case (also disallow negative lengths) */
440 if (len2 <= 0)
441 return crc1;
442
443 /* put operator for one zero bit in odd */
444 odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
445 row = 1;
446 for (n = 1; n < GF2_DIM; n++) {
447 odd[n] = row;
448 row <<= 1;
449 }
450
451 /* put operator for two zero bits in even */
452 gf2_matrix_square(even, odd);
453
454 /* put operator for four zero bits in odd */
455 gf2_matrix_square(odd, even);
456
457 /* apply len2 zeros to crc1 (first square will put the operator for one
458 zero byte, eight zero bits, in even) */
459 do {
460 /* apply zeros operator for this bit of len2 */
461 gf2_matrix_square(even, odd);
462 if (len2 & 1)
463 crc1 = gf2_matrix_times(even, crc1);
464 len2 >>= 1;
465
466 /* if no more bits set, then done */
467 if (len2 == 0)
468 break;
469
470 /* another iteration of the loop with odd and even swapped */
471 gf2_matrix_square(odd, even);
472 if (len2 & 1)
473 crc1 = gf2_matrix_times(odd, crc1);
474 len2 >>= 1;
475
476 /* if no more bits set, then done */
477 } while (len2 != 0);
478
479 /* return combined crc */
480 crc1 ^= crc2;
481 return crc1;
482}
483
484/* ========================================================================= */
485uLong ZEXPORT crc32_combine(crc1, crc2, len2)
486 uLong crc1;
487 uLong crc2;
488 z_off_t len2;
489{
490 return crc32_combine_(crc1, crc2, len2);
491}
492
493uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
494 uLong crc1;
495 uLong crc2;
496 z_off64_t len2;
497{
498 return crc32_combine_(crc1, crc2, len2);
499}
500
501ZLIB_INTERNAL void crc_reset(deflate_state *const s)
502{
503 if (x86_cpu_enable_simd) {
504 crc_fold_init(s);
505 return;
506 }
507 s->strm->adler = crc32(0L, Z_NULL, 0);
508}
509
510ZLIB_INTERNAL void crc_finalize(deflate_state *const s)
511{
512 if (x86_cpu_enable_simd)
513 s->strm->adler = crc_fold_512to32(s);
514}
515
516ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size)
517{
518 if (x86_cpu_enable_simd) {
519 crc_fold_copy(strm->state, dst, strm->next_in, size);
520 return;
521 }
522 zmemcpy(dst, strm->next_in, size);
523 strm->adler = crc32(strm->adler, dst, size);
524}
525