1 | /* adler32.c -- compute the Adler-32 checksum of a data stream |
2 | * Copyright (C) 1995-2011, 2016 Mark Adler |
3 | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | */ |
5 | |
6 | /* @(#) $Id$ */ |
7 | |
8 | #include "zutil.h" |
9 | |
10 | local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); |
11 | |
12 | #define BASE 65521U /* largest prime smaller than 65536 */ |
13 | #define NMAX 5552 |
14 | /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ |
15 | |
16 | #define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} |
17 | #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); |
18 | #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); |
19 | #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); |
20 | #define DO16(buf) DO8(buf,0); DO8(buf,8); |
21 | |
22 | /* use NO_DIVIDE if your processor does not do division in hardware -- |
23 | try it both ways to see which is faster */ |
24 | #ifdef NO_DIVIDE |
25 | /* note that this assumes BASE is 65521, where 65536 % 65521 == 15 |
26 | (thank you to John Reiser for pointing this out) */ |
27 | # define CHOP(a) \ |
28 | do { \ |
29 | unsigned long tmp = a >> 16; \ |
30 | a &= 0xffffUL; \ |
31 | a += (tmp << 4) - tmp; \ |
32 | } while (0) |
33 | # define MOD28(a) \ |
34 | do { \ |
35 | CHOP(a); \ |
36 | if (a >= BASE) a -= BASE; \ |
37 | } while (0) |
38 | # define MOD(a) \ |
39 | do { \ |
40 | CHOP(a); \ |
41 | MOD28(a); \ |
42 | } while (0) |
43 | # define MOD63(a) \ |
44 | do { /* this assumes a is not negative */ \ |
45 | z_off64_t tmp = a >> 32; \ |
46 | a &= 0xffffffffL; \ |
47 | a += (tmp << 8) - (tmp << 5) + tmp; \ |
48 | tmp = a >> 16; \ |
49 | a &= 0xffffL; \ |
50 | a += (tmp << 4) - tmp; \ |
51 | tmp = a >> 16; \ |
52 | a &= 0xffffL; \ |
53 | a += (tmp << 4) - tmp; \ |
54 | if (a >= BASE) a -= BASE; \ |
55 | } while (0) |
56 | #else |
57 | # define MOD(a) a %= BASE |
58 | # define MOD28(a) a %= BASE |
59 | # define MOD63(a) a %= BASE |
60 | #endif |
61 | |
62 | #if defined(ADLER32_SIMD_SSSE3) |
63 | #include "adler32_simd.h" |
64 | #include "x86.h" |
65 | #elif defined(ADLER32_SIMD_NEON) |
66 | #include "adler32_simd.h" |
67 | #endif |
68 | |
69 | /* ========================================================================= */ |
70 | uLong ZEXPORT adler32_z(adler, buf, len) |
71 | uLong adler; |
72 | const Bytef *buf; |
73 | z_size_t len; |
74 | { |
75 | unsigned long sum2; |
76 | unsigned n; |
77 | |
78 | #if defined(ADLER32_SIMD_SSSE3) |
79 | if (x86_cpu_enable_ssse3 && buf && len >= 64) |
80 | return adler32_simd_(adler, buf, len); |
81 | #elif defined(ADLER32_SIMD_NEON) |
82 | if (buf && len >= 64) |
83 | return adler32_simd_(adler, buf, len); |
84 | #endif |
85 | |
86 | /* split Adler-32 into component sums */ |
87 | sum2 = (adler >> 16) & 0xffff; |
88 | adler &= 0xffff; |
89 | |
90 | /* in case user likes doing a byte at a time, keep it fast */ |
91 | if (len == 1) { |
92 | adler += buf[0]; |
93 | if (adler >= BASE) |
94 | adler -= BASE; |
95 | sum2 += adler; |
96 | if (sum2 >= BASE) |
97 | sum2 -= BASE; |
98 | return adler | (sum2 << 16); |
99 | } |
100 | |
101 | #if defined(ADLER32_SIMD_SSSE3) |
102 | /* |
103 | * Use SSSE3 to compute the adler32. Since this routine can be |
104 | * freely used, check CPU features here. zlib convention is to |
105 | * call adler32(0, NULL, 0), before making calls to adler32(). |
106 | * So this is a good early (and infrequent) place to cache CPU |
107 | * features for those later, more interesting adler32() calls. |
108 | */ |
109 | if (buf == Z_NULL) { |
110 | if (!len) /* Assume user is calling adler32(0, NULL, 0); */ |
111 | x86_check_features(); |
112 | return 1L; |
113 | } |
114 | #else |
115 | /* initial Adler-32 value (deferred check for len == 1 speed) */ |
116 | if (buf == Z_NULL) |
117 | return 1L; |
118 | #endif |
119 | |
120 | /* in case short lengths are provided, keep it somewhat fast */ |
121 | if (len < 16) { |
122 | while (len--) { |
123 | adler += *buf++; |
124 | sum2 += adler; |
125 | } |
126 | if (adler >= BASE) |
127 | adler -= BASE; |
128 | MOD28(sum2); /* only added so many BASE's */ |
129 | return adler | (sum2 << 16); |
130 | } |
131 | |
132 | /* do length NMAX blocks -- requires just one modulo operation */ |
133 | while (len >= NMAX) { |
134 | len -= NMAX; |
135 | n = NMAX / 16; /* NMAX is divisible by 16 */ |
136 | do { |
137 | DO16(buf); /* 16 sums unrolled */ |
138 | buf += 16; |
139 | } while (--n); |
140 | MOD(adler); |
141 | MOD(sum2); |
142 | } |
143 | |
144 | /* do remaining bytes (less than NMAX, still just one modulo) */ |
145 | if (len) { /* avoid modulos if none remaining */ |
146 | while (len >= 16) { |
147 | len -= 16; |
148 | DO16(buf); |
149 | buf += 16; |
150 | } |
151 | while (len--) { |
152 | adler += *buf++; |
153 | sum2 += adler; |
154 | } |
155 | MOD(adler); |
156 | MOD(sum2); |
157 | } |
158 | |
159 | /* return recombined sums */ |
160 | return adler | (sum2 << 16); |
161 | } |
162 | |
163 | /* ========================================================================= */ |
164 | uLong ZEXPORT adler32(adler, buf, len) |
165 | uLong adler; |
166 | const Bytef *buf; |
167 | uInt len; |
168 | { |
169 | return adler32_z(adler, buf, len); |
170 | } |
171 | |
172 | /* ========================================================================= */ |
173 | local uLong adler32_combine_(adler1, adler2, len2) |
174 | uLong adler1; |
175 | uLong adler2; |
176 | z_off64_t len2; |
177 | { |
178 | unsigned long sum1; |
179 | unsigned long sum2; |
180 | unsigned rem; |
181 | |
182 | /* for negative len, return invalid adler32 as a clue for debugging */ |
183 | if (len2 < 0) |
184 | return 0xffffffffUL; |
185 | |
186 | /* the derivation of this formula is left as an exercise for the reader */ |
187 | MOD63(len2); /* assumes len2 >= 0 */ |
188 | rem = (unsigned)len2; |
189 | sum1 = adler1 & 0xffff; |
190 | sum2 = rem * sum1; |
191 | MOD(sum2); |
192 | sum1 += (adler2 & 0xffff) + BASE - 1; |
193 | sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; |
194 | if (sum1 >= BASE) sum1 -= BASE; |
195 | if (sum1 >= BASE) sum1 -= BASE; |
196 | if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); |
197 | if (sum2 >= BASE) sum2 -= BASE; |
198 | return sum1 | (sum2 << 16); |
199 | } |
200 | |
201 | /* ========================================================================= */ |
202 | uLong ZEXPORT adler32_combine(adler1, adler2, len2) |
203 | uLong adler1; |
204 | uLong adler2; |
205 | z_off_t len2; |
206 | { |
207 | return adler32_combine_(adler1, adler2, len2); |
208 | } |
209 | |
210 | uLong ZEXPORT adler32_combine64(adler1, adler2, len2) |
211 | uLong adler1; |
212 | uLong adler2; |
213 | z_off64_t len2; |
214 | { |
215 | return adler32_combine_(adler1, adler2, len2); |
216 | } |
217 | |