1/* adler32.c -- compute the Adler-32 checksum of a data stream
2 * Copyright (C) 1995-2011, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/* @(#) $Id$ */
7
8#include "zutil.h"
9
10local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
11
12#define BASE 65521U /* largest prime smaller than 65536 */
13#define NMAX 5552
14/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
15
16#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
17#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
18#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
19#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
20#define DO16(buf) DO8(buf,0); DO8(buf,8);
21
22/* use NO_DIVIDE if your processor does not do division in hardware --
23 try it both ways to see which is faster */
24#ifdef NO_DIVIDE
25/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
26 (thank you to John Reiser for pointing this out) */
27# define CHOP(a) \
28 do { \
29 unsigned long tmp = a >> 16; \
30 a &= 0xffffUL; \
31 a += (tmp << 4) - tmp; \
32 } while (0)
33# define MOD28(a) \
34 do { \
35 CHOP(a); \
36 if (a >= BASE) a -= BASE; \
37 } while (0)
38# define MOD(a) \
39 do { \
40 CHOP(a); \
41 MOD28(a); \
42 } while (0)
43# define MOD63(a) \
44 do { /* this assumes a is not negative */ \
45 z_off64_t tmp = a >> 32; \
46 a &= 0xffffffffL; \
47 a += (tmp << 8) - (tmp << 5) + tmp; \
48 tmp = a >> 16; \
49 a &= 0xffffL; \
50 a += (tmp << 4) - tmp; \
51 tmp = a >> 16; \
52 a &= 0xffffL; \
53 a += (tmp << 4) - tmp; \
54 if (a >= BASE) a -= BASE; \
55 } while (0)
56#else
57# define MOD(a) a %= BASE
58# define MOD28(a) a %= BASE
59# define MOD63(a) a %= BASE
60#endif
61
62#if defined(ADLER32_SIMD_SSSE3)
63#include "adler32_simd.h"
64#include "x86.h"
65#elif defined(ADLER32_SIMD_NEON)
66#include "adler32_simd.h"
67#endif
68
69/* ========================================================================= */
70uLong ZEXPORT adler32_z(adler, buf, len)
71 uLong adler;
72 const Bytef *buf;
73 z_size_t len;
74{
75 unsigned long sum2;
76 unsigned n;
77
78#if defined(ADLER32_SIMD_SSSE3)
79 if (x86_cpu_enable_ssse3 && buf && len >= 64)
80 return adler32_simd_(adler, buf, len);
81#elif defined(ADLER32_SIMD_NEON)
82 if (buf && len >= 64)
83 return adler32_simd_(adler, buf, len);
84#endif
85
86 /* split Adler-32 into component sums */
87 sum2 = (adler >> 16) & 0xffff;
88 adler &= 0xffff;
89
90 /* in case user likes doing a byte at a time, keep it fast */
91 if (len == 1) {
92 adler += buf[0];
93 if (adler >= BASE)
94 adler -= BASE;
95 sum2 += adler;
96 if (sum2 >= BASE)
97 sum2 -= BASE;
98 return adler | (sum2 << 16);
99 }
100
101#if defined(ADLER32_SIMD_SSSE3)
102 /*
103 * Use SSSE3 to compute the adler32. Since this routine can be
104 * freely used, check CPU features here. zlib convention is to
105 * call adler32(0, NULL, 0), before making calls to adler32().
106 * So this is a good early (and infrequent) place to cache CPU
107 * features for those later, more interesting adler32() calls.
108 */
109 if (buf == Z_NULL) {
110 if (!len) /* Assume user is calling adler32(0, NULL, 0); */
111 x86_check_features();
112 return 1L;
113 }
114#else
115 /* initial Adler-32 value (deferred check for len == 1 speed) */
116 if (buf == Z_NULL)
117 return 1L;
118#endif
119
120 /* in case short lengths are provided, keep it somewhat fast */
121 if (len < 16) {
122 while (len--) {
123 adler += *buf++;
124 sum2 += adler;
125 }
126 if (adler >= BASE)
127 adler -= BASE;
128 MOD28(sum2); /* only added so many BASE's */
129 return adler | (sum2 << 16);
130 }
131
132 /* do length NMAX blocks -- requires just one modulo operation */
133 while (len >= NMAX) {
134 len -= NMAX;
135 n = NMAX / 16; /* NMAX is divisible by 16 */
136 do {
137 DO16(buf); /* 16 sums unrolled */
138 buf += 16;
139 } while (--n);
140 MOD(adler);
141 MOD(sum2);
142 }
143
144 /* do remaining bytes (less than NMAX, still just one modulo) */
145 if (len) { /* avoid modulos if none remaining */
146 while (len >= 16) {
147 len -= 16;
148 DO16(buf);
149 buf += 16;
150 }
151 while (len--) {
152 adler += *buf++;
153 sum2 += adler;
154 }
155 MOD(adler);
156 MOD(sum2);
157 }
158
159 /* return recombined sums */
160 return adler | (sum2 << 16);
161}
162
163/* ========================================================================= */
164uLong ZEXPORT adler32(adler, buf, len)
165 uLong adler;
166 const Bytef *buf;
167 uInt len;
168{
169 return adler32_z(adler, buf, len);
170}
171
172/* ========================================================================= */
173local uLong adler32_combine_(adler1, adler2, len2)
174 uLong adler1;
175 uLong adler2;
176 z_off64_t len2;
177{
178 unsigned long sum1;
179 unsigned long sum2;
180 unsigned rem;
181
182 /* for negative len, return invalid adler32 as a clue for debugging */
183 if (len2 < 0)
184 return 0xffffffffUL;
185
186 /* the derivation of this formula is left as an exercise for the reader */
187 MOD63(len2); /* assumes len2 >= 0 */
188 rem = (unsigned)len2;
189 sum1 = adler1 & 0xffff;
190 sum2 = rem * sum1;
191 MOD(sum2);
192 sum1 += (adler2 & 0xffff) + BASE - 1;
193 sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
194 if (sum1 >= BASE) sum1 -= BASE;
195 if (sum1 >= BASE) sum1 -= BASE;
196 if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
197 if (sum2 >= BASE) sum2 -= BASE;
198 return sum1 | (sum2 << 16);
199}
200
201/* ========================================================================= */
202uLong ZEXPORT adler32_combine(adler1, adler2, len2)
203 uLong adler1;
204 uLong adler2;
205 z_off_t len2;
206{
207 return adler32_combine_(adler1, adler2, len2);
208}
209
210uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
211 uLong adler1;
212 uLong adler2;
213 z_off64_t len2;
214{
215 return adler32_combine_(adler1, adler2, len2);
216}
217