1/* adler32.c -- compute the Adler-32 checksum of a data stream
2 * Copyright (C) 1995-2011, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/* @(#) $Id$ */
7
8#include "zutil.h"
9
10local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
11
12#define BASE 65521U /* largest prime smaller than 65536 */
13#define NMAX 5552
14/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
15
16#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
17#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
18#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
19#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
20#define DO16(buf) DO8(buf,0); DO8(buf,8);
21
22/* use NO_DIVIDE if your processor does not do division in hardware --
23 try it both ways to see which is faster */
24#ifdef NO_DIVIDE
25/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
26 (thank you to John Reiser for pointing this out) */
27# define CHOP(a) \
28 do { \
29 unsigned long tmp = a >> 16; \
30 a &= 0xffffUL; \
31 a += (tmp << 4) - tmp; \
32 } while (0)
33# define MOD28(a) \
34 do { \
35 CHOP(a); \
36 if (a >= BASE) a -= BASE; \
37 } while (0)
38# define MOD(a) \
39 do { \
40 CHOP(a); \
41 MOD28(a); \
42 } while (0)
43# define MOD63(a) \
44 do { /* this assumes a is not negative */ \
45 z_off64_t tmp = a >> 32; \
46 a &= 0xffffffffL; \
47 a += (tmp << 8) - (tmp << 5) + tmp; \
48 tmp = a >> 16; \
49 a &= 0xffffL; \
50 a += (tmp << 4) - tmp; \
51 tmp = a >> 16; \
52 a &= 0xffffL; \
53 a += (tmp << 4) - tmp; \
54 if (a >= BASE) a -= BASE; \
55 } while (0)
56#else
57# define MOD(a) a %= BASE
58# define MOD28(a) a %= BASE
59# define MOD63(a) a %= BASE
60#endif
61
62#include "cpu_features.h"
63#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
64#include "adler32_simd.h"
65#endif
66
67/* ========================================================================= */
68uLong ZEXPORT adler32_z(adler, buf, len)
69 uLong adler;
70 const Bytef *buf;
71 z_size_t len;
72{
73 unsigned long sum2;
74 unsigned n;
75
76#if defined(ADLER32_SIMD_SSSE3)
77 if (x86_cpu_enable_ssse3 && buf && len >= 64)
78 return adler32_simd_(adler, buf, len);
79#elif defined(ADLER32_SIMD_NEON)
80 if (buf && len >= 64)
81 return adler32_simd_(adler, buf, len);
82#endif
83
84 /* split Adler-32 into component sums */
85 sum2 = (adler >> 16) & 0xffff;
86 adler &= 0xffff;
87
88 /* in case user likes doing a byte at a time, keep it fast */
89 if (len == 1) {
90 adler += buf[0];
91 if (adler >= BASE)
92 adler -= BASE;
93 sum2 += adler;
94 if (sum2 >= BASE)
95 sum2 -= BASE;
96 return adler | (sum2 << 16);
97 }
98
99#if defined(ADLER32_SIMD_SSSE3)
100 /*
101 * Use SSSE3 to compute the adler32. Since this routine can be
102 * freely used, check CPU features here. zlib convention is to
103 * call adler32(0, NULL, 0), before making calls to adler32().
104 * So this is a good early (and infrequent) place to cache CPU
105 * features for those later, more interesting adler32() calls.
106 */
107 if (buf == Z_NULL) {
108 if (!len) /* Assume user is calling adler32(0, NULL, 0); */
109 cpu_check_features();
110 return 1L;
111 }
112#else
113 /* initial Adler-32 value (deferred check for len == 1 speed) */
114 if (buf == Z_NULL)
115 return 1L;
116#endif
117
118 /* in case short lengths are provided, keep it somewhat fast */
119 if (len < 16) {
120 while (len--) {
121 adler += *buf++;
122 sum2 += adler;
123 }
124 if (adler >= BASE)
125 adler -= BASE;
126 MOD28(sum2); /* only added so many BASE's */
127 return adler | (sum2 << 16);
128 }
129
130 /* do length NMAX blocks -- requires just one modulo operation */
131 while (len >= NMAX) {
132 len -= NMAX;
133 n = NMAX / 16; /* NMAX is divisible by 16 */
134 do {
135 DO16(buf); /* 16 sums unrolled */
136 buf += 16;
137 } while (--n);
138 MOD(adler);
139 MOD(sum2);
140 }
141
142 /* do remaining bytes (less than NMAX, still just one modulo) */
143 if (len) { /* avoid modulos if none remaining */
144 while (len >= 16) {
145 len -= 16;
146 DO16(buf);
147 buf += 16;
148 }
149 while (len--) {
150 adler += *buf++;
151 sum2 += adler;
152 }
153 MOD(adler);
154 MOD(sum2);
155 }
156
157 /* return recombined sums */
158 return adler | (sum2 << 16);
159}
160
161/* ========================================================================= */
162uLong ZEXPORT adler32(adler, buf, len)
163 uLong adler;
164 const Bytef *buf;
165 uInt len;
166{
167 return adler32_z(adler, buf, len);
168}
169
170/* ========================================================================= */
171local uLong adler32_combine_(adler1, adler2, len2)
172 uLong adler1;
173 uLong adler2;
174 z_off64_t len2;
175{
176 unsigned long sum1;
177 unsigned long sum2;
178 unsigned rem;
179
180 /* for negative len, return invalid adler32 as a clue for debugging */
181 if (len2 < 0)
182 return 0xffffffffUL;
183
184 /* the derivation of this formula is left as an exercise for the reader */
185 MOD63(len2); /* assumes len2 >= 0 */
186 rem = (unsigned)len2;
187 sum1 = adler1 & 0xffff;
188 sum2 = rem * sum1;
189 MOD(sum2);
190 sum1 += (adler2 & 0xffff) + BASE - 1;
191 sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
192 if (sum1 >= BASE) sum1 -= BASE;
193 if (sum1 >= BASE) sum1 -= BASE;
194 if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
195 if (sum2 >= BASE) sum2 -= BASE;
196 return sum1 | (sum2 << 16);
197}
198
199/* ========================================================================= */
200uLong ZEXPORT adler32_combine(adler1, adler2, len2)
201 uLong adler1;
202 uLong adler2;
203 z_off_t len2;
204{
205 return adler32_combine_(adler1, adler2, len2);
206}
207
208uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
209 uLong adler1;
210 uLong adler2;
211 z_off64_t len2;
212{
213 return adler32_combine_(adler1, adler2, len2);
214}
215