1/* chunkset_tpl.h -- inline functions to copy small data chunks.
2 * For conditions of distribution and use, see copyright notice in zlib.h
3 */
4
5/* Returns the chunk size */
6Z_INTERNAL uint32_t CHUNKSIZE(void) {
7 return sizeof(chunk_t);
8}
9
10/* Behave like memcpy, but assume that it's OK to overwrite at least
11 chunk_t bytes of output even if the length is shorter than this,
12 that the length is non-zero, and that `from` lags `out` by at least
13 sizeof chunk_t bytes (or that they don't overlap at all or simply that
14 the distance is less than the length of the copy).
15
16 Aside from better memory bus utilisation, this means that short copies
17 (chunk_t bytes or fewer) will fall straight through the loop
18 without iteration, which will hopefully make the branch prediction more
19 reliable. */
20Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
21 Assert(len > 0, "chunkcopy should never have a length 0");
22 chunk_t chunk;
23 int32_t align = (--len % sizeof(chunk_t)) + 1;
24 loadchunk(s: from, chunk: &chunk);
25 storechunk(out, chunk: &chunk);
26 out += align;
27 from += align;
28 len /= sizeof(chunk_t);
29 while (len > 0) {
30 loadchunk(s: from, chunk: &chunk);
31 storechunk(out, chunk: &chunk);
32 out += sizeof(chunk_t);
33 from += sizeof(chunk_t);
34 --len;
35 }
36 return out;
37}
38
39/* Behave like chunkcopy, but avoid writing beyond of legal output. */
40Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
41 unsigned safelen = (unsigned)((safe - out) + 1);
42 len = MIN(len, safelen);
43#if CHUNK_SIZE >= 32
44 while (len >= 32) {
45 memcpy(out, from, 32);
46 out += 32;
47 from += 32;
48 len -= 32;
49 }
50#endif
51#if CHUNK_SIZE >= 16
52 while (len >= 16) {
53 memcpy(dest: out, src: from, n: 16);
54 out += 16;
55 from += 16;
56 len -= 16;
57 }
58#endif
59#if CHUNK_SIZE >= 8
60 while (len >= 8) {
61 memcpy(dest: out, src: from, n: 8);
62 out += 8;
63 from += 8;
64 len -= 8;
65 }
66#endif
67 if (len >= 4) {
68 memcpy(dest: out, src: from, n: 4);
69 out += 4;
70 from += 4;
71 len -= 4;
72 }
73 if (len >= 2) {
74 memcpy(dest: out, src: from, n: 2);
75 out += 2;
76 from += 2;
77 len -= 2;
78 }
79 if (len == 1) {
80 *out++ = *from++;
81 }
82 return out;
83}
84
85/* Perform short copies until distance can be rewritten as being at least
86 sizeof chunk_t.
87
88 This assumes that it's OK to overwrite at least the first
89 2*sizeof(chunk_t) bytes of output even if the copy is shorter than this.
90 This assumption holds because inflate_fast() starts every iteration with at
91 least 258 bytes of output space available (258 being the maximum length
92 output from a single token; see inflate_fast()'s assumptions below). */
93Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
94 unsigned char const *from = out - *dist;
95 chunk_t chunk;
96 while (*dist < *len && *dist < sizeof(chunk_t)) {
97 loadchunk(s: from, chunk: &chunk);
98 storechunk(out, chunk: &chunk);
99 out += *dist;
100 *len -= *dist;
101 *dist += *dist;
102 }
103 return out;
104}
105
106/* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST.
107 Return OUT + LEN. */
108Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
109 /* Debug performance related issues when len < sizeof(uint64_t):
110 Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */
111 Assert(dist > 0, "chunkmemset cannot have a distance 0");
112
113 unsigned char *from = out - dist;
114 chunk_t chunk;
115 unsigned sz = sizeof(chunk);
116 if (len < sz) {
117 while (len != 0) {
118 *out++ = *from++;
119 --len;
120 }
121 return out;
122 }
123
124#ifdef HAVE_CHUNKMEMSET_1
125 if (dist == 1) {
126 chunkmemset_1(from, chunk: &chunk);
127 } else
128#endif
129#ifdef HAVE_CHUNKMEMSET_2
130 if (dist == 2) {
131 chunkmemset_2(from, chunk: &chunk);
132 } else
133#endif
134#ifdef HAVE_CHUNKMEMSET_4
135 if (dist == 4) {
136 chunkmemset_4(from, chunk: &chunk);
137 } else
138#endif
139#ifdef HAVE_CHUNKMEMSET_8
140 if (dist == 8) {
141 chunkmemset_8(from, chunk: &chunk);
142 } else
143#endif
144 if (dist == sz) {
145 loadchunk(s: from, chunk: &chunk);
146 } else if (dist < sz) {
147 unsigned char *end = out + len - 1;
148 while (len > dist) {
149 out = CHUNKCOPY_SAFE(out, from, len: dist, safe: end);
150 len -= dist;
151 }
152 if (len > 0) {
153 out = CHUNKCOPY_SAFE(out, from, len, safe: end);
154 }
155 return out;
156 } else {
157 out = CHUNKUNROLL(out, dist: &dist, len: &len);
158 return CHUNKCOPY(out, from: out - dist, len);
159 }
160
161 unsigned rem = len % sz;
162 len -= rem;
163 while (len) {
164 storechunk(out, chunk: &chunk);
165 out += sz;
166 len -= sz;
167 }
168
169 /* Last, deal with the case when LEN is not a multiple of SZ. */
170 if (rem) {
171 memcpy(dest: out, src: from, n: rem);
172 out += rem;
173 }
174
175 return out;
176}
177
178Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
179#if !defined(UNALIGNED64_OK)
180# if !defined(UNALIGNED_OK)
181 static const uint32_t align_mask = 7;
182# else
183 static const uint32_t align_mask = 3;
184# endif
185#endif
186
187 len = MIN(len, left);
188 uint8_t *from = out - dist;
189#if !defined(UNALIGNED64_OK)
190 while (((uintptr_t)out & align_mask) && (len > 0)) {
191 *out++ = *from++;
192 --len;
193 --left;
194 }
195#endif
196 if (left < (unsigned)(3 * sizeof(chunk_t))) {
197 while (len > 0) {
198 *out++ = *from++;
199 --len;
200 }
201 return out;
202 }
203 return CHUNKMEMSET(out, dist, len);
204}
205