1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | #include "zstd_compress_internal.h" |
12 | #include "zstd_double_fast.h" |
13 | |
14 | static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, |
15 | void const* end, ZSTD_dictTableLoadMethod_e dtlm) |
16 | { |
17 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
18 | U32* const hashLarge = ms->hashTable; |
19 | U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; |
20 | U32 const mls = cParams->minMatch; |
21 | U32* const hashSmall = ms->chainTable; |
22 | U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; |
23 | const BYTE* const base = ms->window.base; |
24 | const BYTE* ip = base + ms->nextToUpdate; |
25 | const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; |
26 | const U32 fastHashFillStep = 3; |
27 | |
28 | /* Always insert every fastHashFillStep position into the hash tables. |
29 | * Insert the other positions into the large hash table if their entry |
30 | * is empty. |
31 | */ |
32 | for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { |
33 | U32 const curr = (U32)(ip - base); |
34 | U32 i; |
35 | for (i = 0; i < fastHashFillStep; ++i) { |
36 | size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); |
37 | size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); |
38 | if (i == 0) { |
39 | ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); |
40 | } |
41 | if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { |
42 | ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); |
43 | } |
44 | /* Only load extra positions for ZSTD_dtlm_full */ |
45 | if (dtlm == ZSTD_dtlm_fast) |
46 | break; |
47 | } } |
48 | } |
49 | |
50 | static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, |
51 | void const* end, ZSTD_dictTableLoadMethod_e dtlm) |
52 | { |
53 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
54 | U32* const hashLarge = ms->hashTable; |
55 | U32 const hBitsL = cParams->hashLog; |
56 | U32 const mls = cParams->minMatch; |
57 | U32* const hashSmall = ms->chainTable; |
58 | U32 const hBitsS = cParams->chainLog; |
59 | const BYTE* const base = ms->window.base; |
60 | const BYTE* ip = base + ms->nextToUpdate; |
61 | const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; |
62 | const U32 fastHashFillStep = 3; |
63 | |
64 | /* Always insert every fastHashFillStep position into the hash tables. |
65 | * Insert the other positions into the large hash table if their entry |
66 | * is empty. |
67 | */ |
68 | for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { |
69 | U32 const curr = (U32)(ip - base); |
70 | U32 i; |
71 | for (i = 0; i < fastHashFillStep; ++i) { |
72 | size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); |
73 | size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); |
74 | if (i == 0) |
75 | hashSmall[smHash] = curr + i; |
76 | if (i == 0 || hashLarge[lgHash] == 0) |
77 | hashLarge[lgHash] = curr + i; |
78 | /* Only load extra positions for ZSTD_dtlm_full */ |
79 | if (dtlm == ZSTD_dtlm_fast) |
80 | break; |
81 | } } |
82 | } |
83 | |
84 | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, |
85 | const void* const end, |
86 | ZSTD_dictTableLoadMethod_e dtlm, |
87 | ZSTD_tableFillPurpose_e tfp) |
88 | { |
89 | if (tfp == ZSTD_tfp_forCDict) { |
90 | ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); |
91 | } else { |
92 | ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); |
93 | } |
94 | } |
95 | |
96 | |
97 | FORCE_INLINE_TEMPLATE |
98 | size_t ZSTD_compressBlock_doubleFast_noDict_generic( |
99 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
100 | void const* src, size_t srcSize, U32 const mls /* template */) |
101 | { |
102 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
103 | U32* const hashLong = ms->hashTable; |
104 | const U32 hBitsL = cParams->hashLog; |
105 | U32* const hashSmall = ms->chainTable; |
106 | const U32 hBitsS = cParams->chainLog; |
107 | const BYTE* const base = ms->window.base; |
108 | const BYTE* const istart = (const BYTE*)src; |
109 | const BYTE* anchor = istart; |
110 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
111 | /* presumes that, if there is a dictionary, it must be using Attach mode */ |
112 | const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); |
113 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
114 | const BYTE* const iend = istart + srcSize; |
115 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
116 | U32 offset_1=rep[0], offset_2=rep[1]; |
117 | U32 offsetSaved1 = 0, offsetSaved2 = 0; |
118 | |
119 | size_t mLength; |
120 | U32 offset; |
121 | U32 curr; |
122 | |
123 | /* how many positions to search before increasing step size */ |
124 | const size_t kStepIncr = 1 << kSearchStrength; |
125 | /* the position at which to increment the step size if no match is found */ |
126 | const BYTE* nextStep; |
127 | size_t step; /* the current step size */ |
128 | |
129 | size_t hl0; /* the long hash at ip */ |
130 | size_t hl1; /* the long hash at ip1 */ |
131 | |
132 | U32 idxl0; /* the long match index for ip */ |
133 | U32 idxl1; /* the long match index for ip1 */ |
134 | |
135 | const BYTE* matchl0; /* the long match for ip */ |
136 | const BYTE* matchs0; /* the short match for ip */ |
137 | const BYTE* matchl1; /* the long match for ip1 */ |
138 | |
139 | const BYTE* ip = istart; /* the current position */ |
140 | const BYTE* ip1; /* the next position */ |
141 | |
142 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic" ); |
143 | |
144 | /* init */ |
145 | ip += ((ip - prefixLowest) == 0); |
146 | { |
147 | U32 const current = (U32)(ip - base); |
148 | U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); |
149 | U32 const maxRep = current - windowLow; |
150 | if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; |
151 | if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; |
152 | } |
153 | |
154 | /* Outer Loop: one iteration per match found and stored */ |
155 | while (1) { |
156 | step = 1; |
157 | nextStep = ip + kStepIncr; |
158 | ip1 = ip + step; |
159 | |
160 | if (ip1 > ilimit) { |
161 | goto _cleanup; |
162 | } |
163 | |
164 | hl0 = ZSTD_hashPtr(ip, hBitsL, 8); |
165 | idxl0 = hashLong[hl0]; |
166 | matchl0 = base + idxl0; |
167 | |
168 | /* Inner Loop: one iteration per search / position */ |
169 | do { |
170 | const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); |
171 | const U32 idxs0 = hashSmall[hs0]; |
172 | curr = (U32)(ip-base); |
173 | matchs0 = base + idxs0; |
174 | |
175 | hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ |
176 | |
177 | /* check noDict repcode */ |
178 | if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { |
179 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; |
180 | ip++; |
181 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); |
182 | goto _match_stored; |
183 | } |
184 | |
185 | hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); |
186 | |
187 | if (idxl0 > prefixLowestIndex) { |
188 | /* check prefix long match */ |
189 | if (MEM_read64(matchl0) == MEM_read64(ip)) { |
190 | mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; |
191 | offset = (U32)(ip-matchl0); |
192 | while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ |
193 | goto _match_found; |
194 | } |
195 | } |
196 | |
197 | idxl1 = hashLong[hl1]; |
198 | matchl1 = base + idxl1; |
199 | |
200 | if (idxs0 > prefixLowestIndex) { |
201 | /* check prefix short match */ |
202 | if (MEM_read32(matchs0) == MEM_read32(ip)) { |
203 | goto _search_next_long; |
204 | } |
205 | } |
206 | |
207 | if (ip1 >= nextStep) { |
208 | PREFETCH_L1(ip1 + 64); |
209 | PREFETCH_L1(ip1 + 128); |
210 | step++; |
211 | nextStep += kStepIncr; |
212 | } |
213 | ip = ip1; |
214 | ip1 += step; |
215 | |
216 | hl0 = hl1; |
217 | idxl0 = idxl1; |
218 | matchl0 = matchl1; |
219 | #if defined(__aarch64__) |
220 | PREFETCH_L1(ip+256); |
221 | #endif |
222 | } while (ip1 <= ilimit); |
223 | |
224 | _cleanup: |
225 | /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), |
226 | * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ |
227 | offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; |
228 | |
229 | /* save reps for next block */ |
230 | rep[0] = offset_1 ? offset_1 : offsetSaved1; |
231 | rep[1] = offset_2 ? offset_2 : offsetSaved2; |
232 | |
233 | /* Return the last literals size */ |
234 | return (size_t)(iend - anchor); |
235 | |
236 | _search_next_long: |
237 | |
238 | /* check prefix long +1 match */ |
239 | if (idxl1 > prefixLowestIndex) { |
240 | if (MEM_read64(matchl1) == MEM_read64(ip1)) { |
241 | ip = ip1; |
242 | mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; |
243 | offset = (U32)(ip-matchl1); |
244 | while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ |
245 | goto _match_found; |
246 | } |
247 | } |
248 | |
249 | /* if no long +1 match, explore the short match we found */ |
250 | mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; |
251 | offset = (U32)(ip - matchs0); |
252 | while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ |
253 | |
254 | /* fall-through */ |
255 | |
256 | _match_found: /* requires ip, offset, mLength */ |
257 | offset_2 = offset_1; |
258 | offset_1 = offset; |
259 | |
260 | if (step < 4) { |
261 | /* It is unsafe to write this value back to the hashtable when ip1 is |
262 | * greater than or equal to the new ip we will have after we're done |
263 | * processing this match. Rather than perform that test directly |
264 | * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler |
265 | * more predictable test. The minmatch even if we take a short match is |
266 | * 4 bytes, so as long as step, the distance between ip and ip1 |
267 | * (initially) is less than 4, we know ip1 < new ip. */ |
268 | hashLong[hl1] = (U32)(ip1 - base); |
269 | } |
270 | |
271 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
272 | |
273 | _match_stored: |
274 | /* match found */ |
275 | ip += mLength; |
276 | anchor = ip; |
277 | |
278 | if (ip <= ilimit) { |
279 | /* Complementary insertion */ |
280 | /* done after iLimit test, as candidates could be > iend-8 */ |
281 | { U32 const indexToInsert = curr+2; |
282 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; |
283 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
284 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; |
285 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); |
286 | } |
287 | |
288 | /* check immediate repcode */ |
289 | while ( (ip <= ilimit) |
290 | && ( (offset_2>0) |
291 | & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { |
292 | /* store sequence */ |
293 | size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; |
294 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ |
295 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); |
296 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); |
297 | ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); |
298 | ip += rLength; |
299 | anchor = ip; |
300 | continue; /* faster when present ... (?) */ |
301 | } |
302 | } |
303 | } |
304 | } |
305 | |
306 | |
307 | FORCE_INLINE_TEMPLATE |
308 | size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( |
309 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
310 | void const* src, size_t srcSize, |
311 | U32 const mls /* template */) |
312 | { |
313 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
314 | U32* const hashLong = ms->hashTable; |
315 | const U32 hBitsL = cParams->hashLog; |
316 | U32* const hashSmall = ms->chainTable; |
317 | const U32 hBitsS = cParams->chainLog; |
318 | const BYTE* const base = ms->window.base; |
319 | const BYTE* const istart = (const BYTE*)src; |
320 | const BYTE* ip = istart; |
321 | const BYTE* anchor = istart; |
322 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
323 | /* presumes that, if there is a dictionary, it must be using Attach mode */ |
324 | const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); |
325 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
326 | const BYTE* const iend = istart + srcSize; |
327 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
328 | U32 offset_1=rep[0], offset_2=rep[1]; |
329 | |
330 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
331 | const ZSTD_compressionParameters* const dictCParams = &dms->cParams; |
332 | const U32* const dictHashLong = dms->hashTable; |
333 | const U32* const dictHashSmall = dms->chainTable; |
334 | const U32 dictStartIndex = dms->window.dictLimit; |
335 | const BYTE* const dictBase = dms->window.base; |
336 | const BYTE* const dictStart = dictBase + dictStartIndex; |
337 | const BYTE* const dictEnd = dms->window.nextSrc; |
338 | const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); |
339 | const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; |
340 | const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; |
341 | const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); |
342 | |
343 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic" ); |
344 | |
345 | /* if a dictionary is attached, it must be within window range */ |
346 | assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); |
347 | |
348 | if (ms->prefetchCDictTables) { |
349 | size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); |
350 | size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); |
351 | PREFETCH_AREA(dictHashLong, hashTableBytes) |
352 | PREFETCH_AREA(dictHashSmall, chainTableBytes) |
353 | } |
354 | |
355 | /* init */ |
356 | ip += (dictAndPrefixLength == 0); |
357 | |
358 | /* dictMatchState repCode checks don't currently handle repCode == 0 |
359 | * disabling. */ |
360 | assert(offset_1 <= dictAndPrefixLength); |
361 | assert(offset_2 <= dictAndPrefixLength); |
362 | |
363 | /* Main Search Loop */ |
364 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ |
365 | size_t mLength; |
366 | U32 offset; |
367 | size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); |
368 | size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); |
369 | size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); |
370 | size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); |
371 | U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; |
372 | U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; |
373 | int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); |
374 | int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); |
375 | U32 const curr = (U32)(ip-base); |
376 | U32 const matchIndexL = hashLong[h2]; |
377 | U32 matchIndexS = hashSmall[h]; |
378 | const BYTE* matchLong = base + matchIndexL; |
379 | const BYTE* match = base + matchIndexS; |
380 | const U32 repIndex = curr + 1 - offset_1; |
381 | const BYTE* repMatch = (repIndex < prefixLowestIndex) ? |
382 | dictBase + (repIndex - dictIndexDelta) : |
383 | base + repIndex; |
384 | hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ |
385 | |
386 | /* check repcode */ |
387 | if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) |
388 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
389 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; |
390 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; |
391 | ip++; |
392 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); |
393 | goto _match_stored; |
394 | } |
395 | |
396 | if (matchIndexL > prefixLowestIndex) { |
397 | /* check prefix long match */ |
398 | if (MEM_read64(matchLong) == MEM_read64(ip)) { |
399 | mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; |
400 | offset = (U32)(ip-matchLong); |
401 | while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
402 | goto _match_found; |
403 | } |
404 | } else if (dictTagsMatchL) { |
405 | /* check dictMatchState long match */ |
406 | U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; |
407 | const BYTE* dictMatchL = dictBase + dictMatchIndexL; |
408 | assert(dictMatchL < dictEnd); |
409 | |
410 | if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { |
411 | mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; |
412 | offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); |
413 | while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ |
414 | goto _match_found; |
415 | } } |
416 | |
417 | if (matchIndexS > prefixLowestIndex) { |
418 | /* check prefix short match */ |
419 | if (MEM_read32(match) == MEM_read32(ip)) { |
420 | goto _search_next_long; |
421 | } |
422 | } else if (dictTagsMatchS) { |
423 | /* check dictMatchState short match */ |
424 | U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; |
425 | match = dictBase + dictMatchIndexS; |
426 | matchIndexS = dictMatchIndexS + dictIndexDelta; |
427 | |
428 | if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { |
429 | goto _search_next_long; |
430 | } } |
431 | |
432 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
433 | #if defined(__aarch64__) |
434 | PREFETCH_L1(ip+256); |
435 | #endif |
436 | continue; |
437 | |
438 | _search_next_long: |
439 | { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
440 | size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); |
441 | U32 const matchIndexL3 = hashLong[hl3]; |
442 | U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; |
443 | int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); |
444 | const BYTE* matchL3 = base + matchIndexL3; |
445 | hashLong[hl3] = curr + 1; |
446 | |
447 | /* check prefix long +1 match */ |
448 | if (matchIndexL3 > prefixLowestIndex) { |
449 | if (MEM_read64(matchL3) == MEM_read64(ip+1)) { |
450 | mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; |
451 | ip++; |
452 | offset = (U32)(ip-matchL3); |
453 | while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ |
454 | goto _match_found; |
455 | } |
456 | } else if (dictTagsMatchL3) { |
457 | /* check dict long +1 match */ |
458 | U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; |
459 | const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; |
460 | assert(dictMatchL3 < dictEnd); |
461 | if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { |
462 | mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; |
463 | ip++; |
464 | offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); |
465 | while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ |
466 | goto _match_found; |
467 | } } } |
468 | |
469 | /* if no long +1 match, explore the short match we found */ |
470 | if (matchIndexS < prefixLowestIndex) { |
471 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; |
472 | offset = (U32)(curr - matchIndexS); |
473 | while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
474 | } else { |
475 | mLength = ZSTD_count(ip+4, match+4, iend) + 4; |
476 | offset = (U32)(ip - match); |
477 | while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
478 | } |
479 | |
480 | _match_found: |
481 | offset_2 = offset_1; |
482 | offset_1 = offset; |
483 | |
484 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
485 | |
486 | _match_stored: |
487 | /* match found */ |
488 | ip += mLength; |
489 | anchor = ip; |
490 | |
491 | if (ip <= ilimit) { |
492 | /* Complementary insertion */ |
493 | /* done after iLimit test, as candidates could be > iend-8 */ |
494 | { U32 const indexToInsert = curr+2; |
495 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; |
496 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
497 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; |
498 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); |
499 | } |
500 | |
501 | /* check immediate repcode */ |
502 | while (ip <= ilimit) { |
503 | U32 const current2 = (U32)(ip-base); |
504 | U32 const repIndex2 = current2 - offset_2; |
505 | const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? |
506 | dictBase + repIndex2 - dictIndexDelta : |
507 | base + repIndex2; |
508 | if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) |
509 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
510 | const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; |
511 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; |
512 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
513 | ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); |
514 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; |
515 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; |
516 | ip += repLength2; |
517 | anchor = ip; |
518 | continue; |
519 | } |
520 | break; |
521 | } |
522 | } |
523 | } /* while (ip < ilimit) */ |
524 | |
525 | /* save reps for next block */ |
526 | rep[0] = offset_1; |
527 | rep[1] = offset_2; |
528 | |
529 | /* Return the last literals size */ |
530 | return (size_t)(iend - anchor); |
531 | } |
532 | |
533 | #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ |
534 | static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ |
535 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ |
536 | void const* src, size_t srcSize) \ |
537 | { \ |
538 | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ |
539 | } |
540 | |
541 | ZSTD_GEN_DFAST_FN(noDict, 4) |
542 | ZSTD_GEN_DFAST_FN(noDict, 5) |
543 | ZSTD_GEN_DFAST_FN(noDict, 6) |
544 | ZSTD_GEN_DFAST_FN(noDict, 7) |
545 | |
546 | ZSTD_GEN_DFAST_FN(dictMatchState, 4) |
547 | ZSTD_GEN_DFAST_FN(dictMatchState, 5) |
548 | ZSTD_GEN_DFAST_FN(dictMatchState, 6) |
549 | ZSTD_GEN_DFAST_FN(dictMatchState, 7) |
550 | |
551 | |
552 | size_t ZSTD_compressBlock_doubleFast( |
553 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
554 | void const* src, size_t srcSize) |
555 | { |
556 | const U32 mls = ms->cParams.minMatch; |
557 | switch(mls) |
558 | { |
559 | default: /* includes case 3 */ |
560 | case 4 : |
561 | return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); |
562 | case 5 : |
563 | return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); |
564 | case 6 : |
565 | return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); |
566 | case 7 : |
567 | return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); |
568 | } |
569 | } |
570 | |
571 | |
572 | size_t ZSTD_compressBlock_doubleFast_dictMatchState( |
573 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
574 | void const* src, size_t srcSize) |
575 | { |
576 | const U32 mls = ms->cParams.minMatch; |
577 | switch(mls) |
578 | { |
579 | default: /* includes case 3 */ |
580 | case 4 : |
581 | return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); |
582 | case 5 : |
583 | return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); |
584 | case 6 : |
585 | return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); |
586 | case 7 : |
587 | return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); |
588 | } |
589 | } |
590 | |
591 | |
592 | static size_t ZSTD_compressBlock_doubleFast_extDict_generic( |
593 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
594 | void const* src, size_t srcSize, |
595 | U32 const mls /* template */) |
596 | { |
597 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
598 | U32* const hashLong = ms->hashTable; |
599 | U32 const hBitsL = cParams->hashLog; |
600 | U32* const hashSmall = ms->chainTable; |
601 | U32 const hBitsS = cParams->chainLog; |
602 | const BYTE* const istart = (const BYTE*)src; |
603 | const BYTE* ip = istart; |
604 | const BYTE* anchor = istart; |
605 | const BYTE* const iend = istart + srcSize; |
606 | const BYTE* const ilimit = iend - 8; |
607 | const BYTE* const base = ms->window.base; |
608 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
609 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); |
610 | const U32 dictStartIndex = lowLimit; |
611 | const U32 dictLimit = ms->window.dictLimit; |
612 | const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; |
613 | const BYTE* const prefixStart = base + prefixStartIndex; |
614 | const BYTE* const dictBase = ms->window.dictBase; |
615 | const BYTE* const dictStart = dictBase + dictStartIndex; |
616 | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
617 | U32 offset_1=rep[0], offset_2=rep[1]; |
618 | |
619 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)" , srcSize); |
620 | |
621 | /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ |
622 | if (prefixStartIndex == dictStartIndex) |
623 | return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); |
624 | |
625 | /* Search Loop */ |
626 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
627 | const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); |
628 | const U32 matchIndex = hashSmall[hSmall]; |
629 | const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; |
630 | const BYTE* match = matchBase + matchIndex; |
631 | |
632 | const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); |
633 | const U32 matchLongIndex = hashLong[hLong]; |
634 | const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; |
635 | const BYTE* matchLong = matchLongBase + matchLongIndex; |
636 | |
637 | const U32 curr = (U32)(ip-base); |
638 | const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ |
639 | const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; |
640 | const BYTE* const repMatch = repBase + repIndex; |
641 | size_t mLength; |
642 | hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ |
643 | |
644 | if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ |
645 | & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ |
646 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
647 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
648 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
649 | ip++; |
650 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); |
651 | } else { |
652 | if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { |
653 | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; |
654 | const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; |
655 | U32 offset; |
656 | mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; |
657 | offset = curr - matchLongIndex; |
658 | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
659 | offset_2 = offset_1; |
660 | offset_1 = offset; |
661 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
662 | |
663 | } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { |
664 | size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
665 | U32 const matchIndex3 = hashLong[h3]; |
666 | const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; |
667 | const BYTE* match3 = match3Base + matchIndex3; |
668 | U32 offset; |
669 | hashLong[h3] = curr + 1; |
670 | if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { |
671 | const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; |
672 | const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; |
673 | mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; |
674 | ip++; |
675 | offset = curr+1 - matchIndex3; |
676 | while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ |
677 | } else { |
678 | const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; |
679 | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; |
680 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; |
681 | offset = curr - matchIndex; |
682 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
683 | } |
684 | offset_2 = offset_1; |
685 | offset_1 = offset; |
686 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
687 | |
688 | } else { |
689 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
690 | continue; |
691 | } } |
692 | |
693 | /* move to next sequence start */ |
694 | ip += mLength; |
695 | anchor = ip; |
696 | |
697 | if (ip <= ilimit) { |
698 | /* Complementary insertion */ |
699 | /* done after iLimit test, as candidates could be > iend-8 */ |
700 | { U32 const indexToInsert = curr+2; |
701 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; |
702 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
703 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; |
704 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); |
705 | } |
706 | |
707 | /* check immediate repcode */ |
708 | while (ip <= ilimit) { |
709 | U32 const current2 = (U32)(ip-base); |
710 | U32 const repIndex2 = current2 - offset_2; |
711 | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; |
712 | if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ |
713 | & (offset_2 <= current2 - dictStartIndex)) |
714 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
715 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
716 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
717 | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
718 | ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); |
719 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; |
720 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; |
721 | ip += repLength2; |
722 | anchor = ip; |
723 | continue; |
724 | } |
725 | break; |
726 | } } } |
727 | |
728 | /* save reps for next block */ |
729 | rep[0] = offset_1; |
730 | rep[1] = offset_2; |
731 | |
732 | /* Return the last literals size */ |
733 | return (size_t)(iend - anchor); |
734 | } |
735 | |
736 | ZSTD_GEN_DFAST_FN(extDict, 4) |
737 | ZSTD_GEN_DFAST_FN(extDict, 5) |
738 | ZSTD_GEN_DFAST_FN(extDict, 6) |
739 | ZSTD_GEN_DFAST_FN(extDict, 7) |
740 | |
741 | size_t ZSTD_compressBlock_doubleFast_extDict( |
742 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
743 | void const* src, size_t srcSize) |
744 | { |
745 | U32 const mls = ms->cParams.minMatch; |
746 | switch(mls) |
747 | { |
748 | default: /* includes case 3 */ |
749 | case 4 : |
750 | return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); |
751 | case 5 : |
752 | return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); |
753 | case 6 : |
754 | return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); |
755 | case 7 : |
756 | return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); |
757 | } |
758 | } |
759 | |