1/*
2 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include "zstd_compress_internal.h"
12#include "zstd_opt.h"
13
14
15#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
16#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17#define ZSTD_MAX_PRICE (1<<30)
18
19
20/*-*************************************
21* Price functions for optimal parser
22***************************************/
23static void ZSTD_setLog2Prices(optState_t* optPtr)
24{
25 optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
26 optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
27 optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
28 optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
29}
30
31
32static void ZSTD_rescaleFreqs(optState_t* const optPtr,
33 const BYTE* const src, size_t const srcSize)
34{
35 optPtr->staticPrices = 0;
36
37 if (optPtr->litLengthSum == 0) { /* first init */
38 unsigned u;
39 if (srcSize <= 1024) optPtr->staticPrices = 1;
40
41 assert(optPtr->litFreq!=NULL);
42 for (u=0; u<=MaxLit; u++)
43 optPtr->litFreq[u] = 0;
44 for (u=0; u<srcSize; u++)
45 optPtr->litFreq[src[u]]++;
46 optPtr->litSum = 0;
47 for (u=0; u<=MaxLit; u++) {
48 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
49 optPtr->litSum += optPtr->litFreq[u];
50 }
51
52 for (u=0; u<=MaxLL; u++)
53 optPtr->litLengthFreq[u] = 1;
54 optPtr->litLengthSum = MaxLL+1;
55 for (u=0; u<=MaxML; u++)
56 optPtr->matchLengthFreq[u] = 1;
57 optPtr->matchLengthSum = MaxML+1;
58 for (u=0; u<=MaxOff; u++)
59 optPtr->offCodeFreq[u] = 1;
60 optPtr->offCodeSum = (MaxOff+1);
61
62 } else {
63 unsigned u;
64
65 optPtr->litSum = 0;
66 for (u=0; u<=MaxLit; u++) {
67 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
68 optPtr->litSum += optPtr->litFreq[u];
69 }
70 optPtr->litLengthSum = 0;
71 for (u=0; u<=MaxLL; u++) {
72 optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
73 optPtr->litLengthSum += optPtr->litLengthFreq[u];
74 }
75 optPtr->matchLengthSum = 0;
76 for (u=0; u<=MaxML; u++) {
77 optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
78 optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
79 }
80 optPtr->offCodeSum = 0;
81 for (u=0; u<=MaxOff; u++) {
82 optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
83 optPtr->offCodeSum += optPtr->offCodeFreq[u];
84 }
85 }
86
87 ZSTD_setLog2Prices(optPtr);
88}
89
90
91/* ZSTD_rawLiteralsCost() :
92 * cost of literals (only) in given segment (which length can be null)
93 * does not include cost of literalLength symbol */
94static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
95 const optState_t* const optPtr)
96{
97 if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
98 if (litLength == 0) return 0;
99
100 /* literals */
101 { U32 u;
102 U32 cost = litLength * optPtr->log2litSum;
103 for (u=0; u < litLength; u++)
104 cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
105 return cost;
106 }
107}
108
109/* ZSTD_litLengthPrice() :
110 * cost of literalLength symbol */
111static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
112{
113 if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
114
115 /* literal Length */
116 { U32 const llCode = ZSTD_LLcode(litLength);
117 U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
118 return price;
119 }
120}
121
122/* ZSTD_litLengthPrice() :
123 * cost of the literal part of a sequence,
124 * including literals themselves, and literalLength symbol */
125static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
126 const optState_t* const optPtr)
127{
128 return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
129 + ZSTD_litLengthPrice(litLength, optPtr);
130}
131
132/* ZSTD_litLengthContribution() :
133 * @return ( cost(litlength) - cost(0) )
134 * this value can then be added to rawLiteralsCost()
135 * to provide a cost which is directly comparable to a match ending at same position */
136static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
137{
138 if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
139
140 /* literal Length */
141 { U32 const llCode = ZSTD_LLcode(litLength);
142 int const contribution = LL_bits[llCode]
143 + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
144 - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
145#if 1
146 return contribution;
147#else
148 return MAX(0, contribution); /* sometimes better, sometimes not ... */
149#endif
150 }
151}
152
153/* ZSTD_literalsContribution() :
154 * creates a fake cost for the literals part of a sequence
155 * which can be compared to the ending cost of a match
156 * should a new match start at this position */
157static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
158 const optState_t* const optPtr)
159{
160 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
161 + ZSTD_litLengthContribution(litLength, optPtr);
162 return contribution;
163}
164
165/* ZSTD_getMatchPrice() :
166 * Provides the cost of the match part (offset + matchLength) of a sequence
167 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
168 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
169FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
170 U32 const offset, U32 const matchLength,
171 const optState_t* const optPtr,
172 int const optLevel)
173{
174 U32 price;
175 U32 const offCode = ZSTD_highbit32(offset+1);
176 U32 const mlBase = matchLength - MINMATCH;
177 assert(matchLength >= MINMATCH);
178
179 if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
180 return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
181
182 price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
183 if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
184
185 /* match Length */
186 { U32 const mlCode = ZSTD_MLcode(mlBase);
187 price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
188 }
189
190 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
191 return price;
192}
193
194static void ZSTD_updateStats(optState_t* const optPtr,
195 U32 litLength, const BYTE* literals,
196 U32 offsetCode, U32 matchLength)
197{
198 /* literals */
199 { U32 u;
200 for (u=0; u < litLength; u++)
201 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
202 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
203 }
204
205 /* literal Length */
206 { U32 const llCode = ZSTD_LLcode(litLength);
207 optPtr->litLengthFreq[llCode]++;
208 optPtr->litLengthSum++;
209 }
210
211 /* match offset code (0-2=>repCode; 3+=>offset+2) */
212 { U32 const offCode = ZSTD_highbit32(offsetCode+1);
213 assert(offCode <= MaxOff);
214 optPtr->offCodeFreq[offCode]++;
215 optPtr->offCodeSum++;
216 }
217
218 /* match Length */
219 { U32 const mlBase = matchLength - MINMATCH;
220 U32 const mlCode = ZSTD_MLcode(mlBase);
221 optPtr->matchLengthFreq[mlCode]++;
222 optPtr->matchLengthSum++;
223 }
224}
225
226
227/* ZSTD_readMINMATCH() :
228 * function safe only for comparisons
229 * assumption : memPtr must be at least 4 bytes before end of buffer */
230MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
231{
232 switch (length)
233 {
234 default :
235 case 4 : return MEM_read32(memPtr);
236 case 3 : if (MEM_isLittleEndian())
237 return MEM_read32(memPtr)<<8;
238 else
239 return MEM_read32(memPtr)>>8;
240 }
241}
242
243
244/* Update hashTable3 up to ip (excluded)
245 Assumption : always within prefix (i.e. not within extDict) */
246static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
247{
248 U32* const hashTable3 = ms->hashTable3;
249 U32 const hashLog3 = ms->hashLog3;
250 const BYTE* const base = ms->window.base;
251 U32 idx = ms->nextToUpdate3;
252 U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
253 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
254 assert(hashLog3 > 0);
255
256 while(idx < target) {
257 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
258 idx++;
259 }
260
261 return hashTable3[hash3];
262}
263
264
265/*-*************************************
266* Binary Tree search
267***************************************/
268/** ZSTD_insertBt1() : add one or multiple positions to tree.
269 * ip : assumed <= iend-8 .
270 * @return : nb of positions added */
271static U32 ZSTD_insertBt1(
272 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
273 const BYTE* const ip, const BYTE* const iend,
274 U32 const mls, U32 const extDict)
275{
276 U32* const hashTable = ms->hashTable;
277 U32 const hashLog = cParams->hashLog;
278 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
279 U32* const bt = ms->chainTable;
280 U32 const btLog = cParams->chainLog - 1;
281 U32 const btMask = (1 << btLog) - 1;
282 U32 matchIndex = hashTable[h];
283 size_t commonLengthSmaller=0, commonLengthLarger=0;
284 const BYTE* const base = ms->window.base;
285 const BYTE* const dictBase = ms->window.dictBase;
286 const U32 dictLimit = ms->window.dictLimit;
287 const BYTE* const dictEnd = dictBase + dictLimit;
288 const BYTE* const prefixStart = base + dictLimit;
289 const BYTE* match;
290 const U32 current = (U32)(ip-base);
291 const U32 btLow = btMask >= current ? 0 : current - btMask;
292 U32* smallerPtr = bt + 2*(current&btMask);
293 U32* largerPtr = smallerPtr + 1;
294 U32 dummy32; /* to be nullified at the end */
295 U32 const windowLow = ms->window.lowLimit;
296 U32 matchEndIdx = current+8+1;
297 size_t bestLength = 8;
298 U32 nbCompares = 1U << cParams->searchLog;
299#ifdef ZSTD_C_PREDICT
300 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
301 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
302 predictedSmall += (predictedSmall>0);
303 predictedLarge += (predictedLarge>0);
304#endif /* ZSTD_C_PREDICT */
305
306 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
307
308 assert(ip <= iend-8); /* required for h calculation */
309 hashTable[h] = current; /* Update Hash Table */
310
311 while (nbCompares-- && (matchIndex > windowLow)) {
312 U32* const nextPtr = bt + 2*(matchIndex & btMask);
313 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
314 assert(matchIndex < current);
315
316#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
317 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
318 if (matchIndex == predictedSmall) {
319 /* no need to check length, result known */
320 *smallerPtr = matchIndex;
321 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
322 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
323 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
324 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
325 continue;
326 }
327 if (matchIndex == predictedLarge) {
328 *largerPtr = matchIndex;
329 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
330 largerPtr = nextPtr;
331 matchIndex = nextPtr[0];
332 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
333 continue;
334 }
335#endif
336
337 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
338 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
339 match = base + matchIndex;
340 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
341 } else {
342 match = dictBase + matchIndex;
343 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
344 if (matchIndex+matchLength >= dictLimit)
345 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
346 }
347
348 if (matchLength > bestLength) {
349 bestLength = matchLength;
350 if (matchLength > matchEndIdx - matchIndex)
351 matchEndIdx = matchIndex + (U32)matchLength;
352 }
353
354 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
355 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
356 }
357
358 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
359 /* match is smaller than current */
360 *smallerPtr = matchIndex; /* update smaller idx */
361 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
362 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
363 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
364 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
365 } else {
366 /* match is larger than current */
367 *largerPtr = matchIndex;
368 commonLengthLarger = matchLength;
369 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
370 largerPtr = nextPtr;
371 matchIndex = nextPtr[0];
372 } }
373
374 *smallerPtr = *largerPtr = 0;
375 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
376 assert(matchEndIdx > current + 8);
377 return matchEndIdx - (current + 8);
378}
379
380FORCE_INLINE_TEMPLATE
381void ZSTD_updateTree_internal(
382 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
383 const BYTE* const ip, const BYTE* const iend,
384 const U32 mls, const U32 extDict)
385{
386 const BYTE* const base = ms->window.base;
387 U32 const target = (U32)(ip - base);
388 U32 idx = ms->nextToUpdate;
389 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
390 idx, target, extDict);
391
392 while(idx < target)
393 idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
394 ms->nextToUpdate = target;
395}
396
397void ZSTD_updateTree(
398 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
399 const BYTE* ip, const BYTE* iend)
400{
401 ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
402}
403
404FORCE_INLINE_TEMPLATE
405U32 ZSTD_insertBtAndGetAllMatches (
406 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
407 const BYTE* const ip, const BYTE* const iLimit, int const extDict,
408 U32 rep[ZSTD_REP_NUM], U32 const ll0,
409 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
410{
411 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
412 const BYTE* const base = ms->window.base;
413 U32 const current = (U32)(ip-base);
414 U32 const hashLog = cParams->hashLog;
415 U32 const minMatch = (mls==3) ? 3 : 4;
416 U32* const hashTable = ms->hashTable;
417 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
418 U32 matchIndex = hashTable[h];
419 U32* const bt = ms->chainTable;
420 U32 const btLog = cParams->chainLog - 1;
421 U32 const btMask= (1U << btLog) - 1;
422 size_t commonLengthSmaller=0, commonLengthLarger=0;
423 const BYTE* const dictBase = ms->window.dictBase;
424 U32 const dictLimit = ms->window.dictLimit;
425 const BYTE* const dictEnd = dictBase + dictLimit;
426 const BYTE* const prefixStart = base + dictLimit;
427 U32 const btLow = btMask >= current ? 0 : current - btMask;
428 U32 const windowLow = ms->window.lowLimit;
429 U32* smallerPtr = bt + 2*(current&btMask);
430 U32* largerPtr = bt + 2*(current&btMask) + 1;
431 U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
432 U32 dummy32; /* to be nullified at the end */
433 U32 mnum = 0;
434 U32 nbCompares = 1U << cParams->searchLog;
435
436 size_t bestLength = lengthToBeat-1;
437 DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
438
439 /* check repCode */
440 { U32 const lastR = ZSTD_REP_NUM + ll0;
441 U32 repCode;
442 for (repCode = ll0; repCode < lastR; repCode++) {
443 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
444 U32 const repIndex = current - repOffset;
445 U32 repLen = 0;
446 assert(current >= dictLimit);
447 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
448 if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
449 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
450 }
451 } else { /* repIndex < dictLimit || repIndex >= current */
452 const BYTE* const repMatch = dictBase + repIndex;
453 assert(current >= windowLow);
454 if ( extDict /* this case only valid in extDict mode */
455 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
456 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
457 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
458 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
459 } }
460 /* save longer solution */
461 if (repLen > bestLength) {
462 DEBUGLOG(8, "found rep-match %u of length %u",
463 repCode - ll0, (U32)repLen);
464 bestLength = repLen;
465 matches[mnum].off = repCode - ll0;
466 matches[mnum].len = (U32)repLen;
467 mnum++;
468 if ( (repLen > sufficient_len)
469 | (ip+repLen == iLimit) ) { /* best possible */
470 return mnum;
471 } } } }
472
473 /* HC3 match finder */
474 if ((mls == 3) /*static*/ && (bestLength < mls)) {
475 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
476 if ((matchIndex3 > windowLow)
477 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
478 size_t mlen;
479 if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
480 const BYTE* const match = base + matchIndex3;
481 mlen = ZSTD_count(ip, match, iLimit);
482 } else {
483 const BYTE* const match = dictBase + matchIndex3;
484 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
485 }
486
487 /* save best solution */
488 if (mlen >= mls /* == 3 > bestLength */) {
489 DEBUGLOG(8, "found small match with hlog3, of length %u",
490 (U32)mlen);
491 bestLength = mlen;
492 assert(current > matchIndex3);
493 assert(mnum==0); /* no prior solution */
494 matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
495 matches[0].len = (U32)mlen;
496 mnum = 1;
497 if ( (mlen > sufficient_len) |
498 (ip+mlen == iLimit) ) { /* best possible length */
499 ms->nextToUpdate = current+1; /* skip insertion */
500 return 1;
501 } } } }
502
503 hashTable[h] = current; /* Update Hash Table */
504
505 while (nbCompares-- && (matchIndex > windowLow)) {
506 U32* const nextPtr = bt + 2*(matchIndex & btMask);
507 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
508 const BYTE* match;
509 assert(current > matchIndex);
510
511 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
512 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
513 match = base + matchIndex;
514 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
515 } else {
516 match = dictBase + matchIndex;
517 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
518 if (matchIndex+matchLength >= dictLimit)
519 match = base + matchIndex; /* prepare for match[matchLength] */
520 }
521
522 if (matchLength > bestLength) {
523 DEBUGLOG(8, "found match of length %u at distance %u",
524 (U32)matchLength, current - matchIndex);
525 assert(matchEndIdx > matchIndex);
526 if (matchLength > matchEndIdx - matchIndex)
527 matchEndIdx = matchIndex + (U32)matchLength;
528 bestLength = matchLength;
529 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
530 matches[mnum].len = (U32)matchLength;
531 mnum++;
532 if (matchLength > ZSTD_OPT_NUM) break;
533 if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
534 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
535 }
536 }
537
538 if (match[matchLength] < ip[matchLength]) {
539 /* match smaller than current */
540 *smallerPtr = matchIndex; /* update smaller idx */
541 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
542 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
543 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
544 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
545 } else {
546 *largerPtr = matchIndex;
547 commonLengthLarger = matchLength;
548 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
549 largerPtr = nextPtr;
550 matchIndex = nextPtr[0];
551 } }
552
553 *smallerPtr = *largerPtr = 0;
554
555 assert(matchEndIdx > current+8);
556 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
557 return mnum;
558}
559
560
561FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
562 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
563 const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
564 U32 rep[ZSTD_REP_NUM], U32 const ll0,
565 ZSTD_match_t* matches, U32 const lengthToBeat)
566{
567 U32 const matchLengthSearch = cParams->searchLength;
568 DEBUGLOG(7, "ZSTD_BtGetAllMatches");
569 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
570 ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
571 switch(matchLengthSearch)
572 {
573 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
574 default :
575 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
576 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
577 case 7 :
578 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
579 }
580}
581
582
583/*-*******************************
584* Optimal parser
585*********************************/
586typedef struct repcodes_s {
587 U32 rep[3];
588} repcodes_t;
589
590repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
591{
592 repcodes_t newReps;
593 if (offset >= ZSTD_REP_NUM) { /* full offset */
594 newReps.rep[2] = rep[1];
595 newReps.rep[1] = rep[0];
596 newReps.rep[0] = offset - ZSTD_REP_MOVE;
597 } else { /* repcode */
598 U32 const repCode = offset + ll0;
599 if (repCode > 0) { /* note : if repCode==0, no change */
600 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
601 newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
602 newReps.rep[1] = rep[0];
603 newReps.rep[0] = currentOffset;
604 } else { /* repCode == 0 */
605 memcpy(&newReps, rep, sizeof(newReps));
606 }
607 }
608 return newReps;
609}
610
611
612typedef struct {
613 const BYTE* anchor;
614 U32 litlen;
615 U32 rawLitCost;
616} cachedLiteralPrice_t;
617
618static U32 ZSTD_rawLiteralsCost_cached(
619 cachedLiteralPrice_t* const cachedLitPrice,
620 const BYTE* const anchor, U32 const litlen,
621 const optState_t* const optStatePtr)
622{
623 U32 startCost;
624 U32 remainingLength;
625 const BYTE* startPosition;
626
627 if (anchor == cachedLitPrice->anchor) {
628 startCost = cachedLitPrice->rawLitCost;
629 startPosition = anchor + cachedLitPrice->litlen;
630 assert(litlen >= cachedLitPrice->litlen);
631 remainingLength = litlen - cachedLitPrice->litlen;
632 } else {
633 startCost = 0;
634 startPosition = anchor;
635 remainingLength = litlen;
636 }
637
638 { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
639 cachedLitPrice->anchor = anchor;
640 cachedLitPrice->litlen = litlen;
641 cachedLitPrice->rawLitCost = rawLitCost;
642 return rawLitCost;
643 }
644}
645
646static U32 ZSTD_fullLiteralsCost_cached(
647 cachedLiteralPrice_t* const cachedLitPrice,
648 const BYTE* const anchor, U32 const litlen,
649 const optState_t* const optStatePtr)
650{
651 return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
652 + ZSTD_litLengthPrice(litlen, optStatePtr);
653}
654
655static int ZSTD_literalsContribution_cached(
656 cachedLiteralPrice_t* const cachedLitPrice,
657 const BYTE* const anchor, U32 const litlen,
658 const optState_t* const optStatePtr)
659{
660 int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
661 + ZSTD_litLengthContribution(litlen, optStatePtr);
662 return contribution;
663}
664
665FORCE_INLINE_TEMPLATE
666size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
667 U32 rep[ZSTD_REP_NUM],
668 ZSTD_compressionParameters const* cParams,
669 const void* src, size_t srcSize,
670 const int optLevel, const int extDict)
671{
672 optState_t* const optStatePtr = &ms->opt;
673 const BYTE* const istart = (const BYTE*)src;
674 const BYTE* ip = istart;
675 const BYTE* anchor = istart;
676 const BYTE* const iend = istart + srcSize;
677 const BYTE* const ilimit = iend - 8;
678 const BYTE* const base = ms->window.base;
679 const BYTE* const prefixStart = base + ms->window.dictLimit;
680
681 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
682 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
683
684 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
685 ZSTD_match_t* const matches = optStatePtr->matchTable;
686 cachedLiteralPrice_t cachedLitPrice;
687
688 /* init */
689 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
690 ms->nextToUpdate3 = ms->nextToUpdate;
691 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
692 ip += (ip==prefixStart);
693 memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
694
695 /* Match Loop */
696 while (ip < ilimit) {
697 U32 cur, last_pos = 0;
698 U32 best_mlen, best_off;
699
700 /* find first match */
701 { U32 const litlen = (U32)(ip - anchor);
702 U32 const ll0 = !litlen;
703 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
704 if (!nbMatches) { ip++; continue; }
705
706 /* initialize opt[0] */
707 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
708 opt[0].mlen = 1;
709 opt[0].litlen = litlen;
710
711 /* large match -> immediate encoding */
712 { U32 const maxML = matches[nbMatches-1].len;
713 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
714 nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
715
716 if (maxML > sufficient_len) {
717 best_mlen = maxML;
718 best_off = matches[nbMatches-1].off;
719 DEBUGLOG(7, "large match (%u>%u), immediate encoding",
720 best_mlen, sufficient_len);
721 cur = 0;
722 last_pos = 1;
723 goto _shortestPath;
724 } }
725
726 /* set prices for first matches starting position == 0 */
727 { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
728 U32 pos;
729 U32 matchNb;
730 for (pos = 0; pos < minMatch; pos++) {
731 opt[pos].mlen = 1;
732 opt[pos].price = ZSTD_MAX_PRICE;
733 }
734 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
735 U32 const offset = matches[matchNb].off;
736 U32 const end = matches[matchNb].len;
737 repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
738 for ( ; pos <= end ; pos++ ) {
739 U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
740 DEBUGLOG(7, "rPos:%u => set initial price : %u",
741 pos, matchPrice);
742 opt[pos].mlen = pos;
743 opt[pos].off = offset;
744 opt[pos].litlen = litlen;
745 opt[pos].price = matchPrice;
746 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
747 } }
748 last_pos = pos-1;
749 }
750 }
751
752 /* check further positions */
753 for (cur = 1; cur <= last_pos; cur++) {
754 const BYTE* const inr = ip + cur;
755 assert(cur < ZSTD_OPT_NUM);
756
757 /* Fix current position with one literal if cheaper */
758 { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
759 int price; /* note : contribution can be negative */
760 if (cur > litlen) {
761 price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
762 } else {
763 price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
764 }
765 assert(price < 1000000000); /* overflow check */
766 if (price <= opt[cur].price) {
767 DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
768 cur, price, opt[cur].price);
769 opt[cur].mlen = 1;
770 opt[cur].off = 0;
771 opt[cur].litlen = litlen;
772 opt[cur].price = price;
773 memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
774 } }
775
776 /* last match must start at a minimum distance of 8 from oend */
777 if (inr > ilimit) continue;
778
779 if (cur == last_pos) break;
780
781 if ( (optLevel==0) /*static*/
782 && (opt[cur+1].price <= opt[cur].price) )
783 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
784
785 { U32 const ll0 = (opt[cur].mlen != 1);
786 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
787 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
788 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
789 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
790 U32 matchNb;
791 if (!nbMatches) continue;
792
793 { U32 const maxML = matches[nbMatches-1].len;
794 DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
795 cur, nbMatches, maxML);
796
797 if ( (maxML > sufficient_len)
798 | (cur + maxML >= ZSTD_OPT_NUM) ) {
799 best_mlen = maxML;
800 best_off = matches[nbMatches-1].off;
801 last_pos = cur + 1;
802 goto _shortestPath;
803 }
804 }
805
806 /* set prices using matches found at position == cur */
807 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
808 U32 const offset = matches[matchNb].off;
809 repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
810 U32 const lastML = matches[matchNb].len;
811 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
812 U32 mlen;
813
814 DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
815 matchNb, matches[matchNb].off, lastML, litlen);
816
817 for (mlen = lastML; mlen >= startML; mlen--) {
818 U32 const pos = cur + mlen;
819 int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
820
821 if ((pos > last_pos) || (price < opt[pos].price)) {
822 DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
823 pos, price, opt[pos].price);
824 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
825 opt[pos].mlen = mlen;
826 opt[pos].off = offset;
827 opt[pos].litlen = litlen;
828 opt[pos].price = price;
829 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
830 } else {
831 if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
832 }
833 } } }
834 } /* for (cur = 1; cur <= last_pos; cur++) */
835
836 best_mlen = opt[last_pos].mlen;
837 best_off = opt[last_pos].off;
838 cur = last_pos - best_mlen;
839
840_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
841 assert(opt[0].mlen == 1);
842
843 /* reverse traversal */
844 DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
845 last_pos, cur);
846 { U32 selectedMatchLength = best_mlen;
847 U32 selectedOffset = best_off;
848 U32 pos = cur;
849 while (1) {
850 U32 const mlen = opt[pos].mlen;
851 U32 const off = opt[pos].off;
852 opt[pos].mlen = selectedMatchLength;
853 opt[pos].off = selectedOffset;
854 selectedMatchLength = mlen;
855 selectedOffset = off;
856 if (mlen > pos) break;
857 pos -= mlen;
858 } }
859
860 /* save sequences */
861 { U32 pos;
862 for (pos=0; pos < last_pos; ) {
863 U32 const llen = (U32)(ip - anchor);
864 U32 const mlen = opt[pos].mlen;
865 U32 const offset = opt[pos].off;
866 if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
867 pos += mlen; ip += mlen;
868
869 /* repcodes update : like ZSTD_updateRep(), but update in place */
870 if (offset >= ZSTD_REP_NUM) { /* full offset */
871 rep[2] = rep[1];
872 rep[1] = rep[0];
873 rep[0] = offset - ZSTD_REP_MOVE;
874 } else { /* repcode */
875 U32 const repCode = offset + (llen==0);
876 if (repCode) { /* note : if repCode==0, no change */
877 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
878 if (repCode >= 2) rep[2] = rep[1];
879 rep[1] = rep[0];
880 rep[0] = currentOffset;
881 }
882 }
883
884 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
885 ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
886 anchor = ip;
887 } }
888 ZSTD_setLog2Prices(optStatePtr);
889 } /* while (ip < ilimit) */
890
891 /* Return the last literals size */
892 return iend - anchor;
893}
894
895
896size_t ZSTD_compressBlock_btopt(
897 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
898 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
899{
900 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
901 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
902}
903
904size_t ZSTD_compressBlock_btultra(
905 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
906 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
907{
908 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
909}
910
911size_t ZSTD_compressBlock_btopt_extDict(
912 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
913 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
914{
915 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
916}
917
918size_t ZSTD_compressBlock_btultra_extDict(
919 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
920 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
921{
922 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
923}
924