1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include "zstd_compress_internal.h"
12#include "hist.h"
13#include "zstd_opt.h"
14
15
16#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17#define ZSTD_MAX_PRICE (1<<30)
18
19#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
21
22/*-*************************************
23* Price functions for optimal parser
24***************************************/
25
26#if 0 /* approximation at bit level (for tests) */
27# define BITCOST_ACCURACY 0
28# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
30#elif 0 /* fractional bit accuracy (for tests) */
31# define BITCOST_ACCURACY 8
32# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
34#else /* opt==approx, ultra==accurate */
35# define BITCOST_ACCURACY 8
36# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38#endif
39
40/* ZSTD_bitWeight() :
41 * provide estimated "cost" of a stat in full bits only */
42MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
43{
44 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
45}
46
47/* ZSTD_fracWeight() :
48 * provide fractional-bit "cost" of a stat,
49 * using linear interpolation approximation */
50MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
51{
52 U32 const stat = rawStat + 1;
53 U32 const hb = ZSTD_highbit32(stat);
54 U32 const BWeight = hb * BITCOST_MULTIPLIER;
55 /* Fweight was meant for "Fractional weight"
56 * but it's effectively a value between 1 and 2
57 * using fixed point arithmetic */
58 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
59 U32 const weight = BWeight + FWeight;
60 assert(hb + BITCOST_ACCURACY < 31);
61 return weight;
62}
63
64#if (DEBUGLEVEL>=2)
65/* debugging function,
66 * @return price in bytes as fractional value
67 * for debug messages only */
68MEM_STATIC double ZSTD_fCost(int price)
69{
70 return (double)price / (BITCOST_MULTIPLIER*8);
71}
72#endif
73
74static int ZSTD_compressedLiterals(optState_t const* const optPtr)
75{
76 return optPtr->literalCompressionMode != ZSTD_ps_disable;
77}
78
79static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
80{
81 if (ZSTD_compressedLiterals(optPtr))
82 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
83 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
84 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
85 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
86}
87
88
89static U32 sum_u32(const unsigned table[], size_t nbElts)
90{
91 size_t n;
92 U32 total = 0;
93 for (n=0; n<nbElts; n++) {
94 total += table[n];
95 }
96 return total;
97}
98
99typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
100
101static U32
102ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
103{
104 U32 s, sum=0;
105 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
106 (unsigned)lastEltIndex+1, (unsigned)shift );
107 assert(shift < 30);
108 for (s=0; s<lastEltIndex+1; s++) {
109 unsigned const base = base1 ? 1 : (table[s]>0);
110 unsigned const newStat = base + (table[s] >> shift);
111 sum += newStat;
112 table[s] = newStat;
113 }
114 return sum;
115}
116
117/* ZSTD_scaleStats() :
118 * reduce all elt frequencies in table if sum too large
119 * return the resulting sum of elements */
120static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
121{
122 U32 const prevsum = sum_u32(table, lastEltIndex+1);
123 U32 const factor = prevsum >> logTarget;
124 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
125 assert(logTarget < 30);
126 if (factor <= 1) return prevsum;
127 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
128}
129
130/* ZSTD_rescaleFreqs() :
131 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
132 * take hints from dictionary if there is one
133 * and init from zero if there is none,
134 * using src for literals stats, and baseline stats for sequence symbols
135 * otherwise downscale existing stats, to be used as seed for next block.
136 */
137static void
138ZSTD_rescaleFreqs(optState_t* const optPtr,
139 const BYTE* const src, size_t const srcSize,
140 int const optLevel)
141{
142 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
143 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
144 optPtr->priceType = zop_dynamic;
145
146 if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
147
148 /* heuristic: use pre-defined stats for too small inputs */
149 if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
150 DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
151 optPtr->priceType = zop_predef;
152 }
153
154 assert(optPtr->symbolCosts != NULL);
155 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
156
157 /* huffman stats covering the full value set : table presumed generated by dictionary */
158 optPtr->priceType = zop_dynamic;
159
160 if (compressedLiterals) {
161 /* generate literals statistics from huffman table */
162 unsigned lit;
163 assert(optPtr->litFreq != NULL);
164 optPtr->litSum = 0;
165 for (lit=0; lit<=MaxLit; lit++) {
166 U32 const scaleLog = 11; /* scale to 2K */
167 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
168 assert(bitCost <= scaleLog);
169 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
170 optPtr->litSum += optPtr->litFreq[lit];
171 } }
172
173 { unsigned ll;
174 FSE_CState_t llstate;
175 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
176 optPtr->litLengthSum = 0;
177 for (ll=0; ll<=MaxLL; ll++) {
178 U32 const scaleLog = 10; /* scale to 1K */
179 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
180 assert(bitCost < scaleLog);
181 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
182 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
183 } }
184
185 { unsigned ml;
186 FSE_CState_t mlstate;
187 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
188 optPtr->matchLengthSum = 0;
189 for (ml=0; ml<=MaxML; ml++) {
190 U32 const scaleLog = 10;
191 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
192 assert(bitCost < scaleLog);
193 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
194 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
195 } }
196
197 { unsigned of;
198 FSE_CState_t ofstate;
199 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
200 optPtr->offCodeSum = 0;
201 for (of=0; of<=MaxOff; of++) {
202 U32 const scaleLog = 10;
203 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
204 assert(bitCost < scaleLog);
205 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
206 optPtr->offCodeSum += optPtr->offCodeFreq[of];
207 } }
208
209 } else { /* first block, no dictionary */
210
211 assert(optPtr->litFreq != NULL);
212 if (compressedLiterals) {
213 /* base initial cost of literals on direct frequency within src */
214 unsigned lit = MaxLit;
215 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
216 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
217 }
218
219 { unsigned const baseLLfreqs[MaxLL+1] = {
220 4, 2, 1, 1, 1, 1, 1, 1,
221 1, 1, 1, 1, 1, 1, 1, 1,
222 1, 1, 1, 1, 1, 1, 1, 1,
223 1, 1, 1, 1, 1, 1, 1, 1,
224 1, 1, 1, 1
225 };
226 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
227 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
228 }
229
230 { unsigned ml;
231 for (ml=0; ml<=MaxML; ml++)
232 optPtr->matchLengthFreq[ml] = 1;
233 }
234 optPtr->matchLengthSum = MaxML+1;
235
236 { unsigned const baseOFCfreqs[MaxOff+1] = {
237 6, 2, 1, 1, 2, 3, 4, 4,
238 4, 3, 2, 1, 1, 1, 1, 1,
239 1, 1, 1, 1, 1, 1, 1, 1,
240 1, 1, 1, 1, 1, 1, 1, 1
241 };
242 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
243 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
244 }
245
246 }
247
248 } else { /* new block : scale down accumulated statistics */
249
250 if (compressedLiterals)
251 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
252 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
253 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
254 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
255 }
256
257 ZSTD_setBasePrices(optPtr, optLevel);
258}
259
260/* ZSTD_rawLiteralsCost() :
261 * price of literals (only) in specified segment (which length can be 0).
262 * does not include price of literalLength symbol */
263static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
264 const optState_t* const optPtr,
265 int optLevel)
266{
267 if (litLength == 0) return 0;
268
269 if (!ZSTD_compressedLiterals(optPtr))
270 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
271
272 if (optPtr->priceType == zop_predef)
273 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
274
275 /* dynamic statistics */
276 { U32 price = optPtr->litSumBasePrice * litLength;
277 U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
278 U32 u;
279 assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
280 for (u=0; u < litLength; u++) {
281 U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
282 if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
283 price -= litPrice;
284 }
285 return price;
286 }
287}
288
289/* ZSTD_litLengthPrice() :
290 * cost of literalLength symbol */
291static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
292{
293 assert(litLength <= ZSTD_BLOCKSIZE_MAX);
294 if (optPtr->priceType == zop_predef)
295 return WEIGHT(litLength, optLevel);
296
297 /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
298 * because it isn't representable in the zstd format.
299 * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
300 * In such a case, the block would be all literals.
301 */
302 if (litLength == ZSTD_BLOCKSIZE_MAX)
303 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
304
305 /* dynamic statistics */
306 { U32 const llCode = ZSTD_LLcode(litLength);
307 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
308 + optPtr->litLengthSumBasePrice
309 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
310 }
311}
312
313/* ZSTD_getMatchPrice() :
314 * Provides the cost of the match part (offset + matchLength) of a sequence.
315 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
316 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
317 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
318 */
319FORCE_INLINE_TEMPLATE U32
320ZSTD_getMatchPrice(U32 const offBase,
321 U32 const matchLength,
322 const optState_t* const optPtr,
323 int const optLevel)
324{
325 U32 price;
326 U32 const offCode = ZSTD_highbit32(offBase);
327 U32 const mlBase = matchLength - MINMATCH;
328 assert(matchLength >= MINMATCH);
329
330 if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
331 return WEIGHT(mlBase, optLevel)
332 + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
333
334 /* dynamic statistics */
335 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
336 if ((optLevel<2) /*static*/ && offCode >= 20)
337 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
338
339 /* match Length */
340 { U32 const mlCode = ZSTD_MLcode(mlBase);
341 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
342 }
343
344 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
345
346 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
347 return price;
348}
349
350/* ZSTD_updateStats() :
351 * assumption : literals + litLength <= iend */
352static void ZSTD_updateStats(optState_t* const optPtr,
353 U32 litLength, const BYTE* literals,
354 U32 offBase, U32 matchLength)
355{
356 /* literals */
357 if (ZSTD_compressedLiterals(optPtr)) {
358 U32 u;
359 for (u=0; u < litLength; u++)
360 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
361 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
362 }
363
364 /* literal Length */
365 { U32 const llCode = ZSTD_LLcode(litLength);
366 optPtr->litLengthFreq[llCode]++;
367 optPtr->litLengthSum++;
368 }
369
370 /* offset code : follows storeSeq() numeric representation */
371 { U32 const offCode = ZSTD_highbit32(offBase);
372 assert(offCode <= MaxOff);
373 optPtr->offCodeFreq[offCode]++;
374 optPtr->offCodeSum++;
375 }
376
377 /* match Length */
378 { U32 const mlBase = matchLength - MINMATCH;
379 U32 const mlCode = ZSTD_MLcode(mlBase);
380 optPtr->matchLengthFreq[mlCode]++;
381 optPtr->matchLengthSum++;
382 }
383}
384
385
386/* ZSTD_readMINMATCH() :
387 * function safe only for comparisons
388 * assumption : memPtr must be at least 4 bytes before end of buffer */
389MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
390{
391 switch (length)
392 {
393 default :
394 case 4 : return MEM_read32(memPtr);
395 case 3 : if (MEM_isLittleEndian())
396 return MEM_read32(memPtr)<<8;
397 else
398 return MEM_read32(memPtr)>>8;
399 }
400}
401
402
403/* Update hashTable3 up to ip (excluded)
404 Assumption : always within prefix (i.e. not within extDict) */
405static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
406 U32* nextToUpdate3,
407 const BYTE* const ip)
408{
409 U32* const hashTable3 = ms->hashTable3;
410 U32 const hashLog3 = ms->hashLog3;
411 const BYTE* const base = ms->window.base;
412 U32 idx = *nextToUpdate3;
413 U32 const target = (U32)(ip - base);
414 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
415 assert(hashLog3 > 0);
416
417 while(idx < target) {
418 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
419 idx++;
420 }
421
422 *nextToUpdate3 = target;
423 return hashTable3[hash3];
424}
425
426
427/*-*************************************
428* Binary Tree search
429***************************************/
430/** ZSTD_insertBt1() : add one or multiple positions to tree.
431 * @param ip assumed <= iend-8 .
432 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
433 * @return : nb of positions added */
434static U32 ZSTD_insertBt1(
435 const ZSTD_matchState_t* ms,
436 const BYTE* const ip, const BYTE* const iend,
437 U32 const target,
438 U32 const mls, const int extDict)
439{
440 const ZSTD_compressionParameters* const cParams = &ms->cParams;
441 U32* const hashTable = ms->hashTable;
442 U32 const hashLog = cParams->hashLog;
443 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
444 U32* const bt = ms->chainTable;
445 U32 const btLog = cParams->chainLog - 1;
446 U32 const btMask = (1 << btLog) - 1;
447 U32 matchIndex = hashTable[h];
448 size_t commonLengthSmaller=0, commonLengthLarger=0;
449 const BYTE* const base = ms->window.base;
450 const BYTE* const dictBase = ms->window.dictBase;
451 const U32 dictLimit = ms->window.dictLimit;
452 const BYTE* const dictEnd = dictBase + dictLimit;
453 const BYTE* const prefixStart = base + dictLimit;
454 const BYTE* match;
455 const U32 curr = (U32)(ip-base);
456 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
457 U32* smallerPtr = bt + 2*(curr&btMask);
458 U32* largerPtr = smallerPtr + 1;
459 U32 dummy32; /* to be nullified at the end */
460 /* windowLow is based on target because
461 * we only need positions that will be in the window at the end of the tree update.
462 */
463 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
464 U32 matchEndIdx = curr+8+1;
465 size_t bestLength = 8;
466 U32 nbCompares = 1U << cParams->searchLog;
467#ifdef ZSTD_C_PREDICT
468 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
469 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
470 predictedSmall += (predictedSmall>0);
471 predictedLarge += (predictedLarge>0);
472#endif /* ZSTD_C_PREDICT */
473
474 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
475
476 assert(curr <= target);
477 assert(ip <= iend-8); /* required for h calculation */
478 hashTable[h] = curr; /* Update Hash Table */
479
480 assert(windowLow > 0);
481 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
482 U32* const nextPtr = bt + 2*(matchIndex & btMask);
483 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
484 assert(matchIndex < curr);
485
486#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
487 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
488 if (matchIndex == predictedSmall) {
489 /* no need to check length, result known */
490 *smallerPtr = matchIndex;
491 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
492 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
493 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
494 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
495 continue;
496 }
497 if (matchIndex == predictedLarge) {
498 *largerPtr = matchIndex;
499 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
500 largerPtr = nextPtr;
501 matchIndex = nextPtr[0];
502 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
503 continue;
504 }
505#endif
506
507 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
508 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
509 match = base + matchIndex;
510 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
511 } else {
512 match = dictBase + matchIndex;
513 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
514 if (matchIndex+matchLength >= dictLimit)
515 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
516 }
517
518 if (matchLength > bestLength) {
519 bestLength = matchLength;
520 if (matchLength > matchEndIdx - matchIndex)
521 matchEndIdx = matchIndex + (U32)matchLength;
522 }
523
524 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
525 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
526 }
527
528 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
529 /* match is smaller than current */
530 *smallerPtr = matchIndex; /* update smaller idx */
531 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
532 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
533 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
534 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
535 } else {
536 /* match is larger than current */
537 *largerPtr = matchIndex;
538 commonLengthLarger = matchLength;
539 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
540 largerPtr = nextPtr;
541 matchIndex = nextPtr[0];
542 } }
543
544 *smallerPtr = *largerPtr = 0;
545 { U32 positions = 0;
546 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
547 assert(matchEndIdx > curr + 8);
548 return MAX(positions, matchEndIdx - (curr + 8));
549 }
550}
551
552FORCE_INLINE_TEMPLATE
553void ZSTD_updateTree_internal(
554 ZSTD_matchState_t* ms,
555 const BYTE* const ip, const BYTE* const iend,
556 const U32 mls, const ZSTD_dictMode_e dictMode)
557{
558 const BYTE* const base = ms->window.base;
559 U32 const target = (U32)(ip - base);
560 U32 idx = ms->nextToUpdate;
561 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
562 idx, target, dictMode);
563
564 while(idx < target) {
565 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
566 assert(idx < (U32)(idx + forward));
567 idx += forward;
568 }
569 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
570 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
571 ms->nextToUpdate = target;
572}
573
574void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
575 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
576}
577
578FORCE_INLINE_TEMPLATE U32
579ZSTD_insertBtAndGetAllMatches (
580 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
581 ZSTD_matchState_t* ms,
582 U32* nextToUpdate3,
583 const BYTE* const ip, const BYTE* const iLimit,
584 const ZSTD_dictMode_e dictMode,
585 const U32 rep[ZSTD_REP_NUM],
586 const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
587 const U32 lengthToBeat,
588 const U32 mls /* template */)
589{
590 const ZSTD_compressionParameters* const cParams = &ms->cParams;
591 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
592 const BYTE* const base = ms->window.base;
593 U32 const curr = (U32)(ip-base);
594 U32 const hashLog = cParams->hashLog;
595 U32 const minMatch = (mls==3) ? 3 : 4;
596 U32* const hashTable = ms->hashTable;
597 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
598 U32 matchIndex = hashTable[h];
599 U32* const bt = ms->chainTable;
600 U32 const btLog = cParams->chainLog - 1;
601 U32 const btMask= (1U << btLog) - 1;
602 size_t commonLengthSmaller=0, commonLengthLarger=0;
603 const BYTE* const dictBase = ms->window.dictBase;
604 U32 const dictLimit = ms->window.dictLimit;
605 const BYTE* const dictEnd = dictBase + dictLimit;
606 const BYTE* const prefixStart = base + dictLimit;
607 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
608 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
609 U32 const matchLow = windowLow ? windowLow : 1;
610 U32* smallerPtr = bt + 2*(curr&btMask);
611 U32* largerPtr = bt + 2*(curr&btMask) + 1;
612 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
613 U32 dummy32; /* to be nullified at the end */
614 U32 mnum = 0;
615 U32 nbCompares = 1U << cParams->searchLog;
616
617 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
618 const ZSTD_compressionParameters* const dmsCParams =
619 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
620 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
621 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
622 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
623 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
624 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
625 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
626 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
627 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
628 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
629
630 size_t bestLength = lengthToBeat-1;
631 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
632
633 /* check repCode */
634 assert(ll0 <= 1); /* necessarily 1 or 0 */
635 { U32 const lastR = ZSTD_REP_NUM + ll0;
636 U32 repCode;
637 for (repCode = ll0; repCode < lastR; repCode++) {
638 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
639 U32 const repIndex = curr - repOffset;
640 U32 repLen = 0;
641 assert(curr >= dictLimit);
642 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
643 /* We must validate the repcode offset because when we're using a dictionary the
644 * valid offset range shrinks when the dictionary goes out of bounds.
645 */
646 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
647 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
648 }
649 } else { /* repIndex < dictLimit || repIndex >= curr */
650 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
651 dmsBase + repIndex - dmsIndexDelta :
652 dictBase + repIndex;
653 assert(curr >= windowLow);
654 if ( dictMode == ZSTD_extDict
655 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
656 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
657 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
658 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
659 }
660 if (dictMode == ZSTD_dictMatchState
661 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
662 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
663 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
664 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
665 } }
666 /* save longer solution */
667 if (repLen > bestLength) {
668 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
669 repCode, ll0, repOffset, repLen);
670 bestLength = repLen;
671 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
672 matches[mnum].len = (U32)repLen;
673 mnum++;
674 if ( (repLen > sufficient_len)
675 | (ip+repLen == iLimit) ) { /* best possible */
676 return mnum;
677 } } } }
678
679 /* HC3 match finder */
680 if ((mls == 3) /*static*/ && (bestLength < mls)) {
681 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
682 if ((matchIndex3 >= matchLow)
683 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
684 size_t mlen;
685 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
686 const BYTE* const match = base + matchIndex3;
687 mlen = ZSTD_count(ip, match, iLimit);
688 } else {
689 const BYTE* const match = dictBase + matchIndex3;
690 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
691 }
692
693 /* save best solution */
694 if (mlen >= mls /* == 3 > bestLength */) {
695 DEBUGLOG(8, "found small match with hlog3, of length %u",
696 (U32)mlen);
697 bestLength = mlen;
698 assert(curr > matchIndex3);
699 assert(mnum==0); /* no prior solution */
700 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
701 matches[0].len = (U32)mlen;
702 mnum = 1;
703 if ( (mlen > sufficient_len) |
704 (ip+mlen == iLimit) ) { /* best possible length */
705 ms->nextToUpdate = curr+1; /* skip insertion */
706 return 1;
707 } } }
708 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
709 } /* if (mls == 3) */
710
711 hashTable[h] = curr; /* Update Hash Table */
712
713 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
714 U32* const nextPtr = bt + 2*(matchIndex & btMask);
715 const BYTE* match;
716 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
717 assert(curr > matchIndex);
718
719 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
720 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
721 match = base + matchIndex;
722 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
723 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
724 } else {
725 match = dictBase + matchIndex;
726 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
727 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
728 if (matchIndex+matchLength >= dictLimit)
729 match = base + matchIndex; /* prepare for match[matchLength] read */
730 }
731
732 if (matchLength > bestLength) {
733 DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
734 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
735 assert(matchEndIdx > matchIndex);
736 if (matchLength > matchEndIdx - matchIndex)
737 matchEndIdx = matchIndex + (U32)matchLength;
738 bestLength = matchLength;
739 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
740 matches[mnum].len = (U32)matchLength;
741 mnum++;
742 if ( (matchLength > ZSTD_OPT_NUM)
743 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
744 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
745 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
746 } }
747
748 if (match[matchLength] < ip[matchLength]) {
749 /* match smaller than current */
750 *smallerPtr = matchIndex; /* update smaller idx */
751 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
752 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
753 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
754 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
755 } else {
756 *largerPtr = matchIndex;
757 commonLengthLarger = matchLength;
758 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
759 largerPtr = nextPtr;
760 matchIndex = nextPtr[0];
761 } }
762
763 *smallerPtr = *largerPtr = 0;
764
765 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
766 if (dictMode == ZSTD_dictMatchState && nbCompares) {
767 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
768 U32 dictMatchIndex = dms->hashTable[dmsH];
769 const U32* const dmsBt = dms->chainTable;
770 commonLengthSmaller = commonLengthLarger = 0;
771 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
772 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
773 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
774 const BYTE* match = dmsBase + dictMatchIndex;
775 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
776 if (dictMatchIndex+matchLength >= dmsHighLimit)
777 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
778
779 if (matchLength > bestLength) {
780 matchIndex = dictMatchIndex + dmsIndexDelta;
781 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
782 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
783 if (matchLength > matchEndIdx - matchIndex)
784 matchEndIdx = matchIndex + (U32)matchLength;
785 bestLength = matchLength;
786 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
787 matches[mnum].len = (U32)matchLength;
788 mnum++;
789 if ( (matchLength > ZSTD_OPT_NUM)
790 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
791 break; /* drop, to guarantee consistency (miss a little bit of compression) */
792 } }
793
794 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
795 if (match[matchLength] < ip[matchLength]) {
796 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
797 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
798 } else {
799 /* match is larger than current */
800 commonLengthLarger = matchLength;
801 dictMatchIndex = nextPtr[0];
802 } } } /* if (dictMode == ZSTD_dictMatchState) */
803
804 assert(matchEndIdx > curr+8);
805 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
806 return mnum;
807}
808
809typedef U32 (*ZSTD_getAllMatchesFn)(
810 ZSTD_match_t*,
811 ZSTD_matchState_t*,
812 U32*,
813 const BYTE*,
814 const BYTE*,
815 const U32 rep[ZSTD_REP_NUM],
816 U32 const ll0,
817 U32 const lengthToBeat);
818
819FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
820 ZSTD_match_t* matches,
821 ZSTD_matchState_t* ms,
822 U32* nextToUpdate3,
823 const BYTE* ip,
824 const BYTE* const iHighLimit,
825 const U32 rep[ZSTD_REP_NUM],
826 U32 const ll0,
827 U32 const lengthToBeat,
828 const ZSTD_dictMode_e dictMode,
829 const U32 mls)
830{
831 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
832 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
833 if (ip < ms->window.base + ms->nextToUpdate)
834 return 0; /* skipped area */
835 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
836 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
837}
838
839#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
840
841#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
842 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
843 ZSTD_match_t* matches, \
844 ZSTD_matchState_t* ms, \
845 U32* nextToUpdate3, \
846 const BYTE* ip, \
847 const BYTE* const iHighLimit, \
848 const U32 rep[ZSTD_REP_NUM], \
849 U32 const ll0, \
850 U32 const lengthToBeat) \
851 { \
852 return ZSTD_btGetAllMatches_internal( \
853 matches, ms, nextToUpdate3, ip, iHighLimit, \
854 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
855 }
856
857#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
858 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
859 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
860 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
861 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
862
863GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
864GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
865GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
866
867#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
868 { \
869 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
870 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
871 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
872 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
873 }
874
875static ZSTD_getAllMatchesFn
876ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
877{
878 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
879 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
880 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
881 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
882 };
883 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
884 assert((U32)dictMode < 3);
885 assert(mls - 3 < 4);
886 return getAllMatchesFns[(int)dictMode][mls - 3];
887}
888
889/*************************
890* LDM helper functions *
891*************************/
892
893/* Struct containing info needed to make decision about ldm inclusion */
894typedef struct {
895 rawSeqStore_t seqStore; /* External match candidates store for this block */
896 U32 startPosInBlock; /* Start position of the current match candidate */
897 U32 endPosInBlock; /* End position of the current match candidate */
898 U32 offset; /* Offset of the match candidate */
899} ZSTD_optLdm_t;
900
901/* ZSTD_optLdm_skipRawSeqStoreBytes():
902 * Moves forward in @rawSeqStore by @nbBytes,
903 * which will update the fields 'pos' and 'posInSequence'.
904 */
905static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
906{
907 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
908 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
909 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
910 if (currPos >= currSeq.litLength + currSeq.matchLength) {
911 currPos -= currSeq.litLength + currSeq.matchLength;
912 rawSeqStore->pos++;
913 } else {
914 rawSeqStore->posInSequence = currPos;
915 break;
916 }
917 }
918 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
919 rawSeqStore->posInSequence = 0;
920 }
921}
922
923/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
924 * Calculates the beginning and end of the next match in the current block.
925 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
926 */
927static void
928ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
929 U32 blockBytesRemaining)
930{
931 rawSeq currSeq;
932 U32 currBlockEndPos;
933 U32 literalsBytesRemaining;
934 U32 matchBytesRemaining;
935
936 /* Setting match end position to MAX to ensure we never use an LDM during this block */
937 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
938 optLdm->startPosInBlock = UINT_MAX;
939 optLdm->endPosInBlock = UINT_MAX;
940 return;
941 }
942 /* Calculate appropriate bytes left in matchLength and litLength
943 * after adjusting based on ldmSeqStore->posInSequence */
944 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
945 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
946 currBlockEndPos = currPosInBlock + blockBytesRemaining;
947 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
948 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
949 0;
950 matchBytesRemaining = (literalsBytesRemaining == 0) ?
951 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
952 currSeq.matchLength;
953
954 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
955 if (literalsBytesRemaining >= blockBytesRemaining) {
956 optLdm->startPosInBlock = UINT_MAX;
957 optLdm->endPosInBlock = UINT_MAX;
958 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
959 return;
960 }
961
962 /* Matches may be < MINMATCH by this process. In that case, we will reject them
963 when we are deciding whether or not to add the ldm */
964 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
965 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
966 optLdm->offset = currSeq.offset;
967
968 if (optLdm->endPosInBlock > currBlockEndPos) {
969 /* Match ends after the block ends, we can't use the whole match */
970 optLdm->endPosInBlock = currBlockEndPos;
971 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
972 } else {
973 /* Consume nb of bytes equal to size of sequence left */
974 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
975 }
976}
977
978/* ZSTD_optLdm_maybeAddMatch():
979 * Adds a match if it's long enough,
980 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
981 * into 'matches'. Maintains the correct ordering of 'matches'.
982 */
983static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
984 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
985{
986 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
987 /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
988 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
989
990 /* Ensure that current block position is not outside of the match */
991 if (currPosInBlock < optLdm->startPosInBlock
992 || currPosInBlock >= optLdm->endPosInBlock
993 || candidateMatchLength < MINMATCH) {
994 return;
995 }
996
997 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
998 U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
999 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1000 candidateOffBase, candidateMatchLength, currPosInBlock);
1001 matches[*nbMatches].len = candidateMatchLength;
1002 matches[*nbMatches].off = candidateOffBase;
1003 (*nbMatches)++;
1004 }
1005}
1006
1007/* ZSTD_optLdm_processMatchCandidate():
1008 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1009 */
1010static void
1011ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1012 ZSTD_match_t* matches, U32* nbMatches,
1013 U32 currPosInBlock, U32 remainingBytes)
1014{
1015 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1016 return;
1017 }
1018
1019 if (currPosInBlock >= optLdm->endPosInBlock) {
1020 if (currPosInBlock > optLdm->endPosInBlock) {
1021 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1022 * at the end of a match from the ldm seq store, and will often be some bytes
1023 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1024 */
1025 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1026 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1027 }
1028 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1029 }
1030 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1031}
1032
1033
1034/*-*******************************
1035* Optimal parser
1036*********************************/
1037
1038static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1039{
1040 return sol.litlen + sol.mlen;
1041}
1042
1043#if 0 /* debug */
1044
1045static void
1046listStats(const U32* table, int lastEltID)
1047{
1048 int const nbElts = lastEltID + 1;
1049 int enb;
1050 for (enb=0; enb < nbElts; enb++) {
1051 (void)table;
1052 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1053 RAWLOG(2, "%4i,", table[enb]);
1054 }
1055 RAWLOG(2, " \n");
1056}
1057
1058#endif
1059
1060FORCE_INLINE_TEMPLATE size_t
1061ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1062 seqStore_t* seqStore,
1063 U32 rep[ZSTD_REP_NUM],
1064 const void* src, size_t srcSize,
1065 const int optLevel,
1066 const ZSTD_dictMode_e dictMode)
1067{
1068 optState_t* const optStatePtr = &ms->opt;
1069 const BYTE* const istart = (const BYTE*)src;
1070 const BYTE* ip = istart;
1071 const BYTE* anchor = istart;
1072 const BYTE* const iend = istart + srcSize;
1073 const BYTE* const ilimit = iend - 8;
1074 const BYTE* const base = ms->window.base;
1075 const BYTE* const prefixStart = base + ms->window.dictLimit;
1076 const ZSTD_compressionParameters* const cParams = &ms->cParams;
1077
1078 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1079
1080 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1081 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1082 U32 nextToUpdate3 = ms->nextToUpdate;
1083
1084 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1085 ZSTD_match_t* const matches = optStatePtr->matchTable;
1086 ZSTD_optimal_t lastSequence;
1087 ZSTD_optLdm_t optLdm;
1088
1089 ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1090
1091 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1092 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1093 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1094
1095 /* init */
1096 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1097 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1098 assert(optLevel <= 2);
1099 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1100 ip += (ip==prefixStart);
1101
1102 /* Match Loop */
1103 while (ip < ilimit) {
1104 U32 cur, last_pos = 0;
1105
1106 /* find first match */
1107 { U32 const litlen = (U32)(ip - anchor);
1108 U32 const ll0 = !litlen;
1109 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1110 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1111 (U32)(ip-istart), (U32)(iend - ip));
1112 if (!nbMatches) { ip++; continue; }
1113
1114 /* initialize opt[0] */
1115 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1116 opt[0].mlen = 0; /* means is_a_literal */
1117 opt[0].litlen = litlen;
1118 /* We don't need to include the actual price of the literals because
1119 * it is static for the duration of the forward pass, and is included
1120 * in every price. We include the literal length to avoid negative
1121 * prices when we subtract the previous literal length.
1122 */
1123 opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1124
1125 /* large match -> immediate encoding */
1126 { U32 const maxML = matches[nbMatches-1].len;
1127 U32 const maxOffBase = matches[nbMatches-1].off;
1128 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1129 nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1130
1131 if (maxML > sufficient_len) {
1132 lastSequence.litlen = litlen;
1133 lastSequence.mlen = maxML;
1134 lastSequence.off = maxOffBase;
1135 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1136 maxML, sufficient_len);
1137 cur = 0;
1138 last_pos = ZSTD_totalLen(lastSequence);
1139 goto _shortestPath;
1140 } }
1141
1142 /* set prices for first matches starting position == 0 */
1143 assert(opt[0].price >= 0);
1144 { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1145 U32 pos;
1146 U32 matchNb;
1147 for (pos = 1; pos < minMatch; pos++) {
1148 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1149 }
1150 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1151 U32 const offBase = matches[matchNb].off;
1152 U32 const end = matches[matchNb].len;
1153 for ( ; pos <= end ; pos++ ) {
1154 U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1155 U32 const sequencePrice = literalsPrice + matchPrice;
1156 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1157 pos, ZSTD_fCost((int)sequencePrice));
1158 opt[pos].mlen = pos;
1159 opt[pos].off = offBase;
1160 opt[pos].litlen = litlen;
1161 opt[pos].price = (int)sequencePrice;
1162 } }
1163 last_pos = pos-1;
1164 }
1165 }
1166
1167 /* check further positions */
1168 for (cur = 1; cur <= last_pos; cur++) {
1169 const BYTE* const inr = ip + cur;
1170 assert(cur < ZSTD_OPT_NUM);
1171 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1172
1173 /* Fix current position with one literal if cheaper */
1174 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1175 int const price = opt[cur-1].price
1176 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1177 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1178 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1179 assert(price < 1000000000); /* overflow check */
1180 if (price <= opt[cur].price) {
1181 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1182 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1183 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1184 opt[cur].mlen = 0;
1185 opt[cur].off = 0;
1186 opt[cur].litlen = litlen;
1187 opt[cur].price = price;
1188 } else {
1189 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1190 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1191 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1192 }
1193 }
1194
1195 /* Set the repcodes of the current position. We must do it here
1196 * because we rely on the repcodes of the 2nd to last sequence being
1197 * correct to set the next chunks repcodes during the backward
1198 * traversal.
1199 */
1200 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1201 assert(cur >= opt[cur].mlen);
1202 if (opt[cur].mlen != 0) {
1203 U32 const prev = cur - opt[cur].mlen;
1204 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1205 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1206 } else {
1207 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1208 }
1209
1210 /* last match must start at a minimum distance of 8 from oend */
1211 if (inr > ilimit) continue;
1212
1213 if (cur == last_pos) break;
1214
1215 if ( (optLevel==0) /*static_test*/
1216 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1217 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1218 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1219 }
1220
1221 assert(opt[cur].price >= 0);
1222 { U32 const ll0 = (opt[cur].mlen != 0);
1223 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1224 U32 const previousPrice = (U32)opt[cur].price;
1225 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1226 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1227 U32 matchNb;
1228
1229 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1230 (U32)(inr-istart), (U32)(iend-inr));
1231
1232 if (!nbMatches) {
1233 DEBUGLOG(7, "rPos:%u : no match found", cur);
1234 continue;
1235 }
1236
1237 { U32 const maxML = matches[nbMatches-1].len;
1238 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1239 inr-istart, cur, nbMatches, maxML);
1240
1241 if ( (maxML > sufficient_len)
1242 || (cur + maxML >= ZSTD_OPT_NUM) ) {
1243 lastSequence.mlen = maxML;
1244 lastSequence.off = matches[nbMatches-1].off;
1245 lastSequence.litlen = litlen;
1246 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1247 last_pos = cur + ZSTD_totalLen(lastSequence);
1248 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1249 goto _shortestPath;
1250 } }
1251
1252 /* set prices using matches found at position == cur */
1253 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1254 U32 const offset = matches[matchNb].off;
1255 U32 const lastML = matches[matchNb].len;
1256 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1257 U32 mlen;
1258
1259 DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1260 matchNb, matches[matchNb].off, lastML, litlen);
1261
1262 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1263 U32 const pos = cur + mlen;
1264 int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1265
1266 if ((pos > last_pos) || (price < opt[pos].price)) {
1267 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1268 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1269 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1270 opt[pos].mlen = mlen;
1271 opt[pos].off = offset;
1272 opt[pos].litlen = litlen;
1273 opt[pos].price = price;
1274 } else {
1275 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1276 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1277 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1278 }
1279 } } }
1280 } /* for (cur = 1; cur <= last_pos; cur++) */
1281
1282 lastSequence = opt[last_pos];
1283 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1284 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1285
1286_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1287 assert(opt[0].mlen == 0);
1288
1289 /* Set the next chunk's repcodes based on the repcodes of the beginning
1290 * of the last match, and the last sequence. This avoids us having to
1291 * update them while traversing the sequences.
1292 */
1293 if (lastSequence.mlen != 0) {
1294 repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1295 ZSTD_memcpy(rep, &reps, sizeof(reps));
1296 } else {
1297 ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1298 }
1299
1300 { U32 const storeEnd = cur + 1;
1301 U32 storeStart = storeEnd;
1302 U32 seqPos = cur;
1303
1304 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1305 last_pos, cur); (void)last_pos;
1306 assert(storeEnd < ZSTD_OPT_NUM);
1307 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1308 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1309 opt[storeEnd] = lastSequence;
1310 while (seqPos > 0) {
1311 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1312 storeStart--;
1313 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1314 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1315 opt[storeStart] = opt[seqPos];
1316 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1317 }
1318
1319 /* save sequences */
1320 DEBUGLOG(6, "sending selected sequences into seqStore")
1321 { U32 storePos;
1322 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1323 U32 const llen = opt[storePos].litlen;
1324 U32 const mlen = opt[storePos].mlen;
1325 U32 const offBase = opt[storePos].off;
1326 U32 const advance = llen + mlen;
1327 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1328 anchor - istart, (unsigned)llen, (unsigned)mlen);
1329
1330 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1331 assert(storePos == storeEnd); /* must be last sequence */
1332 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1333 continue; /* will finish */
1334 }
1335
1336 assert(anchor + llen <= iend);
1337 ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1338 ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1339 anchor += advance;
1340 ip = anchor;
1341 } }
1342 ZSTD_setBasePrices(optStatePtr, optLevel);
1343 }
1344 } /* while (ip < ilimit) */
1345
1346 /* Return the last literals size */
1347 return (size_t)(iend - anchor);
1348}
1349
1350static size_t ZSTD_compressBlock_opt0(
1351 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1352 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1353{
1354 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1355}
1356
1357static size_t ZSTD_compressBlock_opt2(
1358 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1360{
1361 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1362}
1363
1364size_t ZSTD_compressBlock_btopt(
1365 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1366 const void* src, size_t srcSize)
1367{
1368 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1369 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1370}
1371
1372
1373
1374
1375/* ZSTD_initStats_ultra():
1376 * make a first compression pass, just to seed stats with more accurate starting values.
1377 * only works on first block, with no dictionary and no ldm.
1378 * this function cannot error out, its narrow contract must be respected.
1379 */
1380static void
1381ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1382 seqStore_t* seqStore,
1383 U32 rep[ZSTD_REP_NUM],
1384 const void* src, size_t srcSize)
1385{
1386 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1387 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1388
1389 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1390 assert(ms->opt.litLengthSum == 0); /* first block */
1391 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1392 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1393 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1394
1395 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1396
1397 /* invalidate first scan from history, only keep entropy stats */
1398 ZSTD_resetSeqStore(seqStore);
1399 ms->window.base -= srcSize;
1400 ms->window.dictLimit += (U32)srcSize;
1401 ms->window.lowLimit = ms->window.dictLimit;
1402 ms->nextToUpdate = ms->window.dictLimit;
1403
1404}
1405
1406size_t ZSTD_compressBlock_btultra(
1407 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1408 const void* src, size_t srcSize)
1409{
1410 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1411 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1412}
1413
1414size_t ZSTD_compressBlock_btultra2(
1415 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1416 const void* src, size_t srcSize)
1417{
1418 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1419 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1420
1421 /* 2-passes strategy:
1422 * this strategy makes a first pass over first block to collect statistics
1423 * in order to seed next round's statistics with it.
1424 * After 1st pass, function forgets history, and starts a new block.
1425 * Consequently, this can only work if no data has been previously loaded in tables,
1426 * aka, no dictionary, no prefix, no ldm preprocessing.
1427 * The compression ratio gain is generally small (~0.5% on first block),
1428 ** the cost is 2x cpu time on first block. */
1429 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1430 if ( (ms->opt.litLengthSum==0) /* first block */
1431 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1432 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1433 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1434 && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1435 ) {
1436 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1437 }
1438
1439 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1440}
1441
1442size_t ZSTD_compressBlock_btopt_dictMatchState(
1443 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444 const void* src, size_t srcSize)
1445{
1446 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1447}
1448
1449size_t ZSTD_compressBlock_btultra_dictMatchState(
1450 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1451 const void* src, size_t srcSize)
1452{
1453 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1454}
1455
1456size_t ZSTD_compressBlock_btopt_extDict(
1457 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1458 const void* src, size_t srcSize)
1459{
1460 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1461}
1462
1463size_t ZSTD_compressBlock_btultra_extDict(
1464 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1465 const void* src, size_t srcSize)
1466{
1467 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1468}
1469
1470/* note : no btultra2 variant for extDict nor dictMatchState,
1471 * because btultra2 is not meant to work with dictionaries
1472 * and is only specific for the first block (no prefix) */
1473