1/*
2 LZ4 HC - High Compression Mode of LZ4
3 Copyright (C) 2011-2017, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - LZ4 source repository : https://github.com/lz4/lz4
32 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
33*/
34/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
35
36
37/* *************************************
38* Tuning Parameter
39***************************************/
40
41/*! HEAPMODE :
42 * Select how default compression function will allocate workplace memory,
43 * in stack (0:fastest), or in heap (1:requires malloc()).
44 * Since workplace is rather large, heap mode is recommended.
45 */
46#ifndef LZ4HC_HEAPMODE
47# define LZ4HC_HEAPMODE 1
48#endif
49
50
51/*=== Dependency ===*/
52#define LZ4_HC_STATIC_LINKING_ONLY
53#include "lz4hc.h"
54
55
56/*=== Common LZ4 definitions ===*/
57#if defined(__GNUC__)
58# pragma GCC diagnostic ignored "-Wunused-function"
59#endif
60#if defined (__clang__)
61# pragma clang diagnostic ignored "-Wunused-function"
62#endif
63
64#define LZ4_COMMONDEFS_ONLY
65#include "lz4.c" /* LZ4_count, constants, mem */
66
67
68/*=== Constants ===*/
69#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
70
71
72/*=== Macros ===*/
73#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
74#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
75#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
76#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */
77#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */
78
79static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
80
81
82
83/**************************************
84* HC Compression
85**************************************/
86static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
87{
88 MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
89 MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
90 hc4->nextToUpdate = 64 KB;
91 hc4->base = start - 64 KB;
92 hc4->end = start;
93 hc4->dictBase = start - 64 KB;
94 hc4->dictLimit = 64 KB;
95 hc4->lowLimit = 64 KB;
96}
97
98
99/* Update chains up to ip (excluded) */
100LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
101{
102 U16* const chainTable = hc4->chainTable;
103 U32* const hashTable = hc4->hashTable;
104 const BYTE* const base = hc4->base;
105 U32 const target = (U32)(ip - base);
106 U32 idx = hc4->nextToUpdate;
107
108 while (idx < target) {
109 U32 const h = LZ4HC_hashPtr(base+idx);
110 size_t delta = idx - hashTable[h];
111 if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
112 DELTANEXTU16(chainTable, idx) = (U16)delta;
113 hashTable[h] = idx;
114 idx++;
115 }
116
117 hc4->nextToUpdate = target;
118}
119
120/** LZ4HC_countBack() :
121 * @return : negative value, nb of common bytes before ip/match */
122LZ4_FORCE_INLINE
123int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
124 const BYTE* const iMin, const BYTE* const mMin)
125{
126 int back=0;
127 while ( (ip+back > iMin)
128 && (match+back > mMin)
129 && (ip[back-1] == match[back-1]))
130 back--;
131 return back;
132}
133
134/* LZ4HC_countPattern() :
135 * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
136static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
137{
138 const BYTE* const iStart = ip;
139 reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
140
141 while (likely(ip < iEnd-(sizeof(pattern)-1))) {
142 reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
143 if (!diff) { ip+=sizeof(pattern); continue; }
144 ip += LZ4_NbCommonBytes(diff);
145 return (unsigned)(ip - iStart);
146 }
147
148 if (LZ4_isLittleEndian()) {
149 reg_t patternByte = pattern;
150 while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
151 ip++; patternByte >>= 8;
152 }
153 } else { /* big endian */
154 U32 bitOffset = (sizeof(pattern)*8) - 8;
155 while (ip < iEnd) {
156 BYTE const byte = (BYTE)(pattern >> bitOffset);
157 if (*ip != byte) break;
158 ip ++; bitOffset -= 8;
159 }
160 }
161
162 return (unsigned)(ip - iStart);
163}
164
165/* LZ4HC_reverseCountPattern() :
166 * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
167 * read using natural platform endianess */
168static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
169{
170 const BYTE* const iStart = ip;
171
172 while (likely(ip >= iLow+4)) {
173 if (LZ4_read32(ip-4) != pattern) break;
174 ip -= 4;
175 }
176 { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
177 while (likely(ip>iLow)) {
178 if (ip[-1] != *bytePtr) break;
179 ip--; bytePtr--;
180 } }
181 return (unsigned)(iStart - ip);
182}
183
184typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
185
186LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
187 LZ4HC_CCtx_internal* hc4,
188 const BYTE* const ip,
189 const BYTE* const iLowLimit,
190 const BYTE* const iHighLimit,
191 int longest,
192 const BYTE** matchpos,
193 const BYTE** startpos,
194 const int maxNbAttempts)
195{
196 U16* const chainTable = hc4->chainTable;
197 U32* const HashTable = hc4->hashTable;
198 const BYTE* const base = hc4->base;
199 const U32 dictLimit = hc4->dictLimit;
200 const BYTE* const lowPrefixPtr = base + dictLimit;
201 const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE;
202 const BYTE* const dictBase = hc4->dictBase;
203 int const delta = (int)(ip-iLowLimit);
204 int nbAttempts = maxNbAttempts;
205 U32 const pattern = LZ4_read32(ip);
206 U32 matchIndex;
207 repeat_state_e repeat = rep_untested;
208 size_t srcPatternLength = 0;
209
210 DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
211 /* First Match */
212 LZ4HC_Insert(hc4, ip);
213 matchIndex = HashTable[LZ4HC_hashPtr(ip)];
214 DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
215 matchIndex, lowLimit);
216
217 while ((matchIndex>=lowLimit) && (nbAttempts)) {
218 DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
219 nbAttempts--;
220 if (matchIndex >= dictLimit) {
221 const BYTE* const matchPtr = base + matchIndex;
222 if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
223 if (LZ4_read32(matchPtr) == pattern) {
224 int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
225 #if 0
226 /* more generic but unfortunately slower on clang */
227 int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
228 #else
229 int back = 0;
230 while ( (ip+back > iLowLimit)
231 && (matchPtr+back > lowPrefixPtr)
232 && (ip[back-1] == matchPtr[back-1])) {
233 back--;
234 }
235 #endif
236 mlt -= back;
237
238 if (mlt > longest) {
239 longest = mlt;
240 *matchpos = matchPtr+back;
241 *startpos = ip+back;
242 } }
243 }
244 } else { /* matchIndex < dictLimit */
245 const BYTE* const matchPtr = dictBase + matchIndex;
246 if (LZ4_read32(matchPtr) == pattern) {
247 int mlt;
248 int back = 0;
249 const BYTE* vLimit = ip + (dictLimit - matchIndex);
250 if (vLimit > iHighLimit) vLimit = iHighLimit;
251 mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
252 if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
253 mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
254 while ( (ip+back > iLowLimit)
255 && (matchIndex+back > lowLimit)
256 && (ip[back-1] == matchPtr[back-1]))
257 back--;
258 mlt -= back;
259 if (mlt > longest) {
260 longest = mlt;
261 *matchpos = base + matchIndex + back;
262 *startpos = ip + back;
263 } } }
264
265 { U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
266 matchIndex -= nextOffset;
267 if (nextOffset==1) {
268 /* may be a repeated pattern */
269 if (repeat == rep_untested) {
270 if ( ((pattern & 0xFFFF) == (pattern >> 16))
271 & ((pattern & 0xFF) == (pattern >> 24)) ) {
272 repeat = rep_confirmed;
273 srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4;
274 } else {
275 repeat = rep_not;
276 } }
277 if ( (repeat == rep_confirmed)
278 && (matchIndex >= dictLimit) ) { /* same segment only */
279 const BYTE* const matchPtr = base + matchIndex;
280 if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
281 size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
282 const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
283 size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
284 size_t const currentSegmentLength = backLength + forwardPatternLength;
285
286 if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
287 && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
288 matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
289 } else {
290 matchIndex -= (U32)backLength; /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
291 }
292 } } } }
293 } /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
294
295 return longest;
296}
297
298LZ4_FORCE_INLINE
299int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
300 const BYTE* const ip, const BYTE* const iLimit,
301 const BYTE** matchpos,
302 const int maxNbAttempts)
303{
304 const BYTE* uselessPtr = ip;
305 /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
306 * but this won't be the case here, as we define iLowLimit==ip,
307 * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
308 return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts);
309}
310
311
312
313typedef enum {
314 noLimit = 0,
315 limitedOutput = 1,
316 limitedDestSize = 2,
317} limitedOutput_directive;
318
319/* LZ4HC_encodeSequence() :
320 * @return : 0 if ok,
321 * 1 if buffer issue detected */
322LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
323 const BYTE** ip,
324 BYTE** op,
325 const BYTE** anchor,
326 int matchLength,
327 const BYTE* const match,
328 limitedOutput_directive limit,
329 BYTE* oend)
330{
331 size_t length;
332 BYTE* const token = (*op)++;
333
334#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
335 static const BYTE* start = NULL;
336 static U32 totalCost = 0;
337 U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
338 U32 const ll = (U32)(*ip - *anchor);
339 U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
340 U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
341 U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
342 if (start==NULL) start = *anchor; /* only works for single segment */
343 //g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
344 DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
345 pos,
346 (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
347 cost, totalCost);
348 totalCost += cost;
349#endif
350
351 /* Encode Literal length */
352 length = (size_t)(*ip - *anchor);
353 if ((limit) && ((*op + (length >> 8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */
354 if (length >= RUN_MASK) {
355 size_t len = length - RUN_MASK;
356 *token = (RUN_MASK << ML_BITS);
357 for(; len >= 255 ; len -= 255) *(*op)++ = 255;
358 *(*op)++ = (BYTE)len;
359 } else {
360 *token = (BYTE)(length << ML_BITS);
361 }
362
363 /* Copy Literals */
364 LZ4_wildCopy(*op, *anchor, (*op) + length);
365 *op += length;
366
367 /* Encode Offset */
368 LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
369
370 /* Encode MatchLength */
371 assert(matchLength >= MINMATCH);
372 length = (size_t)(matchLength - MINMATCH);
373 if ((limit) && (*op + (length >> 8) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
374 if (length >= ML_MASK) {
375 *token += ML_MASK;
376 length -= ML_MASK;
377 for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
378 if (length >= 255) { length -= 255; *(*op)++ = 255; }
379 *(*op)++ = (BYTE)length;
380 } else {
381 *token += (BYTE)(length);
382 }
383
384 /* Prepare next loop */
385 *ip += matchLength;
386 *anchor = *ip;
387
388 return 0;
389}
390
391/* btopt */
392#include "lz4opt.h"
393
394
395static int LZ4HC_compress_hashChain (
396 LZ4HC_CCtx_internal* const ctx,
397 const char* const source,
398 char* const dest,
399 int* srcSizePtr,
400 int const maxOutputSize,
401 unsigned maxNbAttempts,
402 limitedOutput_directive limit
403 )
404{
405 const int inputSize = *srcSizePtr;
406
407 const BYTE* ip = (const BYTE*) source;
408 const BYTE* anchor = ip;
409 const BYTE* const iend = ip + inputSize;
410 const BYTE* const mflimit = iend - MFLIMIT;
411 const BYTE* const matchlimit = (iend - LASTLITERALS);
412
413 BYTE* optr = (BYTE*) dest;
414 BYTE* op = (BYTE*) dest;
415 BYTE* oend = op + maxOutputSize;
416
417 int ml, ml2, ml3, ml0;
418 const BYTE* ref = NULL;
419 const BYTE* start2 = NULL;
420 const BYTE* ref2 = NULL;
421 const BYTE* start3 = NULL;
422 const BYTE* ref3 = NULL;
423 const BYTE* start0;
424 const BYTE* ref0;
425
426 /* init */
427 *srcSizePtr = 0;
428 if (limit == limitedDestSize && maxOutputSize < 1) return 0; /* Impossible to store anything */
429 if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
430
431 if (limit == limitedDestSize) oend -= LASTLITERALS; /* Hack for support limitations LZ4 decompressor */
432 if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
433
434 /* Main Loop */
435 while (ip < mflimit) {
436 ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts);
437 if (ml<MINMATCH) { ip++; continue; }
438
439 /* saved, in case we would skip too much */
440 start0 = ip;
441 ref0 = ref;
442 ml0 = ml;
443
444_Search2:
445 if (ip+ml < mflimit)
446 ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
447 else
448 ml2 = ml;
449
450 if (ml2 == ml) { /* No better match */
451 optr = op;
452 if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
453 continue;
454 }
455
456 if (start0 < ip) {
457 if (start2 < ip + ml0) { /* empirical */
458 ip = start0;
459 ref = ref0;
460 ml = ml0;
461 }
462 }
463
464 /* Here, start0==ip */
465 if ((start2 - ip) < 3) { /* First Match too small : removed */
466 ml = ml2;
467 ip = start2;
468 ref =ref2;
469 goto _Search2;
470 }
471
472_Search3:
473 /* At this stage, we have :
474 * ml2 > ml1, and
475 * ip1+3 <= ip2 (usually < ip1+ml1) */
476 if ((start2 - ip) < OPTIMAL_ML) {
477 int correction;
478 int new_ml = ml;
479 if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
480 if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
481 correction = new_ml - (int)(start2 - ip);
482 if (correction > 0) {
483 start2 += correction;
484 ref2 += correction;
485 ml2 -= correction;
486 }
487 }
488 /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
489
490 if (start2 + ml2 < mflimit)
491 ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
492 else
493 ml3 = ml2;
494
495 if (ml3 == ml2) { /* No better match : 2 sequences to encode */
496 /* ip & ref are known; Now for ml */
497 if (start2 < ip+ml) ml = (int)(start2 - ip);
498 /* Now, encode 2 sequences */
499 optr = op;
500 if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
501 ip = start2;
502 optr = op;
503 if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) goto _dest_overflow;
504 continue;
505 }
506
507 if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
508 if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
509 if (start2 < ip+ml) {
510 int correction = (int)(ip+ml - start2);
511 start2 += correction;
512 ref2 += correction;
513 ml2 -= correction;
514 if (ml2 < MINMATCH) {
515 start2 = start3;
516 ref2 = ref3;
517 ml2 = ml3;
518 }
519 }
520
521 optr = op;
522 if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
523 ip = start3;
524 ref = ref3;
525 ml = ml3;
526
527 start0 = start2;
528 ref0 = ref2;
529 ml0 = ml2;
530 goto _Search2;
531 }
532
533 start2 = start3;
534 ref2 = ref3;
535 ml2 = ml3;
536 goto _Search3;
537 }
538
539 /*
540 * OK, now we have 3 ascending matches; let's write at least the first one
541 * ip & ref are known; Now for ml
542 */
543 if (start2 < ip+ml) {
544 if ((start2 - ip) < (int)ML_MASK) {
545 int correction;
546 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
547 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
548 correction = ml - (int)(start2 - ip);
549 if (correction > 0) {
550 start2 += correction;
551 ref2 += correction;
552 ml2 -= correction;
553 }
554 } else {
555 ml = (int)(start2 - ip);
556 }
557 }
558 optr = op;
559 if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
560
561 ip = start2;
562 ref = ref2;
563 ml = ml2;
564
565 start2 = start3;
566 ref2 = ref3;
567 ml2 = ml3;
568
569 goto _Search3;
570 }
571
572_last_literals:
573 /* Encode Last Literals */
574 { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
575 size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
576 size_t const totalSize = 1 + litLength + lastRunSize;
577 if (limit == limitedDestSize) oend += LASTLITERALS; /* restore correct value */
578 if (limit && (op + totalSize > oend)) {
579 if (limit == limitedOutput) return 0; /* Check output limit */
580 /* adapt lastRunSize to fill 'dest' */
581 lastRunSize = (size_t)(oend - op) - 1;
582 litLength = (lastRunSize + 255 - RUN_MASK) / 255;
583 lastRunSize -= litLength;
584 }
585 ip = anchor + lastRunSize;
586
587 if (lastRunSize >= RUN_MASK) {
588 size_t accumulator = lastRunSize - RUN_MASK;
589 *op++ = (RUN_MASK << ML_BITS);
590 for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
591 *op++ = (BYTE) accumulator;
592 } else {
593 *op++ = (BYTE)(lastRunSize << ML_BITS);
594 }
595 memcpy(op, anchor, lastRunSize);
596 op += lastRunSize;
597 }
598
599 /* End */
600 *srcSizePtr = (int) (((const char*)ip) - source);
601 return (int) (((char*)op)-dest);
602
603_dest_overflow:
604 if (limit == limitedDestSize) {
605 op = optr; /* restore correct out pointer */
606 goto _last_literals;
607 }
608 return 0;
609}
610
611
612static int LZ4HC_compress_generic (
613 LZ4HC_CCtx_internal* const ctx,
614 const char* const src,
615 char* const dst,
616 int* const srcSizePtr,
617 int const dstCapacity,
618 int cLevel,
619 limitedOutput_directive limit
620 )
621{
622 ctx->end += *srcSizePtr;
623 if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
624 if (cLevel > 9) {
625 if (limit == limitedDestSize) cLevel = 10;
626 switch (cLevel) {
627 case 10:
628 return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1<<12, limit);
629 case 11:
630 return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 512, 128, 0);
631 default:
632 /* fall-through */
633 case 12:
634 return LZ4HC_compress_optimal(ctx, src, dst, *srcSizePtr, dstCapacity, limit, 1<<13, LZ4_OPT_NUM, 1);
635 }
636 }
637 return LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, 1 << (cLevel-1), limit); /* levels 1-9 */
638}
639
640
641int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
642
643int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
644{
645 LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
646 if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
647 LZ4HC_init (ctx, (const BYTE*)src);
648 if (dstCapacity < LZ4_compressBound(srcSize))
649 return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
650 else
651 return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, noLimit);
652}
653
654int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
655{
656#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
657 LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t));
658#else
659 LZ4_streamHC_t state;
660 LZ4_streamHC_t* const statePtr = &state;
661#endif
662 int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
663#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
664 free(statePtr);
665#endif
666 return cSize;
667}
668
669/* LZ4_compress_HC_destSize() :
670 * only compatible with Hash Chain match finder */
671int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
672{
673 LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
674 LZ4HC_init(ctx, (const BYTE*) source);
675 return LZ4HC_compress_generic(ctx, source, dest, sourceSizePtr, targetDestSize, cLevel, limitedDestSize);
676}
677
678
679
680/**************************************
681* Streaming Functions
682**************************************/
683/* allocation */
684LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
685int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
686 if (!LZ4_streamHCPtr) return 0; /* support free on NULL */
687 free(LZ4_streamHCPtr);
688 return 0;
689}
690
691
692/* initialization */
693void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
694{
695 LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET); /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
696 LZ4_streamHCPtr->internal_donotuse.base = NULL;
697 LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
698}
699
700void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
701{
702 if (compressionLevel < 1) compressionLevel = 1;
703 if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
704 LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
705}
706
707int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
708{
709 LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
710 if (dictSize > 64 KB) {
711 dictionary += dictSize - 64 KB;
712 dictSize = 64 KB;
713 }
714 LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
715 ctxPtr->end = (const BYTE*)dictionary + dictSize;
716 if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
717 return dictSize;
718}
719
720
721/* compression */
722
723static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
724{
725 if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
726
727 /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
728 ctxPtr->lowLimit = ctxPtr->dictLimit;
729 ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
730 ctxPtr->dictBase = ctxPtr->base;
731 ctxPtr->base = newBlock - ctxPtr->dictLimit;
732 ctxPtr->end = newBlock;
733 ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
734}
735
736static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
737 const char* src, char* dst,
738 int* srcSizePtr, int dstCapacity,
739 limitedOutput_directive limit)
740{
741 LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
742 /* auto-init if forgotten */
743 if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) src);
744
745 /* Check overflow */
746 if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
747 size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
748 if (dictSize > 64 KB) dictSize = 64 KB;
749 LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
750 }
751
752 /* Check if blocks follow each other */
753 if ((const BYTE*)src != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
754
755 /* Check overlapping input/dictionary space */
756 { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
757 const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
758 const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
759 if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
760 if (sourceEnd > dictEnd) sourceEnd = dictEnd;
761 ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
762 if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
763 }
764 }
765
766 return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
767}
768
769int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
770{
771 if (dstCapacity < LZ4_compressBound(srcSize))
772 return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
773 else
774 return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, noLimit);
775}
776
777int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
778{
779 return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, limitedDestSize);
780}
781
782
783
784/* dictionary saving */
785
786int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
787{
788 LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
789 int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
790 if (dictSize > 64 KB) dictSize = 64 KB;
791 if (dictSize < 4) dictSize = 0;
792 if (dictSize > prefixSize) dictSize = prefixSize;
793 memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
794 { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
795 streamPtr->end = (const BYTE*)safeBuffer + dictSize;
796 streamPtr->base = streamPtr->end - endIndex;
797 streamPtr->dictLimit = endIndex - dictSize;
798 streamPtr->lowLimit = endIndex - dictSize;
799 if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
800 }
801 return dictSize;
802}
803
804
805/***********************************
806* Deprecated Functions
807***********************************/
808/* These functions currently generate deprecation warnings */
809/* Deprecated compression functions */
810int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
811int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
812int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
813int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
814int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
815int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
816int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
817int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
818int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
819int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
820
821
822/* Deprecated streaming functions */
823int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
824
825int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
826{
827 LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
828 if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */
829 LZ4HC_init(ctx, (const BYTE*)inputBuffer);
830 ctx->inputBuffer = (BYTE*)inputBuffer;
831 return 0;
832}
833
834void* LZ4_createHC (char* inputBuffer)
835{
836 LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
837 if (hc4 == NULL) return NULL; /* not enough memory */
838 LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
839 hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
840 return hc4;
841}
842
843int LZ4_freeHC (void* LZ4HC_Data) {
844 if (!LZ4HC_Data) return 0; /* support free on NULL */
845 FREEMEM(LZ4HC_Data);
846 return 0;
847}
848
849int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
850{
851 return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, noLimit);
852}
853
854int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
855{
856 return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
857}
858
859char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
860{
861 LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
862 int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
863 return (char*)(hc4->inputBuffer + dictSize);
864}
865