lz4.c source code [ClickHouse/contrib/lz4/lib/lz4.c]

1	/*
2	LZ4 - Fast LZ compression algorithm
3	Copyright (C) 2011-present, Yann Collet.
4
5	BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7	Redistribution and use in source and binary forms, with or without
8	modification, are permitted provided that the following conditions are
9	met:
10
11	* Redistributions of source code must retain the above copyright
12	notice, this list of conditions and the following disclaimer.
13	* Redistributions in binary form must reproduce the above
14	copyright notice, this list of conditions and the following disclaimer
15	in the documentation and/or other materials provided with the
16	distribution.
17
18	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	You can contact the author at :
31	- LZ4 homepage : http://www.lz4.org
32	- LZ4 source repository : https://github.com/lz4/lz4
33	*/
34
35	/-***********************************
36	* Tuning parameters
37	**************************************/
38	/*
39	* LZ4_HEAPMODE :
40	* Select how default compression functions will allocate memory for their hash table,
41	* in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
42	*/
43	#ifndef LZ4_HEAPMODE
44	# define LZ4_HEAPMODE 0
45	#endif
46
47	/*
48	* ACCELERATION_DEFAULT :
49	* Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
50	*/
51	#define ACCELERATION_DEFAULT 1
52
53
54	/-***********************************
55	* CPU Feature Detection
56	**************************************/
57	/ LZ4_FORCE_MEMORY_ACCESS*
58	* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
59	* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
60	* The below switch allow to select different access method for improved performance.
61	* Method 0 (default) : use `memcpy()`. Safe and portable.
62	* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
63	* This method is safe if your compiler supports it, and generally as fast or faster than `memcpy`.
64	* Method 2 : direct access. This method is portable but violate C standard.
65	* It can generate buggy code on targets which assembly generation depends on alignment.
66	* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
67	* See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
68	* Prefer these methods in priority order (0 > 1 > 2)
69	*/
70	#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
71	# if defined(__GNUC__) && \
72	( defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| defined(__ARM_ARCH_6K__) \
73	\|\| defined(__ARM_ARCH_6Z__) \|\| defined(__ARM_ARCH_6ZK__) \|\| defined(__ARM_ARCH_6T2__) )
74	# define LZ4_FORCE_MEMORY_ACCESS 2
75	# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) \|\| defined(__GNUC__)
76	# define LZ4_FORCE_MEMORY_ACCESS 1
77	# endif
78	#endif
79
80	/*
81	* LZ4_FORCE_SW_BITCOUNT
82	* Define this parameter if your target system or compiler does not support hardware bit count
83	*/
84	#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */
85	# define LZ4_FORCE_SW_BITCOUNT
86	#endif
87
88
89
90	/-***********************************
91	* Dependency
92	**************************************/
93	/*
94	* LZ4_SRC_INCLUDED:
95	* Amalgamation flag, whether lz4.c is included
96	*/
97	#ifndef LZ4_SRC_INCLUDED
98	# define LZ4_SRC_INCLUDED 1
99	#endif
100
101	#ifndef LZ4_STATIC_LINKING_ONLY
102	#define LZ4_STATIC_LINKING_ONLY
103	#endif
104
105	#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
106	#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
107	#endif
108
109	#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */
110	#include "lz4.h"
111	/ see also "memory routines" below /
112
113
114	/-***********************************
115	* Compiler Options
116	**************************************/
117	#ifdef _MSC_VER /* Visual Studio */
118	# include <intrin.h>
119	# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
120	# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */
121	#endif /* _MSC_VER */
122
123	#ifndef LZ4_FORCE_INLINE
124	# ifdef _MSC_VER /* Visual Studio */
125	# define LZ4_FORCE_INLINE static __forceinline
126	# else
127	# if defined (__cplusplus) \|\| defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
128	# ifdef __GNUC__
129	# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
130	# else
131	# define LZ4_FORCE_INLINE static inline
132	# endif
133	# else
134	# define LZ4_FORCE_INLINE static
135	# endif /* __STDC_VERSION__ */
136	# endif /* _MSC_VER */
137	#endif /* LZ4_FORCE_INLINE */
138
139	/ LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE*
140	* gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
141	* together with a simple 8-byte copy loop as a fall-back path.
142	* However, this optimization hurts the decompression speed by >30%,
143	* because the execution does not go to the optimized loop
144	* for typical compressible data, and all of the preamble checks
145	* before going to the fall-back path become useless overhead.
146	* This optimization happens only with the -O3 flag, and -O2 generates
147	* a simple 8-byte copy loop.
148	* With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
149	* functions are annotated with __attribute__((optimize("O2"))),
150	* and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
151	* of LZ4_wildCopy8 does not affect the compression speed.
152	*/
153	#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
154	# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
155	# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
156	#else
157	# define LZ4_FORCE_O2_GCC_PPC64LE
158	# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
159	#endif
160
161	#if (defined(__GNUC__) && (__GNUC__ >= 3)) \|\| (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \|\| defined(__clang__)
162	# define expect(expr,value) (__builtin_expect ((expr),(value)) )
163	#else
164	# define expect(expr,value) (expr)
165	#endif
166
167	#ifndef likely
168	#define likely(expr) expect((expr) != 0, 1)
169	#endif
170	#ifndef unlikely
171	#define unlikely(expr) expect((expr) != 0, 0)
172	#endif
173
174
175	/-***********************************
176	* Memory routines
177	**************************************/
178	#include <stdlib.h> /* malloc, calloc, free */
179	#define ALLOC(s) malloc(s)
180	#define ALLOC_AND_ZERO(s) calloc(1,s)
181	#define FREEMEM(p) free(p)
182	#include <string.h> /* memset, memcpy */
183	#define MEM_INIT(p,v,s) memset((p),(v),(s))
184
185
186	/-***********************************
187	* Common Constants
188	**************************************/
189	#define MINMATCH 4
190
191	#define WILDCOPYLENGTH 8
192	#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
193	#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
194	#define MATCH_SAFEGUARD_DISTANCE ((2WILDCOPYLENGTH) - MINMATCH) / ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
195	#define FASTLOOP_SAFE_DISTANCE 64
196	static const int LZ4_minLength = (MFLIMIT+`1`);
197
198	#define KB *(1 <<10)
199	#define MB *(1 <<20)
200	#define GB *(1U<<30)
201
202	#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
203	#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
204	# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
205	#endif
206
207	#define ML_BITS 4
208	#define ML_MASK ((1U<<ML_BITS)-1)
209	#define RUN_BITS (8-ML_BITS)
210	#define RUN_MASK ((1U<<RUN_BITS)-1)
211
212
213	/-***********************************
214	* Error detection
215	**************************************/
216	#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
217	# include <assert.h>
218	#else
219	# ifndef assert
220	# define assert(condition) ((void)0)
221	# endif
222	#endif
223
224	#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
225
226	#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
227	# include <stdio.h>
228	static int g_debuglog_enable = `1`;
229	# define DEBUGLOG(l, ...) { \
230	if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
231	fprintf(stderr, __FILE__ ": "); \
232	fprintf(stderr, __VA_ARGS__); \
233	fprintf(stderr, " \n"); \
234	} }
235	#else
236	# define DEBUGLOG(l, ...) {} /* disabled */
237	#endif
238
239
240	/-***********************************
241	* Types
242	**************************************/
243	#if defined(__cplusplus) \|\| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
244	# include <stdint.h>
245	typedef uint8_t BYTE;
246	typedef uint16_t U16;
247	typedef uint32_t U32;
248	typedef int32_t S32;
249	typedef uint64_t U64;
250	typedef uintptr_t uptrval;
251	#else
252	typedef unsigned char BYTE;
253	typedef unsigned short U16;
254	typedef unsigned int U32;
255	typedef signed int S32;
256	typedef unsigned long long U64;
257	typedef size_t uptrval; / generally true, except OpenVMS-64 /
258	#endif
259
260	#if defined(__x86_64__)
261	typedef U64 reg_t; / 64-bits in x32 mode /
262	#else
263	typedef size_t reg_t; / 32-bits in x32 mode /
264	#endif
265
266	typedef enum {
267	notLimited = `0`,
268	limitedOutput = `1`,
269	fillOutput = `2`
270	} limitedOutput_directive;
271
272
273	/-***********************************
274	* Reading and writing into memory
275	**************************************/
276	static unsigned LZ4_isLittleEndian(void)
277	{
278	const union { U32 u; BYTE c[`4`]; } one = { `1` }; / don't use static : performance detrimental /
279	return one.c[`0`];
280	}
281
282
283	#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
284	/ lie to the compiler about data alignment; use with caution /
285
286	static U16 LZ4_read16(const void* memPtr) { return (const* U16*) memPtr; }
287	static U32 LZ4_read32(const void* memPtr) { return (const* U32*) memPtr; }
288	static reg_t LZ4_read_ARCH(const void* memPtr) { return (const* reg_t*) memPtr; }
289
290	static void LZ4_write16(void* memPtr, U16 value) { (U16)memPtr = value; }
291	static void LZ4_write32(void* memPtr, U32 value) { (U32)memPtr = value; }
292
293	#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
294
295	/ __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers /
296	/ currently only defined for gcc and icc /
297	typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
298
299	static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
300	static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
301	static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
302
303	static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
304	static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
305
306	#else /* safe and portable access using memcpy() */
307
308	static U16 LZ4_read16(const void* memPtr)
309	{
310	U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
311	}
312
313	static U32 LZ4_read32(const void* memPtr)
314	{
315	U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
316	}
317
318	static reg_t LZ4_read_ARCH(const void* memPtr)
319	{
320	reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
321	}
322
323	static void LZ4_write16(void* memPtr, U16 value)
324	{
325	memcpy(memPtr, &value, sizeof(value));
326	}
327
328	static void LZ4_write32(void* memPtr, U32 value)
329	{
330	memcpy(memPtr, &value, sizeof(value));
331	}
332
333	#endif /* LZ4_FORCE_MEMORY_ACCESS */
334
335
336	static U16 LZ4_readLE16(const void* memPtr)
337	{
338	if (LZ4_isLittleEndian()) {
339	return LZ4_read16(memPtr);
340	} else {
341	const BYTE* p = (const BYTE*)memPtr;
342	return (U16)((U16)p[`0`] + (p[`1`]<<`8`));
343	}
344	}
345
346	static void LZ4_writeLE16(void* memPtr, U16 value)
347	{
348	if (LZ4_isLittleEndian()) {
349	LZ4_write16(memPtr, value);
350	} else {
351	BYTE* p = (BYTE*)memPtr;
352	p[`0`] = (BYTE) value;
353	p[`1`] = (BYTE)(value>>`8`);
354	}
355	}
356
357	/ customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd /
358	LZ4_FORCE_O2_INLINE_GCC_PPC64LE
359	void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
360	{
361	BYTE* d = (BYTE*)dstPtr;
362	const BYTE* s = (const BYTE*)srcPtr;
363	BYTE* const e = (BYTE*)dstEnd;
364
365	do { memcpy(d,s,`8`); d+=`8`; s+=`8`; } while (d<e);
366	}
367
368	static const unsigned inc32table[`8`] = {`0`, `1`, `2`, `1`, `0`, `4`, `4`, `4`};
369	static const int dec64table[`8`] = {`0`, `0`, `0`, -`1`, -`4`, `1`, `2`, `3`};
370
371
372	#ifndef LZ4_FAST_DEC_LOOP
373	# if defined(__i386__) \|\| defined(__x86_64__)
374	# define LZ4_FAST_DEC_LOOP 1
375	# elif defined(__aarch64__) && !defined(__clang__)
376	/ On aarch64, we disable this optimization for clang because on certain*
377	* mobile chipsets and clang, it reduces performance. For more information
378	* refer to https://github.com/lz4/lz4/pull/707. */
379	# define LZ4_FAST_DEC_LOOP 1
380	# else
381	# define LZ4_FAST_DEC_LOOP 0
382	# endif
383	#endif
384
385	#if LZ4_FAST_DEC_LOOP
386
387	LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
388	LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
389	{
390	if (offset < `8`) {
391	dstPtr[`0`] = srcPtr[`0`];
392	dstPtr[`1`] = srcPtr[`1`];
393	dstPtr[`2`] = srcPtr[`2`];
394	dstPtr[`3`] = srcPtr[`3`];
395	srcPtr += inc32table[offset];
396	memcpy(dstPtr+`4`, srcPtr, `4`);
397	srcPtr -= dec64table[offset];
398	dstPtr += `8`;
399	} else {
400	memcpy(dstPtr, srcPtr, `8`);
401	dstPtr += `8`;
402	srcPtr += `8`;
403	}
404
405	LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
406	}
407
408	/ customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd*
409	* this version copies two times 16 bytes (instead of one time 32 bytes)
410	* because it must be compatible with offsets >= 16. */
411	LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
412	LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
413	{
414	BYTE* d = (BYTE*)dstPtr;
415	const BYTE* s = (const BYTE*)srcPtr;
416	BYTE* const e = (BYTE*)dstEnd;
417
418	do { memcpy(d,s,`16`); memcpy(d+`16`,s+`16`,`16`); d+=`32`; s+=`32`; } while (d<e);
419	}
420
421	/ LZ4_memcpy_using_offset() presumes :*
422	* - dstEnd >= dstPtr + MINMATCH
423	* - there is at least 8 bytes available to write after dstEnd */
424	LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
425	LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
426	{
427	BYTE v[`8`];
428
429	assert(dstEnd >= dstPtr + MINMATCH);
430	LZ4_write32(dstPtr, `0`); / silence an msan warning when offset==0 /
431
432	switch(offset) {
433	case `1`:
434	memset(v, *srcPtr, `8`);
435	break;
436	case `2`:
437	memcpy(v, srcPtr, `2`);
438	memcpy(&v[`2`], srcPtr, `2`);
439	memcpy(&v[`4`], &v[`0`], `4`);
440	break;
441	case `4`:
442	memcpy(v, srcPtr, `4`);
443	memcpy(&v[`4`], srcPtr, `4`);
444	break;
445	default:
446	LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
447	return;
448	}
449
450	memcpy(dstPtr, v, `8`);
451	dstPtr += `8`;
452	while (dstPtr < dstEnd) {
453	memcpy(dstPtr, v, `8`);
454	dstPtr += `8`;
455	}
456	}
457	#endif
458
459
460	/-***********************************
461	* Common functions
462	**************************************/
463	static unsigned LZ4_NbCommonBytes (reg_t val)
464	{
465	if (LZ4_isLittleEndian()) {
466	if (sizeof(val)==`8`) {
467	# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
468	unsigned long r = `0`;
469	_BitScanForward64( &r, (U64)val );
470	return (int)(r>>`3`);
471	# elif (defined(__clang__) \|\| (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
472	return (unsigned)__builtin_ctzll((U64)val) >> `3`;
473	# else
474	static const int DeBruijnBytePos[`64`] = { `0`, `0`, `0`, `0`, `0`, `1`, `1`, `2`,
475	`0`, `3`, `1`, `3`, `1`, `4`, `2`, `7`,
476	`0`, `2`, `3`, `6`, `1`, `5`, `3`, `5`,
477	`1`, `3`, `4`, `4`, `2`, `5`, `6`, `7`,
478	`7`, `0`, `1`, `2`, `3`, `3`, `4`, `6`,
479	`2`, `6`, `5`, `5`, `3`, `4`, `5`, `6`,
480	`7`, `1`, `2`, `4`, `6`, `4`, `4`, `5`,
481	`7`, `2`, `6`, `5`, `7`, `6`, `7`, `7` };
482	return DeBruijnBytePos[((U64)((val & -(long long)val) * `0x0218A392CDABBD3FULL`)) >> `58`];
483	# endif
484	} else / 32 bits / {
485	# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
486	unsigned long r;
487	_BitScanForward( &r, (U32)val );
488	return (int)(r>>`3`);
489	# elif (defined(__clang__) \|\| (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
490	return (unsigned)__builtin_ctz((U32)val) >> `3`;
491	# else
492	static const int DeBruijnBytePos[`32`] = { `0`, `0`, `3`, `0`, `3`, `1`, `3`, `0`,
493	`3`, `2`, `2`, `1`, `3`, `2`, `0`, `1`,
494	`3`, `3`, `1`, `2`, `2`, `2`, `2`, `0`,
495	`3`, `1`, `2`, `0`, `1`, `0`, `1`, `1` };
496	return DeBruijnBytePos[((U32)((val & -(S32)val) * `0x077CB531U`)) >> `27`];
497	# endif
498	}
499	} else / Big Endian CPU / {
500	if (sizeof(val)==`8`) { / 64-bits /
501	# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
502	unsigned long r = `0`;
503	_BitScanReverse64( &r, val );
504	return (unsigned)(r>>`3`);
505	# elif (defined(__clang__) \|\| (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
506	return (unsigned)__builtin_clzll((U64)val) >> `3`;
507	# else
508	static const U32 by32 = sizeof(val)`4`; /* 32 on 64 bits (goal), 16 on 32 bits.*
509	Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
510	Note that this code path is never triggered in 32-bits mode. /*
511	unsigned r;
512	if (!(val>>by32)) { r=`4`; } else { r=`0`; val>>=by32; }
513	if (!(val>>`16`)) { r+=`2`; val>>=`8`; } else { val>>=`24`; }
514	r += (!val);
515	return r;
516	# endif
517	} else / 32 bits / {
518	# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
519	unsigned long r = `0`;
520	_BitScanReverse( &r, (unsigned long)val );
521	return (unsigned)(r>>`3`);
522	# elif (defined(__clang__) \|\| (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
523	return (unsigned)__builtin_clz((U32)val) >> `3`;
524	# else
525	unsigned r;
526	if (!(val>>`16`)) { r=`2`; val>>=`8`; } else { r=`0`; val>>=`24`; }
527	r += (!val);
528	return r;
529	# endif
530	}
531	}
532	}
533
534	#define STEPSIZE sizeof(reg_t)
535	LZ4_FORCE_INLINE
536	unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
537	{
538	const BYTE* const pStart = pIn;
539
540	if (likely(pIn < pInLimit-(STEPSIZE-`1`))) {
541	reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
542	if (!diff) {
543	pIn+=STEPSIZE; pMatch+=STEPSIZE;
544	} else {
545	return LZ4_NbCommonBytes(diff);
546	} }
547
548	while (likely(pIn < pInLimit-(STEPSIZE-`1`))) {
549	reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
550	if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
551	pIn += LZ4_NbCommonBytes(diff);
552	return (unsigned)(pIn - pStart);
553	}
554
555	if ((STEPSIZE==`8`) && (pIn<(pInLimit-`3`)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=`4`; pMatch+=`4`; }
556	if ((pIn<(pInLimit-`1`)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=`2`; pMatch+=`2`; }
557	if ((pIn<pInLimit) && (pMatch == pIn)) pIn++;
558	return (unsigned)(pIn - pStart);
559	}
560
561
562	#ifndef LZ4_COMMONDEFS_ONLY
563	/-***********************************
564	* Local Constants
565	**************************************/
566	static const int LZ4_64Klimit = ((`64` KB) + (MFLIMIT-`1`));
567	static const U32 LZ4_skipTrigger = `6`; / Increase this value ==> compression run slower on incompressible data /
568
569
570	/-***********************************
571	* Local Structures and types
572	**************************************/
573	typedef enum { clearedTable = `0`, byPtr, byU32, byU16 } tableType_t;
574
575	/**
576	* This enum distinguishes several different modes of accessing previous
577	* content in the stream.
578	*
579	* - noDict : There is no preceding content.
580	* - withPrefix64k : Table entries up to ctx->dictSize before the current blob
581	* blob being compressed are valid and refer to the preceding
582	* content (of length ctx->dictSize), which is available
583	* contiguously preceding in memory the content currently
584	* being compressed.
585	* - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
586	* else in memory, starting at ctx->dictionary with length
587	* ctx->dictSize.
588	* - usingDictCtx : Like usingExtDict, but everything concerning the preceding
589	* content is in a separate context, pointed to by
590	* ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
591	* entries in the current context that refer to positions
592	* preceding the beginning of the current compression are
593	* ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
594	* ->dictSize describe the location and size of the preceding
595	* content, and matches are found by looking in the ctx
596	* ->dictCtx->hashTable.
597	*/
598	typedef enum { noDict = `0`, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
599	typedef enum { noDictIssue = `0`, dictSmall } dictIssue_directive;
600
601
602	/-***********************************
603	* Local Utils
604	**************************************/
605	int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
606	const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
607	int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
608	int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
609
610
611	/-***********************************
612	* Internal Definitions used in Tests
613	**************************************/
614	#if defined (__cplusplus)
615	extern "C" {
616	#endif
617
618	int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
619
620	int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
621	int compressedSize, int maxOutputSize,
622	const void* dictStart, size_t dictSize);
623
624	#if defined (__cplusplus)
625	}
626	#endif
627
628	/-*****************************
629	* Compression functions
630	********************************/
631	static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
632	{
633	if (tableType == byU16)
634	return ((sequence * `2654435761U`) >> ((MINMATCH*`8`)-(LZ4_HASHLOG+`1`)));
635	else
636	return ((sequence * `2654435761U`) >> ((MINMATCH*`8`)-LZ4_HASHLOG));
637	}
638
639	static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
640	{
641	const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+`1` : LZ4_HASHLOG;
642	if (LZ4_isLittleEndian()) {
643	const U64 prime5bytes = `889523592379ULL`;
644	return (U32)(((sequence << `24`) * prime5bytes) >> (`64` - hashLog));
645	} else {
646	const U64 prime8bytes = `11400714785074694791ULL`;
647	return (U32)(((sequence >> `24`) * prime8bytes) >> (`64` - hashLog));
648	}
649	}
650
651	LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
652	{
653	if ((sizeof(reg_t)==`8`) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
654	return LZ4_hash4(LZ4_read32(p), tableType);
655	}
656
657	static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
658	{
659	switch (tableType)
660	{
661	default: / fallthrough /
662	case clearedTable: { / illegal! / assert(`0`); return; }
663	case byPtr: { const BYTE** hashTable = (const BYTE)tableBase; hashTable[h] = NULL; return**; }
664	case byU32: { U32* hashTable = (U32) tableBase; hashTable[h] = `0`; return*; }
665	case byU16: { U16* hashTable = (U16) tableBase; hashTable[h] = `0`; return*; }
666	}
667	}
668
669	static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
670	{
671	switch (tableType)
672	{
673	default: / fallthrough /
674	case clearedTable: / fallthrough /
675	case byPtr: { / illegal! / assert(`0`); return; }
676	case byU32: { U32* hashTable = (U32) tableBase; hashTable[h] = idx; return*; }
677	case byU16: { U16* hashTable = (U16) tableBase; assert(idx < `65536`); hashTable[h] = (U16)idx; return*; }
678	}
679	}
680
681	static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
682	void* tableBase, tableType_t const tableType,
683	const BYTE* srcBase)
684	{
685	switch (tableType)
686	{
687	case clearedTable: { / illegal! / assert(`0`); return; }
688	case byPtr: { const BYTE** hashTable = (const BYTE)tableBase; hashTable[h] = p; return**; }
689	case byU32: { U32* hashTable = (U32) tableBase; hashTable[h] = (U32)(p-srcBase); return*; }
690	case byU16: { U16* hashTable = (U16) tableBase; hashTable[h] = (U16)(p-srcBase); return*; }
691	}
692	}
693
694	LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
695	{
696	U32 const h = LZ4_hashPosition(p, tableType);
697	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
698	}
699
700	/ LZ4_getIndexOnHash() :*
701	* Index of match position registered in hash table.
702	* hash position must be calculated by using base+index, or dictBase+index.
703	* Assumption 1 : only valid if tableType == byU32 or byU16.
704	* Assumption 2 : h is presumed valid (within limits of hash table)
705	*/
706	static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
707	{
708	LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > `2`);
709	if (tableType == byU32) {
710	const U32* const hashTable = (const U32*) tableBase;
711	assert(h < (`1U` << (LZ4_MEMORY_USAGE-`2`)));
712	return hashTable[h];
713	}
714	if (tableType == byU16) {
715	const U16* const hashTable = (const U16*) tableBase;
716	assert(h < (`1U` << (LZ4_MEMORY_USAGE-`1`)));
717	return hashTable[h];
718	}
719	assert(`0`); return `0`; / forbidden case /
720	}
721
722	static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
723	{
724	if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const) tableBase; return* hashTable[h]; }
725	if (tableType == byU32) { const U32* const hashTable = (const U32) tableBase; return* hashTable[h] + srcBase; }
726	{ const U16* const hashTable = (const U16) tableBase; return* hashTable[h] + srcBase; } / default, to ensure a return /
727	}
728
729	LZ4_FORCE_INLINE const BYTE*
730	LZ4_getPosition(const BYTE* p,
731	const void* tableBase, tableType_t tableType,
732	const BYTE* srcBase)
733	{
734	U32 const h = LZ4_hashPosition(p, tableType);
735	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
736	}
737
738	LZ4_FORCE_INLINE void
739	LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
740	const int inputSize,
741	const tableType_t tableType) {
742	/ If compression failed during the previous step, then the context*
743	* is marked as dirty, therefore, it has to be fully reset.
744	*/
745	if (cctx->dirty) {
746	DEBUGLOG(`5`, "LZ4_prepareTable: Full reset for %p", cctx);
747	MEM_INIT(cctx, `0`, sizeof(LZ4_stream_t_internal));
748	return;
749	}
750
751	/ If the table hasn't been used, it's guaranteed to be zeroed out, and is*
752	* therefore safe to use no matter what mode we're in. Otherwise, we figure
753	* out if it's safe to leave as is or whether it needs to be reset.
754	*/
755	if (cctx->tableType != clearedTable) {
756	assert(inputSize >= `0`);
757	if (cctx->tableType != tableType
758	\|\| ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= `0xFFFFU`)
759	\|\| ((tableType == byU32) && cctx->currentOffset > `1` GB)
760	\|\| tableType == byPtr
761	\|\| inputSize >= `4` KB)
762	{
763	DEBUGLOG(`4`, "LZ4_prepareTable: Resetting table in %p", cctx);
764	MEM_INIT(cctx->hashTable, `0`, LZ4_HASHTABLESIZE);
765	cctx->currentOffset = `0`;
766	cctx->tableType = clearedTable;
767	} else {
768	DEBUGLOG(`4`, "LZ4_prepareTable: Re-use hash table (no reset)");
769	}
770	}
771
772	/ Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster*
773	* than compressing without a gap. However, compressing with
774	* currentOffset == 0 is faster still, so we preserve that case.
775	*/
776	if (cctx->currentOffset != `0` && tableType == byU32) {
777	DEBUGLOG(`5`, "LZ4_prepareTable: adding 64KB to currentOffset");
778	cctx->currentOffset += `64` KB;
779	}
780
781	/ Finally, clear history /
782	cctx->dictCtx = NULL;
783	cctx->dictionary = NULL;
784	cctx->dictSize = `0`;
785	}
786
787	/* LZ4_compress_generic() :*
788	inlined, to ensure branches are decided at compilation time /*
789	LZ4_FORCE_INLINE int LZ4_compress_generic(
790	LZ4_stream_t_internal* const cctx,
791	const char* const source,
792	char* const dest,
793	const int inputSize,
794	int inputConsumed, /* only written when outputDirective == fillOutput /
795	const int maxOutputSize,
796	const limitedOutput_directive outputDirective,
797	const tableType_t tableType,
798	const dict_directive dictDirective,
799	const dictIssue_directive dictIssue,
800	const int acceleration)
801	{
802	int result;
803	const BYTE* ip = (const BYTE*) source;
804
805	U32 const startIndex = cctx->currentOffset;
806	const BYTE* base = (const BYTE*) source - startIndex;
807	const BYTE* lowLimit;
808
809	const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
810	const BYTE* const dictionary =
811	dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
812	const U32 dictSize =
813	dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
814	const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : `0`; / make indexes in dictCtx comparable with index in current context /
815
816	int const maybe_extMem = (dictDirective == usingExtDict) \|\| (dictDirective == usingDictCtx);
817	U32 const prefixIdxLimit = startIndex - dictSize; / used when dictDirective == dictSmall /
818	const BYTE* const dictEnd = dictionary + dictSize;
819	const BYTE* anchor = (const BYTE*) source;
820	const BYTE* const iend = ip + inputSize;
821	const BYTE* const mflimitPlusOne = iend - MFLIMIT + `1`;
822	const BYTE* const matchlimit = iend - LASTLITERALS;
823
824	/ the dictCtx currentOffset is indexed on the start of the dictionary,*
825	* while a dictionary in the current context precedes the currentOffset */
826	const BYTE* dictBase = (dictDirective == usingDictCtx) ?
827	dictionary + dictSize - dictCtx->currentOffset :
828	dictionary + dictSize - startIndex;
829
830	BYTE* op = (BYTE*) dest;
831	BYTE* const olimit = op + maxOutputSize;
832
833	U32 offset = `0`;
834	U32 forwardH;
835
836	DEBUGLOG(`5`, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
837	/ If init conditions are not met, we don't have to mark stream*
838	* as having dirty context, since no action was taken yet */
839	if (outputDirective == fillOutput && maxOutputSize < `1`) { return `0`; } / Impossible to store anything /
840	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return `0`; } / Unsupported inputSize, too large (or negative) /
841	if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return `0`; } / Size too large (not within 64K limit) /
842	if (tableType==byPtr) assert(dictDirective==noDict); / only supported use case with byPtr /
843	assert(acceleration >= `1`);
844
845	lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : `0`);
846
847	/ Update context state /
848	if (dictDirective == usingDictCtx) {
849	/ Subsequent linked blocks can't use the dictionary. /
850	/ Instead, they use the block we just compressed. /
851	cctx->dictCtx = NULL;
852	cctx->dictSize = (U32)inputSize;
853	} else {
854	cctx->dictSize += (U32)inputSize;
855	}
856	cctx->currentOffset += (U32)inputSize;
857	cctx->tableType = (U16)tableType;
858
859	if (inputSize<LZ4_minLength) goto _last_literals; / Input too small, no compression (all literals) /
860
861	/ First Byte /
862	LZ4_putPosition(ip, cctx->hashTable, tableType, base);
863	ip++; forwardH = LZ4_hashPosition(ip, tableType);
864
865	/ Main Loop /
866	for ( ; ; ) {
867	const BYTE* match;
868	BYTE* token;
869	const BYTE* filledIp;
870
871	/ Find a match /
872	if (tableType == byPtr) {
873	const BYTE* forwardIp = ip;
874	int step = `1`;
875	int searchMatchNb = acceleration << LZ4_skipTrigger;
876	do {
877	U32 const h = forwardH;
878	ip = forwardIp;
879	forwardIp += step;
880	step = (searchMatchNb++ >> LZ4_skipTrigger);
881
882	if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
883	assert(ip < mflimitPlusOne);
884
885	match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
886	forwardH = LZ4_hashPosition(forwardIp, tableType);
887	LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
888
889	} while ( (match+LZ4_DISTANCE_MAX < ip)
890	\|\| (LZ4_read32(match) != LZ4_read32(ip)) );
891
892	} else { / byU32, byU16 /
893
894	const BYTE* forwardIp = ip;
895	int step = `1`;
896	int searchMatchNb = acceleration << LZ4_skipTrigger;
897	do {
898	U32 const h = forwardH;
899	U32 const current = (U32)(forwardIp - base);
900	U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
901	assert(matchIndex <= current);
902	assert(forwardIp - base < (ptrdiff_t)(`2` GB - `1`));
903	ip = forwardIp;
904	forwardIp += step;
905	step = (searchMatchNb++ >> LZ4_skipTrigger);
906
907	if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
908	assert(ip < mflimitPlusOne);
909
910	if (dictDirective == usingDictCtx) {
911	if (matchIndex < startIndex) {
912	/ there was no match, try the dictionary /
913	assert(tableType == byU32);
914	matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
915	match = dictBase + matchIndex;
916	matchIndex += dictDelta; / make dictCtx index comparable with current context /
917	lowLimit = dictionary;
918	} else {
919	match = base + matchIndex;
920	lowLimit = (const BYTE*)source;
921	}
922	} else if (dictDirective==usingExtDict) {
923	if (matchIndex < startIndex) {
924	DEBUGLOG(`7`, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex);
925	assert(startIndex - matchIndex >= MINMATCH);
926	match = dictBase + matchIndex;
927	lowLimit = dictionary;
928	} else {
929	match = base + matchIndex;
930	lowLimit = (const BYTE*)source;
931	}
932	} else { / single continuous memory segment /
933	match = base + matchIndex;
934	}
935	forwardH = LZ4_hashPosition(forwardIp, tableType);
936	LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
937
938	DEBUGLOG(`7`, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex);
939	if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } / match outside of valid area /
940	assert(matchIndex < current);
941	if ( ((tableType != byU16) \|\| (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
942	&& (matchIndex+LZ4_DISTANCE_MAX < current)) {
943	continue;
944	} / too far /
945	assert((current - matchIndex) <= LZ4_DISTANCE_MAX); / match now expected within distance /
946
947	if (LZ4_read32(match) == LZ4_read32(ip)) {
948	if (maybe_extMem) offset = current - matchIndex;
949	break; / match found /
950	}
951
952	} while(`1`);
953	}
954
955	/ Catch up /
956	filledIp = ip;
957	while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-`1`]==match[-`1`]))) { ip--; match--; }
958
959	/ Encode Literals /
960	{ unsigned const litLength = (unsigned)(ip - anchor);
961	token = op++;
962	if ((outputDirective == limitedOutput) && / Check output buffer overflow /
963	(unlikely(op + litLength + (`2` + `1` + LASTLITERALS) + (litLength/`255`) > olimit)) ) {
964	return `0`; / cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine /
965	}
966	if ((outputDirective == fillOutput) &&
967	(unlikely(op + (litLength+`240`)/`255` / litlen / + litLength / literals / + `2` / offset / + `1` / token / + MFLIMIT - MINMATCH / min last literals so last match is <= end - MFLIMIT / > olimit))) {
968	op--;
969	goto _last_literals;
970	}
971	if (litLength >= RUN_MASK) {
972	int len = (int)(litLength - RUN_MASK);
973	*token = (RUN_MASK<<ML_BITS);
974	for(; len >= `255` ; len-=`255`) *op++ = `255`;
975	*op++ = (BYTE)len;
976	}
977	else *token = (BYTE)(litLength<<ML_BITS);
978
979	/ Copy Literals /
980	LZ4_wildCopy8(op, anchor, op+litLength);
981	op+=litLength;
982	DEBUGLOG(`6`, "seq.start:%i, literals=%u, match.start:%i",
983	(int)(anchor-(const BYTE)source), litLength, (int)(ip-(const* BYTE*)source));
984	}
985
986	_next_match:
987	/ at this stage, the following variables must be correctly set :*
988	* - ip : at start of LZ operation
989	* - match : at start of previous pattern occurence; can be within current prefix, or within extDict
990	* - offset : if maybe_ext_memSegment==1 (constant)
991	* - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
992	* - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
993	*/
994
995	if ((outputDirective == fillOutput) &&
996	(op + `2` / offset / + `1` / token / + MFLIMIT - MINMATCH / min last literals so last match is <= end - MFLIMIT / > olimit)) {
997	/ the match was too close to the end, rewind and go to last literals /
998	op = token;
999	goto _last_literals;
1000	}
1001
1002	/ Encode Offset /
1003	if (maybe_extMem) { / static test /
1004	DEBUGLOG(`6`, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
1005	assert(offset <= LZ4_DISTANCE_MAX && offset > `0`);
1006	LZ4_writeLE16(op, (U16)offset); op+=`2`;
1007	} else {
1008	DEBUGLOG(`6`, " with offset=%u (same segment)", (U32)(ip - match));
1009	assert(ip-match <= LZ4_DISTANCE_MAX);
1010	LZ4_writeLE16(op, (U16)(ip - match)); op+=`2`;
1011	}
1012
1013	/ Encode MatchLength /
1014	{ unsigned matchCode;
1015
1016	if ( (dictDirective==usingExtDict \|\| dictDirective==usingDictCtx)
1017	&& (lowLimit==dictionary) / match within extDict / ) {
1018	const BYTE* limit = ip + (dictEnd-match);
1019	assert(dictEnd > match);
1020	if (limit > matchlimit) limit = matchlimit;
1021	matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
1022	ip += (size_t)matchCode + MINMATCH;
1023	if (ip==limit) {
1024	unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
1025	matchCode += more;
1026	ip += more;
1027	}
1028	DEBUGLOG(`6`, " with matchLength=%u starting in extDict", matchCode+MINMATCH);
1029	} else {
1030	matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
1031	ip += (size_t)matchCode + MINMATCH;
1032	DEBUGLOG(`6`, " with matchLength=%u", matchCode+MINMATCH);
1033	}
1034
1035	if ((outputDirective) && / Check output buffer overflow /
1036	(unlikely(op + (`1` + LASTLITERALS) + (matchCode+`240`)/`255` > olimit)) ) {
1037	if (outputDirective == fillOutput) {
1038	/ Match description too long : reduce it /
1039	U32 newMatchCode = `15` / in token / - `1` / to avoid needing a zero byte / + ((U32)(olimit - op) - `1` - LASTLITERALS) * `255`;
1040	ip -= matchCode - newMatchCode;
1041	assert(newMatchCode < matchCode);
1042	matchCode = newMatchCode;
1043	if (unlikely(ip <= filledIp)) {
1044	/ We have already filled up to filledIp so if ip ends up less than filledIp*
1045	* we have positions in the hash table beyond the current position. This is
1046	* a problem if we reuse the hash table. So we have to remove these positions
1047	* from the hash table.
1048	*/
1049	const BYTE* ptr;
1050	DEBUGLOG(`5`, "Clearing %u positions", (U32)(filledIp - ip));
1051	for (ptr = ip; ptr <= filledIp; ++ptr) {
1052	U32 const h = LZ4_hashPosition(ptr, tableType);
1053	LZ4_clearHash(h, cctx->hashTable, tableType);
1054	}
1055	}
1056	} else {
1057	assert(outputDirective == limitedOutput);
1058	return `0`; / cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine /
1059	}
1060	}
1061	if (matchCode >= ML_MASK) {
1062	*token += ML_MASK;
1063	matchCode -= ML_MASK;
1064	LZ4_write32(op, `0xFFFFFFFF`);
1065	while (matchCode >= `4`*`255`) {
1066	op+=`4`;
1067	LZ4_write32(op, `0xFFFFFFFF`);
1068	matchCode -= `4`*`255`;
1069	}
1070	op += matchCode / `255`;
1071	*op++ = (BYTE)(matchCode % `255`);
1072	} else
1073	*token += (BYTE)(matchCode);
1074	}
1075	/ Ensure we have enough space for the last literals. /
1076	assert(!(outputDirective == fillOutput && op + `1` + LASTLITERALS > olimit));
1077
1078	anchor = ip;
1079
1080	/ Test end of chunk /
1081	if (ip >= mflimitPlusOne) break;
1082
1083	/ Fill table /
1084	LZ4_putPosition(ip-`2`, cctx->hashTable, tableType, base);
1085
1086	/ Test next position /
1087	if (tableType == byPtr) {
1088
1089	match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
1090	LZ4_putPosition(ip, cctx->hashTable, tableType, base);
1091	if ( (match+LZ4_DISTANCE_MAX >= ip)
1092	&& (LZ4_read32(match) == LZ4_read32(ip)) )
1093	{ token=op++; token=`0`; goto* _next_match; }
1094
1095	} else { / byU32, byU16 /
1096
1097	U32 const h = LZ4_hashPosition(ip, tableType);
1098	U32 const current = (U32)(ip-base);
1099	U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1100	assert(matchIndex < current);
1101	if (dictDirective == usingDictCtx) {
1102	if (matchIndex < startIndex) {
1103	/ there was no match, try the dictionary /
1104	matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1105	match = dictBase + matchIndex;
1106	lowLimit = dictionary; / required for match length counter /
1107	matchIndex += dictDelta;
1108	} else {
1109	match = base + matchIndex;
1110	lowLimit = (const BYTE)source; /* required for match length counter /
1111	}
1112	} else if (dictDirective==usingExtDict) {
1113	if (matchIndex < startIndex) {
1114	match = dictBase + matchIndex;
1115	lowLimit = dictionary; / required for match length counter /
1116	} else {
1117	match = base + matchIndex;
1118	lowLimit = (const BYTE)source; /* required for match length counter /
1119	}
1120	} else { / single memory segment /
1121	match = base + matchIndex;
1122	}
1123	LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1124	assert(matchIndex < current);
1125	if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : `1`)
1126	&& (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? `1` : (matchIndex+LZ4_DISTANCE_MAX >= current))
1127	&& (LZ4_read32(match) == LZ4_read32(ip)) ) {
1128	token=op++;
1129	*token=`0`;
1130	if (maybe_extMem) offset = current - matchIndex;
1131	DEBUGLOG(`6`, "seq.start:%i, literals=%u, match.start:%i",
1132	(int)(anchor-(const BYTE)source), `0`, (int)(ip-(const* BYTE*)source));
1133	goto _next_match;
1134	}
1135	}
1136
1137	/ Prepare next loop /
1138	forwardH = LZ4_hashPosition(++ip, tableType);
1139
1140	}
1141
1142	_last_literals:
1143	/ Encode Last Literals /
1144	{ size_t lastRun = (size_t)(iend - anchor);
1145	if ( (outputDirective) && / Check output buffer overflow /
1146	(op + lastRun + `1` + ((lastRun+`255`-RUN_MASK)/`255`) > olimit)) {
1147	if (outputDirective == fillOutput) {
1148	/ adapt lastRun to fill 'dst' /
1149	assert(olimit >= op);
1150	lastRun = (size_t)(olimit-op) - `1`;
1151	lastRun -= (lastRun+`240`)/`255`;
1152	} else {
1153	assert(outputDirective == limitedOutput);
1154	return `0`; / cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine /
1155	}
1156	}
1157	if (lastRun >= RUN_MASK) {
1158	size_t accumulator = lastRun - RUN_MASK;
1159	*op++ = RUN_MASK << ML_BITS;
1160	for(; accumulator >= `255` ; accumulator-=`255`) *op++ = `255`;
1161	*op++ = (BYTE) accumulator;
1162	} else {
1163	*op++ = (BYTE)(lastRun<<ML_BITS);
1164	}
1165	memcpy(op, anchor, lastRun);
1166	ip = anchor + lastRun;
1167	op += lastRun;
1168	}
1169
1170	if (outputDirective == fillOutput) {
1171	inputConsumed = (int) (((const* char*)ip)-source);
1172	}
1173	DEBUGLOG(`5`, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
1174	result = (int)(((char*)op) - dest);
1175	assert(result > `0`);
1176	return result;
1177	}
1178
1179
1180	int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1181	{
1182	LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1183	assert(ctx != NULL);
1184	if (acceleration < `1`) acceleration = ACCELERATION_DEFAULT;
1185	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1186	if (inputSize < LZ4_64Klimit) {
1187	return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, `0`, notLimited, byU16, noDict, noDictIssue, acceleration);
1188	} else {
1189	const tableType_t tableType = ((sizeof(void*)==`4`) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1190	return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, `0`, notLimited, tableType, noDict, noDictIssue, acceleration);
1191	}
1192	} else {
1193	if (inputSize < LZ4_64Klimit) {
1194	return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1195	} else {
1196	const tableType_t tableType = ((sizeof(void*)==`4`) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1197	return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1198	}
1199	}
1200	}
1201
1202	/**
1203	* LZ4_compress_fast_extState_fastReset() :
1204	* A variant of LZ4_compress_fast_extState().
1205	*
1206	* Using this variant avoids an expensive initialization step. It is only safe
1207	* to call if the state buffer is known to be correctly initialized already
1208	* (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
1209	* "correctly initialized").
1210	*/
1211	int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1212	{
1213	LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1214	if (acceleration < `1`) acceleration = ACCELERATION_DEFAULT;
1215
1216	if (dstCapacity >= LZ4_compressBound(srcSize)) {
1217	if (srcSize < LZ4_64Klimit) {
1218	const tableType_t tableType = byU16;
1219	LZ4_prepareTable(ctx, srcSize, tableType);
1220	if (ctx->currentOffset) {
1221	return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, `0`, notLimited, tableType, noDict, dictSmall, acceleration);
1222	} else {
1223	return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, `0`, notLimited, tableType, noDict, noDictIssue, acceleration);
1224	}
1225	} else {
1226	const tableType_t tableType = ((sizeof(void*)==`4`) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1227	LZ4_prepareTable(ctx, srcSize, tableType);
1228	return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, `0`, notLimited, tableType, noDict, noDictIssue, acceleration);
1229	}
1230	} else {
1231	if (srcSize < LZ4_64Klimit) {
1232	const tableType_t tableType = byU16;
1233	LZ4_prepareTable(ctx, srcSize, tableType);
1234	if (ctx->currentOffset) {
1235	return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
1236	} else {
1237	return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1238	}
1239	} else {
1240	const tableType_t tableType = ((sizeof(void*)==`4`) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1241	LZ4_prepareTable(ctx, srcSize, tableType);
1242	return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1243	}
1244	}
1245	}
1246
1247
1248	int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1249	{
1250	int result;
1251	#if (LZ4_HEAPMODE)
1252	LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); / malloc-calloc always properly aligned /
1253	if (ctxPtr == NULL) return `0`;
1254	#else
1255	LZ4_stream_t ctx;
1256	LZ4_stream_t* const ctxPtr = &ctx;
1257	#endif
1258	result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
1259
1260	#if (LZ4_HEAPMODE)
1261	FREEMEM(ctxPtr);
1262	#endif
1263	return result;
1264	}
1265
1266
1267	int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
1268	{
1269	return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, `1`);
1270	}
1271
1272
1273	/ hidden debug function /
1274	/ strangely enough, gcc generates faster code when this function is uncommented, even if unused /
1275	int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1276	{
1277	LZ4_stream_t ctx;
1278	LZ4_initStream(&ctx, sizeof(ctx));
1279
1280	if (srcSize < LZ4_64Klimit) {
1281	return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1282	} else {
1283	tableType_t const addrMode = (sizeof(void*) > `4`) ? byU32 : byPtr;
1284	return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
1285	}
1286	}
1287
1288
1289	/ Note!: This function leaves the stream in an unclean/broken state!*
1290	* It is not safe to subsequently use the same state with a _fastReset() or
1291	* _continue() call without resetting it. */
1292	static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1293	{
1294	void* const s = LZ4_initStream(state, sizeof (*state));
1295	assert(s != NULL); (void)s;
1296
1297	if (targetDstSize >= LZ4_compressBound(srcSizePtr)) { /* compression success is guaranteed /
1298	return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, `1`);
1299	} else {
1300	if (*srcSizePtr < LZ4_64Klimit) {
1301	return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, `1`);
1302	} else {
1303	tableType_t const addrMode = ((sizeof(void*)==`4`) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1304	return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, `1`);
1305	} }
1306	}
1307
1308
1309	int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1310	{
1311	#if (LZ4_HEAPMODE)
1312	LZ4_stream_t* ctx = (LZ4_stream_t)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned /
1313	if (ctx == NULL) return `0`;
1314	#else
1315	LZ4_stream_t ctxBody;
1316	LZ4_stream_t* ctx = &ctxBody;
1317	#endif
1318
1319	int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
1320
1321	#if (LZ4_HEAPMODE)
1322	FREEMEM(ctx);
1323	#endif
1324	return result;
1325	}
1326
1327
1328
1329	/-*****************************
1330	* Streaming functions
1331	********************************/
1332
1333	LZ4_stream_t* LZ4_createStream(void)
1334	{
1335	LZ4_stream_t* const lz4s = (LZ4_stream_t)ALLOC(sizeof*(LZ4_stream_t));
1336	LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); / A compilation error here means LZ4_STREAMSIZE is not large enough /
1337	DEBUGLOG(`4`, "LZ4_createStream %p", lz4s);
1338	if (lz4s == NULL) return NULL;
1339	LZ4_initStream(lz4s, sizeof(*lz4s));
1340	return lz4s;
1341	}
1342
1343	#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
1344	it reports an aligment of 8-bytes,
1345	while actually aligning LZ4_stream_t on 4 bytes. */
1346	static size_t LZ4_stream_t_alignment(void)
1347	{
1348	struct { char c; LZ4_stream_t t; } t_a;
1349	return sizeof(t_a) - sizeof(t_a.t);
1350	}
1351	#endif
1352
1353	LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1354	{
1355	DEBUGLOG(`5`, "LZ4_initStream");
1356	if (buffer == NULL) { return NULL; }
1357	if (size < sizeof(LZ4_stream_t)) { return NULL; }
1358	#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
1359	it reports an aligment of 8-bytes,
1360	while actually aligning LZ4_stream_t on 4 bytes. */
1361	if (((size_t)buffer) & (LZ4_stream_t_alignment() - `1`)) { return NULL; } / alignment check /
1362	#endif
1363	MEM_INIT(buffer, `0`, sizeof(LZ4_stream_t));
1364	return (LZ4_stream_t*)buffer;
1365	}
1366
1367	/ resetStream is now deprecated,*
1368	* prefer initStream() which is more general */
1369	void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1370	{
1371	DEBUGLOG(`5`, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1372	MEM_INIT(LZ4_stream, `0`, sizeof(LZ4_stream_t));
1373	}
1374
1375	void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
1376	LZ4_prepareTable(&(ctx->internal_donotuse), `0`, byU32);
1377	}
1378
1379	int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
1380	{
1381	if (!LZ4_stream) return `0`; / support free on NULL /
1382	DEBUGLOG(`5`, "LZ4_freeStream %p", LZ4_stream);
1383	FREEMEM(LZ4_stream);
1384	return (`0`);
1385	}
1386
1387
1388	#define HASH_UNIT sizeof(reg_t)
1389	int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1390	{
1391	LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
1392	const tableType_t tableType = byU32;
1393	const BYTE* p = (const BYTE*)dictionary;
1394	const BYTE* const dictEnd = p + dictSize;
1395	const BYTE* base;
1396
1397	DEBUGLOG(`4`, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
1398
1399	/ It's necessary to reset the context,*
1400	* and not just continue it with prepareTable()
1401	* to avoid any risk of generating overflowing matchIndex
1402	* when compressing using this dictionary */
1403	LZ4_resetStream(LZ4_dict);
1404
1405	/ We always increment the offset by 64 KB, since, if the dict is longer,*
1406	* we truncate it to the last 64k, and if it's shorter, we still want to
1407	* advance by a whole window length so we can provide the guarantee that
1408	* there are only valid offsets in the window, which allows an optimization
1409	* in LZ4_compress_fast_continue() where it uses noDictIssue even when the
1410	* dictionary isn't a full 64k. */
1411	dict->currentOffset += `64` KB;
1412
1413	if (dictSize < (int)HASH_UNIT) {
1414	return `0`;
1415	}
1416
1417	if ((dictEnd - p) > `64` KB) p = dictEnd - `64` KB;
1418	base = dictEnd - dict->currentOffset;
1419	dict->dictionary = p;
1420	dict->dictSize = (U32)(dictEnd - p);
1421	dict->tableType = tableType;
1422
1423	while (p <= dictEnd-HASH_UNIT) {
1424	LZ4_putPosition(p, dict->hashTable, tableType, base);
1425	p+=`3`;
1426	}
1427
1428	return (int)dict->dictSize;
1429	}
1430
1431	void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
1432	const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
1433	&(dictionaryStream->internal_donotuse);
1434
1435	DEBUGLOG(`4`, "LZ4_attach_dictionary (%p, %p, size %u)",
1436	workingStream, dictionaryStream,
1437	dictCtx != NULL ? dictCtx->dictSize : `0`);
1438
1439	/ Calling LZ4_resetStream_fast() here makes sure that changes will not be*
1440	* erased by subsequent calls to LZ4_resetStream_fast() in case stream was
1441	* marked as having dirty context, e.g. requiring full reset.
1442	*/
1443	LZ4_resetStream_fast(workingStream);
1444
1445	if (dictCtx != NULL) {
1446	/ If the current offset is zero, we will never look in the*
1447	* external dictionary context, since there is no value a table
1448	* entry can take that indicate a miss. In that case, we need
1449	* to bump the offset to something non-zero.
1450	*/
1451	if (workingStream->internal_donotuse.currentOffset == `0`) {
1452	workingStream->internal_donotuse.currentOffset = `64` KB;
1453	}
1454
1455	/ Don't actually attach an empty dictionary.*
1456	*/
1457	if (dictCtx->dictSize == `0`) {
1458	dictCtx = NULL;
1459	}
1460	}
1461	workingStream->internal_donotuse.dictCtx = dictCtx;
1462	}
1463
1464
1465	static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1466	{
1467	assert(nextSize >= `0`);
1468	if (LZ4_dict->currentOffset + (unsigned)nextSize > `0x80000000`) { / potential ptrdiff_t overflow (32-bits mode) /
1469	/ rescale hash table /
1470	U32 const delta = LZ4_dict->currentOffset - `64` KB;
1471	const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
1472	int i;
1473	DEBUGLOG(`4`, "LZ4_renormDictT");
1474	for (i=`0`; i<LZ4_HASH_SIZE_U32; i++) {
1475	if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=`0`;
1476	else LZ4_dict->hashTable[i] -= delta;
1477	}
1478	LZ4_dict->currentOffset = `64` KB;
1479	if (LZ4_dict->dictSize > `64` KB) LZ4_dict->dictSize = `64` KB;
1480	LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
1481	}
1482	}
1483
1484
1485	int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1486	const char* source, char* dest,
1487	int inputSize, int maxOutputSize,
1488	int acceleration)
1489	{
1490	const tableType_t tableType = byU32;
1491	LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
1492	const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
1493
1494	DEBUGLOG(`5`, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
1495
1496	if (streamPtr->dirty) { return `0`; } / Uninitialized structure detected /
1497	LZ4_renormDictT(streamPtr, inputSize); / avoid index overflow /
1498	if (acceleration < `1`) acceleration = ACCELERATION_DEFAULT;
1499
1500	/ invalidate tiny dictionaries /
1501	if ( (streamPtr->dictSize-`1` < `4`-`1`) / intentional underflow /
1502	&& (dictEnd != (const BYTE*)source) ) {
1503	DEBUGLOG(`5`, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
1504	streamPtr->dictSize = `0`;
1505	streamPtr->dictionary = (const BYTE*)source;
1506	dictEnd = (const BYTE*)source;
1507	}
1508
1509	/ Check overlapping input/dictionary space /
1510	{ const BYTE* sourceEnd = (const BYTE*) source + inputSize;
1511	if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
1512	streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
1513	if (streamPtr->dictSize > `64` KB) streamPtr->dictSize = `64` KB;
1514	if (streamPtr->dictSize < `4`) streamPtr->dictSize = `0`;
1515	streamPtr->dictionary = dictEnd - streamPtr->dictSize;
1516	}
1517	}
1518
1519	/ prefix mode : source data follows dictionary /
1520	if (dictEnd == (const BYTE*)source) {
1521	if ((streamPtr->dictSize < `64` KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1522	return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
1523	else
1524	return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
1525	}
1526
1527	/ external dictionary mode /
1528	{ int result;
1529	if (streamPtr->dictCtx) {
1530	/ We depend here on the fact that dictCtx'es (produced by*
1531	* LZ4_loadDict) guarantee that their tables contain no references
1532	* to offsets between dictCtx->currentOffset - 64 KB and
1533	* dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
1534	* to use noDictIssue even when the dict isn't a full 64 KB.
1535	*/
1536	if (inputSize > `4` KB) {
1537	/ For compressing large blobs, it is faster to pay the setup*
1538	* cost to copy the dictionary's tables into the active context,
1539	* so that the compression loop is only looking into one table.
1540	*/
1541	memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
1542	result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1543	} else {
1544	result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
1545	}
1546	} else {
1547	if ((streamPtr->dictSize < `64` KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1548	result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
1549	} else {
1550	result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1551	}
1552	}
1553	streamPtr->dictionary = (const BYTE*)source;
1554	streamPtr->dictSize = (U32)inputSize;
1555	return result;
1556	}
1557	}
1558
1559
1560	/ Hidden debug function, to force-test external dictionary mode /
1561	int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
1562	{
1563	LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
1564	int result;
1565
1566	LZ4_renormDictT(streamPtr, srcSize);
1567
1568	if ((streamPtr->dictSize < `64` KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1569	result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, `0`, notLimited, byU32, usingExtDict, dictSmall, `1`);
1570	} else {
1571	result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, `0`, notLimited, byU32, usingExtDict, noDictIssue, `1`);
1572	}
1573
1574	streamPtr->dictionary = (const BYTE*)source;
1575	streamPtr->dictSize = (U32)srcSize;
1576
1577	return result;
1578	}
1579
1580
1581	/! LZ4_saveDict() :*
1582	* If previously compressed data block is not guaranteed to remain available at its memory location,
1583	* save it into a safer place (char* safeBuffer).
1584	* Note : you don't need to call LZ4_loadDict() afterwards,
1585	* dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
1586	* Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
1587	*/
1588	int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1589	{
1590	LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1591	const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
1592
1593	if ((U32)dictSize > `64` KB) { dictSize = `64` KB; } / useless to define a dictionary > 64 KB /
1594	if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1595
1596	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1597
1598	dict->dictionary = (const BYTE*)safeBuffer;
1599	dict->dictSize = (U32)dictSize;
1600
1601	return dictSize;
1602	}
1603
1604
1605
1606	/-******************************
1607	* Decompression functions
1608	********************************/
1609
1610	typedef enum { endOnOutputSize = `0`, endOnInputSize = `1` } endCondition_directive;
1611	typedef enum { decode_full_block = `0`, partial_decode = `1` } earlyEnd_directive;
1612
1613	#undef MIN
1614	#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
1615
1616	/ Read the variable-length literal or match length.*
1617	*
1618	* ip - pointer to use as input.
1619	* lencheck - end ip. Return an error if ip advances >= lencheck.
1620	* loop_check - check ip >= lencheck in body of loop. Returns loop_error if so.
1621	* initial_check - check ip >= lencheck before start of loop. Returns initial_error if so.
1622	* error (output) - error code. Should be set to 0 before call.
1623	*/
1624	typedef enum { loop_error = -`2`, initial_error = -`1`, ok = `0` } variable_length_error;
1625	LZ4_FORCE_INLINE unsigned
1626	read_variable_length(const BYTE*ip, const* BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
1627	{
1628	unsigned length = `0`;
1629	unsigned s;
1630	if (initial_check && unlikely((ip) >= lencheck)) { /* overflow detection /
1631	*error = initial_error;
1632	return length;
1633	}
1634	do {
1635	s = **ip;
1636	(*ip)++;
1637	length += s;
1638	if (loop_check && unlikely((ip) >= lencheck)) { /* overflow detection /
1639	*error = loop_error;
1640	return length;
1641	}
1642	} while (s==`255`);
1643
1644	return length;
1645	}
1646
1647	/! LZ4_decompress_generic() :*
1648	* This generic decompression function covers all use cases.
1649	* It shall be instantiated several times, using different sets of directives.
1650	* Note that it is important for performance that this function really get inlined,
1651	* in order to remove useless branches during compilation optimization.
1652	*/
1653	LZ4_FORCE_INLINE int
1654	LZ4_decompress_generic(
1655	const char* const src,
1656	char* const dst,
1657	int srcSize,
1658	int outputSize, / If endOnInput==endOnInputSize, this value is `dstCapacity` /
1659
1660	endCondition_directive endOnInput, / endOnOutputSize, endOnInputSize /
1661	earlyEnd_directive partialDecoding, / full, partial /
1662	dict_directive dict, / noDict, withPrefix64k, usingExtDict /
1663	const BYTE* const lowPrefix, / always <= dst, == dst when no prefix /
1664	const BYTE* const dictStart, / only if dict==usingExtDict /
1665	const size_t dictSize / note : = 0 if noDict /
1666	)
1667	{
1668	if (src == NULL) { return -`1`; }
1669
1670	{ const BYTE* ip = (const BYTE*) src;
1671	const BYTE* const iend = ip + srcSize;
1672
1673	BYTE* op = (BYTE*) dst;
1674	BYTE* const oend = op + outputSize;
1675	BYTE* cpy;
1676
1677	const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
1678
1679	const int safeDecode = (endOnInput==endOnInputSize);
1680	const int checkOffset = ((safeDecode) && (dictSize < (int)(`64` KB)));
1681
1682
1683	/ Set up the "end" pointers for the shortcut. /
1684	const BYTE* const shortiend = iend - (endOnInput ? `14` : `8`) /maxLL/ - `2` /offset/;
1685	const BYTE* const shortoend = oend - (endOnInput ? `14` : `8`) /maxLL/ - `18` /maxML/;
1686
1687	const BYTE* match;
1688	size_t offset;
1689	unsigned token;
1690	size_t length;
1691
1692
1693	DEBUGLOG(`5`, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
1694
1695	/ Special cases /
1696	assert(lowPrefix <= op);
1697	if ((endOnInput) && (unlikely(outputSize==`0`))) {
1698	/ Empty output buffer /
1699	if (partialDecoding) return `0`;
1700	return ((srcSize==`1`) && (*ip==`0`)) ? `0` : -`1`;
1701	}
1702	if ((!endOnInput) && (unlikely(outputSize==`0`))) { return (*ip==`0` ? `1` : -`1`); }
1703	if ((endOnInput) && unlikely(srcSize==`0`)) { return -`1`; }
1704
1705	/ Currently the fast loop shows a regression on qualcomm arm chips. /
1706	#if LZ4_FAST_DEC_LOOP
1707	if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
1708	DEBUGLOG(`6`, "skip fast decode loop");
1709	goto safe_decode;
1710	}
1711
1712	/ Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE /
1713	while (`1`) {
1714	/ Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE /
1715	assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
1716	if (endOnInput) { assert(ip < iend); }
1717	token = *ip++;
1718	length = token >> ML_BITS; / literal length /
1719
1720	assert(!endOnInput \|\| ip <= iend); / ip < iend before the increment /
1721
1722	/ decode literal length /
1723	if (length == RUN_MASK) {
1724	variable_length_error error = ok;
1725	length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1726	if (error == initial_error) { goto _output_error; }
1727	if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } / overflow detection /
1728	if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } / overflow detection /
1729
1730	/ copy literals /
1731	cpy = op+length;
1732	LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
1733	if (endOnInput) { / LZ4_decompress_safe() /
1734	if ((cpy>oend-`32`) \|\| (ip+length>iend-`32`)) { goto safe_literal_copy; }
1735	LZ4_wildCopy32(op, ip, cpy);
1736	} else { / LZ4_decompress_fast() /
1737	if (cpy>oend-`8`) { goto safe_literal_copy; }
1738	LZ4_wildCopy8(op, ip, cpy); / LZ4_decompress_fast() cannot copy more than 8 bytes at a time :*
1739	* it doesn't know input length, and only relies on end-of-block properties */
1740	}
1741	ip += length; op = cpy;
1742	} else {
1743	cpy = op+length;
1744	if (endOnInput) { / LZ4_decompress_safe() /
1745	DEBUGLOG(`7`, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
1746	/ We don't need to check oend, since we check it once for each loop below /
1747	if (ip > iend-(`16` + `1`/max lit + offset + nextToken/)) { goto safe_literal_copy; }
1748	/ Literals can only be 14, but hope compilers optimize if we copy by a register size /
1749	memcpy(op, ip, `16`);
1750	} else { / LZ4_decompress_fast() /
1751	/ LZ4_decompress_fast() cannot copy more than 8 bytes at a time :*
1752	* it doesn't know input length, and relies on end-of-block properties */
1753	memcpy(op, ip, `8`);
1754	if (length > `8`) { memcpy(op+`8`, ip+`8`, `8`); }
1755	}
1756	ip += length; op = cpy;
1757	}
1758
1759	/ get offset /
1760	offset = LZ4_readLE16(ip); ip+=`2`;
1761	match = op - offset;
1762	assert(match <= op);
1763
1764	/ get matchlength /
1765	length = token & ML_MASK;
1766
1767	if (length == ML_MASK) {
1768	variable_length_error error = ok;
1769	if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } / Error : offset outside buffers /
1770	length += read_variable_length(&ip, iend - LASTLITERALS + `1`, endOnInput, `0`, &error);
1771	if (error != ok) { goto _output_error; }
1772	if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } / overflow detection /
1773	length += MINMATCH;
1774	if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
1775	goto safe_match_copy;
1776	}
1777	} else {
1778	length += MINMATCH;
1779	if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
1780	goto safe_match_copy;
1781	}
1782
1783	/ Fastpath check: Avoids a branch in LZ4_wildCopy32 if true /
1784	if ((dict == withPrefix64k) \|\| (match >= lowPrefix)) {
1785	if (offset >= `8`) {
1786	assert(match >= lowPrefix);
1787	assert(match <= op);
1788	assert(op + `18` <= oend);
1789
1790	memcpy(op, match, `8`);
1791	memcpy(op+`8`, match+`8`, `8`);
1792	memcpy(op+`16`, match+`16`, `2`);
1793	op += length;
1794	continue;
1795	} } }
1796
1797	if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } / Error : offset outside buffers /
1798	/ match starting within external dictionary /
1799	if ((dict==usingExtDict) && (match < lowPrefix)) {
1800	if (unlikely(op+length > oend-LASTLITERALS)) {
1801	if (partialDecoding) {
1802	length = MIN(length, (size_t)(oend-op)); / reach end of buffer /
1803	} else {
1804	goto _output_error; / end-of-block condition violated /
1805	} }
1806
1807	if (length <= (size_t)(lowPrefix-match)) {
1808	/ match fits entirely within external dictionary : just copy /
1809	memmove(op, dictEnd - (lowPrefix-match), length);
1810	op += length;
1811	} else {
1812	/ match stretches into both external dictionary and current block /
1813	size_t const copySize = (size_t)(lowPrefix - match);
1814	size_t const restSize = length - copySize;
1815	memcpy(op, dictEnd - copySize, copySize);
1816	op += copySize;
1817	if (restSize > (size_t)(op - lowPrefix)) { / overlap copy /
1818	BYTE* const endOfMatch = op + restSize;
1819	const BYTE* copyFrom = lowPrefix;
1820	while (op < endOfMatch) { op++ = copyFrom++; }
1821	} else {
1822	memcpy(op, lowPrefix, restSize);
1823	op += restSize;
1824	} }
1825	continue;
1826	}
1827
1828	/ copy match within block /
1829	cpy = op + length;
1830
1831	assert((op <= oend) && (oend-op >= `32`));
1832	if (unlikely(offset<`16`)) {
1833	LZ4_memcpy_using_offset(op, match, cpy, offset);
1834	} else {
1835	LZ4_wildCopy32(op, match, cpy);
1836	}
1837
1838	op = cpy; / wildcopy correction /
1839	}
1840	safe_decode:
1841	#endif
1842
1843	/ Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE /
1844	while (`1`) {
1845	token = *ip++;
1846	length = token >> ML_BITS; / literal length /
1847
1848	assert(!endOnInput \|\| ip <= iend); / ip < iend before the increment /
1849
1850	/ A two-stage shortcut for the most common case:*
1851	* 1) If the literal length is 0..14, and there is enough space,
1852	* enter the shortcut and copy 16 bytes on behalf of the literals
1853	* (in the fast mode, only 8 bytes can be safely copied this way).
1854	* 2) Further if the match length is 4..18, copy 18 bytes in a similar
1855	* manner; but we ensure that there's enough space in the output for
1856	* those 18 bytes earlier, upon entering the shortcut (in other words,
1857	* there is a combined check for both stages).
1858	*/
1859	if ( (endOnInput ? length != RUN_MASK : length <= `8`)
1860	/ strictly "less than" on input, to re-enter the loop with at least one byte /
1861	&& likely((endOnInput ? ip < shortiend : `1`) & (op <= shortoend)) ) {
1862	/ Copy the literals /
1863	memcpy(op, ip, endOnInput ? `16` : `8`);
1864	op += length; ip += length;
1865
1866	/ The second stage: prepare for match copying, decode full info.*
1867	* If it doesn't work out, the info won't be wasted. */
1868	length = token & ML_MASK; / match length /
1869	offset = LZ4_readLE16(ip); ip += `2`;
1870	match = op - offset;
1871	assert(match <= op); / check overflow /
1872
1873	/ Do not deal with overlapping matches. /
1874	if ( (length != ML_MASK)
1875	&& (offset >= `8`)
1876	&& (dict==withPrefix64k \|\| match >= lowPrefix) ) {
1877	/ Copy the match. /
1878	memcpy(op + `0`, match + `0`, `8`);
1879	memcpy(op + `8`, match + `8`, `8`);
1880	memcpy(op +`16`, match +`16`, `2`);
1881	op += length + MINMATCH;
1882	/ Both stages worked, load the next token. /
1883	continue;
1884	}
1885
1886	/ The second stage didn't work out, but the info is ready.*
1887	* Propel it right to the point of match copying. */
1888	goto _copy_match;
1889	}
1890
1891	/ decode literal length /
1892	if (length == RUN_MASK) {
1893	variable_length_error error = ok;
1894	length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1895	if (error == initial_error) { goto _output_error; }
1896	if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } / overflow detection /
1897	if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } / overflow detection /
1898	}
1899
1900	/ copy literals /
1901	cpy = op+length;
1902	#if LZ4_FAST_DEC_LOOP
1903	safe_literal_copy:
1904	#endif
1905	LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
1906	if ( ((endOnInput) && ((cpy>oend-MFLIMIT) \|\| (ip+length>iend-(`2`+`1`+LASTLITERALS))) )
1907	\|\| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
1908	{
1909	/ We've either hit the input parsing restriction or the output parsing restriction.*
1910	* If we've hit the input parsing condition then this must be the last sequence.
1911	* If we've hit the output parsing condition then we are either using partialDecoding
1912	* or we've hit the output parsing condition.
1913	*/
1914	if (partialDecoding) {
1915	/ Since we are partial decoding we may be in this block because of the output parsing*
1916	* restriction, which is not valid since the output buffer is allowed to be undersized.
1917	*/
1918	assert(endOnInput);
1919	/ If we're in this block because of the input parsing condition, then we must be on the*
1920	* last sequence (or invalid), so we must check that we exactly consume the input.
1921	*/
1922	if ((ip+length>iend-(`2`+`1`+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
1923	assert(ip+length <= iend);
1924	/ We are finishing in the middle of a literals segment.*
1925	* Break after the copy.
1926	*/
1927	if (cpy > oend) {
1928	cpy = oend;
1929	assert(op<=oend);
1930	length = (size_t)(oend-op);
1931	}
1932	assert(ip+length <= iend);
1933	} else {
1934	/ We must be on the last sequence because of the parsing limitations so check*
1935	* that we exactly regenerate the original size (must be exact when !endOnInput).
1936	*/
1937	if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
1938	/ We must be on the last sequence (or invalid) because of the parsing limitations*
1939	* so check that we exactly consume the input and don't overrun the output buffer.
1940	*/
1941	if ((endOnInput) && ((ip+length != iend) \|\| (cpy > oend))) { goto _output_error; }
1942	}
1943	memmove(op, ip, length); / supports overlapping memory regions, which only matters for in-place decompression scenarios /
1944	ip += length;
1945	op += length;
1946	/ Necessarily EOF when !partialDecoding. When partialDecoding*
1947	* it is EOF if we've either filled the output buffer or hit
1948	* the input parsing restriction.
1949	*/
1950	if (!partialDecoding \|\| (cpy == oend) \|\| (ip == iend)) {
1951	break;
1952	}
1953	} else {
1954	LZ4_wildCopy8(op, ip, cpy); / may overwrite up to WILDCOPYLENGTH beyond cpy /
1955	ip += length; op = cpy;
1956	}
1957
1958	/ get offset /
1959	offset = LZ4_readLE16(ip); ip+=`2`;
1960	match = op - offset;
1961
1962	/ get matchlength /
1963	length = token & ML_MASK;
1964
1965	_copy_match:
1966	if (length == ML_MASK) {
1967	variable_length_error error = ok;
1968	length += read_variable_length(&ip, iend - LASTLITERALS + `1`, endOnInput, `0`, &error);
1969	if (error != ok) goto _output_error;
1970	if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; / overflow detection /
1971	}
1972	length += MINMATCH;
1973
1974	#if LZ4_FAST_DEC_LOOP
1975	safe_match_copy:
1976	#endif
1977	if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; / Error : offset outside buffers /
1978	/ match starting within external dictionary /
1979	if ((dict==usingExtDict) && (match < lowPrefix)) {
1980	if (unlikely(op+length > oend-LASTLITERALS)) {
1981	if (partialDecoding) length = MIN(length, (size_t)(oend-op));
1982	else goto _output_error; / doesn't respect parsing restriction /
1983	}
1984
1985	if (length <= (size_t)(lowPrefix-match)) {
1986	/ match fits entirely within external dictionary : just copy /
1987	memmove(op, dictEnd - (lowPrefix-match), length);
1988	op += length;
1989	} else {
1990	/ match stretches into both external dictionary and current block /
1991	size_t const copySize = (size_t)(lowPrefix - match);
1992	size_t const restSize = length - copySize;
1993	memcpy(op, dictEnd - copySize, copySize);
1994	op += copySize;
1995	if (restSize > (size_t)(op - lowPrefix)) { / overlap copy /
1996	BYTE* const endOfMatch = op + restSize;
1997	const BYTE* copyFrom = lowPrefix;
1998	while (op < endOfMatch) op++ = copyFrom++;
1999	} else {
2000	memcpy(op, lowPrefix, restSize);
2001	op += restSize;
2002	} }
2003	continue;
2004	}
2005	assert(match >= lowPrefix);
2006
2007	/ copy match within block /
2008	cpy = op + length;
2009
2010	/ partialDecoding : may end anywhere within the block /
2011	assert(op<=oend);
2012	if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2013	size_t const mlen = MIN(length, (size_t)(oend-op));
2014	const BYTE* const matchEnd = match + mlen;
2015	BYTE* const copyEnd = op + mlen;
2016	if (matchEnd > op) { / overlap copy /
2017	while (op < copyEnd) { op++ = match++; }
2018	} else {
2019	memcpy(op, match, mlen);
2020	}
2021	op = copyEnd;
2022	if (op == oend) { break; }
2023	continue;
2024	}
2025
2026	if (unlikely(offset<`8`)) {
2027	LZ4_write32(op, `0`); / silence msan warning when offset==0 /
2028	op[`0`] = match[`0`];
2029	op[`1`] = match[`1`];
2030	op[`2`] = match[`2`];
2031	op[`3`] = match[`3`];
2032	match += inc32table[offset];
2033	memcpy(op+`4`, match, `4`);
2034	match -= dec64table[offset];
2035	} else {
2036	memcpy(op, match, `8`);
2037	match += `8`;
2038	}
2039	op += `8`;
2040
2041	if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2042	BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-`1`);
2043	if (cpy > oend-LASTLITERALS) { goto _output_error; } / Error : last LASTLITERALS bytes must be literals (uncompressed) /
2044	if (op < oCopyLimit) {
2045	LZ4_wildCopy8(op, match, oCopyLimit);
2046	match += oCopyLimit - op;
2047	op = oCopyLimit;
2048	}
2049	while (op < cpy) { op++ = match++; }
2050	} else {
2051	memcpy(op, match, `8`);
2052	if (length > `16`) { LZ4_wildCopy8(op+`8`, match+`8`, cpy); }
2053	}
2054	op = cpy; / wildcopy correction /
2055	}
2056
2057	/ end of decoding /
2058	if (endOnInput) {
2059	return (int) (((char)op)-dst); /* Nb of output bytes decoded /
2060	} else {
2061	return (int) (((const char)ip)-src); /* Nb of input bytes read /
2062	}
2063
2064	/ Overflow error detected /
2065	_output_error:
2066	return (int) (-(((const char*)ip)-src))-`1`;
2067	}
2068	}
2069
2070
2071	/===== Instantiate the API decoding functions. =====/
2072
2073	LZ4_FORCE_O2_GCC_PPC64LE
2074	int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
2075	{
2076	return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
2077	endOnInputSize, decode_full_block, noDict,
2078	(BYTE*)dest, NULL, `0`);
2079	}
2080
2081	LZ4_FORCE_O2_GCC_PPC64LE
2082	int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
2083	{
2084	dstCapacity = MIN(targetOutputSize, dstCapacity);
2085	return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
2086	endOnInputSize, partial_decode,
2087	noDict, (BYTE*)dst, NULL, `0`);
2088	}
2089
2090	LZ4_FORCE_O2_GCC_PPC64LE
2091	int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2092	{
2093	return LZ4_decompress_generic(source, dest, `0`, originalSize,
2094	endOnOutputSize, decode_full_block, withPrefix64k,
2095	(BYTE*)dest - `64` KB, NULL, `0`);
2096	}
2097
2098	/===== Instantiate a few more decoding cases, used more than once. =====/
2099
2100	LZ4_FORCE_O2_GCC_PPC64LE / Exported, an obsolete API function. /
2101	int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
2102	{
2103	return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2104	endOnInputSize, decode_full_block, withPrefix64k,
2105	(BYTE*)dest - `64` KB, NULL, `0`);
2106	}
2107
2108	/ Another obsolete API function, paired with the previous one. /
2109	int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
2110	{
2111	/ LZ4_decompress_fast doesn't validate match offsets,*
2112	* and thus serves well with any prefixed dictionary. */
2113	return LZ4_decompress_fast(source, dest, originalSize);
2114	}
2115
2116	LZ4_FORCE_O2_GCC_PPC64LE
2117	static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
2118	size_t prefixSize)
2119	{
2120	return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2121	endOnInputSize, decode_full_block, noDict,
2122	(BYTE*)dest-prefixSize, NULL, `0`);
2123	}
2124
2125	LZ4_FORCE_O2_GCC_PPC64LE
2126	int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2127	int compressedSize, int maxOutputSize,
2128	const void* dictStart, size_t dictSize)
2129	{
2130	return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2131	endOnInputSize, decode_full_block, usingExtDict,
2132	(BYTE)dest, (const* BYTE*)dictStart, dictSize);
2133	}
2134
2135	LZ4_FORCE_O2_GCC_PPC64LE
2136	static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
2137	const void* dictStart, size_t dictSize)
2138	{
2139	return LZ4_decompress_generic(source, dest, `0`, originalSize,
2140	endOnOutputSize, decode_full_block, usingExtDict,
2141	(BYTE)dest, (const* BYTE*)dictStart, dictSize);
2142	}
2143
2144	/ The "double dictionary" mode, for use with e.g. ring buffers: the first part*
2145	* of the dictionary is passed as prefix, and the second via dictStart + dictSize.
2146	* These routines are used only once, in LZ4_decompress_*_continue().
2147	*/
2148	LZ4_FORCE_INLINE
2149	int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
2150	size_t prefixSize, const void* dictStart, size_t dictSize)
2151	{
2152	return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2153	endOnInputSize, decode_full_block, usingExtDict,
2154	(BYTE)dest-prefixSize, (const* BYTE*)dictStart, dictSize);
2155	}
2156
2157	LZ4_FORCE_INLINE
2158	int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
2159	size_t prefixSize, const void* dictStart, size_t dictSize)
2160	{
2161	return LZ4_decompress_generic(source, dest, `0`, originalSize,
2162	endOnOutputSize, decode_full_block, usingExtDict,
2163	(BYTE)dest-prefixSize, (const* BYTE*)dictStart, dictSize);
2164	}
2165
2166	/===== streaming decompression functions =====/
2167
2168	LZ4_streamDecode_t* LZ4_createStreamDecode(void)
2169	{
2170	LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t) ALLOC_AND_ZERO(sizeof*(LZ4_streamDecode_t));
2171	LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); / A compilation error here means LZ4_STREAMDECODESIZE is not large enough /
2172	return lz4s;
2173	}
2174
2175	int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
2176	{
2177	if (LZ4_stream == NULL) { return `0`; } / support free on NULL /
2178	FREEMEM(LZ4_stream);
2179	return `0`;
2180	}
2181
2182	/! LZ4_setStreamDecode() :*
2183	* Use this function to instruct where to find the dictionary.
2184	* This function is not necessary if previous data is still available where it was decoded.
2185	* Loading a size of 0 is allowed (same effect as no dictionary).
2186	* @return : 1 if OK, 0 if error
2187	*/
2188	int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
2189	{
2190	LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2191	lz4sd->prefixSize = (size_t) dictSize;
2192	lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
2193	lz4sd->externalDict = NULL;
2194	lz4sd->extDictSize = `0`;
2195	return `1`;
2196	}
2197
2198	/! LZ4_decoderRingBufferSize() :*
2199	* when setting a ring buffer for streaming decompression (optional scenario),
2200	* provides the minimum size of this ring buffer
2201	* to be compatible with any source respecting maxBlockSize condition.
2202	* Note : in a ring buffer scenario,
2203	* blocks are presumed decompressed next to each other.
2204	* When not enough space remains for next block (remainingSize < maxBlockSize),
2205	* decoding resumes from beginning of ring buffer.
2206	* @return : minimum ring buffer size,
2207	* or 0 if there is an error (invalid maxBlockSize).
2208	*/
2209	int LZ4_decoderRingBufferSize(int maxBlockSize)
2210	{
2211	if (maxBlockSize < `0`) return `0`;
2212	if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return `0`;
2213	if (maxBlockSize < `16`) maxBlockSize = `16`;
2214	return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
2215	}
2216
2217	/*
2218	*_continue() :
2219	These decoding functions allow decompression of multiple blocks in "streaming" mode.
2220	Previously decoded blocks must still be available at the memory position where they were decoded.
2221	If it's not possible, save the relevant part of decoded data into a safe buffer,
2222	and indicate where it stands using LZ4_setStreamDecode()
2223	*/
2224	LZ4_FORCE_O2_GCC_PPC64LE
2225	int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
2226	{
2227	LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2228	int result;
2229
2230	if (lz4sd->prefixSize == `0`) {
2231	/ The first call, no dictionary yet. /
2232	assert(lz4sd->extDictSize == `0`);
2233	result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2234	if (result <= `0`) return result;
2235	lz4sd->prefixSize = (size_t)result;
2236	lz4sd->prefixEnd = (BYTE*)dest + result;
2237	} else if (lz4sd->prefixEnd == (BYTE*)dest) {
2238	/ They're rolling the current segment. /
2239	if (lz4sd->prefixSize >= `64` KB - `1`)
2240	result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2241	else if (lz4sd->extDictSize == `0`)
2242	result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
2243	lz4sd->prefixSize);
2244	else
2245	result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
2246	lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
2247	if (result <= `0`) return result;
2248	lz4sd->prefixSize += (size_t)result;
2249	lz4sd->prefixEnd += result;
2250	} else {
2251	/ The buffer wraps around, or they're switching to another buffer. /
2252	lz4sd->extDictSize = lz4sd->prefixSize;
2253	lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2254	result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
2255	lz4sd->externalDict, lz4sd->extDictSize);
2256	if (result <= `0`) return result;
2257	lz4sd->prefixSize = (size_t)result;
2258	lz4sd->prefixEnd = (BYTE*)dest + result;
2259	}
2260
2261	return result;
2262	}
2263
2264	LZ4_FORCE_O2_GCC_PPC64LE
2265	int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
2266	{
2267	LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2268	int result;
2269	assert(originalSize >= `0`);
2270
2271	if (lz4sd->prefixSize == `0`) {
2272	assert(lz4sd->extDictSize == `0`);
2273	result = LZ4_decompress_fast(source, dest, originalSize);
2274	if (result <= `0`) return result;
2275	lz4sd->prefixSize = (size_t)originalSize;
2276	lz4sd->prefixEnd = (BYTE*)dest + originalSize;
2277	} else if (lz4sd->prefixEnd == (BYTE*)dest) {
2278	if (lz4sd->prefixSize >= `64` KB - `1` \|\| lz4sd->extDictSize == `0`)
2279	result = LZ4_decompress_fast(source, dest, originalSize);
2280	else
2281	result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
2282	lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
2283	if (result <= `0`) return result;
2284	lz4sd->prefixSize += (size_t)originalSize;
2285	lz4sd->prefixEnd += originalSize;
2286	} else {
2287	lz4sd->extDictSize = lz4sd->prefixSize;
2288	lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2289	result = LZ4_decompress_fast_extDict(source, dest, originalSize,
2290	lz4sd->externalDict, lz4sd->extDictSize);
2291	if (result <= `0`) return result;
2292	lz4sd->prefixSize = (size_t)originalSize;
2293	lz4sd->prefixEnd = (BYTE*)dest + originalSize;
2294	}
2295
2296	return result;
2297	}
2298
2299
2300	/*
2301	Advanced decoding functions :
2302	*_usingDict() :
2303	These decoding functions work the same as "_continue" ones,
2304	the dictionary must be explicitly provided within parameters
2305	*/
2306
2307	int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
2308	{
2309	if (dictSize==`0`)
2310	return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2311	if (dictStart+dictSize == dest) {
2312	if (dictSize >= `64` KB - `1`) {
2313	return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2314	}
2315	assert(dictSize >= `0`);
2316	return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
2317	}
2318	assert(dictSize >= `0`);
2319	return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
2320	}
2321
2322	int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
2323	{
2324	if (dictSize==`0` \|\| dictStart+dictSize == dest)
2325	return LZ4_decompress_fast(source, dest, originalSize);
2326	assert(dictSize >= `0`);
2327	return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
2328	}
2329
2330
2331	/=************************************************
2332	* Obsolete Functions
2333	***************************************************/
2334	/ obsolete compression functions /
2335	int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
2336	{
2337	return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
2338	}
2339	int LZ4_compress(const char* src, char* dest, int srcSize)
2340	{
2341	return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
2342	}
2343	int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
2344	{
2345	return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, `1`);
2346	}
2347	int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
2348	{
2349	return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), `1`);
2350	}
2351	int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
2352	{
2353	return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, `1`);
2354	}
2355	int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
2356	{
2357	return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), `1`);
2358	}
2359
2360	/*
2361	These decompression functions are deprecated and should no longer be used.
2362	They are only provided here for compatibility with older user programs.
2363	- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
2364	- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
2365	*/
2366	int LZ4_uncompress (const char* source, char* dest, int outputSize)
2367	{
2368	return LZ4_decompress_fast(source, dest, outputSize);
2369	}
2370	int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
2371	{
2372	return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
2373	}
2374
2375	/ Obsolete Streaming functions /
2376
2377	int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
2378
2379	int LZ4_resetStreamState(void* state, char* inputBuffer)
2380	{
2381	(void)inputBuffer;
2382	LZ4_resetStream((LZ4_stream_t*)state);
2383	return `0`;
2384	}
2385
2386	void* LZ4_create (char* inputBuffer)
2387	{
2388	(void)inputBuffer;
2389	return LZ4_createStream();
2390	}
2391
2392	char* LZ4_slideInputBuffer (void* state)
2393	{
2394	/ avoid const char * -> char * conversion warning /
2395	return (char )(uptrval)((LZ4_stream_t)state)->internal_donotuse.dictionary;
2396	}
2397
2398	#endif /* LZ4_COMMONDEFS_ONLY */
2399

Browse the source code of ClickHouse/contrib/lz4/lib/lz4.c