zstd_decompress_block.c source code [Godot/thirdparty/zstd/decompress/zstd_decompress_block.c]

1	/*
2	* Copyright (c) Meta Platforms, Inc. and affiliates.
3	* All rights reserved.
4	*
5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* in the COPYING file in the root directory of this source tree).
8	* You may select, at your option, one of the above-listed licenses.
9	*/
10
11	/ zstd_decompress_block :*
12	* this module takes care of decompressing _compressed_ block */
13
14	/-******************************************************
15	* Dependencies
16	*********************************************************/
17	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18	#include "../common/compiler.h" /* prefetch */
19	#include "../common/cpu.h" /* bmi2 */
20	#include "../common/mem.h" /* low level memory routines */
21	#define FSE_STATIC_LINKING_ONLY
22	#include "../common/fse.h"
23	#include "../common/huf.h"
24	#include "../common/zstd_internal.h"
25	#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
26	#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
27	#include "zstd_decompress_block.h"
28	#include "../common/bits.h" /* ZSTD_highbit32 */
29
30	/_******************************************************
31	* Macros
32	**********************************************************/
33
34	/ These two optional macros force the use one way or another of the two*
35	* ZSTD_decompressSequences implementations. You can't force in both directions
36	* at the same time.
37	*/
38	#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39	defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40	#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41	#endif
42
43
44	/_******************************************************
45	* Memory operations
46	**********************************************************/
47	static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, `4`); }
48
49
50	/-************************************************************
51	* Block decoding
52	***************************************************************/
53
54	/! ZSTD_getcBlockSize() :*
55	* Provides the size of compressed block from block header `src` */
56	size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57	blockProperties_t* bpPtr)
58	{
59	RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
61	{ U32 const cBlockHeader = MEM_readLE24(src);
62	U32 const cSize = cBlockHeader >> `3`;
63	bpPtr->lastBlock = cBlockHeader & `1`;
64	bpPtr->blockType = (blockType_e)((cBlockHeader >> `1`) & `3`);
65	bpPtr->origSize = cSize; / only useful for RLE /
66	if (bpPtr->blockType == bt_rle) return `1`;
67	RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68	return cSize;
69	}
70	}
71
72	/ Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer /
73	static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74	const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75	{
76	if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77	{
78	/ room for litbuffer to fit without read faulting /
79	dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80	dctx->litBufferEnd = dctx->litBuffer + litSize;
81	dctx->litBufferLocation = ZSTD_in_dst;
82	}
83	else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84	{
85	/ won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer /
86	if (splitImmediately) {
87	/ won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer /
88	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89	dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90	}
91	else {
92	/ initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after /
93	dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94	dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95	}
96	dctx->litBufferLocation = ZSTD_split;
97	}
98	else
99	{
100	/ fits entirely within litExtraBuffer, so no split is necessary /
101	dctx->litBuffer = dctx->litExtraBuffer;
102	dctx->litBufferEnd = dctx->litBuffer + litSize;
103	dctx->litBufferLocation = ZSTD_not_in_dst;
104	}
105	}
106
107	/ Hidden declaration for fullbench /
108	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109	const void* src, size_t srcSize,
110	void* dst, size_t dstCapacity, const streaming_operation streaming);
111	/! ZSTD_decodeLiteralsBlock() :*
112	* Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113	* in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
114	* block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115	* stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116	*
117	* @return : nb of bytes read from src (< srcSize )
118	* note : symbol not declared but exposed for fullbench */
119	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120	const void* src, size_t srcSize, / note : srcSize < BLOCKSIZE /
121	void* dst, size_t dstCapacity, const streaming_operation streaming)
122	{
123	DEBUGLOG(`5`, "ZSTD_decodeLiteralsBlock");
124	RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125
126	{ const BYTE* const istart = (const BYTE*) src;
127	symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[`0`] & `3`);
128
129	switch(litEncType)
130	{
131	case set_repeat:
132	DEBUGLOG(`5`, "set_repeat flag : re-using stats from previous compressed literals block");
133	RETURN_ERROR_IF(dctx->litEntropy==`0`, dictionary_corrupted, "");
134	ZSTD_FALLTHROUGH;
135
136	case set_compressed:
137	RETURN_ERROR_IF(srcSize < `5`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
138	{ size_t lhSize, litSize, litCSize;
139	U32 singleStream=`0`;
140	U32 const lhlCode = (istart[`0`] >> `2`) & `3`;
141	U32 const lhc = MEM_readLE32(istart);
142	size_t hufSuccess;
143	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144	int const flags = `0`
145	\| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : `0`)
146	\| (dctx->disableHufAsm ? HUF_flags_disableAsm : `0`);
147	switch(lhlCode)
148	{
149	case `0`: case `1`: default: / note : default is impossible, since lhlCode into [0..3] /
150	/ 2 - 2 - 10 - 10 /
151	singleStream = !lhlCode;
152	lhSize = `3`;
153	litSize = (lhc >> `4`) & `0x3FF`;
154	litCSize = (lhc >> `14`) & `0x3FF`;
155	break;
156	case `2`:
157	/ 2 - 2 - 14 - 14 /
158	lhSize = `4`;
159	litSize = (lhc >> `4`) & `0x3FFF`;
160	litCSize = lhc >> `18`;
161	break;
162	case `3`:
163	/ 2 - 2 - 18 - 18 /
164	lhSize = `5`;
165	litSize = (lhc >> `4`) & `0x3FFFF`;
166	litCSize = (lhc >> `22`) + ((size_t)istart[`4`] << `10`);
167	break;
168	}
169	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
170	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
171	if (!singleStream)
172	RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
173	"Not enough literals (%zu) for the 4-streams mode (min %u)",
174	litSize, MIN_LITERALS_FOR_4_STREAMS);
175	RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
176	RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
177	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, `0`);
178
179	/ prefetch huffman table if cold /
180	if (dctx->ddictIsCold && (litSize > `768` / heuristic /)) {
181	PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
182	}
183
184	if (litEncType==set_repeat) {
185	if (singleStream) {
186	hufSuccess = HUF_decompress1X_usingDTable(
187	dctx->litBuffer, litSize, istart+lhSize, litCSize,
188	dctx->HUFptr, flags);
189	} else {
190	assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
191	hufSuccess = HUF_decompress4X_usingDTable(
192	dctx->litBuffer, litSize, istart+lhSize, litCSize,
193	dctx->HUFptr, flags);
194	}
195	} else {
196	if (singleStream) {
197	#if defined(HUF_FORCE_DECOMPRESS_X2)
198	hufSuccess = HUF_decompress1X_DCtx_wksp(
199	dctx->entropy.hufTable, dctx->litBuffer, litSize,
200	istart+lhSize, litCSize, dctx->workspace,
201	sizeof(dctx->workspace), flags);
202	#else
203	hufSuccess = HUF_decompress1X1_DCtx_wksp(
204	dctx->entropy.hufTable, dctx->litBuffer, litSize,
205	istart+lhSize, litCSize, dctx->workspace,
206	sizeof(dctx->workspace), flags);
207	#endif
208	} else {
209	hufSuccess = HUF_decompress4X_hufOnly_wksp(
210	dctx->entropy.hufTable, dctx->litBuffer, litSize,
211	istart+lhSize, litCSize, dctx->workspace,
212	sizeof(dctx->workspace), flags);
213	}
214	}
215	if (dctx->litBufferLocation == ZSTD_split)
216	{
217	ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
218	ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
219	dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
220	dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
221	}
222
223	RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
224
225	dctx->litPtr = dctx->litBuffer;
226	dctx->litSize = litSize;
227	dctx->litEntropy = `1`;
228	if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
229	return litCSize + lhSize;
230	}
231
232	case set_basic:
233	{ size_t litSize, lhSize;
234	U32 const lhlCode = ((istart[`0`]) >> `2`) & `3`;
235	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
236	switch(lhlCode)
237	{
238	case `0`: case `2`: default: / note : default is impossible, since lhlCode into [0..3] /
239	lhSize = `1`;
240	litSize = istart[`0`] >> `3`;
241	break;
242	case `1`:
243	lhSize = `2`;
244	litSize = MEM_readLE16(istart) >> `4`;
245	break;
246	case `3`:
247	lhSize = `3`;
248	RETURN_ERROR_IF(srcSize<`3`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
249	litSize = MEM_readLE24(istart) >> `4`;
250	break;
251	}
252
253	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
254	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
255	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, `1`);
256	if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { / risk reading beyond src buffer with wildcopy /
257	RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
258	if (dctx->litBufferLocation == ZSTD_split)
259	{
260	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
261	ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
262	}
263	else
264	{
265	ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
266	}
267	dctx->litPtr = dctx->litBuffer;
268	dctx->litSize = litSize;
269	return lhSize+litSize;
270	}
271	/ direct reference into compressed stream /
272	dctx->litPtr = istart+lhSize;
273	dctx->litSize = litSize;
274	dctx->litBufferEnd = dctx->litPtr + litSize;
275	dctx->litBufferLocation = ZSTD_not_in_dst;
276	return lhSize+litSize;
277	}
278
279	case set_rle:
280	{ U32 const lhlCode = ((istart[`0`]) >> `2`) & `3`;
281	size_t litSize, lhSize;
282	size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
283	switch(lhlCode)
284	{
285	case `0`: case `2`: default: / note : default is impossible, since lhlCode into [0..3] /
286	lhSize = `1`;
287	litSize = istart[`0`] >> `3`;
288	break;
289	case `1`:
290	lhSize = `2`;
291	RETURN_ERROR_IF(srcSize<`3`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
292	litSize = MEM_readLE16(istart) >> `4`;
293	break;
294	case `3`:
295	lhSize = `3`;
296	RETURN_ERROR_IF(srcSize<`4`, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
297	litSize = MEM_readLE24(istart) >> `4`;
298	break;
299	}
300	RETURN_ERROR_IF(litSize > `0` && dst == NULL, dstSize_tooSmall, "NULL not handled");
301	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
302	RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
303	ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, `1`);
304	if (dctx->litBufferLocation == ZSTD_split)
305	{
306	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
307	ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
308	}
309	else
310	{
311	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
312	}
313	dctx->litPtr = dctx->litBuffer;
314	dctx->litSize = litSize;
315	return lhSize+`1`;
316	}
317	default:
318	RETURN_ERROR(corruption_detected, "impossible");
319	}
320	}
321	}
322
323	/ Default FSE distribution tables.*
324	* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
325	* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
326	* They were generated programmatically with following method :
327	* - start from default distributions, present in /lib/common/zstd_internal.h
328	* - generate tables normally, using ZSTD_buildFSETable()
329	* - printout the content of tables
330	* - pretify output, report below, test with fuzzer to ensure it's correct */
331
332	/ Default FSE distribution table for Literal Lengths /
333	static const ZSTD_seqSymbol LL_defaultDTable[(`1`<<LL_DEFAULTNORMLOG)+`1`] = {
334	{ `1`, `1`, `1`, LL_DEFAULTNORMLOG}, / header : fastMode, tableLog /
335	/ nextState, nbAddBits, nbBits, baseVal /
336	{ `0`, `0`, `4`, `0`}, { `16`, `0`, `4`, `0`},
337	{ `32`, `0`, `5`, `1`}, { `0`, `0`, `5`, `3`},
338	{ `0`, `0`, `5`, `4`}, { `0`, `0`, `5`, `6`},
339	{ `0`, `0`, `5`, `7`}, { `0`, `0`, `5`, `9`},
340	{ `0`, `0`, `5`, `10`}, { `0`, `0`, `5`, `12`},
341	{ `0`, `0`, `6`, `14`}, { `0`, `1`, `5`, `16`},
342	{ `0`, `1`, `5`, `20`}, { `0`, `1`, `5`, `22`},
343	{ `0`, `2`, `5`, `28`}, { `0`, `3`, `5`, `32`},
344	{ `0`, `4`, `5`, `48`}, { `32`, `6`, `5`, `64`},
345	{ `0`, `7`, `5`, `128`}, { `0`, `8`, `6`, `256`},
346	{ `0`, `10`, `6`, `1024`}, { `0`, `12`, `6`, `4096`},
347	{ `32`, `0`, `4`, `0`}, { `0`, `0`, `4`, `1`},
348	{ `0`, `0`, `5`, `2`}, { `32`, `0`, `5`, `4`},
349	{ `0`, `0`, `5`, `5`}, { `32`, `0`, `5`, `7`},
350	{ `0`, `0`, `5`, `8`}, { `32`, `0`, `5`, `10`},
351	{ `0`, `0`, `5`, `11`}, { `0`, `0`, `6`, `13`},
352	{ `32`, `1`, `5`, `16`}, { `0`, `1`, `5`, `18`},
353	{ `32`, `1`, `5`, `22`}, { `0`, `2`, `5`, `24`},
354	{ `32`, `3`, `5`, `32`}, { `0`, `3`, `5`, `40`},
355	{ `0`, `6`, `4`, `64`}, { `16`, `6`, `4`, `64`},
356	{ `32`, `7`, `5`, `128`}, { `0`, `9`, `6`, `512`},
357	{ `0`, `11`, `6`, `2048`}, { `48`, `0`, `4`, `0`},
358	{ `16`, `0`, `4`, `1`}, { `32`, `0`, `5`, `2`},
359	{ `32`, `0`, `5`, `3`}, { `32`, `0`, `5`, `5`},
360	{ `32`, `0`, `5`, `6`}, { `32`, `0`, `5`, `8`},
361	{ `32`, `0`, `5`, `9`}, { `32`, `0`, `5`, `11`},
362	{ `32`, `0`, `5`, `12`}, { `0`, `0`, `6`, `15`},
363	{ `32`, `1`, `5`, `18`}, { `32`, `1`, `5`, `20`},
364	{ `32`, `2`, `5`, `24`}, { `32`, `2`, `5`, `28`},
365	{ `32`, `3`, `5`, `40`}, { `32`, `4`, `5`, `48`},
366	{ `0`, `16`, `6`,`65536`}, { `0`, `15`, `6`,`32768`},
367	{ `0`, `14`, `6`,`16384`}, { `0`, `13`, `6`, `8192`},
368	}; / LL_defaultDTable /
369
370	/ Default FSE distribution table for Offset Codes /
371	static const ZSTD_seqSymbol OF_defaultDTable[(`1`<<OF_DEFAULTNORMLOG)+`1`] = {
372	{ `1`, `1`, `1`, OF_DEFAULTNORMLOG}, / header : fastMode, tableLog /
373	/ nextState, nbAddBits, nbBits, baseVal /
374	{ `0`, `0`, `5`, `0`}, { `0`, `6`, `4`, `61`},
375	{ `0`, `9`, `5`, `509`}, { `0`, `15`, `5`,`32765`},
376	{ `0`, `21`, `5`,`2097149`}, { `0`, `3`, `5`, `5`},
377	{ `0`, `7`, `4`, `125`}, { `0`, `12`, `5`, `4093`},
378	{ `0`, `18`, `5`,`262141`}, { `0`, `23`, `5`,`8388605`},
379	{ `0`, `5`, `5`, `29`}, { `0`, `8`, `4`, `253`},
380	{ `0`, `14`, `5`,`16381`}, { `0`, `20`, `5`,`1048573`},
381	{ `0`, `2`, `5`, `1`}, { `16`, `7`, `4`, `125`},
382	{ `0`, `11`, `5`, `2045`}, { `0`, `17`, `5`,`131069`},
383	{ `0`, `22`, `5`,`4194301`}, { `0`, `4`, `5`, `13`},
384	{ `16`, `8`, `4`, `253`}, { `0`, `13`, `5`, `8189`},
385	{ `0`, `19`, `5`,`524285`}, { `0`, `1`, `5`, `1`},
386	{ `16`, `6`, `4`, `61`}, { `0`, `10`, `5`, `1021`},
387	{ `0`, `16`, `5`,`65533`}, { `0`, `28`, `5`,`268435453`},
388	{ `0`, `27`, `5`,`134217725`}, { `0`, `26`, `5`,`67108861`},
389	{ `0`, `25`, `5`,`33554429`}, { `0`, `24`, `5`,`16777213`},
390	}; / OF_defaultDTable /
391
392
393	/ Default FSE distribution table for Match Lengths /
394	static const ZSTD_seqSymbol ML_defaultDTable[(`1`<<ML_DEFAULTNORMLOG)+`1`] = {
395	{ `1`, `1`, `1`, ML_DEFAULTNORMLOG}, / header : fastMode, tableLog /
396	/ nextState, nbAddBits, nbBits, baseVal /
397	{ `0`, `0`, `6`, `3`}, { `0`, `0`, `4`, `4`},
398	{ `32`, `0`, `5`, `5`}, { `0`, `0`, `5`, `6`},
399	{ `0`, `0`, `5`, `8`}, { `0`, `0`, `5`, `9`},
400	{ `0`, `0`, `5`, `11`}, { `0`, `0`, `6`, `13`},
401	{ `0`, `0`, `6`, `16`}, { `0`, `0`, `6`, `19`},
402	{ `0`, `0`, `6`, `22`}, { `0`, `0`, `6`, `25`},
403	{ `0`, `0`, `6`, `28`}, { `0`, `0`, `6`, `31`},
404	{ `0`, `0`, `6`, `34`}, { `0`, `1`, `6`, `37`},
405	{ `0`, `1`, `6`, `41`}, { `0`, `2`, `6`, `47`},
406	{ `0`, `3`, `6`, `59`}, { `0`, `4`, `6`, `83`},
407	{ `0`, `7`, `6`, `131`}, { `0`, `9`, `6`, `515`},
408	{ `16`, `0`, `4`, `4`}, { `0`, `0`, `4`, `5`},
409	{ `32`, `0`, `5`, `6`}, { `0`, `0`, `5`, `7`},
410	{ `32`, `0`, `5`, `9`}, { `0`, `0`, `5`, `10`},
411	{ `0`, `0`, `6`, `12`}, { `0`, `0`, `6`, `15`},
412	{ `0`, `0`, `6`, `18`}, { `0`, `0`, `6`, `21`},
413	{ `0`, `0`, `6`, `24`}, { `0`, `0`, `6`, `27`},
414	{ `0`, `0`, `6`, `30`}, { `0`, `0`, `6`, `33`},
415	{ `0`, `1`, `6`, `35`}, { `0`, `1`, `6`, `39`},
416	{ `0`, `2`, `6`, `43`}, { `0`, `3`, `6`, `51`},
417	{ `0`, `4`, `6`, `67`}, { `0`, `5`, `6`, `99`},
418	{ `0`, `8`, `6`, `259`}, { `32`, `0`, `4`, `4`},
419	{ `48`, `0`, `4`, `4`}, { `16`, `0`, `4`, `5`},
420	{ `32`, `0`, `5`, `7`}, { `32`, `0`, `5`, `8`},
421	{ `32`, `0`, `5`, `10`}, { `32`, `0`, `5`, `11`},
422	{ `0`, `0`, `6`, `14`}, { `0`, `0`, `6`, `17`},
423	{ `0`, `0`, `6`, `20`}, { `0`, `0`, `6`, `23`},
424	{ `0`, `0`, `6`, `26`}, { `0`, `0`, `6`, `29`},
425	{ `0`, `0`, `6`, `32`}, { `0`, `16`, `6`,`65539`},
426	{ `0`, `15`, `6`,`32771`}, { `0`, `14`, `6`,`16387`},
427	{ `0`, `13`, `6`, `8195`}, { `0`, `12`, `6`, `4099`},
428	{ `0`, `11`, `6`, `2051`}, { `0`, `10`, `6`, `1027`},
429	}; / ML_defaultDTable /
430
431
432	static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
433	{
434	void* ptr = dt;
435	ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
436	ZSTD_seqSymbol* const cell = dt + `1`;
437
438	DTableH->tableLog = `0`;
439	DTableH->fastMode = `0`;
440
441	cell->nbBits = `0`;
442	cell->nextState = `0`;
443	assert(nbAddBits < `255`);
444	cell->nbAdditionalBits = nbAddBits;
445	cell->baseValue = baseValue;
446	}
447
448
449	/ ZSTD_buildFSETable() :*
450	* generate FSE decoding table for one symbol (ll, ml or off)
451	* cannot fail if input is valid =>
452	* all inputs are presumed validated at this stage */
453	FORCE_INLINE_TEMPLATE
454	void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
455	const short* normalizedCounter, unsigned maxSymbolValue,
456	const U32* baseValue, const U8* nbAdditionalBits,
457	unsigned tableLog, void* wksp, size_t wkspSize)
458	{
459	ZSTD_seqSymbol* const tableDecode = dt+`1`;
460	U32 const maxSV1 = maxSymbolValue + `1`;
461	U32 const tableSize = `1` << tableLog;
462
463	U16* symbolNext = (U16*)wksp;
464	BYTE* spread = (BYTE*)(symbolNext + MaxSeq + `1`);
465	U32 highThreshold = tableSize - `1`;
466
467
468	/ Sanity Checks /
469	assert(maxSymbolValue <= MaxSeq);
470	assert(tableLog <= MaxFSELog);
471	assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
472	(void)wkspSize;
473	/ Init, lay down lowprob symbols /
474	{ ZSTD_seqSymbol_header DTableH;
475	DTableH.tableLog = tableLog;
476	DTableH.fastMode = `1`;
477	{ S16 const largeLimit= (S16)(`1` << (tableLog-`1`));
478	U32 s;
479	for (s=`0`; s<maxSV1; s++) {
480	if (normalizedCounter[s]==-`1`) {
481	tableDecode[highThreshold--].baseValue = s;
482	symbolNext[s] = `1`;
483	} else {
484	if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=`0`;
485	assert(normalizedCounter[s]>=`0`);
486	symbolNext[s] = (U16)normalizedCounter[s];
487	} } }
488	ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
489	}
490
491	/ Spread symbols /
492	assert(tableSize <= `512`);
493	/ Specialized symbol spreading for the case when there are*
494	* no low probability (-1 count) symbols. When compressing
495	* small blocks we avoid low probability symbols to hit this
496	* case, since header decoding speed matters more.
497	*/
498	if (highThreshold == tableSize - `1`) {
499	size_t const tableMask = tableSize-`1`;
500	size_t const step = FSE_TABLESTEP(tableSize);
501	/ First lay down the symbols in order.*
502	* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
503	* misses since small blocks generally have small table logs, so nearly
504	* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
505	* our buffer to handle the over-write.
506	*/
507	{
508	U64 const add = `0x0101010101010101ull`;
509	size_t pos = `0`;
510	U64 sv = `0`;
511	U32 s;
512	for (s=`0`; s<maxSV1; ++s, sv += add) {
513	int i;
514	int const n = normalizedCounter[s];
515	MEM_write64(spread + pos, sv);
516	for (i = `8`; i < n; i += `8`) {
517	MEM_write64(spread + pos + i, sv);
518	}
519	assert(n>=`0`);
520	pos += (size_t)n;
521	}
522	}
523	/ Now we spread those positions across the table.*
524	* The benefit of doing it in two stages is that we avoid the
525	* variable size inner loop, which caused lots of branch misses.
526	* Now we can run through all the positions without any branch misses.
527	* We unroll the loop twice, since that is what empirically worked best.
528	*/
529	{
530	size_t position = `0`;
531	size_t s;
532	size_t const unroll = `2`;
533	assert(tableSize % unroll == `0`); / FSE_MIN_TABLELOG is 5 /
534	for (s = `0`; s < (size_t)tableSize; s += unroll) {
535	size_t u;
536	for (u = `0`; u < unroll; ++u) {
537	size_t const uPosition = (position + (u * step)) & tableMask;
538	tableDecode[uPosition].baseValue = spread[s + u];
539	}
540	position = (position + (unroll * step)) & tableMask;
541	}
542	assert(position == `0`);
543	}
544	} else {
545	U32 const tableMask = tableSize-`1`;
546	U32 const step = FSE_TABLESTEP(tableSize);
547	U32 s, position = `0`;
548	for (s=`0`; s<maxSV1; s++) {
549	int i;
550	int const n = normalizedCounter[s];
551	for (i=`0`; i<n; i++) {
552	tableDecode[position].baseValue = s;
553	position = (position + step) & tableMask;
554	while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; / lowprob area /
555	} }
556	assert(position == `0`); / position must reach all cells once, otherwise normalizedCounter is incorrect /
557	}
558
559	/ Build Decoding table /
560	{
561	U32 u;
562	for (u=`0`; u<tableSize; u++) {
563	U32 const symbol = tableDecode[u].baseValue;
564	U32 const nextState = symbolNext[symbol]++;
565	tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
566	tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
567	assert(nbAdditionalBits[symbol] < `255`);
568	tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
569	tableDecode[u].baseValue = baseValue[symbol];
570	}
571	}
572	}
573
574	/ Avoids the FORCE_INLINE of the _body() function. /
575	static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
576	const short* normalizedCounter, unsigned maxSymbolValue,
577	const U32* baseValue, const U8* nbAdditionalBits,
578	unsigned tableLog, void* wksp, size_t wkspSize)
579	{
580	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
581	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
582	}
583
584	#if DYNAMIC_BMI2
585	BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
586	const short* normalizedCounter, unsigned maxSymbolValue,
587	const U32* baseValue, const U8* nbAdditionalBits,
588	unsigned tableLog, void* wksp, size_t wkspSize)
589	{
590	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
591	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
592	}
593	#endif
594
595	void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
596	const short* normalizedCounter, unsigned maxSymbolValue,
597	const U32* baseValue, const U8* nbAdditionalBits,
598	unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
599	{
600	#if DYNAMIC_BMI2
601	if (bmi2) {
602	ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
603	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
604	return;
605	}
606	#endif
607	(void)bmi2;
608	ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
609	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
610	}
611
612
613	/! ZSTD_buildSeqTable() :*
614	* @return : nb bytes read from src,
615	* or an error code if it fails */
616	static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
617	symbolEncodingType_e type, unsigned max, U32 maxLog,
618	const void* src, size_t srcSize,
619	const U32* baseValue, const U8* nbAdditionalBits,
620	const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
621	int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
622	int bmi2)
623	{
624	switch(type)
625	{
626	case set_rle :
627	RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
628	RETURN_ERROR_IF(((const* BYTE*)src) > max, corruption_detected, "");
629	{ U32 const symbol = (const* BYTE*)src;
630	U32 const baseline = baseValue[symbol];
631	U8 const nbBits = nbAdditionalBits[symbol];
632	ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
633	}
634	*DTablePtr = DTableSpace;
635	return `1`;
636	case set_basic :
637	*DTablePtr = defaultTable;
638	return `0`;
639	case set_repeat:
640	RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
641	/ prefetch FSE table if used /
642	if (ddictIsCold && (nbSeq > `24` / heuristic /)) {
643	const void* const pStart = *DTablePtr;
644	size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
645	PREFETCH_AREA(pStart, pSize);
646	}
647	return `0`;
648	case set_compressed :
649	{ unsigned tableLog;
650	S16 norm[MaxSeq+`1`];
651	size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
652	RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
653	RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
654	ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
655	*DTablePtr = DTableSpace;
656	return headerSize;
657	}
658	default :
659	assert(`0`);
660	RETURN_ERROR(GENERIC, "impossible");
661	}
662	}
663
664	size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
665	const void* src, size_t srcSize)
666	{
667	const BYTE* const istart = (const BYTE*)src;
668	const BYTE* const iend = istart + srcSize;
669	const BYTE* ip = istart;
670	int nbSeq;
671	DEBUGLOG(`5`, "ZSTD_decodeSeqHeaders");
672
673	/ check /
674	RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
675
676	/ SeqHead /
677	nbSeq = *ip++;
678	if (!nbSeq) {
679	*nbSeqPtr=`0`;
680	RETURN_ERROR_IF(srcSize != `1`, srcSize_wrong, "");
681	return `1`;
682	}
683	if (nbSeq > `0x7F`) {
684	if (nbSeq == `0xFF`) {
685	RETURN_ERROR_IF(ip+`2` > iend, srcSize_wrong, "");
686	nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
687	ip+=`2`;
688	} else {
689	RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
690	nbSeq = ((nbSeq-`0x80`)<<`8`) + *ip++;
691	}
692	}
693	*nbSeqPtr = nbSeq;
694
695	/ FSE table descriptors /
696	RETURN_ERROR_IF(ip+`1` > iend, srcSize_wrong, ""); / minimum possible size: 1 byte for symbol encoding types /
697	{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> `6`);
698	symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> `4`) & `3`);
699	symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> `2`) & `3`);
700	ip++;
701
702	/ Build DTables /
703	{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
704	LLtype, MaxLL, LLFSELog,
705	ip, iend-ip,
706	LL_base, LL_bits,
707	LL_defaultDTable, dctx->fseEntropy,
708	dctx->ddictIsCold, nbSeq,
709	dctx->workspace, sizeof(dctx->workspace),
710	ZSTD_DCtx_get_bmi2(dctx));
711	RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
712	ip += llhSize;
713	}
714
715	{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
716	OFtype, MaxOff, OffFSELog,
717	ip, iend-ip,
718	OF_base, OF_bits,
719	OF_defaultDTable, dctx->fseEntropy,
720	dctx->ddictIsCold, nbSeq,
721	dctx->workspace, sizeof(dctx->workspace),
722	ZSTD_DCtx_get_bmi2(dctx));
723	RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
724	ip += ofhSize;
725	}
726
727	{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
728	MLtype, MaxML, MLFSELog,
729	ip, iend-ip,
730	ML_base, ML_bits,
731	ML_defaultDTable, dctx->fseEntropy,
732	dctx->ddictIsCold, nbSeq,
733	dctx->workspace, sizeof(dctx->workspace),
734	ZSTD_DCtx_get_bmi2(dctx));
735	RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
736	ip += mlhSize;
737	}
738	}
739
740	return ip-istart;
741	}
742
743
744	typedef struct {
745	size_t litLength;
746	size_t matchLength;
747	size_t offset;
748	} seq_t;
749
750	typedef struct {
751	size_t state;
752	const ZSTD_seqSymbol* table;
753	} ZSTD_fseState;
754
755	typedef struct {
756	BIT_DStream_t DStream;
757	ZSTD_fseState stateLL;
758	ZSTD_fseState stateOffb;
759	ZSTD_fseState stateML;
760	size_t prevOffset[ZSTD_REP_NUM];
761	} seqState_t;
762
763	/! ZSTD_overlapCopy8() :*
764	* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
765	* If the offset is < 8 then the offset is spread to at least 8 bytes.
766	*
767	* Precondition: ip <= op
768	* Postcondition: op - op >= 8
769	*/
770	HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
771	assert(ip <= op);
772	if (offset < `8`) {
773	/ close range match, overlap /
774	static const U32 dec32table[] = { `0`, `1`, `2`, `1`, `4`, `4`, `4`, `4` }; / added /
775	static const int dec64table[] = { `8`, `8`, `8`, `7`, `8`, `9`,`10`,`11` }; / subtracted /
776	int const sub2 = dec64table[offset];
777	(op)[`0`] = (ip)[`0`];
778	(op)[`1`] = (ip)[`1`];
779	(op)[`2`] = (ip)[`2`];
780	(op)[`3`] = (ip)[`3`];
781	*ip += dec32table[offset];
782	ZSTD_copy4(op+`4`, ip);
783	*ip -= sub2;
784	} else {
785	ZSTD_copy8(op, ip);
786	}
787	*ip += `8`;
788	*op += `8`;
789	assert(op - ip >= `8`);
790	}
791
792	/! ZSTD_safecopy() :*
793	* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
794	* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
795	* This function is only called in the uncommon case where the sequence is near the end of the block. It
796	* should be fast for a single long sequence, but can be slow for several short sequences.
797	*
798	* @param ovtype controls the overlap detection
799	* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
800	* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
801	* The src buffer must be before the dst buffer.
802	*/
803	static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
804	ptrdiff_t const diff = op - ip;
805	BYTE* const oend = op + length;
806
807	assert((ovtype == ZSTD_no_overlap && (diff <= -`8` \|\| diff >= `8` \|\| op >= oend_w)) \|\|
808	(ovtype == ZSTD_overlap_src_before_dst && diff >= `0`));
809
810	if (length < `8`) {
811	/ Handle short lengths. /
812	while (op < oend) op++ = ip++;
813	return;
814	}
815	if (ovtype == ZSTD_overlap_src_before_dst) {
816	/ Copy 8 bytes and ensure the offset >= 8 when there can be overlap. /
817	assert(length >= `8`);
818	ZSTD_overlapCopy8(&op, &ip, diff);
819	length -= `8`;
820	assert(op - ip >= `8`);
821	assert(op <= oend);
822	}
823
824	if (oend <= oend_w) {
825	/ No risk of overwrite. /
826	ZSTD_wildcopy(op, ip, length, ovtype);
827	return;
828	}
829	if (op <= oend_w) {
830	/ Wildcopy until we get close to the end. /
831	assert(oend > oend_w);
832	ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
833	ip += oend_w - op;
834	op += oend_w - op;
835	}
836	/ Handle the leftovers. /
837	while (op < oend) op++ = ip++;
838	}
839
840	/ ZSTD_safecopyDstBeforeSrc():*
841	* This version allows overlap with dst before src, or handles the non-overlap case with dst after src
842	* Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
843	static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
844	ptrdiff_t const diff = op - ip;
845	BYTE* const oend = op + length;
846
847	if (length < `8` \|\| diff > -`8`) {
848	/ Handle short lengths, close overlaps, and dst not before src. /
849	while (op < oend) op++ = ip++;
850	return;
851	}
852
853	if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
854	ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
855	ip += oend - WILDCOPY_OVERLENGTH - op;
856	op += oend - WILDCOPY_OVERLENGTH - op;
857	}
858
859	/ Handle the leftovers. /
860	while (op < oend) op++ = ip++;
861	}
862
863	/ ZSTD_execSequenceEnd():*
864	* This version handles cases that are near the end of the output buffer. It requires
865	* more careful checks to make sure there is no overflow. By separating out these hard
866	* and unlikely cases, we can speed up the common cases.
867	*
868	* NOTE: This function needs to be fast for a single long sequence, but doesn't need
869	* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
870	*/
871	FORCE_NOINLINE
872	size_t ZSTD_execSequenceEnd(BYTE* op,
873	BYTE* const oend, seq_t sequence,
874	const BYTE** litPtr, const BYTE* const litLimit,
875	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
876	{
877	BYTE* const oLitEnd = op + sequence.litLength;
878	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
879	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
880	const BYTE* match = oLitEnd - sequence.offset;
881	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
882
883	/ bounds checks : careful of address space overflow in 32-bit mode /
884	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
885	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
886	assert(op < op + sequenceLength);
887	assert(oLitEnd < op + sequenceLength);
888
889	/ copy literals /
890	ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
891	op = oLitEnd;
892	*litPtr = iLitEnd;
893
894	/ copy Match /
895	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
896	/ offset beyond prefix /
897	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
898	match = dictEnd - (prefixStart - match);
899	if (match + sequence.matchLength <= dictEnd) {
900	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
901	return sequenceLength;
902	}
903	/ span extDict & currentPrefixSegment /
904	{ size_t const length1 = dictEnd - match;
905	ZSTD_memmove(oLitEnd, match, length1);
906	op = oLitEnd + length1;
907	sequence.matchLength -= length1;
908	match = prefixStart;
909	}
910	}
911	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
912	return sequenceLength;
913	}
914
915	/ ZSTD_execSequenceEndSplitLitBuffer():*
916	* This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
917	*/
918	FORCE_NOINLINE
919	size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
920	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
921	const BYTE** litPtr, const BYTE* const litLimit,
922	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
923	{
924	BYTE* const oLitEnd = op + sequence.litLength;
925	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
926	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
927	const BYTE* match = oLitEnd - sequence.offset;
928
929
930	/ bounds checks : careful of address space overflow in 32-bit mode /
931	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
932	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
933	assert(op < op + sequenceLength);
934	assert(oLitEnd < op + sequenceLength);
935
936	/ copy literals /
937	RETURN_ERROR_IF(op > litPtr && op < litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
938	ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
939	op = oLitEnd;
940	*litPtr = iLitEnd;
941
942	/ copy Match /
943	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
944	/ offset beyond prefix /
945	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
946	match = dictEnd - (prefixStart - match);
947	if (match + sequence.matchLength <= dictEnd) {
948	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
949	return sequenceLength;
950	}
951	/ span extDict & currentPrefixSegment /
952	{ size_t const length1 = dictEnd - match;
953	ZSTD_memmove(oLitEnd, match, length1);
954	op = oLitEnd + length1;
955	sequence.matchLength -= length1;
956	match = prefixStart;
957	}
958	}
959	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
960	return sequenceLength;
961	}
962
963	HINT_INLINE
964	size_t ZSTD_execSequence(BYTE* op,
965	BYTE* const oend, seq_t sequence,
966	const BYTE** litPtr, const BYTE* const litLimit,
967	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
968	{
969	BYTE* const oLitEnd = op + sequence.litLength;
970	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
971	BYTE* const oMatchEnd = op + sequenceLength; / risk : address space overflow (32-bits) /
972	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; / risk : address space underflow on oend=NULL /
973	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
974	const BYTE* match = oLitEnd - sequence.offset;
975
976	assert(op != NULL / Precondition /);
977	assert(oend_w < oend / No underflow /);
978
979	#if defined(__aarch64__)
980	/ prefetch sequence starting from match that will be used for copy later /
981	PREFETCH_L1(match);
982	#endif
983	/ Handle edge cases in a slow path:*
984	* - Read beyond end of literals
985	* - Match end is within WILDCOPY_OVERLIMIT of oend
986	* - 32-bit mode and the match length overflows
987	*/
988	if (UNLIKELY(
989	iLitEnd > litLimit \|\|
990	oMatchEnd > oend_w \|\|
991	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
992	return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
993
994	/ Assumptions (everything else goes into ZSTD_execSequenceEnd()) /
995	assert(op <= oLitEnd / No overflow /);
996	assert(oLitEnd < oMatchEnd / Non-zero match & no overflow /);
997	assert(oMatchEnd <= oend / No underflow /);
998	assert(iLitEnd <= litLimit / Literal length is in bounds /);
999	assert(oLitEnd <= oend_w / Can wildcopy literals /);
1000	assert(oMatchEnd <= oend_w / Can wildcopy matches /);
1001
1002	/ Copy Literals:*
1003	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1004	* We likely don't need the full 32-byte wildcopy.
1005	*/
1006	assert(WILDCOPY_OVERLENGTH >= `16`);
1007	ZSTD_copy16(op, (*litPtr));
1008	if (UNLIKELY(sequence.litLength > `16`)) {
1009	ZSTD_wildcopy(op + `16`, (*litPtr) + `16`, sequence.litLength - `16`, ZSTD_no_overlap);
1010	}
1011	op = oLitEnd;
1012	litPtr = iLitEnd; /* update for next sequence /
1013
1014	/ Copy Match /
1015	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1016	/ offset beyond prefix -> go into extDict /
1017	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1018	match = dictEnd + (match - prefixStart);
1019	if (match + sequence.matchLength <= dictEnd) {
1020	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1021	return sequenceLength;
1022	}
1023	/ span extDict & currentPrefixSegment /
1024	{ size_t const length1 = dictEnd - match;
1025	ZSTD_memmove(oLitEnd, match, length1);
1026	op = oLitEnd + length1;
1027	sequence.matchLength -= length1;
1028	match = prefixStart;
1029	}
1030	}
1031	/ Match within prefix of 1 or more bytes /
1032	assert(op <= oMatchEnd);
1033	assert(oMatchEnd <= oend_w);
1034	assert(match >= prefixStart);
1035	assert(sequence.matchLength >= `1`);
1036
1037	/ Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy*
1038	* without overlap checking.
1039	*/
1040	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1041	/ We bet on a full wildcopy for matches, since we expect matches to be*
1042	* longer than literals (in general). In silesia, ~10% of matches are longer
1043	* than 16 bytes.
1044	*/
1045	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1046	return sequenceLength;
1047	}
1048	assert(sequence.offset < WILDCOPY_VECLEN);
1049
1050	/ Copy 8 bytes and spread the offset to be >= 8. /
1051	ZSTD_overlapCopy8(&op, &match, sequence.offset);
1052
1053	/ If the match length is > 8 bytes, then continue with the wildcopy. /
1054	if (sequence.matchLength > `8`) {
1055	assert(op < oMatchEnd);
1056	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - `8`, ZSTD_overlap_src_before_dst);
1057	}
1058	return sequenceLength;
1059	}
1060
1061	HINT_INLINE
1062	size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1063	BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1064	const BYTE** litPtr, const BYTE* const litLimit,
1065	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1066	{
1067	BYTE* const oLitEnd = op + sequence.litLength;
1068	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1069	BYTE* const oMatchEnd = op + sequenceLength; / risk : address space overflow (32-bits) /
1070	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1071	const BYTE* match = oLitEnd - sequence.offset;
1072
1073	assert(op != NULL / Precondition /);
1074	assert(oend_w < oend / No underflow /);
1075	/ Handle edge cases in a slow path:*
1076	* - Read beyond end of literals
1077	* - Match end is within WILDCOPY_OVERLIMIT of oend
1078	* - 32-bit mode and the match length overflows
1079	*/
1080	if (UNLIKELY(
1081	iLitEnd > litLimit \|\|
1082	oMatchEnd > oend_w \|\|
1083	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1084	return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1085
1086	/ Assumptions (everything else goes into ZSTD_execSequenceEnd()) /
1087	assert(op <= oLitEnd / No overflow /);
1088	assert(oLitEnd < oMatchEnd / Non-zero match & no overflow /);
1089	assert(oMatchEnd <= oend / No underflow /);
1090	assert(iLitEnd <= litLimit / Literal length is in bounds /);
1091	assert(oLitEnd <= oend_w / Can wildcopy literals /);
1092	assert(oMatchEnd <= oend_w / Can wildcopy matches /);
1093
1094	/ Copy Literals:*
1095	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1096	* We likely don't need the full 32-byte wildcopy.
1097	*/
1098	assert(WILDCOPY_OVERLENGTH >= `16`);
1099	ZSTD_copy16(op, (*litPtr));
1100	if (UNLIKELY(sequence.litLength > `16`)) {
1101	ZSTD_wildcopy(op+`16`, (*litPtr)+`16`, sequence.litLength-`16`, ZSTD_no_overlap);
1102	}
1103	op = oLitEnd;
1104	litPtr = iLitEnd; /* update for next sequence /
1105
1106	/ Copy Match /
1107	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1108	/ offset beyond prefix -> go into extDict /
1109	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1110	match = dictEnd + (match - prefixStart);
1111	if (match + sequence.matchLength <= dictEnd) {
1112	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1113	return sequenceLength;
1114	}
1115	/ span extDict & currentPrefixSegment /
1116	{ size_t const length1 = dictEnd - match;
1117	ZSTD_memmove(oLitEnd, match, length1);
1118	op = oLitEnd + length1;
1119	sequence.matchLength -= length1;
1120	match = prefixStart;
1121	} }
1122	/ Match within prefix of 1 or more bytes /
1123	assert(op <= oMatchEnd);
1124	assert(oMatchEnd <= oend_w);
1125	assert(match >= prefixStart);
1126	assert(sequence.matchLength >= `1`);
1127
1128	/ Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy*
1129	* without overlap checking.
1130	*/
1131	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1132	/ We bet on a full wildcopy for matches, since we expect matches to be*
1133	* longer than literals (in general). In silesia, ~10% of matches are longer
1134	* than 16 bytes.
1135	*/
1136	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1137	return sequenceLength;
1138	}
1139	assert(sequence.offset < WILDCOPY_VECLEN);
1140
1141	/ Copy 8 bytes and spread the offset to be >= 8. /
1142	ZSTD_overlapCopy8(&op, &match, sequence.offset);
1143
1144	/ If the match length is > 8 bytes, then continue with the wildcopy. /
1145	if (sequence.matchLength > `8`) {
1146	assert(op < oMatchEnd);
1147	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-`8`, ZSTD_overlap_src_before_dst);
1148	}
1149	return sequenceLength;
1150	}
1151
1152
1153	static void
1154	ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1155	{
1156	const void* ptr = dt;
1157	const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1158	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1159	DEBUGLOG(`6`, "ZSTD_initFseState : val=%u using %u bits",
1160	(U32)DStatePtr->state, DTableH->tableLog);
1161	BIT_reloadDStream(bitD);
1162	DStatePtr->table = dt + `1`;
1163	}
1164
1165	FORCE_INLINE_TEMPLATE void
1166	ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1167	{
1168	size_t const lowBits = BIT_readBits(bitD, nbBits);
1169	DStatePtr->state = nextState + lowBits;
1170	}
1171
1172	/ We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum*
1173	* offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1174	* bits before reloading. This value is the maximum number of bytes we read
1175	* after reloading when we are decoding long offsets.
1176	*/
1177	#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1178	(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1179	? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1180	: 0)
1181
1182	typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=`1` } ZSTD_longOffset_e;
1183
1184	FORCE_INLINE_TEMPLATE seq_t
1185	ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1186	{
1187	seq_t seq;
1188	/*
1189	* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
1190	* loaded in one operation and extracted its fields by simply shifting or
1191	* bit-extracting on aarch64.
1192	* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1193	* operations that cause performance drop. This can be avoided by using this
1194	* ZSTD_memcpy hack.
1195	*/
1196	#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1197	ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1198	ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1199	ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1200	ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1201	ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1202	ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1203	ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1204	#else
1205	const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1206	const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1207	const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1208	#endif
1209	seq.matchLength = mlDInfo->baseValue;
1210	seq.litLength = llDInfo->baseValue;
1211	{ U32 const ofBase = ofDInfo->baseValue;
1212	BYTE const llBits = llDInfo->nbAdditionalBits;
1213	BYTE const mlBits = mlDInfo->nbAdditionalBits;
1214	BYTE const ofBits = ofDInfo->nbAdditionalBits;
1215	BYTE const totalBits = llBits+mlBits+ofBits;
1216
1217	U16 const llNext = llDInfo->nextState;
1218	U16 const mlNext = mlDInfo->nextState;
1219	U16 const ofNext = ofDInfo->nextState;
1220	U32 const llnbBits = llDInfo->nbBits;
1221	U32 const mlnbBits = mlDInfo->nbBits;
1222	U32 const ofnbBits = ofDInfo->nbBits;
1223
1224	assert(llBits <= MaxLLBits);
1225	assert(mlBits <= MaxMLBits);
1226	assert(ofBits <= MaxOff);
1227	/*
1228	* As gcc has better branch and block analyzers, sometimes it is only
1229	* valuable to mark likeliness for clang, it gives around 3-4% of
1230	* performance.
1231	*/
1232
1233	/ sequence /
1234	{ size_t offset;
1235	if (ofBits > `1`) {
1236	ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == `1`);
1237	ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == `5`);
1238	ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1239	ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1240	if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1241	/ Always read extra bits, this keeps the logic simple,*
1242	* avoids branches, and avoids accidentally reading 0 bits.
1243	*/
1244	U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1245	offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1246	BIT_reloadDStream(&seqState->DStream);
1247	offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1248	} else {
1249	offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/>0/); / <= (ZSTD_WINDOWLOG_MAX-1) bits /
1250	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1251	}
1252	seqState->prevOffset[`2`] = seqState->prevOffset[`1`];
1253	seqState->prevOffset[`1`] = seqState->prevOffset[`0`];
1254	seqState->prevOffset[`0`] = offset;
1255	} else {
1256	U32 const ll0 = (llDInfo->baseValue == `0`);
1257	if (LIKELY((ofBits == `0`))) {
1258	offset = seqState->prevOffset[ll0];
1259	seqState->prevOffset[`1`] = seqState->prevOffset[!ll0];
1260	seqState->prevOffset[`0`] = offset;
1261	} else {
1262	offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, `1`);
1263	{ size_t temp = (offset==`3`) ? seqState->prevOffset[`0`] - `1` : seqState->prevOffset[offset];
1264	temp += !temp; / 0 is not valid; input is corrupted; force offset to 1 /
1265	if (offset != `1`) seqState->prevOffset[`2`] = seqState->prevOffset[`1`];
1266	seqState->prevOffset[`1`] = seqState->prevOffset[`0`];
1267	seqState->prevOffset[`0`] = offset = temp;
1268	} } }
1269	seq.offset = offset;
1270	}
1271
1272	if (mlBits > `0`)
1273	seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/>0/);
1274
1275	if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1276	BIT_reloadDStream(&seqState->DStream);
1277	if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1278	BIT_reloadDStream(&seqState->DStream);
1279	/ Ensure there are enough bits to read the rest of data in 64-bit mode. /
1280	ZSTD_STATIC_ASSERT(`16`+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1281
1282	if (llBits > `0`)
1283	seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/>0/);
1284
1285	if (MEM_32bits())
1286	BIT_reloadDStream(&seqState->DStream);
1287
1288	DEBUGLOG(`6`, "seq: litL=%u, matchL=%u, offset=%u",
1289	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1290
1291	ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); / <= 9 bits /
1292	ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); / <= 9 bits /
1293	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); / <= 18 bits /
1294	ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); / <= 8 bits /
1295	}
1296
1297	return seq;
1298	}
1299
1300	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1301	MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1302	{
1303	size_t const windowSize = dctx->fParams.windowSize;
1304	/ No dictionary used. /
1305	if (dctx->dictContentEndForFuzzing == NULL) return `0`;
1306	/ Dictionary is our prefix. /
1307	if (prefixStart == dctx->dictContentBeginForFuzzing) return `1`;
1308	/ Dictionary is not our ext-dict. /
1309	if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return `0`;
1310	/ Dictionary is not within our window size. /
1311	if ((size_t)(oLitEnd - prefixStart) >= windowSize) return `0`;
1312	/ Dictionary is active. /
1313	return `1`;
1314	}
1315
1316	MEM_STATIC void ZSTD_assertValidSequence(
1317	ZSTD_DCtx const* dctx,
1318	BYTE const* op, BYTE const* oend,
1319	seq_t const seq,
1320	BYTE const* prefixStart, BYTE const* virtualStart)
1321	{
1322	#if DEBUGLEVEL >= 1
1323	size_t const windowSize = dctx->fParams.windowSize;
1324	size_t const sequenceSize = seq.litLength + seq.matchLength;
1325	BYTE const* const oLitEnd = op + seq.litLength;
1326	DEBUGLOG(`6`, "Checking sequence: litL=%u matchL=%u offset=%u",
1327	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1328	assert(op <= oend);
1329	assert((size_t)(oend - op) >= sequenceSize);
1330	assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
1331	if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1332	size_t const dictSize = (size_t)((char const)dctx->dictContentEndForFuzzing - (char* const*)dctx->dictContentBeginForFuzzing);
1333	/ Offset must be within the dictionary. /
1334	assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1335	assert(seq.offset <= windowSize + dictSize);
1336	} else {
1337	/ Offset must be within our window. /
1338	assert(seq.offset <= windowSize);
1339	}
1340	#else
1341	(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1342	#endif
1343	}
1344	#endif
1345
1346	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1347
1348
1349	FORCE_INLINE_TEMPLATE size_t
1350	DONT_VECTORIZE
1351	ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1352	void* dst, size_t maxDstSize,
1353	const void* seqStart, size_t seqSize, int nbSeq,
1354	const ZSTD_longOffset_e isLongOffset,
1355	const int frame)
1356	{
1357	const BYTE* ip = (const BYTE*)seqStart;
1358	const BYTE* const iend = ip + seqSize;
1359	BYTE* const ostart = (BYTE*)dst;
1360	BYTE* const oend = ostart + maxDstSize;
1361	BYTE* op = ostart;
1362	const BYTE* litPtr = dctx->litPtr;
1363	const BYTE* litBufferEnd = dctx->litBufferEnd;
1364	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1365	const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1366	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1367	DEBUGLOG(`5`, "ZSTD_decompressSequences_bodySplitLitBuffer");
1368	(void)frame;
1369
1370	/ Regen sequences /
1371	if (nbSeq) {
1372	seqState_t seqState;
1373	dctx->fseEntropy = `1`;
1374	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1375	RETURN_ERROR_IF(
1376	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1377	corruption_detected, "");
1378	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1379	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1380	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1381	assert(dst != NULL);
1382
1383	ZSTD_STATIC_ASSERT(
1384	BIT_DStream_unfinished < BIT_DStream_completed &&
1385	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1386	BIT_DStream_completed < BIT_DStream_overflow);
1387
1388	/ decompress without overrunning litPtr begins /
1389	{
1390	seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1391	/ Align the decompression loop to 32 + 16 bytes.*
1392	*
1393	* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1394	* speed swings based on the alignment of the decompression loop. This
1395	* performance swing is caused by parts of the decompression loop falling
1396	* out of the DSB. The entire decompression loop should fit in the DSB,
1397	* when it can't we get much worse performance. You can measure if you've
1398	* hit the good case or the bad case with this perf command for some
1399	* compressed file test.zst:
1400	*
1401	* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1402	* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1403	*
1404	* If you see most cycles served out of the MITE you've hit the bad case.
1405	* If you see most cycles served out of the DSB you've hit the good case.
1406	* If it is pretty even then you may be in an okay case.
1407	*
1408	* This issue has been reproduced on the following CPUs:
1409	* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1410	* Use Instruments->Counters to get DSB/MITE cycles.
1411	* I never got performance swings, but I was able to
1412	* go from the good case of mostly DSB to half of the
1413	* cycles served from MITE.
1414	* - Coffeelake: Intel i9-9900k
1415	* - Coffeelake: Intel i7-9700k
1416	*
1417	* I haven't been able to reproduce the instability or DSB misses on any
1418	* of the following CPUS:
1419	* - Haswell
1420	* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1421	* - Skylake
1422	*
1423	* Alignment is done for each of the three major decompression loops:
1424	* - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1425	* - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1426	* - ZSTD_decompressSequences_body
1427	* Alignment choices are made to minimize large swings on bad cases and influence on performance
1428	* from changes external to this code, rather than to overoptimize on the current commit.
1429	*
1430	* If you are seeing performance stability this script can help test.
1431	* It tests on 4 commits in zstd where I saw performance change.
1432	*
1433	* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1434	*/
1435	#if defined(__GNUC__) && defined(__x86_64__)
1436	__asm__(".p2align 6");
1437	# if __GNUC__ >= 7
1438	/ good for gcc-7, gcc-9, and gcc-11 /
1439	__asm__("nop");
1440	__asm__(".p2align 5");
1441	__asm__("nop");
1442	__asm__(".p2align 4");
1443	# if __GNUC__ == 8 \|\| __GNUC__ == 10
1444	/ good for gcc-8 and gcc-10 /
1445	__asm__("nop");
1446	__asm__(".p2align 3");
1447	# endif
1448	# endif
1449	#endif
1450
1451	/ Handle the initial state where litBuffer is currently split between dst and litExtraBuffer /
1452	for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1453	size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1454	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455	assert(!ZSTD_isError(oneSeqSize));
1456	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457	#endif
1458	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459	return oneSeqSize;
1460	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1461	op += oneSeqSize;
1462	if (UNLIKELY(!--nbSeq))
1463	break;
1464	BIT_reloadDStream(&(seqState.DStream));
1465	sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1466	}
1467
1468	/ If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd /
1469	if (nbSeq > `0`) {
1470	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1471	if (leftoverLit)
1472	{
1473	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1474	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1475	sequence.litLength -= leftoverLit;
1476	op += leftoverLit;
1477	}
1478	litPtr = dctx->litExtraBuffer;
1479	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1480	dctx->litBufferLocation = ZSTD_not_in_dst;
1481	{
1482	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1483	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1484	assert(!ZSTD_isError(oneSeqSize));
1485	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1486	#endif
1487	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1488	return oneSeqSize;
1489	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1490	op += oneSeqSize;
1491	if (--nbSeq)
1492	BIT_reloadDStream(&(seqState.DStream));
1493	}
1494	}
1495	}
1496
1497	if (nbSeq > `0`) / there is remaining lit from extra buffer /
1498	{
1499
1500	#if defined(__GNUC__) && defined(__x86_64__)
1501	__asm__(".p2align 6");
1502	__asm__("nop");
1503	# if __GNUC__ != 7
1504	/ worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang /
1505	__asm__(".p2align 4");
1506	__asm__("nop");
1507	__asm__(".p2align 3");
1508	# elif __GNUC__ >= 11
1509	__asm__(".p2align 3");
1510	# else
1511	__asm__(".p2align 5");
1512	__asm__("nop");
1513	__asm__(".p2align 3");
1514	# endif
1515	#endif
1516
1517	for (; ; ) {
1518	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1519	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1520	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1521	assert(!ZSTD_isError(oneSeqSize));
1522	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1523	#endif
1524	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1525	return oneSeqSize;
1526	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1527	op += oneSeqSize;
1528	if (UNLIKELY(!--nbSeq))
1529	break;
1530	BIT_reloadDStream(&(seqState.DStream));
1531	}
1532	}
1533
1534	/ check if reached exact end /
1535	DEBUGLOG(`5`, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1536	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1537	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1538	/ save reps for next block /
1539	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1540	}
1541
1542	/ last literal segment /
1543	if (dctx->litBufferLocation == ZSTD_split) / split hasn't been reached yet, first get dst then copy litExtraBuffer /
1544	{
1545	size_t const lastLLSize = litBufferEnd - litPtr;
1546	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1547	if (op != NULL) {
1548	ZSTD_memmove(op, litPtr, lastLLSize);
1549	op += lastLLSize;
1550	}
1551	litPtr = dctx->litExtraBuffer;
1552	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1553	dctx->litBufferLocation = ZSTD_not_in_dst;
1554	}
1555	{ size_t const lastLLSize = litBufferEnd - litPtr;
1556	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1557	if (op != NULL) {
1558	ZSTD_memcpy(op, litPtr, lastLLSize);
1559	op += lastLLSize;
1560	}
1561	}
1562
1563	return op-ostart;
1564	}
1565
1566	FORCE_INLINE_TEMPLATE size_t
1567	DONT_VECTORIZE
1568	ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1569	void* dst, size_t maxDstSize,
1570	const void* seqStart, size_t seqSize, int nbSeq,
1571	const ZSTD_longOffset_e isLongOffset,
1572	const int frame)
1573	{
1574	const BYTE* ip = (const BYTE*)seqStart;
1575	const BYTE* const iend = ip + seqSize;
1576	BYTE* const ostart = (BYTE*)dst;
1577	BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1578	BYTE* op = ostart;
1579	const BYTE* litPtr = dctx->litPtr;
1580	const BYTE* const litEnd = litPtr + dctx->litSize;
1581	const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1582	const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1583	const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1584	DEBUGLOG(`5`, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1585	(void)frame;
1586
1587	/ Regen sequences /
1588	if (nbSeq) {
1589	seqState_t seqState;
1590	dctx->fseEntropy = `1`;
1591	{ U32 i; for (i = `0`; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1592	RETURN_ERROR_IF(
1593	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1594	corruption_detected, "");
1595	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1596	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1597	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1598	assert(dst != NULL);
1599
1600	ZSTD_STATIC_ASSERT(
1601	BIT_DStream_unfinished < BIT_DStream_completed &&
1602	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1603	BIT_DStream_completed < BIT_DStream_overflow);
1604
1605	#if defined(__GNUC__) && defined(__x86_64__)
1606	__asm__(".p2align 6");
1607	__asm__("nop");
1608	# if __GNUC__ >= 7
1609	__asm__(".p2align 5");
1610	__asm__("nop");
1611	__asm__(".p2align 3");
1612	# else
1613	__asm__(".p2align 4");
1614	__asm__("nop");
1615	__asm__(".p2align 3");
1616	# endif
1617	#endif
1618
1619	for ( ; ; ) {
1620	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1621	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1622	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1623	assert(!ZSTD_isError(oneSeqSize));
1624	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1625	#endif
1626	if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1627	return oneSeqSize;
1628	DEBUGLOG(`6`, "regenerated sequence size : %u", (U32)oneSeqSize);
1629	op += oneSeqSize;
1630	if (UNLIKELY(!--nbSeq))
1631	break;
1632	BIT_reloadDStream(&(seqState.DStream));
1633	}
1634
1635	/ check if reached exact end /
1636	DEBUGLOG(`5`, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1637	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1638	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1639	/ save reps for next block /
1640	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1641	}
1642
1643	/ last literal segment /
1644	{ size_t const lastLLSize = litEnd - litPtr;
1645	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1646	if (op != NULL) {
1647	ZSTD_memcpy(op, litPtr, lastLLSize);
1648	op += lastLLSize;
1649	}
1650	}
1651
1652	return op-ostart;
1653	}
1654
1655	static size_t
1656	ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1657	void* dst, size_t maxDstSize,
1658	const void* seqStart, size_t seqSize, int nbSeq,
1659	const ZSTD_longOffset_e isLongOffset,
1660	const int frame)
1661	{
1662	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1663	}
1664
1665	static size_t
1666	ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1667	void* dst, size_t maxDstSize,
1668	const void* seqStart, size_t seqSize, int nbSeq,
1669	const ZSTD_longOffset_e isLongOffset,
1670	const int frame)
1671	{
1672	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1673	}
1674	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1675
1676	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1677
1678	FORCE_INLINE_TEMPLATE size_t
1679	ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1680	const BYTE* const prefixStart, const BYTE* const dictEnd)
1681	{
1682	prefetchPos += sequence.litLength;
1683	{ const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1684	const BYTE* const match = matchBase + prefetchPos - sequence.offset; / note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.*
1685	* No consequence though : memory address is only used for prefetching, not for dereferencing */
1686	PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); / note : it's safe to invoke PREFETCH() on any memory address, including invalid ones /
1687	}
1688	return prefetchPos + sequence.matchLength;
1689	}
1690
1691	/ This decoding function employs prefetching*
1692	* to reduce latency impact of cache misses.
1693	* It's generally employed when block contains a significant portion of long-distance matches
1694	* or when coupled with a "cold" dictionary */
1695	FORCE_INLINE_TEMPLATE size_t
1696	ZSTD_decompressSequencesLong_body(
1697	ZSTD_DCtx* dctx,
1698	void* dst, size_t maxDstSize,
1699	const void* seqStart, size_t seqSize, int nbSeq,
1700	const ZSTD_longOffset_e isLongOffset,
1701	const int frame)
1702	{
1703	const BYTE* ip = (const BYTE*)seqStart;
1704	const BYTE* const iend = ip + seqSize;
1705	BYTE* const ostart = (BYTE*)dst;
1706	BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1707	BYTE* op = ostart;
1708	const BYTE* litPtr = dctx->litPtr;
1709	const BYTE* litBufferEnd = dctx->litBufferEnd;
1710	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1711	const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1712	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1713	(void)frame;
1714
1715	/ Regen sequences /
1716	if (nbSeq) {
1717	#define STORED_SEQS 8
1718	#define STORED_SEQS_MASK (STORED_SEQS-1)
1719	#define ADVANCED_SEQS STORED_SEQS
1720	seq_t sequences[STORED_SEQS];
1721	int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1722	seqState_t seqState;
1723	int seqNb;
1724	size_t prefetchPos = (size_t)(op-prefixStart); / track position relative to prefixStart /
1725
1726	dctx->fseEntropy = `1`;
1727	{ int i; for (i=`0`; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1728	assert(dst != NULL);
1729	assert(iend >= ip);
1730	RETURN_ERROR_IF(
1731	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1732	corruption_detected, "");
1733	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1734	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1735	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1736
1737	/ prepare in advance /
1738	for (seqNb=`0`; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1739	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1740	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1741	sequences[seqNb] = sequence;
1742	}
1743	RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1744
1745	/ decompress without stomping litBuffer /
1746	for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1747	seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1748	size_t oneSeqSize;
1749
1750	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1751	{
1752	/ lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer /
1753	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1754	if (leftoverLit)
1755	{
1756	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1757	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1758	sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1759	op += leftoverLit;
1760	}
1761	litPtr = dctx->litExtraBuffer;
1762	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1763	dctx->litBufferLocation = ZSTD_not_in_dst;
1764	oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1765	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1766	assert(!ZSTD_isError(oneSeqSize));
1767	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1768	#endif
1769	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1770
1771	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1772	sequences[seqNb & STORED_SEQS_MASK] = sequence;
1773	op += oneSeqSize;
1774	}
1775	else
1776	{
1777	/ lit buffer is either wholly contained in first or second split, or not split at all/
1778	oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1779	ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1780	ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1781	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1782	assert(!ZSTD_isError(oneSeqSize));
1783	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1784	#endif
1785	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1786
1787	prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1788	sequences[seqNb & STORED_SEQS_MASK] = sequence;
1789	op += oneSeqSize;
1790	}
1791	}
1792	RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1793
1794	/ finish queue /
1795	seqNb -= seqAdvance;
1796	for ( ; seqNb<nbSeq ; seqNb++) {
1797	seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1798	if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1799	{
1800	const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1801	if (leftoverLit)
1802	{
1803	RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1804	ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1805	sequence->litLength -= leftoverLit;
1806	op += leftoverLit;
1807	}
1808	litPtr = dctx->litExtraBuffer;
1809	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1810	dctx->litBufferLocation = ZSTD_not_in_dst;
1811	{
1812	size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1813	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1814	assert(!ZSTD_isError(oneSeqSize));
1815	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1816	#endif
1817	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1818	op += oneSeqSize;
1819	}
1820	}
1821	else
1822	{
1823	size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1824	ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1825	ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1826	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1827	assert(!ZSTD_isError(oneSeqSize));
1828	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1829	#endif
1830	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1831	op += oneSeqSize;
1832	}
1833	}
1834
1835	/ save reps for next block /
1836	{ U32 i; for (i=`0`; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1837	}
1838
1839	/ last literal segment /
1840	if (dctx->litBufferLocation == ZSTD_split) / first deplete literal buffer in dst, then copy litExtraBuffer /
1841	{
1842	size_t const lastLLSize = litBufferEnd - litPtr;
1843	RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1844	if (op != NULL) {
1845	ZSTD_memmove(op, litPtr, lastLLSize);
1846	op += lastLLSize;
1847	}
1848	litPtr = dctx->litExtraBuffer;
1849	litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1850	}
1851	{ size_t const lastLLSize = litBufferEnd - litPtr;
1852	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1853	if (op != NULL) {
1854	ZSTD_memmove(op, litPtr, lastLLSize);
1855	op += lastLLSize;
1856	}
1857	}
1858
1859	return op-ostart;
1860	}
1861
1862	static size_t
1863	ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1864	void* dst, size_t maxDstSize,
1865	const void* seqStart, size_t seqSize, int nbSeq,
1866	const ZSTD_longOffset_e isLongOffset,
1867	const int frame)
1868	{
1869	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1870	}
1871	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1872
1873
1874
1875	#if DYNAMIC_BMI2
1876
1877	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1878	static BMI2_TARGET_ATTRIBUTE size_t
1879	DONT_VECTORIZE
1880	ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1881	void* dst, size_t maxDstSize,
1882	const void* seqStart, size_t seqSize, int nbSeq,
1883	const ZSTD_longOffset_e isLongOffset,
1884	const int frame)
1885	{
1886	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1887	}
1888	static BMI2_TARGET_ATTRIBUTE size_t
1889	DONT_VECTORIZE
1890	ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1891	void* dst, size_t maxDstSize,
1892	const void* seqStart, size_t seqSize, int nbSeq,
1893	const ZSTD_longOffset_e isLongOffset,
1894	const int frame)
1895	{
1896	return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1897	}
1898	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1899
1900	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1901	static BMI2_TARGET_ATTRIBUTE size_t
1902	ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1903	void* dst, size_t maxDstSize,
1904	const void* seqStart, size_t seqSize, int nbSeq,
1905	const ZSTD_longOffset_e isLongOffset,
1906	const int frame)
1907	{
1908	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1909	}
1910	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1911
1912	#endif /* DYNAMIC_BMI2 */
1913
1914	typedef size_t (*ZSTD_decompressSequences_t)(
1915	ZSTD_DCtx* dctx,
1916	void* dst, size_t maxDstSize,
1917	const void* seqStart, size_t seqSize, int nbSeq,
1918	const ZSTD_longOffset_e isLongOffset,
1919	const int frame);
1920
1921	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1922	static size_t
1923	ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1924	const void* seqStart, size_t seqSize, int nbSeq,
1925	const ZSTD_longOffset_e isLongOffset,
1926	const int frame)
1927	{
1928	DEBUGLOG(`5`, "ZSTD_decompressSequences");
1929	#if DYNAMIC_BMI2
1930	if (ZSTD_DCtx_get_bmi2(dctx)) {
1931	return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1932	}
1933	#endif
1934	return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1935	}
1936	static size_t
1937	ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1938	const void* seqStart, size_t seqSize, int nbSeq,
1939	const ZSTD_longOffset_e isLongOffset,
1940	const int frame)
1941	{
1942	DEBUGLOG(`5`, "ZSTD_decompressSequencesSplitLitBuffer");
1943	#if DYNAMIC_BMI2
1944	if (ZSTD_DCtx_get_bmi2(dctx)) {
1945	return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1946	}
1947	#endif
1948	return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1949	}
1950	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1951
1952
1953	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1954	/ ZSTD_decompressSequencesLong() :*
1955	* decompression function triggered when a minimum share of offsets is considered "long",
1956	* aka out of cache.
1957	* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1958	* This function will try to mitigate main memory latency through the use of prefetching */
1959	static size_t
1960	ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1961	void* dst, size_t maxDstSize,
1962	const void* seqStart, size_t seqSize, int nbSeq,
1963	const ZSTD_longOffset_e isLongOffset,
1964	const int frame)
1965	{
1966	DEBUGLOG(`5`, "ZSTD_decompressSequencesLong");
1967	#if DYNAMIC_BMI2
1968	if (ZSTD_DCtx_get_bmi2(dctx)) {
1969	return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1970	}
1971	#endif
1972	return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1973	}
1974	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1975
1976
1977	/**
1978	* @returns The total size of the history referenceable by zstd, including
1979	* both the prefix and the extDict. At @p op any offset larger than this
1980	* is invalid.
1981	*/
1982	static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1983	{
1984	return (size_t)(op - virtualStart);
1985	}
1986
1987	typedef struct {
1988	unsigned longOffsetShare;
1989	unsigned maxNbAdditionalBits;
1990	} ZSTD_OffsetInfo;
1991
1992	/ ZSTD_getOffsetInfo() :*
1993	* condition : offTable must be valid
1994	* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1995	* compared to maximum possible of (1<<OffFSELog),
1996	* as well as the maximum number additional bits required.
1997	*/
1998	static ZSTD_OffsetInfo
1999	ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
2000	{
2001	ZSTD_OffsetInfo info = {`0`, `0`};
2002	/ If nbSeq == 0, then the offTable is uninitialized, but we have*
2003	* no sequences, so both values should be 0.
2004	*/
2005	if (nbSeq != `0`) {
2006	const void* ptr = offTable;
2007	U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[`0`].tableLog;
2008	const ZSTD_seqSymbol* table = offTable + `1`;
2009	U32 const max = `1` << tableLog;
2010	U32 u;
2011	DEBUGLOG(`5`, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2012
2013	assert(max <= (`1` << OffFSELog)); / max not too large /
2014	for (u=`0`; u<max; u++) {
2015	info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2016	if (table[u].nbAdditionalBits > `22`) info.longOffsetShare += `1`;
2017	}
2018
2019	assert(tableLog <= OffFSELog);
2020	info.longOffsetShare <<= (OffFSELog - tableLog); / scale to OffFSELog /
2021	}
2022
2023	return info;
2024	}
2025
2026	/**
2027	* @returns The maximum offset we can decode in one read of our bitstream, without
2028	* reloading more bits in the middle of the offset bits read. Any offsets larger
2029	* than this must use the long offset decoder.
2030	*/
2031	static size_t ZSTD_maxShortOffset(void)
2032	{
2033	if (MEM_64bits()) {
2034	/ We can decode any offset without reloading bits.*
2035	* This might change if the max window size grows.
2036	*/
2037	ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= `31`);
2038	return (size_t)-`1`;
2039	} else {
2040	/ The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.*
2041	* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2042	* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2043	*/
2044	size_t const maxOffbase = ((size_t)`1` << (STREAM_ACCUMULATOR_MIN + `1`)) - `1`;
2045	size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2046	assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2047	return maxOffset;
2048	}
2049	}
2050
2051	size_t
2052	ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2053	void* dst, size_t dstCapacity,
2054	const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
2055	{ / blockType == blockCompressed /
2056	const BYTE* ip = (const BYTE*)src;
2057	DEBUGLOG(`5`, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
2058
2059	/ Note : the wording of the specification*
2060	* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
2061	* This generally does not happen, as it makes little sense,
2062	* since an uncompressed block would feature same size and have no decompression cost.
2063	* Also, note that decoder from reference libzstd before < v1.5.4
2064	* would consider this edge case as an error.
2065	* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
2066	* for broader compatibility with the deployed ecosystem of zstd decoders */
2067	RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
2068
2069	/ Decode literals section /
2070	{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2071	DEBUGLOG(`5`, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
2072	if (ZSTD_isError(litCSize)) return litCSize;
2073	ip += litCSize;
2074	srcSize -= litCSize;
2075	}
2076
2077	/ Build Decoding Tables /
2078	{
2079	/ Compute the maximum block size, which must also work when !frame and fParams are unset.*
2080	* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2081	*/
2082	size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
2083	size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE)dst + blockSizeMax, (BYTE const**)dctx->virtualStart);
2084	/ isLongOffset must be true if there are long offsets.*
2085	* Offsets are long if they are larger than ZSTD_maxShortOffset().
2086	* We don't expect that to be the case in 64-bit mode.
2087	*
2088	* We check here to see if our history is large enough to allow long offsets.
2089	* If it isn't, then we can't possible have (valid) long offsets. If the offset
2090	* is invalid, then it is okay to read it incorrectly.
2091	*
2092	* If isLongOffsets is true, then we will later check our decoding table to see
2093	* if it is even possible to generate long offsets.
2094	*/
2095	ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2096	/ These macros control at build-time which decompressor implementation*
2097	* we use. If neither is defined, we do some inspection and dispatch at
2098	* runtime.
2099	*/
2100	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2101	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2102	int usePrefetchDecoder = dctx->ddictIsCold;
2103	#else
2104	/ Set to 1 to avoid computing offset info if we don't need to.*
2105	* Otherwise this value is ignored.
2106	*/
2107	int usePrefetchDecoder = `1`;
2108	#endif
2109	int nbSeq;
2110	size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2111	if (ZSTD_isError(seqHSize)) return seqHSize;
2112	ip += seqHSize;
2113	srcSize -= seqHSize;
2114
2115	RETURN_ERROR_IF((dst == NULL \|\| dstCapacity == `0`) && nbSeq > `0`, dstSize_tooSmall, "NULL not handled");
2116	RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-`1`) - (size_t)dst < (size_t)(`1` << `20`), dstSize_tooSmall,
2117	"invalid dst");
2118
2119	/ If we could potentially have long offsets, or we might want to use the prefetch decoder,*
2120	* compute information about the share of long offsets, and the maximum nbAdditionalBits.
2121	* NOTE: could probably use a larger nbSeq limit
2122	*/
2123	if (isLongOffset \|\| (!usePrefetchDecoder && (totalHistorySize > (`1u` << `24`)) && (nbSeq > `8`))) {
2124	ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2125	if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2126	/ If isLongOffset, but the maximum number of additional bits that we see in our table is small*
2127	* enough, then we know it is impossible to have too long an offset in this block, so we can
2128	* use the regular offset decoder.
2129	*/
2130	isLongOffset = ZSTD_lo_isRegularOffset;
2131	}
2132	if (!usePrefetchDecoder) {
2133	U32 const minShare = MEM_64bits() ? `7` : `20`; / heuristic values, correspond to 2.73% and 7.81% /
2134	usePrefetchDecoder = (info.longOffsetShare >= minShare);
2135	}
2136	}
2137
2138	dctx->ddictIsCold = `0`;
2139
2140	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2141	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2142	if (usePrefetchDecoder) {
2143	#else
2144	(void)usePrefetchDecoder;
2145	{
2146	#endif
2147	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2148	return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2149	#endif
2150	}
2151
2152	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2153	/ else /
2154	if (dctx->litBufferLocation == ZSTD_split)
2155	return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2156	else
2157	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2158	#endif
2159	}
2160	}
2161
2162
2163	void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2164	{
2165	if (dst != dctx->previousDstEnd && dstSize > `0`) { / not contiguous /
2166	dctx->dictEnd = dctx->previousDstEnd;
2167	dctx->virtualStart = (const char)dst - ((const* char)(dctx->previousDstEnd) - (const* char*)(dctx->prefixStart));
2168	dctx->prefixStart = dst;
2169	dctx->previousDstEnd = dst;
2170	}
2171	}
2172
2173
2174	size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2175	void* dst, size_t dstCapacity,
2176	const void* src, size_t srcSize)
2177	{
2178	size_t dSize;
2179	ZSTD_checkContinuity(dctx, dst, dstCapacity);
2180	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, / frame / `0`, not_streaming);
2181	dctx->previousDstEnd = (char*)dst + dSize;
2182	return dSize;
2183	}
2184
2185
2186	/ NOTE: Must just wrap ZSTD_decompressBlock_deprecated() /
2187	size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2188	void* dst, size_t dstCapacity,
2189	const void* src, size_t srcSize)
2190	{
2191	return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2192	}
2193

Browse the source code of Godot/thirdparty/zstd/decompress/zstd_decompress_block.c