zstd_double_fast.c source code [Godot/thirdparty/zstd/compress/zstd_double_fast.c]

1	/*
2	* Copyright (c) Meta Platforms, Inc. and affiliates.
3	* All rights reserved.
4	*
5	* This source code is licensed under both the BSD-style license (found in the
6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7	* in the COPYING file in the root directory of this source tree).
8	* You may select, at your option, one of the above-listed licenses.
9	*/
10
11	#include "zstd_compress_internal.h"
12	#include "zstd_double_fast.h"
13
14	static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
15	void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16	{
17	const ZSTD_compressionParameters* const cParams = &ms->cParams;
18	U32* const hashLarge = ms->hashTable;
19	U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
20	U32 const mls = cParams->minMatch;
21	U32* const hashSmall = ms->chainTable;
22	U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
23	const BYTE* const base = ms->window.base;
24	const BYTE* ip = base + ms->nextToUpdate;
25	const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26	const U32 fastHashFillStep = `3`;
27
28	/ Always insert every fastHashFillStep position into the hash tables.*
29	* Insert the other positions into the large hash table if their entry
30	* is empty.
31	*/
32	for (; ip + fastHashFillStep - `1` <= iend; ip += fastHashFillStep) {
33	U32 const curr = (U32)(ip - base);
34	U32 i;
35	for (i = `0`; i < fastHashFillStep; ++i) {
36	size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
37	size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, `8`);
38	if (i == `0`) {
39	ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
40	}
41	if (i == `0` \|\| hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == `0`) {
42	ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
43	}
44	/ Only load extra positions for ZSTD_dtlm_full /
45	if (dtlm == ZSTD_dtlm_fast)
46	break;
47	} }
48	}
49
50	static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
51	void const* end, ZSTD_dictTableLoadMethod_e dtlm)
52	{
53	const ZSTD_compressionParameters* const cParams = &ms->cParams;
54	U32* const hashLarge = ms->hashTable;
55	U32 const hBitsL = cParams->hashLog;
56	U32 const mls = cParams->minMatch;
57	U32* const hashSmall = ms->chainTable;
58	U32 const hBitsS = cParams->chainLog;
59	const BYTE* const base = ms->window.base;
60	const BYTE* ip = base + ms->nextToUpdate;
61	const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
62	const U32 fastHashFillStep = `3`;
63
64	/ Always insert every fastHashFillStep position into the hash tables.*
65	* Insert the other positions into the large hash table if their entry
66	* is empty.
67	*/
68	for (; ip + fastHashFillStep - `1` <= iend; ip += fastHashFillStep) {
69	U32 const curr = (U32)(ip - base);
70	U32 i;
71	for (i = `0`; i < fastHashFillStep; ++i) {
72	size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
73	size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, `8`);
74	if (i == `0`)
75	hashSmall[smHash] = curr + i;
76	if (i == `0` \|\| hashLarge[lgHash] == `0`)
77	hashLarge[lgHash] = curr + i;
78	/ Only load extra positions for ZSTD_dtlm_full /
79	if (dtlm == ZSTD_dtlm_fast)
80	break;
81	} }
82	}
83
84	void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
85	const void* const end,
86	ZSTD_dictTableLoadMethod_e dtlm,
87	ZSTD_tableFillPurpose_e tfp)
88	{
89	if (tfp == ZSTD_tfp_forCDict) {
90	ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
91	} else {
92	ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
93	}
94	}
95
96
97	FORCE_INLINE_TEMPLATE
98	size_t ZSTD_compressBlock_doubleFast_noDict_generic(
99	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
100	void const* src, size_t srcSize, U32 const mls / template /)
101	{
102	ZSTD_compressionParameters const* cParams = &ms->cParams;
103	U32* const hashLong = ms->hashTable;
104	const U32 hBitsL = cParams->hashLog;
105	U32* const hashSmall = ms->chainTable;
106	const U32 hBitsS = cParams->chainLog;
107	const BYTE* const base = ms->window.base;
108	const BYTE* const istart = (const BYTE*)src;
109	const BYTE* anchor = istart;
110	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
111	/ presumes that, if there is a dictionary, it must be using Attach mode /
112	const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
113	const BYTE* const prefixLowest = base + prefixLowestIndex;
114	const BYTE* const iend = istart + srcSize;
115	const BYTE* const ilimit = iend - HASH_READ_SIZE;
116	U32 offset_1=rep[`0`], offset_2=rep[`1`];
117	U32 offsetSaved1 = `0`, offsetSaved2 = `0`;
118
119	size_t mLength;
120	U32 offset;
121	U32 curr;
122
123	/ how many positions to search before increasing step size /
124	const size_t kStepIncr = `1` << kSearchStrength;
125	/ the position at which to increment the step size if no match is found /
126	const BYTE* nextStep;
127	size_t step; / the current step size /
128
129	size_t hl0; / the long hash at ip /
130	size_t hl1; / the long hash at ip1 /
131
132	U32 idxl0; / the long match index for ip /
133	U32 idxl1; / the long match index for ip1 /
134
135	const BYTE* matchl0; / the long match for ip /
136	const BYTE* matchs0; / the short match for ip /
137	const BYTE* matchl1; / the long match for ip1 /
138
139	const BYTE* ip = istart; / the current position /
140	const BYTE* ip1; / the next position /
141
142	DEBUGLOG(`5`, "ZSTD_compressBlock_doubleFast_noDict_generic");
143
144	/ init /
145	ip += ((ip - prefixLowest) == `0`);
146	{
147	U32 const current = (U32)(ip - base);
148	U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
149	U32 const maxRep = current - windowLow;
150	if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = `0`;
151	if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = `0`;
152	}
153
154	/ Outer Loop: one iteration per match found and stored /
155	while (`1`) {
156	step = `1`;
157	nextStep = ip + kStepIncr;
158	ip1 = ip + step;
159
160	if (ip1 > ilimit) {
161	goto _cleanup;
162	}
163
164	hl0 = ZSTD_hashPtr(ip, hBitsL, `8`);
165	idxl0 = hashLong[hl0];
166	matchl0 = base + idxl0;
167
168	/ Inner Loop: one iteration per search / position /
169	do {
170	const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
171	const U32 idxs0 = hashSmall[hs0];
172	curr = (U32)(ip-base);
173	matchs0 = base + idxs0;
174
175	hashLong[hl0] = hashSmall[hs0] = curr; / update hash tables /
176
177	/ check noDict repcode /
178	if ((offset_1 > `0`) & (MEM_read32(ip+`1`-offset_1) == MEM_read32(ip+`1`))) {
179	mLength = ZSTD_count(ip+`1`+`4`, ip+`1`+`4`-offset_1, iend) + `4`;
180	ip++;
181	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
182	goto _match_stored;
183	}
184
185	hl1 = ZSTD_hashPtr(ip1, hBitsL, `8`);
186
187	if (idxl0 > prefixLowestIndex) {
188	/ check prefix long match /
189	if (MEM_read64(matchl0) == MEM_read64(ip)) {
190	mLength = ZSTD_count(ip+`8`, matchl0+`8`, iend) + `8`;
191	offset = (U32)(ip-matchl0);
192	while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-`1`] == matchl0[-`1`])) { ip--; matchl0--; mLength++; } / catch up /
193	goto _match_found;
194	}
195	}
196
197	idxl1 = hashLong[hl1];
198	matchl1 = base + idxl1;
199
200	if (idxs0 > prefixLowestIndex) {
201	/ check prefix short match /
202	if (MEM_read32(matchs0) == MEM_read32(ip)) {
203	goto _search_next_long;
204	}
205	}
206
207	if (ip1 >= nextStep) {
208	PREFETCH_L1(ip1 + `64`);
209	PREFETCH_L1(ip1 + `128`);
210	step++;
211	nextStep += kStepIncr;
212	}
213	ip = ip1;
214	ip1 += step;
215
216	hl0 = hl1;
217	idxl0 = idxl1;
218	matchl0 = matchl1;
219	#if defined(__aarch64__)
220	PREFETCH_L1(ip+`256`);
221	#endif
222	} while (ip1 <= ilimit);
223
224	_cleanup:
225	/ If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),*
226	* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
227	offsetSaved2 = ((offsetSaved1 != `0`) && (offset_1 != `0`)) ? offsetSaved1 : offsetSaved2;
228
229	/ save reps for next block /
230	rep[`0`] = offset_1 ? offset_1 : offsetSaved1;
231	rep[`1`] = offset_2 ? offset_2 : offsetSaved2;
232
233	/ Return the last literals size /
234	return (size_t)(iend - anchor);
235
236	_search_next_long:
237
238	/ check prefix long +1 match /
239	if (idxl1 > prefixLowestIndex) {
240	if (MEM_read64(matchl1) == MEM_read64(ip1)) {
241	ip = ip1;
242	mLength = ZSTD_count(ip+`8`, matchl1+`8`, iend) + `8`;
243	offset = (U32)(ip-matchl1);
244	while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-`1`] == matchl1[-`1`])) { ip--; matchl1--; mLength++; } / catch up /
245	goto _match_found;
246	}
247	}
248
249	/ if no long +1 match, explore the short match we found /
250	mLength = ZSTD_count(ip+`4`, matchs0+`4`, iend) + `4`;
251	offset = (U32)(ip - matchs0);
252	while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-`1`] == matchs0[-`1`])) { ip--; matchs0--; mLength++; } / catch up /
253
254	/ fall-through /
255
256	_match_found: / requires ip, offset, mLength /
257	offset_2 = offset_1;
258	offset_1 = offset;
259
260	if (step < `4`) {
261	/ It is unsafe to write this value back to the hashtable when ip1 is*
262	* greater than or equal to the new ip we will have after we're done
263	* processing this match. Rather than perform that test directly
264	* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
265	* more predictable test. The minmatch even if we take a short match is
266	* 4 bytes, so as long as step, the distance between ip and ip1
267	* (initially) is less than 4, we know ip1 < new ip. */
268	hashLong[hl1] = (U32)(ip1 - base);
269	}
270
271	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
272
273	_match_stored:
274	/ match found /
275	ip += mLength;
276	anchor = ip;
277
278	if (ip <= ilimit) {
279	/ Complementary insertion /
280	/ done after iLimit test, as candidates could be > iend-8 /
281	{ U32 const indexToInsert = curr+`2`;
282	hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, `8`)] = indexToInsert;
283	hashLong[ZSTD_hashPtr(ip-`2`, hBitsL, `8`)] = (U32)(ip-`2`-base);
284	hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
285	hashSmall[ZSTD_hashPtr(ip-`1`, hBitsS, mls)] = (U32)(ip-`1`-base);
286	}
287
288	/ check immediate repcode /
289	while ( (ip <= ilimit)
290	&& ( (offset_2>`0`)
291	& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
292	/ store sequence /
293	size_t const rLength = ZSTD_count(ip+`4`, ip+`4`-offset_2, iend) + `4`;
294	U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; / swap offset_2 <=> offset_1 /
295	hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
296	hashLong[ZSTD_hashPtr(ip, hBitsL, `8`)] = (U32)(ip-base);
297	ZSTD_storeSeq(seqStore, `0`, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
298	ip += rLength;
299	anchor = ip;
300	continue; / faster when present ... (?) /
301	}
302	}
303	}
304	}
305
306
307	FORCE_INLINE_TEMPLATE
308	size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
309	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
310	void const* src, size_t srcSize,
311	U32 const mls / template /)
312	{
313	ZSTD_compressionParameters const* cParams = &ms->cParams;
314	U32* const hashLong = ms->hashTable;
315	const U32 hBitsL = cParams->hashLog;
316	U32* const hashSmall = ms->chainTable;
317	const U32 hBitsS = cParams->chainLog;
318	const BYTE* const base = ms->window.base;
319	const BYTE* const istart = (const BYTE*)src;
320	const BYTE* ip = istart;
321	const BYTE* anchor = istart;
322	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
323	/ presumes that, if there is a dictionary, it must be using Attach mode /
324	const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
325	const BYTE* const prefixLowest = base + prefixLowestIndex;
326	const BYTE* const iend = istart + srcSize;
327	const BYTE* const ilimit = iend - HASH_READ_SIZE;
328	U32 offset_1=rep[`0`], offset_2=rep[`1`];
329
330	const ZSTD_matchState_t* const dms = ms->dictMatchState;
331	const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
332	const U32* const dictHashLong = dms->hashTable;
333	const U32* const dictHashSmall = dms->chainTable;
334	const U32 dictStartIndex = dms->window.dictLimit;
335	const BYTE* const dictBase = dms->window.base;
336	const BYTE* const dictStart = dictBase + dictStartIndex;
337	const BYTE* const dictEnd = dms->window.nextSrc;
338	const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
339	const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
340	const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
341	const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
342
343	DEBUGLOG(`5`, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
344
345	/ if a dictionary is attached, it must be within window range /
346	assert(ms->window.dictLimit + (`1U` << cParams->windowLog) >= endIndex);
347
348	if (ms->prefetchCDictTables) {
349	size_t const hashTableBytes = (((size_t)`1`) << dictCParams->hashLog) * sizeof(U32);
350	size_t const chainTableBytes = (((size_t)`1`) << dictCParams->chainLog) * sizeof(U32);
351	PREFETCH_AREA(dictHashLong, hashTableBytes)
352	PREFETCH_AREA(dictHashSmall, chainTableBytes)
353	}
354
355	/ init /
356	ip += (dictAndPrefixLength == `0`);
357
358	/ dictMatchState repCode checks don't currently handle repCode == 0*
359	* disabling. */
360	assert(offset_1 <= dictAndPrefixLength);
361	assert(offset_2 <= dictAndPrefixLength);
362
363	/ Main Search Loop /
364	while (ip < ilimit) { / < instead of <=, because repcode check at (ip+1) /
365	size_t mLength;
366	U32 offset;
367	size_t const h2 = ZSTD_hashPtr(ip, hBitsL, `8`);
368	size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
369	size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, `8`);
370	size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
371	U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
372	U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
373	int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
374	int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
375	U32 const curr = (U32)(ip-base);
376	U32 const matchIndexL = hashLong[h2];
377	U32 matchIndexS = hashSmall[h];
378	const BYTE* matchLong = base + matchIndexL;
379	const BYTE* match = base + matchIndexS;
380	const U32 repIndex = curr + `1` - offset_1;
381	const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
382	dictBase + (repIndex - dictIndexDelta) :
383	base + repIndex;
384	hashLong[h2] = hashSmall[h] = curr; / update hash tables /
385
386	/ check repcode /
387	if (((U32)((prefixLowestIndex-`1`) - repIndex) >= `3` / intentional underflow /)
388	&& (MEM_read32(repMatch) == MEM_read32(ip+`1`)) ) {
389	const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
390	mLength = ZSTD_count_2segments(ip+`1`+`4`, repMatch+`4`, iend, repMatchEnd, prefixLowest) + `4`;
391	ip++;
392	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
393	goto _match_stored;
394	}
395
396	if (matchIndexL > prefixLowestIndex) {
397	/ check prefix long match /
398	if (MEM_read64(matchLong) == MEM_read64(ip)) {
399	mLength = ZSTD_count(ip+`8`, matchLong+`8`, iend) + `8`;
400	offset = (U32)(ip-matchLong);
401	while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-`1`] == matchLong[-`1`])) { ip--; matchLong--; mLength++; } / catch up /
402	goto _match_found;
403	}
404	} else if (dictTagsMatchL) {
405	/ check dictMatchState long match /
406	U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
407	const BYTE* dictMatchL = dictBase + dictMatchIndexL;
408	assert(dictMatchL < dictEnd);
409
410	if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
411	mLength = ZSTD_count_2segments(ip+`8`, dictMatchL+`8`, iend, dictEnd, prefixLowest) + `8`;
412	offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
413	while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-`1`] == dictMatchL[-`1`])) { ip--; dictMatchL--; mLength++; } / catch up /
414	goto _match_found;
415	} }
416
417	if (matchIndexS > prefixLowestIndex) {
418	/ check prefix short match /
419	if (MEM_read32(match) == MEM_read32(ip)) {
420	goto _search_next_long;
421	}
422	} else if (dictTagsMatchS) {
423	/ check dictMatchState short match /
424	U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
425	match = dictBase + dictMatchIndexS;
426	matchIndexS = dictMatchIndexS + dictIndexDelta;
427
428	if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
429	goto _search_next_long;
430	} }
431
432	ip += ((ip-anchor) >> kSearchStrength) + `1`;
433	#if defined(__aarch64__)
434	PREFETCH_L1(ip+`256`);
435	#endif
436	continue;
437
438	_search_next_long:
439	{ size_t const hl3 = ZSTD_hashPtr(ip+`1`, hBitsL, `8`);
440	size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+`1`, dictHBitsL, `8`);
441	U32 const matchIndexL3 = hashLong[hl3];
442	U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
443	int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
444	const BYTE* matchL3 = base + matchIndexL3;
445	hashLong[hl3] = curr + `1`;
446
447	/ check prefix long +1 match /
448	if (matchIndexL3 > prefixLowestIndex) {
449	if (MEM_read64(matchL3) == MEM_read64(ip+`1`)) {
450	mLength = ZSTD_count(ip+`9`, matchL3+`8`, iend) + `8`;
451	ip++;
452	offset = (U32)(ip-matchL3);
453	while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-`1`] == matchL3[-`1`])) { ip--; matchL3--; mLength++; } / catch up /
454	goto _match_found;
455	}
456	} else if (dictTagsMatchL3) {
457	/ check dict long +1 match /
458	U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
459	const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
460	assert(dictMatchL3 < dictEnd);
461	if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+`1`)) {
462	mLength = ZSTD_count_2segments(ip+`1`+`8`, dictMatchL3+`8`, iend, dictEnd, prefixLowest) + `8`;
463	ip++;
464	offset = (U32)(curr + `1` - dictMatchIndexL3 - dictIndexDelta);
465	while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-`1`] == dictMatchL3[-`1`])) { ip--; dictMatchL3--; mLength++; } / catch up /
466	goto _match_found;
467	} } }
468
469	/ if no long +1 match, explore the short match we found /
470	if (matchIndexS < prefixLowestIndex) {
471	mLength = ZSTD_count_2segments(ip+`4`, match+`4`, iend, dictEnd, prefixLowest) + `4`;
472	offset = (U32)(curr - matchIndexS);
473	while (((ip>anchor) & (match>dictStart)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
474	} else {
475	mLength = ZSTD_count(ip+`4`, match+`4`, iend) + `4`;
476	offset = (U32)(ip - match);
477	while (((ip>anchor) & (match>prefixLowest)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
478	}
479
480	_match_found:
481	offset_2 = offset_1;
482	offset_1 = offset;
483
484	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
485
486	_match_stored:
487	/ match found /
488	ip += mLength;
489	anchor = ip;
490
491	if (ip <= ilimit) {
492	/ Complementary insertion /
493	/ done after iLimit test, as candidates could be > iend-8 /
494	{ U32 const indexToInsert = curr+`2`;
495	hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, `8`)] = indexToInsert;
496	hashLong[ZSTD_hashPtr(ip-`2`, hBitsL, `8`)] = (U32)(ip-`2`-base);
497	hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
498	hashSmall[ZSTD_hashPtr(ip-`1`, hBitsS, mls)] = (U32)(ip-`1`-base);
499	}
500
501	/ check immediate repcode /
502	while (ip <= ilimit) {
503	U32 const current2 = (U32)(ip-base);
504	U32 const repIndex2 = current2 - offset_2;
505	const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
506	dictBase + repIndex2 - dictIndexDelta :
507	base + repIndex2;
508	if ( ((U32)((prefixLowestIndex-`1`) - (U32)repIndex2) >= `3` / intentional overflow /)
509	&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
510	const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
511	size_t const repLength2 = ZSTD_count_2segments(ip+`4`, repMatch2+`4`, iend, repEnd2, prefixLowest) + `4`;
512	U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; / swap offset_2 <=> offset_1 /
513	ZSTD_storeSeq(seqStore, `0`, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
514	hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
515	hashLong[ZSTD_hashPtr(ip, hBitsL, `8`)] = current2;
516	ip += repLength2;
517	anchor = ip;
518	continue;
519	}
520	break;
521	}
522	}
523	} / while (ip < ilimit) /
524
525	/ save reps for next block /
526	rep[`0`] = offset_1;
527	rep[`1`] = offset_2;
528
529	/ Return the last literals size /
530	return (size_t)(iend - anchor);
531	}
532
533	#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
534	static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
535	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
536	void const* src, size_t srcSize) \
537	{ \
538	return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
539	}
540
541	ZSTD_GEN_DFAST_FN(noDict, `4`)
542	ZSTD_GEN_DFAST_FN(noDict, `5`)
543	ZSTD_GEN_DFAST_FN(noDict, `6`)
544	ZSTD_GEN_DFAST_FN(noDict, `7`)
545
546	ZSTD_GEN_DFAST_FN(dictMatchState, `4`)
547	ZSTD_GEN_DFAST_FN(dictMatchState, `5`)
548	ZSTD_GEN_DFAST_FN(dictMatchState, `6`)
549	ZSTD_GEN_DFAST_FN(dictMatchState, `7`)
550
551
552	size_t ZSTD_compressBlock_doubleFast(
553	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
554	void const* src, size_t srcSize)
555	{
556	const U32 mls = ms->cParams.minMatch;
557	switch(mls)
558	{
559	default: / includes case 3 /
560	case `4` :
561	return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
562	case `5` :
563	return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
564	case `6` :
565	return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
566	case `7` :
567	return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
568	}
569	}
570
571
572	size_t ZSTD_compressBlock_doubleFast_dictMatchState(
573	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
574	void const* src, size_t srcSize)
575	{
576	const U32 mls = ms->cParams.minMatch;
577	switch(mls)
578	{
579	default: / includes case 3 /
580	case `4` :
581	return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
582	case `5` :
583	return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
584	case `6` :
585	return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
586	case `7` :
587	return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
588	}
589	}
590
591
592	static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
593	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
594	void const* src, size_t srcSize,
595	U32 const mls / template /)
596	{
597	ZSTD_compressionParameters const* cParams = &ms->cParams;
598	U32* const hashLong = ms->hashTable;
599	U32 const hBitsL = cParams->hashLog;
600	U32* const hashSmall = ms->chainTable;
601	U32 const hBitsS = cParams->chainLog;
602	const BYTE* const istart = (const BYTE*)src;
603	const BYTE* ip = istart;
604	const BYTE* anchor = istart;
605	const BYTE* const iend = istart + srcSize;
606	const BYTE* const ilimit = iend - `8`;
607	const BYTE* const base = ms->window.base;
608	const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
609	const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
610	const U32 dictStartIndex = lowLimit;
611	const U32 dictLimit = ms->window.dictLimit;
612	const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
613	const BYTE* const prefixStart = base + prefixStartIndex;
614	const BYTE* const dictBase = ms->window.dictBase;
615	const BYTE* const dictStart = dictBase + dictStartIndex;
616	const BYTE* const dictEnd = dictBase + prefixStartIndex;
617	U32 offset_1=rep[`0`], offset_2=rep[`1`];
618
619	DEBUGLOG(`5`, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
620
621	/ if extDict is invalidated due to maxDistance, switch to "regular" variant /
622	if (prefixStartIndex == dictStartIndex)
623	return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
624
625	/ Search Loop /
626	while (ip < ilimit) { / < instead of <=, because (ip+1) /
627	const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
628	const U32 matchIndex = hashSmall[hSmall];
629	const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
630	const BYTE* match = matchBase + matchIndex;
631
632	const size_t hLong = ZSTD_hashPtr(ip, hBitsL, `8`);
633	const U32 matchLongIndex = hashLong[hLong];
634	const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
635	const BYTE* matchLong = matchLongBase + matchLongIndex;
636
637	const U32 curr = (U32)(ip-base);
638	const U32 repIndex = curr + `1` - offset_1; / offset_1 expected <= curr +1 /
639	const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
640	const BYTE* const repMatch = repBase + repIndex;
641	size_t mLength;
642	hashSmall[hSmall] = hashLong[hLong] = curr; / update hash table /
643
644	if ((((U32)((prefixStartIndex-`1`) - repIndex) >= `3`) / intentional underflow : ensure repIndex doesn't overlap dict + prefix /
645	& (offset_1 <= curr+`1` - dictStartIndex)) / note: we are searching at curr+1 /
646	&& (MEM_read32(repMatch) == MEM_read32(ip+`1`)) ) {
647	const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
648	mLength = ZSTD_count_2segments(ip+`1`+`4`, repMatch+`4`, iend, repMatchEnd, prefixStart) + `4`;
649	ip++;
650	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
651	} else {
652	if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
653	const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
654	const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
655	U32 offset;
656	mLength = ZSTD_count_2segments(ip+`8`, matchLong+`8`, iend, matchEnd, prefixStart) + `8`;
657	offset = curr - matchLongIndex;
658	while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-`1`] == matchLong[-`1`])) { ip--; matchLong--; mLength++; } / catch up /
659	offset_2 = offset_1;
660	offset_1 = offset;
661	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
662
663	} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
664	size_t const h3 = ZSTD_hashPtr(ip+`1`, hBitsL, `8`);
665	U32 const matchIndex3 = hashLong[h3];
666	const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
667	const BYTE* match3 = match3Base + matchIndex3;
668	U32 offset;
669	hashLong[h3] = curr + `1`;
670	if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+`1`)) ) {
671	const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
672	const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
673	mLength = ZSTD_count_2segments(ip+`9`, match3+`8`, iend, matchEnd, prefixStart) + `8`;
674	ip++;
675	offset = curr+`1` - matchIndex3;
676	while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-`1`] == match3[-`1`])) { ip--; match3--; mLength++; } / catch up /
677	} else {
678	const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
679	const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
680	mLength = ZSTD_count_2segments(ip+`4`, match+`4`, iend, matchEnd, prefixStart) + `4`;
681	offset = curr - matchIndex;
682	while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-`1`] == match[-`1`])) { ip--; match--; mLength++; } / catch up /
683	}
684	offset_2 = offset_1;
685	offset_1 = offset;
686	ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
687
688	} else {
689	ip += ((ip-anchor) >> kSearchStrength) + `1`;
690	continue;
691	} }
692
693	/ move to next sequence start /
694	ip += mLength;
695	anchor = ip;
696
697	if (ip <= ilimit) {
698	/ Complementary insertion /
699	/ done after iLimit test, as candidates could be > iend-8 /
700	{ U32 const indexToInsert = curr+`2`;
701	hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, `8`)] = indexToInsert;
702	hashLong[ZSTD_hashPtr(ip-`2`, hBitsL, `8`)] = (U32)(ip-`2`-base);
703	hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
704	hashSmall[ZSTD_hashPtr(ip-`1`, hBitsS, mls)] = (U32)(ip-`1`-base);
705	}
706
707	/ check immediate repcode /
708	while (ip <= ilimit) {
709	U32 const current2 = (U32)(ip-base);
710	U32 const repIndex2 = current2 - offset_2;
711	const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
712	if ( (((U32)((prefixStartIndex-`1`) - repIndex2) >= `3`) / intentional overflow : ensure repIndex2 doesn't overlap dict + prefix /
713	& (offset_2 <= current2 - dictStartIndex))
714	&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
715	const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
716	size_t const repLength2 = ZSTD_count_2segments(ip+`4`, repMatch2+`4`, iend, repEnd2, prefixStart) + `4`;
717	U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; / swap offset_2 <=> offset_1 /
718	ZSTD_storeSeq(seqStore, `0`, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
719	hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
720	hashLong[ZSTD_hashPtr(ip, hBitsL, `8`)] = current2;
721	ip += repLength2;
722	anchor = ip;
723	continue;
724	}
725	break;
726	} } }
727
728	/ save reps for next block /
729	rep[`0`] = offset_1;
730	rep[`1`] = offset_2;
731
732	/ Return the last literals size /
733	return (size_t)(iend - anchor);
734	}
735
736	ZSTD_GEN_DFAST_FN(extDict, `4`)
737	ZSTD_GEN_DFAST_FN(extDict, `5`)
738	ZSTD_GEN_DFAST_FN(extDict, `6`)
739	ZSTD_GEN_DFAST_FN(extDict, `7`)
740
741	size_t ZSTD_compressBlock_doubleFast_extDict(
742	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
743	void const* src, size_t srcSize)
744	{
745	U32 const mls = ms->cParams.minMatch;
746	switch(mls)
747	{
748	default: / includes case 3 /
749	case `4` :
750	return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
751	case `5` :
752	return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
753	case `6` :
754	return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
755	case `7` :
756	return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
757	}
758	}
759

Browse the source code of Godot/thirdparty/zstd/compress/zstd_double_fast.c