libfsst.hpp source code [Velox/build/_deps/duckdb-src/third_party/fsst/libfsst.hpp]

1	// this software is distributed under the MIT License (http://www.opensource.org/licenses/MIT):
2	//
3	// Copyright 2018-2020, CWI, TU Munich, FSU Jena
4	//
5	// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files
6	// (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify,
7	// merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8	// furnished to do so, subject to the following conditions:
9	//
10	// - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11	//
12	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13	// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
14	// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
15	// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16	//
17	// You can contact the authors via the FSST source repository : https://github.com/cwida/fsst
18	#include <algorithm>
19	#include <cassert>
20	#include <cstring>
21	#include <fstream>
22	#include <iostream>
23	#include <numeric>
24	#include <memory>
25	#include <queue>
26	#include <string>
27	#include <unordered_set>
28	#include <vector>
29	#include <sys/types.h>
30	#include <sys/stat.h>
31	#include <fcntl.h>
32	#include <stddef.h>
33
34	using namespace std;
35
36	#include "fsst.h" // the official FSST API -- also usable by C mortals
37
38	/ unsigned integers /
39	typedef uint8_t u8;
40	typedef uint16_t u16;
41	typedef uint32_t u32;
42	typedef uint64_t u64;
43
44	inline uint64_t fsst_unaligned_load(u8 const* V) {
45	uint64_t Ret;
46	memcpy(dest: &Ret, src: V, n: sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible)
47	return Ret;
48	}
49
50	#define FSST_ENDIAN_MARKER ((u64) 1)
51	#define FSST_VERSION_20190218 20190218
52	#define FSST_VERSION ((u64) FSST_VERSION_20190218)
53
54	// "symbols" are character sequences (up to 8 bytes)
55	// A symbol is compressed into a "code" of, in principle, one byte. But, we added an exception mechanism:
56	// byte 255 followed by byte X represents the single-byte symbol X. Its code is 256+X.
57
58	// we represent codes in u16 (not u8). 12 bits code (of which 10 are used), 4 bits length
59	#define FSST_LEN_BITS 12
60	#define FSST_CODE_BITS 9
61	#define FSST_CODE_BASE 256UL /* first 256 codes [0,255] are pseudo codes: escaped bytes */
62	#define FSST_CODE_MAX (1UL<<FSST_CODE_BITS) /* all bits set: indicating a symbol that has not been assigned a code yet */
63	#define FSST_CODE_MASK (FSST_CODE_MAX-1UL) /* all bits set: indicating a symbol that has not been assigned a code yet */
64
65	struct Symbol {
66	static const unsigned maxLength = `8`;
67
68	// the byte sequence that this symbol stands for
69	union { char str[maxLength]; u64 num; } val; // usually we process it as a num(ber), as this is fast
70
71	// icl = u64 ignoredBits:16,code:12,length:4,unused:32 -- but we avoid exposing this bit-field notation
72	u64 icl; // use a single u64 to be sure "code" is accessed with one load and can be compared with one comparison
73
74	Symbol() : icl(`0`) { val.num = `0`; }
75
76	explicit Symbol(u8 c, u16 code) : icl((`1`<<`28`)\|(code<<`16`)\|`56`) { val.num = c; } // single-char symbol
77	explicit Symbol(const char* begin, const char* end) : Symbol (begin, (u32) (end-begin)) {}
78	explicit Symbol(u8* begin, u8* end) : Symbol ((const char*)begin, (u32) (end-begin)) {}
79	explicit Symbol(const char* input, u32 len) {
80	val.num = `0`;
81	if (len>=`8`) {
82	len = `8`;
83	memcpy(dest: val.str, src: input, n: `8`);
84	} else {
85	memcpy(dest: val.str, src: input, n: len);
86	}
87	set_code_len(FSST_CODE_MAX, len);
88	}
89	void set_code_len(u32 code, u32 len) { icl = (len<<`28`)\|(code<<`16`)\|((`8`-len)*`8`); }
90
91	u32 length() const { return (u32) (icl >> `28`); }
92	u16 code() const { return (icl >> `16`) & FSST_CODE_MASK; }
93	u32 ignoredBits() const { return (u32) icl; }
94
95	u8 first() const { assert( length() >= `1`); return `0xFF` & val.num; }
96	u16 first2() const { assert( length() >= `2`); return `0xFFFF` & val.num; }
97
98	#define FSST_HASH_LOG2SIZE 10
99	#define FSST_HASH_PRIME 2971215073LL
100	#define FSST_SHIFT 15
101	#define FSST_HASH(w) (((w)FSST_HASH_PRIME)^(((w)FSST_HASH_PRIME)>>FSST_SHIFT))
102	size_t hash() const { size_t v = `0xFFFFFF` & val.num; return FSST_HASH(v); } // hash on the next 3 bytes
103	};
104
105	// Symbol that can be put in a queue, ordered on gain
106	struct QSymbol{
107	Symbol symbol;
108	mutable u32 gain; // mutable because gain value should be ignored in find() on unordered_set of QSymbols
109	bool operator==(const QSymbol& other) const { return symbol.val.num == other.symbol.val.num && symbol.length() == other.symbol.length(); }
110	};
111
112	// we construct FSST symbol tables using a random sample of about 16KB (1<<14)
113	#define FSST_SAMPLETARGET (1<<14)
114	#define FSST_SAMPLEMAXSZ ((long) 2*FSST_SAMPLETARGET)
115
116	// two phases of compression, before and after optimize():
117	//
118	// (1) to encode values we probe (and maintain) three datastructures:
119	// - u16 byteCodes[65536] array at the position of the next byte (s.length==1)
120	// - u16 shortCodes[65536] array at the position of the next twobyte pattern (s.length==2)
121	// - Symbol hashtable[1024] (keyed by the next three bytes, ie for s.length>2),
122	// this search will yield a u16 code, it points into Symbol symbols[]. You always find a hit, because the first 256 codes are
123	// pseudo codes representing a single byte these will become escapes)
124	//
125	// (2) when we finished looking for the best symbol table we call optimize() to reshape it:
126	// - it renumbers the codes by length (first symbols of length 2,3,4,5,6,7,8; then 1 (starting from byteLim are symbols of length 1)
127	// length 2 codes for which no longer suffix symbol exists (< suffixLim) come first among the 2-byte codes
128	// (allows shortcut during compression)
129	// - for each two-byte combination, in all unused slots of shortCodes[], it enters the byteCode[] of the symbol corresponding
130	// to the first byte (if such a single-byte symbol exists). This allows us to just probe the next two bytes (if there is only one
131	// byte left in the string, there is still a terminator-byte added during compression) in shortCodes[]. That is, byteCodes[]
132	// and its codepath is no longer required. This makes compression faster. The reason we use byteCodes[] during symbolTable construction
133	// is that adding a new code/symbol is expensive (you have to touch shortCodes[] in 256 places). This optimization was
134	// hence added to make symbolTable construction faster.
135	//
136	// this final layout allows for the fastest compression code, only currently present in compressBulk
137
138	// in the hash table, the icl field contains (low-to-high) ignoredBits:16,code:12,length:4
139	#define FSST_ICL_FREE ((15<<28)\|(((u32)FSST_CODE_MASK)<<16)) // high bits of icl (len=8,code=FSST_CODE_MASK) indicates free bucket
140
141	// ignoredBits is (8-length)8, which is the amount of high bits to zero in the input word before comparing with the hashtable key*
142	// ..it could of course be computed from len during lookup, but storing it precomputed in some loose bits is faster
143	//
144	// the gain field is only used in the symbol queue that sorts symbols on gain
145
146	struct SymbolTable {
147	static const u32 hashTabSize = `1`<<FSST_HASH_LOG2SIZE; // smallest size that incurs no precision loss
148
149	// lookup table using the next two bytes (65536 codes), or just the next single byte
150	u16 shortCodes[`65536`]; // contains code for 2-byte symbol, otherwise code for pseudo byte (escaped byte)
151
152	// lookup table (only used during symbolTable construction, not during normal text compression)
153	u16 byteCodes[`256`]; // contains code for every 1-byte symbol, otherwise code for pseudo byte (escaped byte)
154
155	// 'symbols' is the current symbol table symbol[code].symbol is the max 8-byte 'symbol' for single-byte 'code'
156	Symbol symbols[FSST_CODE_MAX]; // x in [0,255]: pseudo symbols representing escaped byte x; x in [FSST_CODE_BASE=256,256+nSymbols]: real symbols
157
158	// replicate long symbols in hashTab (avoid indirection).
159	Symbol hashTab[hashTabSize]; // used for all symbols of 3 and more bytes
160
161	u16 nSymbols; // amount of symbols in the map (max 255)
162	u16 suffixLim; // codes higher than this do not have a longer suffix
163	u16 terminator; // code of 1-byte symbol, that can be used as a terminator during compression
164	bool zeroTerminated; // whether we are expecting zero-terminated strings (we then also produce zero-terminated compressed strings)
165	u16 lenHisto[FSST_CODE_BITS]; // lenHisto[x] is the amount of symbols of byte-length (x+1) in this SymbolTable
166
167	SymbolTable() : nSymbols(`0`), suffixLim(FSST_CODE_MAX), terminator(`0`), zeroTerminated(false) {
168	// stuff done once at startup
169	for (u32 i=`0`; i<`256`; i++) {
170	symbols[i] = Symbol (i,i\|(`1`<<FSST_LEN_BITS)); // pseudo symbols
171	}
172	Symbol unused = Symbol ((u8) `0`,FSST_CODE_MASK); // single-char symbol, exception code
173	for (u32 i=`256`; i<FSST_CODE_MAX; i++) {
174	symbols[i] = unused; // we start with all symbols unused
175	}
176	// empty hash table
177	Symbol s;
178	s.val.num = `0`;
179	s.icl = FSST_ICL_FREE; //marks empty in hashtab
180	for(u32 i=`0`; i<hashTabSize; i++)
181	hashTab[i] = s;
182
183	// fill byteCodes[] with the pseudo code all bytes (escaped bytes)
184	for(u32 i=`0`; i<`256`; i++)
185	byteCodes[i] = (`1`<<FSST_LEN_BITS) \| i;
186
187	// fill shortCodes[] with the pseudo code for the first byte of each two-byte pattern
188	for(u32 i=`0`; i<`65536`; i++)
189	shortCodes[i] = (`1`<<FSST_LEN_BITS) \| (i&`255`);
190
191	memset(s: lenHisto, c: `0`, n: sizeof(lenHisto)); // all unused
192	}
193
194	void clear() {
195	// clear a symbolTable with minimal effort (only erase the used positions in it)
196	memset(s: lenHisto, c: `0`, n: sizeof(lenHisto)); // all unused
197	for(u32 i=FSST_CODE_BASE; i<FSST_CODE_BASE+nSymbols; i++) {
198	if (symbols[i].length() == `1`) {
199	u16 val = symbols[i].first();
200	byteCodes[val] = (`1`<<FSST_LEN_BITS) \| val;
201	} else if (symbols[i].length() == `2`) {
202	u16 val = symbols[i].first2();
203	shortCodes[val] = (`1`<<FSST_LEN_BITS) \| (val&`255`);
204	} else {
205	u32 idx = symbols[i].hash() & (hashTabSize-`1`);
206	hashTab[idx].val.num = `0`;
207	hashTab[idx].icl = FSST_ICL_FREE; //marks empty in hashtab
208	}
209	}
210	nSymbols = `0`; // no need to clean symbols[] as no symbols are used
211	}
212	bool hashInsert(Symbol s) {
213	u32 idx = s.hash() & (hashTabSize-`1`);
214	bool taken = (hashTab[idx].icl < FSST_ICL_FREE);
215	if (taken) return false; // collision in hash table
216	hashTab[idx].icl = s.icl;
217	hashTab[idx].val.num = s.val.num & (`0xFFFFFFFFFFFFFFFF` >> (u8) s.icl);
218	return true;
219	}
220	bool add(Symbol s) {
221	assert(FSST_CODE_BASE + nSymbols < FSST_CODE_MAX);
222	u32 len = s.length();
223	s.set_code_len(FSST_CODE_BASE + nSymbols, len);
224	if (len == `1`) {
225	byteCodes[s.first()] = FSST_CODE_BASE + nSymbols + (`1`<<FSST_LEN_BITS); // len=1 (<<FSST_LEN_BITS)
226	} else if (len == `2`) {
227	shortCodes[s.first2()] = FSST_CODE_BASE + nSymbols + (`2`<<FSST_LEN_BITS); // len=2 (<<FSST_LEN_BITS)
228	} else if (!hashInsert(s)) {
229	return false;
230	}
231	symbols[FSST_CODE_BASE + nSymbols++] = s;
232	lenHisto[len-`1`]++;
233	return true;
234	}
235	/// Find longest expansion, return code (= position in symbol table)
236	u16 findLongestSymbol(Symbol s) const {
237	size_t idx = s.hash() & (hashTabSize-`1`);
238	if (hashTab[idx].icl <= s.icl && hashTab[idx].val.num == (s.val.num & (`0xFFFFFFFFFFFFFFFF` >> ((u8) hashTab[idx].icl)))) {
239	return (hashTab[idx].icl>>`16`) & FSST_CODE_MASK; // matched a long symbol
240	}
241	if (s.length() >= `2`) {
242	u16 code = shortCodes[s.first2()] & FSST_CODE_MASK;
243	if (code >= FSST_CODE_BASE) return code;
244	}
245	return byteCodes[s.first()] & FSST_CODE_MASK;
246	}
247	u16 findLongestSymbol(u8* cur, u8* end) const {
248	return findLongestSymbol(s: Symbol (cur,end)); // represent the string as a temporary symbol
249	}
250
251	// rationale for finalize:
252	// - during symbol table construction, we may create more than 256 codes, but bring it down to max 255 in the last makeTable()
253	// consequently we needed more than 8 bits during symbol table contruction, but can simplify the codes to single bytes in finalize()
254	// (this feature is in fact lo longer used, but could still be exploited: symbol construction creates no more than 255 symbols in each pass)
255	// - we not only reduce the amount of codes to <255, but also reorder* the symbols and renumber their codes, for higher compression perf.*
256	// we renumber codes so they are grouped by length, to allow optimized scalar string compression (byteLim and suffixLim optimizations).
257	// - we make the use of byteCode[] no longer necessary by inserting single-byte codes in the free spots of shortCodes[]
258	// Using shortCodes[] only makes compression faster. When creating the symbolTable, however, using shortCodes[] for the single-byte
259	// symbols is slow, as each insert touches 256 positions in it. This optimization was added when optimizing symbolTable construction time.
260	//
261	// In all, we change the layout and coding, as follows..
262	//
263	// before finalize():
264	// - The real symbols are symbols[256..256+nSymbols>. As we may have nSymbols > 255
265	// - The first 256 codes are pseudo symbols (all escaped bytes)
266	//
267	// after finalize():
268	// - table layout is symbols[0..nSymbols>, with nSymbols < 256.
269	// - Real codes are [0,nSymbols>. 8-th bit not set.
270	// - Escapes in shortCodes have the 8th bit set (value: 256+255=511). 255 because the code to be emitted is the escape byte 255
271	// - symbols are grouped by length: 2,3,4,5,6,7,8, then 1 (single-byte codes last)
272	// the two-byte codes are split in two sections:
273	// - first section contains codes for symbols for which there is no longer symbol (no suffix). It allows an early-out during compression
274	//
275	// finally, shortCodes[] is modified to also encode all single-byte symbols (hence byteCodes[] is not required on a critical path anymore).
276	//
277	void finalize(u8 zeroTerminated) {
278	assert(nSymbols <= `255`);
279	u8 newCode[`256`], rsum[`8`], byteLim = nSymbols - (lenHisto[`0`] - zeroTerminated);
280
281	// compute running sum of code lengths (starting offsets for each length)
282	rsum[`0`] = byteLim; // 1-byte codes are highest
283	rsum[`1`] = zeroTerminated;
284	for(u32 i=`1`; i<`7`; i++)
285	rsum[i+`1`] = rsum[i] + lenHisto[i];
286
287	// determine the new code for each symbol, ordered by length (and splitting 2byte symbols into two classes around suffixLim)
288	suffixLim = rsum[`1`];
289	symbols[newCode[`0`] = `0`] = symbols[`256`]; // keep symbol 0 in place (for zeroTerminated cases only)
290
291	for(u32 i=zeroTerminated, j=rsum[`2`]; i<nSymbols; i++) {
292	Symbol s1 = symbols[FSST_CODE_BASE+i];
293	u32 len = s1.length(), opt = (len == `2`)*nSymbols;
294	if (opt) {
295	u16 first2 = s1.first2();
296	for(u32 k=`0`; k<opt; k++) {
297	Symbol s2 = symbols[FSST_CODE_BASE+k];
298	if (k != i && s2.length() > `1` && first2 == s2.first2()) // test if symbol k is a suffix of s
299	opt = `0`;
300	}
301	newCode[i] = opt?suffixLim++:--j; // symbols without a larger suffix have a code < suffixLim
302	} else
303	newCode[i] = rsum[len-`1`]++;
304	s1.set_code_len(code: newCode[i],len);
305	symbols[newCode[i]] = s1;
306	}
307	// renumber the codes in byteCodes[]
308	for(u32 i=`0`; i<`256`; i++)
309	if ((byteCodes[i] & FSST_CODE_MASK) >= FSST_CODE_BASE)
310	byteCodes[i] = newCode[(u8) byteCodes[i]] + (`1` << FSST_LEN_BITS);
311	else
312	byteCodes[i] = `511` + (`1` << FSST_LEN_BITS);
313
314	// renumber the codes in shortCodes[]
315	for(u32 i=`0`; i<`65536`; i++)
316	if ((shortCodes[i] & FSST_CODE_MASK) >= FSST_CODE_BASE)
317	shortCodes[i] = newCode[(u8) shortCodes[i]] + (shortCodes[i] & (`15` << FSST_LEN_BITS));
318	else
319	shortCodes[i] = byteCodes[i&`0xFF`];
320
321	// replace the symbols in the hash table
322	for(u32 i=`0`; i<hashTabSize; i++)
323	if (hashTab[i].icl < FSST_ICL_FREE)
324	hashTab[i] = symbols[newCode[(u8) hashTab[i].code()]];
325	}
326	};
327
328	#ifdef NONOPT_FSST
329	struct Counters {
330	u16 count1[FSST_CODE_MAX]; // array to count frequency of symbols as they occur in the sample
331	u16 count2[FSST_CODE_MAX][FSST_CODE_MAX]; // array to count subsequent combinations of two symbols in the sample
332
333	void count1Set(u32 pos1, u16 val) {
334	count1[pos1] = val;
335	}
336	void count1Inc(u32 pos1) {
337	count1[pos1]++;
338	}
339	void count2Inc(u32 pos1, u32 pos2) {
340	count2[pos1][pos2]++;
341	}
342	u32 count1GetNext(u32 &pos1) {
343	return count1[pos1];
344	}
345	u32 count2GetNext(u32 pos1, u32 &pos2) {
346	return count2[pos1][pos2];
347	}
348	void backup1(u8 *buf) {
349	memcpy(buf, count1, FSST_CODE_MAX*sizeof(u16));
350	}
351	void restore1(u8 *buf) {
352	memcpy(count1, buf, FSST_CODE_MAX*sizeof(u16));
353	}
354	};
355	#else
356	// we keep two counters count1[pos] and count2[pos1][pos2] of resp 16 and 12-bits. Both are split into two columns for performance reasons
357	// first reason is to make the column we update the most during symbolTable construction (the low bits) thinner, thus reducing CPU cache pressure.
358	// second reason is that when scanning the array, after seeing a 64-bits 0 in the high bits column, we can quickly skip over many codes (15 or 7)
359	struct Counters {
360	// high arrays come before low arrays, because our GetNext() methods may overrun their 64-bits reads a few bytes
361	u8 count1High[FSST_CODE_MAX]; // array to count frequency of symbols as they occur in the sample (16-bits)
362	u8 count1Low[FSST_CODE_MAX]; // it is split in a low and high byte: cnt = count1High256 + count1Low*
363	u8 count2High[FSST_CODE_MAX][FSST_CODE_MAX/`2`]; // array to count subsequent combinations of two symbols in the sample (12-bits: 8-bits low, 4-bits high)
364	u8 count2Low[FSST_CODE_MAX][FSST_CODE_MAX]; // its value is (count2High256+count2Low) -- but high is 4-bits (we put two numbers in one, hence /2)*
365	// 385KB -- but hot area likely just 10 + 304 = 130 cache lines (=8KB)*
366
367	void count1Set(u32 pos1, u16 val) {
368	count1Low[pos1] = val&`255`;
369	count1High[pos1] = val>>`8`;
370	}
371	void count1Inc(u32 pos1) {
372	if (!count1Low[pos1]++) // increment high early (when low==0, not when low==255). This means (high > 0) <=> (cnt > 0)
373	count1High[pos1]++; //(0,0)->(1,1)->..->(255,1)->(0,1)->(1,2)->(2,2)->(3,2)..(255,2)->(0,2)->(1,3)->(2,3)...
374	}
375	void count2Inc(u32 pos1, u32 pos2) {
376	if (!count2Low[pos1][pos2]++) // increment high early (when low==0, not when low==255). This means (high > 0) <=> (cnt > 0)
377	// inc 4-bits high counter with 1<<0 (1) or 1<<4 (16) -- depending on whether pos2 is even or odd, repectively
378	count2High[pos1][(pos2)>>`1`] += `1` << (((pos2)&`1`)<<`2`); // we take our chances with overflow.. (4K maxval, on a 8K sample)
379	}
380	u32 count1GetNext(u32 &pos1) { // note: we will advance pos1 to the next nonzero counter in register range
381	// read 16-bits single symbol counter, split into two 8-bits numbers (count1Low, count1High), while skipping over zeros
382	u64 high = fsst_unaligned_load(V: &count1High[pos1]);
383
384	u32 zero = high?(__builtin_ctzl(high)>>`3`):`7UL`; // number of zero bytes
385	high = (high >> (zero << `3`)) & `255`; // advance to nonzero counter
386	if (((pos1 += zero) >= FSST_CODE_MAX) \|\| !high) // SKIP! advance pos2
387	return `0`; // all zero
388
389	u32 low = count1Low[pos1];
390	if (low) high--; // high is incremented early and low late, so decrement high (unless low==0)
391	return (u32) ((high << `8`) + low);
392	}
393	u32 count2GetNext(u32 pos1, u32 &pos2) { // note: we will advance pos2 to the next nonzero counter in register range
394	// read 12-bits pairwise symbol counter, split into low 8-bits and high 4-bits number while skipping over zeros
395	u64 high = fsst_unaligned_load(V: &count2High[pos1][pos2>>`1`]);
396	high >>= ((pos2&`1`) << `2`); // odd pos2: ignore the lowest 4 bits & we see only 15 counters
397
398	u32 zero = high?(__builtin_ctzl(high)>>`2`):(`15UL`-(pos2&`1UL`)); // number of zero 4-bits counters
399	high = (high >> (zero << `2`)) & `15`; // advance to nonzero counter
400	if (((pos2 += zero) >= FSST_CODE_MAX) \|\| !high) // SKIP! advance pos2
401	return `0UL`; // all zero
402
403	u32 low = count2Low[pos1][pos2];
404	if (low) high--; // high is incremented early and low late, so decrement high (unless low==0)
405	return (u32) ((high << `8`) + low);
406	}
407	void backup1(u8 *buf) {
408	memcpy(dest: buf, src: count1High, FSST_CODE_MAX);
409	memcpy(dest: buf+FSST_CODE_MAX, src: count1Low, FSST_CODE_MAX);
410	}
411	void restore1(u8 *buf) {
412	memcpy(dest: count1High, src: buf, FSST_CODE_MAX);
413	memcpy(dest: count1Low, src: buf+FSST_CODE_MAX, FSST_CODE_MAX);
414	}
415	};
416	#endif
417
418
419	#define FSST_BUFSZ (3<<19) // 768KB
420
421	// an encoder is a symbolmap plus some bufferspace, needed during map construction as well as compression
422	struct Encoder {
423	shared_ptr<SymbolTable> symbolTable; // symbols, plus metadata and data structures for quick compression (shortCode,hashTab, etc)
424	union {
425	Counters counters; // for counting symbol occurences during map construction
426	u8 simdbuf[FSST_BUFSZ]; // for compression: SIMD string staging area 768KB = 256KB in + 512KB out (worst case for 256KB in)
427	};
428	};
429
430	// job control integer representable in one 64bits SIMD lane: cur/end=input, out=output, pos=which string (2^9=512 per call)
431	struct SIMDjob {
432	u64 out:`19`,pos:`9`,end:`18`,cur:`18`; // cur/end is input offsets (2^18=256KB), out is output offset (2^19=512KB)
433	};
434
435	extern bool
436	duckdb_fsst_hasAVX512(); // runtime check for avx512 capability
437
438	extern size_t
439	duckdb_fsst_compressAVX512(
440	SymbolTable &symbolTable,
441	u8* codeBase, // IN: base address for codes, i.e. compression output (points to simdbuf+256KB)
442	u8* symbolBase, // IN: base address for string bytes, i.e. compression input (points to simdbuf)
443	SIMDjob* input, // IN: input array (size n) with job information: what to encode, where to store it.
444	SIMDjob* output, // OUT: output array (size n) with job information: how much got encoded, end output pointer.
445	size_t n, // IN: size of arrays input and output (should be max 512)
446	size_t unroll); // IN: degree of SIMD unrolling
447
448	// C++ fsst-compress function with some more control of how the compression happens (algorithm flavor, simd unroll degree)
449	size_t compressImpl(Encoder encoder, size_t n, size_t lenIn[], u8 strIn[], size_t size, u8 * output, size_t lenOut, u8 strOut[], bool noSuffixOpt, bool avoidBranch, int simd);
450	size_t compressAuto(Encoder encoder, size_t n, size_t lenIn[], u8 strIn[], size_t size, u8 * output, size_t lenOut, u8 strOut[], int simd);
451

Browse the source code of Velox/build/_deps/duckdb-src/third_party/fsst/libfsst.hpp