1 | /* ****************************************************************** |
2 | Huffman encoder, part of New Generation Entropy library |
3 | Copyright (C) 2013-2016, Yann Collet. |
4 | |
5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
6 | |
7 | Redistribution and use in source and binary forms, with or without |
8 | modification, are permitted provided that the following conditions are |
9 | met: |
10 | |
11 | * Redistributions of source code must retain the above copyright |
12 | notice, this list of conditions and the following disclaimer. |
13 | * Redistributions in binary form must reproduce the above |
14 | copyright notice, this list of conditions and the following disclaimer |
15 | in the documentation and/or other materials provided with the |
16 | distribution. |
17 | |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | You can contact the author at : |
31 | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy |
32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
33 | ****************************************************************** */ |
34 | |
35 | /* ************************************************************** |
36 | * Compiler specifics |
37 | ****************************************************************/ |
38 | #ifdef _MSC_VER /* Visual Studio */ |
39 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ |
40 | #endif |
41 | |
42 | |
43 | /* ************************************************************** |
44 | * Includes |
45 | ****************************************************************/ |
46 | #include <string.h> /* memcpy, memset */ |
47 | #include <stdio.h> /* printf (debug) */ |
48 | #include "bitstream.h" |
49 | #include "compiler.h" |
50 | #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ |
51 | #include "fse.h" /* header compression */ |
52 | #define HUF_STATIC_LINKING_ONLY |
53 | #include "huf.h" |
54 | #include "error_private.h" |
55 | |
56 | |
57 | /* ************************************************************** |
58 | * Error Management |
59 | ****************************************************************/ |
60 | #define HUF_isError ERR_isError |
61 | #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ |
62 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e |
63 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
64 | |
65 | |
66 | /* ************************************************************** |
67 | * Utils |
68 | ****************************************************************/ |
69 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) |
70 | { |
71 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); |
72 | } |
73 | |
74 | |
75 | /* ******************************************************* |
76 | * HUF : Huffman block compression |
77 | *********************************************************/ |
78 | /* HUF_compressWeights() : |
79 | * Same as FSE_compress(), but dedicated to huff0's weights compression. |
80 | * The use case needs much less stack memory. |
81 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. |
82 | */ |
83 | #define 6 |
84 | size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) |
85 | { |
86 | BYTE* const ostart = (BYTE*) dst; |
87 | BYTE* op = ostart; |
88 | BYTE* const oend = ostart + dstSize; |
89 | |
90 | U32 maxSymbolValue = HUF_TABLELOG_MAX; |
91 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; |
92 | |
93 | FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; |
94 | BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; |
95 | |
96 | U32 count[HUF_TABLELOG_MAX+1]; |
97 | S16 norm[HUF_TABLELOG_MAX+1]; |
98 | |
99 | /* init conditions */ |
100 | if (wtSize <= 1) return 0; /* Not compressible */ |
101 | |
102 | /* Scan input and build symbol stats */ |
103 | { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) ); |
104 | if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ |
105 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
106 | } |
107 | |
108 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); |
109 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); |
110 | |
111 | /* Write table description header */ |
112 | { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); |
113 | op += hSize; |
114 | } |
115 | |
116 | /* Compress */ |
117 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); |
118 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); |
119 | if (cSize == 0) return 0; /* not enough space for compressed data */ |
120 | op += cSize; |
121 | } |
122 | |
123 | return op-ostart; |
124 | } |
125 | |
126 | |
127 | struct HUF_CElt_s { |
128 | U16 val; |
129 | BYTE nbBits; |
130 | }; /* typedef'd to HUF_CElt within "huf.h" */ |
131 | |
132 | /*! HUF_writeCTable() : |
133 | `CTable` : Huffman tree to save, using huf representation. |
134 | @return : size of saved CTable */ |
135 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, |
136 | const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) |
137 | { |
138 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ |
139 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; |
140 | BYTE* op = (BYTE*)dst; |
141 | U32 n; |
142 | |
143 | /* check conditions */ |
144 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
145 | |
146 | /* convert to weight */ |
147 | bitsToWeight[0] = 0; |
148 | for (n=1; n<huffLog+1; n++) |
149 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); |
150 | for (n=0; n<maxSymbolValue; n++) |
151 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; |
152 | |
153 | /* attempt weights compression by FSE */ |
154 | { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); |
155 | if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ |
156 | op[0] = (BYTE)hSize; |
157 | return hSize+1; |
158 | } } |
159 | |
160 | /* write raw values as 4-bits (max : 15) */ |
161 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ |
162 | if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ |
163 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); |
164 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ |
165 | for (n=0; n<maxSymbolValue; n+=2) |
166 | op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); |
167 | return ((maxSymbolValue+1)/2) + 1; |
168 | } |
169 | |
170 | |
171 | size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize) |
172 | { |
173 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ |
174 | U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ |
175 | U32 tableLog = 0; |
176 | U32 nbSymbols = 0; |
177 | |
178 | /* get symbol weights */ |
179 | CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); |
180 | |
181 | /* check result */ |
182 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
183 | if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); |
184 | |
185 | /* Prepare base value per rank */ |
186 | { U32 n, = 0; |
187 | for (n=1; n<=tableLog; n++) { |
188 | U32 current = nextRankStart; |
189 | nextRankStart += (rankVal[n] << (n-1)); |
190 | rankVal[n] = current; |
191 | } } |
192 | |
193 | /* fill nbBits */ |
194 | { U32 n; for (n=0; n<nbSymbols; n++) { |
195 | const U32 w = huffWeight[n]; |
196 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); |
197 | } } |
198 | |
199 | /* fill val */ |
200 | { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ |
201 | U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; |
202 | { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } |
203 | /* determine stating value per rank */ |
204 | valPerRank[tableLog+1] = 0; /* for w==0 */ |
205 | { U16 min = 0; |
206 | U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ |
207 | valPerRank[n] = min; /* get starting value within each rank */ |
208 | min += nbPerRank[n]; |
209 | min >>= 1; |
210 | } } |
211 | /* assign value within rank, symbol order */ |
212 | { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } |
213 | } |
214 | |
215 | *maxSymbolValuePtr = nbSymbols - 1; |
216 | return readSize; |
217 | } |
218 | |
219 | |
220 | typedef struct nodeElt_s { |
221 | U32 count; |
222 | U16 parent; |
223 | BYTE byte; |
224 | BYTE nbBits; |
225 | } nodeElt; |
226 | |
227 | static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) |
228 | { |
229 | const U32 largestBits = huffNode[lastNonNull].nbBits; |
230 | if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ |
231 | |
232 | /* there are several too large elements (at least >= 2) */ |
233 | { int totalCost = 0; |
234 | const U32 baseCost = 1 << (largestBits - maxNbBits); |
235 | U32 n = lastNonNull; |
236 | |
237 | while (huffNode[n].nbBits > maxNbBits) { |
238 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); |
239 | huffNode[n].nbBits = (BYTE)maxNbBits; |
240 | n --; |
241 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ |
242 | while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ |
243 | |
244 | /* renorm totalCost */ |
245 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ |
246 | |
247 | /* repay normalized cost */ |
248 | { U32 const noSymbol = 0xF0F0F0F0; |
249 | U32 rankLast[HUF_TABLELOG_MAX+2]; |
250 | int pos; |
251 | |
252 | /* Get pos of last (smallest) symbol per rank */ |
253 | memset(rankLast, 0xF0, sizeof(rankLast)); |
254 | { U32 currentNbBits = maxNbBits; |
255 | for (pos=n ; pos >= 0; pos--) { |
256 | if (huffNode[pos].nbBits >= currentNbBits) continue; |
257 | currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ |
258 | rankLast[maxNbBits-currentNbBits] = pos; |
259 | } } |
260 | |
261 | while (totalCost > 0) { |
262 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; |
263 | for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { |
264 | U32 highPos = rankLast[nBitsToDecrease]; |
265 | U32 lowPos = rankLast[nBitsToDecrease-1]; |
266 | if (highPos == noSymbol) continue; |
267 | if (lowPos == noSymbol) break; |
268 | { U32 const highTotal = huffNode[highPos].count; |
269 | U32 const lowTotal = 2 * huffNode[lowPos].count; |
270 | if (highTotal <= lowTotal) break; |
271 | } } |
272 | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ |
273 | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ |
274 | while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) |
275 | nBitsToDecrease ++; |
276 | totalCost -= 1 << (nBitsToDecrease-1); |
277 | if (rankLast[nBitsToDecrease-1] == noSymbol) |
278 | rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ |
279 | huffNode[rankLast[nBitsToDecrease]].nbBits ++; |
280 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ |
281 | rankLast[nBitsToDecrease] = noSymbol; |
282 | else { |
283 | rankLast[nBitsToDecrease]--; |
284 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) |
285 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ |
286 | } } /* while (totalCost > 0) */ |
287 | |
288 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ |
289 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ |
290 | while (huffNode[n].nbBits == maxNbBits) n--; |
291 | huffNode[n+1].nbBits--; |
292 | rankLast[1] = n+1; |
293 | totalCost++; |
294 | continue; |
295 | } |
296 | huffNode[ rankLast[1] + 1 ].nbBits--; |
297 | rankLast[1]++; |
298 | totalCost ++; |
299 | } } } /* there are several too large elements (at least >= 2) */ |
300 | |
301 | return maxNbBits; |
302 | } |
303 | |
304 | |
305 | typedef struct { |
306 | U32 base; |
307 | U32 current; |
308 | } rankPos; |
309 | |
310 | static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) |
311 | { |
312 | rankPos rank[32]; |
313 | U32 n; |
314 | |
315 | memset(rank, 0, sizeof(rank)); |
316 | for (n=0; n<=maxSymbolValue; n++) { |
317 | U32 r = BIT_highbit32(count[n] + 1); |
318 | rank[r].base ++; |
319 | } |
320 | for (n=30; n>0; n--) rank[n-1].base += rank[n].base; |
321 | for (n=0; n<32; n++) rank[n].current = rank[n].base; |
322 | for (n=0; n<=maxSymbolValue; n++) { |
323 | U32 const c = count[n]; |
324 | U32 const r = BIT_highbit32(c+1) + 1; |
325 | U32 pos = rank[r].current++; |
326 | while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { |
327 | huffNode[pos] = huffNode[pos-1]; |
328 | pos--; |
329 | } |
330 | huffNode[pos].count = c; |
331 | huffNode[pos].byte = (BYTE)n; |
332 | } |
333 | } |
334 | |
335 | |
336 | /** HUF_buildCTable_wksp() : |
337 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. |
338 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. |
339 | */ |
340 | #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) |
341 | typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; |
342 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) |
343 | { |
344 | nodeElt* const huffNode0 = (nodeElt*)workSpace; |
345 | nodeElt* const huffNode = huffNode0+1; |
346 | U32 n, nonNullRank; |
347 | int lowS, lowN; |
348 | U16 nodeNb = STARTNODE; |
349 | U32 nodeRoot; |
350 | |
351 | /* safety checks */ |
352 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
353 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); |
354 | if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; |
355 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
356 | memset(huffNode0, 0, sizeof(huffNodeTable)); |
357 | |
358 | /* sort, decreasing order */ |
359 | HUF_sort(huffNode, count, maxSymbolValue); |
360 | |
361 | /* init for parents */ |
362 | nonNullRank = maxSymbolValue; |
363 | while(huffNode[nonNullRank].count == 0) nonNullRank--; |
364 | lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; |
365 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; |
366 | huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; |
367 | nodeNb++; lowS-=2; |
368 | for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); |
369 | huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ |
370 | |
371 | /* create parents */ |
372 | while (nodeNb <= nodeRoot) { |
373 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
374 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
375 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; |
376 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; |
377 | nodeNb++; |
378 | } |
379 | |
380 | /* distribute weights (unlimited tree height) */ |
381 | huffNode[nodeRoot].nbBits = 0; |
382 | for (n=nodeRoot-1; n>=STARTNODE; n--) |
383 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
384 | for (n=0; n<=nonNullRank; n++) |
385 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
386 | |
387 | /* enforce maxTableLog */ |
388 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); |
389 | |
390 | /* fill result into tree (val, nbBits) */ |
391 | { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; |
392 | U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; |
393 | if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ |
394 | for (n=0; n<=nonNullRank; n++) |
395 | nbPerRank[huffNode[n].nbBits]++; |
396 | /* determine stating value per rank */ |
397 | { U16 min = 0; |
398 | for (n=maxNbBits; n>0; n--) { |
399 | valPerRank[n] = min; /* get starting value within each rank */ |
400 | min += nbPerRank[n]; |
401 | min >>= 1; |
402 | } } |
403 | for (n=0; n<=maxSymbolValue; n++) |
404 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ |
405 | for (n=0; n<=maxSymbolValue; n++) |
406 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ |
407 | } |
408 | |
409 | return maxNbBits; |
410 | } |
411 | |
412 | /** HUF_buildCTable() : |
413 | * @return : maxNbBits |
414 | * Note : count is used before tree is written, so they can safely overlap |
415 | */ |
416 | size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) |
417 | { |
418 | huffNodeTable nodeTable; |
419 | return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); |
420 | } |
421 | |
422 | static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) |
423 | { |
424 | size_t nbBits = 0; |
425 | int s; |
426 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
427 | nbBits += CTable[s].nbBits * count[s]; |
428 | } |
429 | return nbBits >> 3; |
430 | } |
431 | |
432 | static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { |
433 | int bad = 0; |
434 | int s; |
435 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
436 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); |
437 | } |
438 | return !bad; |
439 | } |
440 | |
441 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } |
442 | |
443 | FORCE_INLINE_TEMPLATE void |
444 | HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) |
445 | { |
446 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); |
447 | } |
448 | |
449 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) |
450 | |
451 | #define HUF_FLUSHBITS_1(stream) \ |
452 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) |
453 | |
454 | #define HUF_FLUSHBITS_2(stream) \ |
455 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) |
456 | |
457 | FORCE_INLINE_TEMPLATE size_t |
458 | HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, |
459 | const void* src, size_t srcSize, |
460 | const HUF_CElt* CTable) |
461 | { |
462 | const BYTE* ip = (const BYTE*) src; |
463 | BYTE* const ostart = (BYTE*)dst; |
464 | BYTE* const oend = ostart + dstSize; |
465 | BYTE* op = ostart; |
466 | size_t n; |
467 | BIT_CStream_t bitC; |
468 | |
469 | /* init */ |
470 | if (dstSize < 8) return 0; /* not enough space to compress */ |
471 | { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); |
472 | if (HUF_isError(initErr)) return 0; } |
473 | |
474 | n = srcSize & ~3; /* join to mod 4 */ |
475 | switch (srcSize & 3) |
476 | { |
477 | case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); |
478 | HUF_FLUSHBITS_2(&bitC); |
479 | /* fall-through */ |
480 | case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); |
481 | HUF_FLUSHBITS_1(&bitC); |
482 | /* fall-through */ |
483 | case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); |
484 | HUF_FLUSHBITS(&bitC); |
485 | /* fall-through */ |
486 | case 0 : /* fall-through */ |
487 | default: break; |
488 | } |
489 | |
490 | for (; n>0; n-=4) { /* note : n&3==0 at this stage */ |
491 | HUF_encodeSymbol(&bitC, ip[n- 1], CTable); |
492 | HUF_FLUSHBITS_1(&bitC); |
493 | HUF_encodeSymbol(&bitC, ip[n- 2], CTable); |
494 | HUF_FLUSHBITS_2(&bitC); |
495 | HUF_encodeSymbol(&bitC, ip[n- 3], CTable); |
496 | HUF_FLUSHBITS_1(&bitC); |
497 | HUF_encodeSymbol(&bitC, ip[n- 4], CTable); |
498 | HUF_FLUSHBITS(&bitC); |
499 | } |
500 | |
501 | return BIT_closeCStream(&bitC); |
502 | } |
503 | |
504 | #if DYNAMIC_BMI2 |
505 | |
506 | static TARGET_ATTRIBUTE("bmi2" ) size_t |
507 | HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, |
508 | const void* src, size_t srcSize, |
509 | const HUF_CElt* CTable) |
510 | { |
511 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
512 | } |
513 | |
514 | static size_t |
515 | HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, |
516 | const void* src, size_t srcSize, |
517 | const HUF_CElt* CTable) |
518 | { |
519 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
520 | } |
521 | |
522 | static size_t |
523 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
524 | const void* src, size_t srcSize, |
525 | const HUF_CElt* CTable, const int bmi2) |
526 | { |
527 | if (bmi2) { |
528 | return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); |
529 | } |
530 | return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); |
531 | } |
532 | |
533 | #else |
534 | |
535 | static size_t |
536 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
537 | const void* src, size_t srcSize, |
538 | const HUF_CElt* CTable, const int bmi2) |
539 | { |
540 | (void)bmi2; |
541 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
542 | } |
543 | |
544 | #endif |
545 | |
546 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
547 | { |
548 | return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
549 | } |
550 | |
551 | |
552 | static size_t |
553 | HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, |
554 | const void* src, size_t srcSize, |
555 | const HUF_CElt* CTable, int bmi2) |
556 | { |
557 | size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ |
558 | const BYTE* ip = (const BYTE*) src; |
559 | const BYTE* const iend = ip + srcSize; |
560 | BYTE* const ostart = (BYTE*) dst; |
561 | BYTE* const oend = ostart + dstSize; |
562 | BYTE* op = ostart; |
563 | |
564 | if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ |
565 | if (srcSize < 12) return 0; /* no saving possible : too small input */ |
566 | op += 6; /* jumpTable */ |
567 | |
568 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
569 | if (cSize==0) return 0; |
570 | assert(cSize <= 65535); |
571 | MEM_writeLE16(ostart, (U16)cSize); |
572 | op += cSize; |
573 | } |
574 | |
575 | ip += segmentSize; |
576 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
577 | if (cSize==0) return 0; |
578 | assert(cSize <= 65535); |
579 | MEM_writeLE16(ostart+2, (U16)cSize); |
580 | op += cSize; |
581 | } |
582 | |
583 | ip += segmentSize; |
584 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
585 | if (cSize==0) return 0; |
586 | assert(cSize <= 65535); |
587 | MEM_writeLE16(ostart+4, (U16)cSize); |
588 | op += cSize; |
589 | } |
590 | |
591 | ip += segmentSize; |
592 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); |
593 | if (cSize==0) return 0; |
594 | op += cSize; |
595 | } |
596 | |
597 | return op-ostart; |
598 | } |
599 | |
600 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
601 | { |
602 | return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
603 | } |
604 | |
605 | |
606 | static size_t HUF_compressCTable_internal( |
607 | BYTE* const ostart, BYTE* op, BYTE* const oend, |
608 | const void* src, size_t srcSize, |
609 | unsigned singleStream, const HUF_CElt* CTable, const int bmi2) |
610 | { |
611 | size_t const cSize = singleStream ? |
612 | HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : |
613 | HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); |
614 | if (HUF_isError(cSize)) { return cSize; } |
615 | if (cSize==0) { return 0; } /* uncompressible */ |
616 | op += cSize; |
617 | /* check compressibility */ |
618 | if ((size_t)(op-ostart) >= srcSize-1) { return 0; } |
619 | return op-ostart; |
620 | } |
621 | |
622 | typedef struct { |
623 | U32 count[HUF_SYMBOLVALUE_MAX + 1]; |
624 | HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; |
625 | huffNodeTable nodeTable; |
626 | } HUF_compress_tables_t; |
627 | |
628 | /* HUF_compress_internal() : |
629 | * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ |
630 | static size_t HUF_compress_internal ( |
631 | void* dst, size_t dstSize, |
632 | const void* src, size_t srcSize, |
633 | unsigned maxSymbolValue, unsigned huffLog, |
634 | unsigned singleStream, |
635 | void* workSpace, size_t wkspSize, |
636 | HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, |
637 | const int bmi2) |
638 | { |
639 | HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; |
640 | BYTE* const ostart = (BYTE*)dst; |
641 | BYTE* const oend = ostart + dstSize; |
642 | BYTE* op = ostart; |
643 | |
644 | /* checks & inits */ |
645 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
646 | if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); |
647 | if (!srcSize) return 0; /* Uncompressed */ |
648 | if (!dstSize) return 0; /* cannot fit anything within dst budget */ |
649 | if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ |
650 | if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
651 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
652 | if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
653 | if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; |
654 | |
655 | /* Heuristic : If old table is valid, use it for small inputs */ |
656 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { |
657 | return HUF_compressCTable_internal(ostart, op, oend, |
658 | src, srcSize, |
659 | singleStream, oldHufTable, bmi2); |
660 | } |
661 | |
662 | /* Scan input and build symbol stats */ |
663 | { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); |
664 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ |
665 | if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */ |
666 | } |
667 | |
668 | /* Check validity of previous table */ |
669 | if ( repeat |
670 | && *repeat == HUF_repeat_check |
671 | && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { |
672 | *repeat = HUF_repeat_none; |
673 | } |
674 | /* Heuristic : use existing table for small inputs */ |
675 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { |
676 | return HUF_compressCTable_internal(ostart, op, oend, |
677 | src, srcSize, |
678 | singleStream, oldHufTable, bmi2); |
679 | } |
680 | |
681 | /* Build Huffman Tree */ |
682 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); |
683 | { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, |
684 | maxSymbolValue, huffLog, |
685 | table->nodeTable, sizeof(table->nodeTable)) ); |
686 | huffLog = (U32)maxBits; |
687 | /* Zero unused symbols in CTable, so we can check it for validity */ |
688 | memset(table->CTable + (maxSymbolValue + 1), 0, |
689 | sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); |
690 | } |
691 | |
692 | /* Write table description header */ |
693 | { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); |
694 | /* Check if using previous huffman table is beneficial */ |
695 | if (repeat && *repeat != HUF_repeat_none) { |
696 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); |
697 | size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); |
698 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { |
699 | return HUF_compressCTable_internal(ostart, op, oend, |
700 | src, srcSize, |
701 | singleStream, oldHufTable, bmi2); |
702 | } } |
703 | |
704 | /* Use the new huffman table */ |
705 | if (hSize + 12ul >= srcSize) { return 0; } |
706 | op += hSize; |
707 | if (repeat) { *repeat = HUF_repeat_none; } |
708 | if (oldHufTable) |
709 | memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ |
710 | } |
711 | return HUF_compressCTable_internal(ostart, op, oend, |
712 | src, srcSize, |
713 | singleStream, table->CTable, bmi2); |
714 | } |
715 | |
716 | |
717 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, |
718 | const void* src, size_t srcSize, |
719 | unsigned maxSymbolValue, unsigned huffLog, |
720 | void* workSpace, size_t wkspSize) |
721 | { |
722 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
723 | maxSymbolValue, huffLog, 1 /*single stream*/, |
724 | workSpace, wkspSize, |
725 | NULL, NULL, 0, 0 /*bmi2*/); |
726 | } |
727 | |
728 | size_t HUF_compress1X_repeat (void* dst, size_t dstSize, |
729 | const void* src, size_t srcSize, |
730 | unsigned maxSymbolValue, unsigned huffLog, |
731 | void* workSpace, size_t wkspSize, |
732 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
733 | { |
734 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
735 | maxSymbolValue, huffLog, 1 /*single stream*/, |
736 | workSpace, wkspSize, hufTable, |
737 | repeat, preferRepeat, bmi2); |
738 | } |
739 | |
740 | size_t HUF_compress1X (void* dst, size_t dstSize, |
741 | const void* src, size_t srcSize, |
742 | unsigned maxSymbolValue, unsigned huffLog) |
743 | { |
744 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
745 | return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
746 | } |
747 | |
748 | /* HUF_compress4X_repeat(): |
749 | * compress input using 4 streams. |
750 | * provide workspace to generate compression tables */ |
751 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, |
752 | const void* src, size_t srcSize, |
753 | unsigned maxSymbolValue, unsigned huffLog, |
754 | void* workSpace, size_t wkspSize) |
755 | { |
756 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
757 | maxSymbolValue, huffLog, 0 /*4 streams*/, |
758 | workSpace, wkspSize, |
759 | NULL, NULL, 0, 0 /*bmi2*/); |
760 | } |
761 | |
762 | /* HUF_compress4X_repeat(): |
763 | * compress input using 4 streams. |
764 | * re-use an existing huffman compression table */ |
765 | size_t HUF_compress4X_repeat (void* dst, size_t dstSize, |
766 | const void* src, size_t srcSize, |
767 | unsigned maxSymbolValue, unsigned huffLog, |
768 | void* workSpace, size_t wkspSize, |
769 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
770 | { |
771 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
772 | maxSymbolValue, huffLog, 0 /* 4 streams */, |
773 | workSpace, wkspSize, |
774 | hufTable, repeat, preferRepeat, bmi2); |
775 | } |
776 | |
777 | size_t HUF_compress2 (void* dst, size_t dstSize, |
778 | const void* src, size_t srcSize, |
779 | unsigned maxSymbolValue, unsigned huffLog) |
780 | { |
781 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
782 | return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
783 | } |
784 | |
785 | size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) |
786 | { |
787 | return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); |
788 | } |
789 | |