1 | /* ****************************************************************** |
2 | FSE : Finite State Entropy encoder |
3 | Copyright (C) 2013-2015, Yann Collet. |
4 | |
5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
6 | |
7 | Redistribution and use in source and binary forms, with or without |
8 | modification, are permitted provided that the following conditions are |
9 | met: |
10 | |
11 | * Redistributions of source code must retain the above copyright |
12 | notice, this list of conditions and the following disclaimer. |
13 | * Redistributions in binary form must reproduce the above |
14 | copyright notice, this list of conditions and the following disclaimer |
15 | in the documentation and/or other materials provided with the |
16 | distribution. |
17 | |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | You can contact the author at : |
31 | - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy |
32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
33 | ****************************************************************** */ |
34 | |
35 | /* ************************************************************** |
36 | * Includes |
37 | ****************************************************************/ |
38 | #include <stdlib.h> /* malloc, free, qsort */ |
39 | #include <string.h> /* memcpy, memset */ |
40 | #include <stdio.h> /* printf (debug) */ |
41 | #include "bitstream.h" |
42 | #include "compiler.h" |
43 | #define FSE_STATIC_LINKING_ONLY |
44 | #include "fse.h" |
45 | #include "error_private.h" |
46 | |
47 | |
48 | /* ************************************************************** |
49 | * Error Management |
50 | ****************************************************************/ |
51 | #define FSE_isError ERR_isError |
52 | #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ |
53 | |
54 | |
55 | /* ************************************************************** |
56 | * Templates |
57 | ****************************************************************/ |
58 | /* |
59 | designed to be included |
60 | for type-specific functions (template emulation in C) |
61 | Objective is to write these functions only once, for improved maintenance |
62 | */ |
63 | |
64 | /* safety checks */ |
65 | #ifndef FSE_FUNCTION_EXTENSION |
66 | # error "FSE_FUNCTION_EXTENSION must be defined" |
67 | #endif |
68 | #ifndef FSE_FUNCTION_TYPE |
69 | # error "FSE_FUNCTION_TYPE must be defined" |
70 | #endif |
71 | |
72 | /* Function names */ |
73 | #define FSE_CAT(X,Y) X##Y |
74 | #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) |
75 | #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) |
76 | |
77 | |
78 | /* Function templates */ |
79 | |
80 | /* FSE_buildCTable_wksp() : |
81 | * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). |
82 | * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` |
83 | * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements |
84 | */ |
85 | size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) |
86 | { |
87 | U32 const tableSize = 1 << tableLog; |
88 | U32 const tableMask = tableSize - 1; |
89 | void* const ptr = ct; |
90 | U16* const tableU16 = ( (U16*) ptr) + 2; |
91 | void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; |
92 | FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); |
93 | U32 const step = FSE_TABLESTEP(tableSize); |
94 | U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; |
95 | |
96 | FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; |
97 | U32 highThreshold = tableSize-1; |
98 | |
99 | /* CTable header */ |
100 | if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); |
101 | tableU16[-2] = (U16) tableLog; |
102 | tableU16[-1] = (U16) maxSymbolValue; |
103 | |
104 | /* For explanations on how to distribute symbol values over the table : |
105 | * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ |
106 | |
107 | /* symbol start positions */ |
108 | { U32 u; |
109 | cumul[0] = 0; |
110 | for (u=1; u<=maxSymbolValue+1; u++) { |
111 | if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ |
112 | cumul[u] = cumul[u-1] + 1; |
113 | tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); |
114 | } else { |
115 | cumul[u] = cumul[u-1] + normalizedCounter[u-1]; |
116 | } } |
117 | cumul[maxSymbolValue+1] = tableSize+1; |
118 | } |
119 | |
120 | /* Spread symbols */ |
121 | { U32 position = 0; |
122 | U32 symbol; |
123 | for (symbol=0; symbol<=maxSymbolValue; symbol++) { |
124 | int nbOccurences; |
125 | for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) { |
126 | tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; |
127 | position = (position + step) & tableMask; |
128 | while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */ |
129 | } } |
130 | |
131 | if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ |
132 | } |
133 | |
134 | /* Build table */ |
135 | { U32 u; for (u=0; u<tableSize; u++) { |
136 | FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ |
137 | tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */ |
138 | } } |
139 | |
140 | /* Build Symbol Transformation Table */ |
141 | { unsigned total = 0; |
142 | unsigned s; |
143 | for (s=0; s<=maxSymbolValue; s++) { |
144 | switch (normalizedCounter[s]) |
145 | { |
146 | case 0: break; |
147 | |
148 | case -1: |
149 | case 1: |
150 | symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); |
151 | symbolTT[s].deltaFindState = total - 1; |
152 | total ++; |
153 | break; |
154 | default : |
155 | { |
156 | U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1); |
157 | U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; |
158 | symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; |
159 | symbolTT[s].deltaFindState = total - normalizedCounter[s]; |
160 | total += normalizedCounter[s]; |
161 | } } } } |
162 | |
163 | return 0; |
164 | } |
165 | |
166 | |
167 | size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) |
168 | { |
169 | FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ |
170 | return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); |
171 | } |
172 | |
173 | |
174 | |
175 | #ifndef FSE_COMMONDEFS_ONLY |
176 | |
177 | /*-************************************************************** |
178 | * FSE NCount encoding-decoding |
179 | ****************************************************************/ |
180 | size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) |
181 | { |
182 | size_t const = (((maxSymbolValue+1) * tableLog) >> 3) + 3; |
183 | return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ |
184 | } |
185 | |
186 | static size_t FSE_writeNCount_generic (void* , size_t , |
187 | const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, |
188 | unsigned writeIsSafe) |
189 | { |
190 | BYTE* const ostart = (BYTE*) header; |
191 | BYTE* out = ostart; |
192 | BYTE* const oend = ostart + headerBufferSize; |
193 | int nbBits; |
194 | const int tableSize = 1 << tableLog; |
195 | int remaining; |
196 | int threshold; |
197 | U32 bitStream; |
198 | int bitCount; |
199 | unsigned charnum = 0; |
200 | int previous0 = 0; |
201 | |
202 | bitStream = 0; |
203 | bitCount = 0; |
204 | /* Table Size */ |
205 | bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; |
206 | bitCount += 4; |
207 | |
208 | /* Init */ |
209 | remaining = tableSize+1; /* +1 for extra accuracy */ |
210 | threshold = tableSize; |
211 | nbBits = tableLog+1; |
212 | |
213 | while (remaining>1) { /* stops at 1 */ |
214 | if (previous0) { |
215 | unsigned start = charnum; |
216 | while (!normalizedCounter[charnum]) charnum++; |
217 | while (charnum >= start+24) { |
218 | start+=24; |
219 | bitStream += 0xFFFFU << bitCount; |
220 | if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ |
221 | out[0] = (BYTE) bitStream; |
222 | out[1] = (BYTE)(bitStream>>8); |
223 | out+=2; |
224 | bitStream>>=16; |
225 | } |
226 | while (charnum >= start+3) { |
227 | start+=3; |
228 | bitStream += 3 << bitCount; |
229 | bitCount += 2; |
230 | } |
231 | bitStream += (charnum-start) << bitCount; |
232 | bitCount += 2; |
233 | if (bitCount>16) { |
234 | if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ |
235 | out[0] = (BYTE)bitStream; |
236 | out[1] = (BYTE)(bitStream>>8); |
237 | out += 2; |
238 | bitStream >>= 16; |
239 | bitCount -= 16; |
240 | } } |
241 | { int count = normalizedCounter[charnum++]; |
242 | int const max = (2*threshold-1)-remaining; |
243 | remaining -= count < 0 ? -count : count; |
244 | count++; /* +1 for extra accuracy */ |
245 | if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ |
246 | bitStream += count << bitCount; |
247 | bitCount += nbBits; |
248 | bitCount -= (count<max); |
249 | previous0 = (count==1); |
250 | if (remaining<1) return ERROR(GENERIC); |
251 | while (remaining<threshold) { nbBits--; threshold>>=1; } |
252 | } |
253 | if (bitCount>16) { |
254 | if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ |
255 | out[0] = (BYTE)bitStream; |
256 | out[1] = (BYTE)(bitStream>>8); |
257 | out += 2; |
258 | bitStream >>= 16; |
259 | bitCount -= 16; |
260 | } } |
261 | |
262 | /* flush remaining bitStream */ |
263 | if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ |
264 | out[0] = (BYTE)bitStream; |
265 | out[1] = (BYTE)(bitStream>>8); |
266 | out+= (bitCount+7) /8; |
267 | |
268 | if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); |
269 | |
270 | return (out-ostart); |
271 | } |
272 | |
273 | |
274 | size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) |
275 | { |
276 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ |
277 | if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ |
278 | |
279 | if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) |
280 | return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); |
281 | |
282 | return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); |
283 | } |
284 | |
285 | |
286 | |
287 | /*-************************************************************** |
288 | * Counting histogram |
289 | ****************************************************************/ |
290 | /*! FSE_count_simple |
291 | This function counts byte values within `src`, and store the histogram into table `count`. |
292 | It doesn't use any additional memory. |
293 | But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. |
294 | For this reason, prefer using a table `count` with 256 elements. |
295 | @return : count of most numerous element. |
296 | */ |
297 | size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, |
298 | const void* src, size_t srcSize) |
299 | { |
300 | const BYTE* ip = (const BYTE*)src; |
301 | const BYTE* const end = ip + srcSize; |
302 | unsigned maxSymbolValue = *maxSymbolValuePtr; |
303 | unsigned max=0; |
304 | |
305 | memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); |
306 | if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } |
307 | |
308 | while (ip<end) { |
309 | assert(*ip <= maxSymbolValue); |
310 | count[*ip++]++; |
311 | } |
312 | |
313 | while (!count[maxSymbolValue]) maxSymbolValue--; |
314 | *maxSymbolValuePtr = maxSymbolValue; |
315 | |
316 | { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; } |
317 | |
318 | return (size_t)max; |
319 | } |
320 | |
321 | |
322 | /* FSE_count_parallel_wksp() : |
323 | * Same as FSE_count_parallel(), but using an externally provided scratch buffer. |
324 | * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`. |
325 | * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ |
326 | static size_t FSE_count_parallel_wksp( |
327 | unsigned* count, unsigned* maxSymbolValuePtr, |
328 | const void* source, size_t sourceSize, |
329 | unsigned checkMax, unsigned* const workSpace) |
330 | { |
331 | const BYTE* ip = (const BYTE*)source; |
332 | const BYTE* const iend = ip+sourceSize; |
333 | unsigned maxSymbolValue = *maxSymbolValuePtr; |
334 | unsigned max=0; |
335 | U32* const Counting1 = workSpace; |
336 | U32* const Counting2 = Counting1 + 256; |
337 | U32* const Counting3 = Counting2 + 256; |
338 | U32* const Counting4 = Counting3 + 256; |
339 | |
340 | memset(workSpace, 0, 4*256*sizeof(unsigned)); |
341 | |
342 | /* safety checks */ |
343 | if (!sourceSize) { |
344 | memset(count, 0, maxSymbolValue + 1); |
345 | *maxSymbolValuePtr = 0; |
346 | return 0; |
347 | } |
348 | if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ |
349 | |
350 | /* by stripes of 16 bytes */ |
351 | { U32 cached = MEM_read32(ip); ip += 4; |
352 | while (ip < iend-15) { |
353 | U32 c = cached; cached = MEM_read32(ip); ip += 4; |
354 | Counting1[(BYTE) c ]++; |
355 | Counting2[(BYTE)(c>>8) ]++; |
356 | Counting3[(BYTE)(c>>16)]++; |
357 | Counting4[ c>>24 ]++; |
358 | c = cached; cached = MEM_read32(ip); ip += 4; |
359 | Counting1[(BYTE) c ]++; |
360 | Counting2[(BYTE)(c>>8) ]++; |
361 | Counting3[(BYTE)(c>>16)]++; |
362 | Counting4[ c>>24 ]++; |
363 | c = cached; cached = MEM_read32(ip); ip += 4; |
364 | Counting1[(BYTE) c ]++; |
365 | Counting2[(BYTE)(c>>8) ]++; |
366 | Counting3[(BYTE)(c>>16)]++; |
367 | Counting4[ c>>24 ]++; |
368 | c = cached; cached = MEM_read32(ip); ip += 4; |
369 | Counting1[(BYTE) c ]++; |
370 | Counting2[(BYTE)(c>>8) ]++; |
371 | Counting3[(BYTE)(c>>16)]++; |
372 | Counting4[ c>>24 ]++; |
373 | } |
374 | ip-=4; |
375 | } |
376 | |
377 | /* finish last symbols */ |
378 | while (ip<iend) Counting1[*ip++]++; |
379 | |
380 | if (checkMax) { /* verify stats will fit into destination table */ |
381 | U32 s; for (s=255; s>maxSymbolValue; s--) { |
382 | Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; |
383 | if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); |
384 | } } |
385 | |
386 | { U32 s; |
387 | if (maxSymbolValue > 255) maxSymbolValue = 255; |
388 | for (s=0; s<=maxSymbolValue; s++) { |
389 | count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; |
390 | if (count[s] > max) max = count[s]; |
391 | } } |
392 | |
393 | while (!count[maxSymbolValue]) maxSymbolValue--; |
394 | *maxSymbolValuePtr = maxSymbolValue; |
395 | return (size_t)max; |
396 | } |
397 | |
398 | /* FSE_countFast_wksp() : |
399 | * Same as FSE_countFast(), but using an externally provided scratch buffer. |
400 | * `workSpace` size must be table of >= `1024` unsigned */ |
401 | size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
402 | const void* source, size_t sourceSize, |
403 | unsigned* workSpace) |
404 | { |
405 | if (sourceSize < 1500) /* heuristic threshold */ |
406 | return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); |
407 | return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); |
408 | } |
409 | |
410 | /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ |
411 | size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
412 | const void* source, size_t sourceSize) |
413 | { |
414 | unsigned tmpCounters[1024]; |
415 | return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); |
416 | } |
417 | |
418 | /* FSE_count_wksp() : |
419 | * Same as FSE_count(), but using an externally provided scratch buffer. |
420 | * `workSpace` size must be table of >= `1024` unsigned */ |
421 | size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
422 | const void* source, size_t sourceSize, unsigned* workSpace) |
423 | { |
424 | if (*maxSymbolValuePtr < 255) |
425 | return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); |
426 | *maxSymbolValuePtr = 255; |
427 | return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); |
428 | } |
429 | |
430 | size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, |
431 | const void* src, size_t srcSize) |
432 | { |
433 | unsigned tmpCounters[1024]; |
434 | return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); |
435 | } |
436 | |
437 | |
438 | |
439 | /*-************************************************************** |
440 | * FSE Compression Code |
441 | ****************************************************************/ |
442 | /*! FSE_sizeof_CTable() : |
443 | FSE_CTable is a variable size structure which contains : |
444 | `U16 tableLog;` |
445 | `U16 maxSymbolValue;` |
446 | `U16 nextStateNumber[1 << tableLog];` // This size is variable |
447 | `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable |
448 | Allocation is manual (C standard does not support variable-size structures). |
449 | */ |
450 | size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) |
451 | { |
452 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); |
453 | return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); |
454 | } |
455 | |
456 | FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) |
457 | { |
458 | size_t size; |
459 | if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; |
460 | size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); |
461 | return (FSE_CTable*)malloc(size); |
462 | } |
463 | |
464 | void FSE_freeCTable (FSE_CTable* ct) { free(ct); } |
465 | |
466 | /* provides the minimum logSize to safely represent a distribution */ |
467 | static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) |
468 | { |
469 | U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; |
470 | U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; |
471 | U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; |
472 | assert(srcSize > 1); /* Not supported, RLE should be used instead */ |
473 | return minBits; |
474 | } |
475 | |
476 | unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) |
477 | { |
478 | U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; |
479 | U32 tableLog = maxTableLog; |
480 | U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); |
481 | assert(srcSize > 1); /* Not supported, RLE should be used instead */ |
482 | if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; |
483 | if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ |
484 | if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ |
485 | if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; |
486 | if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; |
487 | return tableLog; |
488 | } |
489 | |
490 | unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) |
491 | { |
492 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); |
493 | } |
494 | |
495 | |
496 | /* Secondary normalization method. |
497 | To be used when primary method fails. */ |
498 | |
499 | static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) |
500 | { |
501 | short const NOT_YET_ASSIGNED = -2; |
502 | U32 s; |
503 | U32 distributed = 0; |
504 | U32 ToDistribute; |
505 | |
506 | /* Init */ |
507 | U32 const lowThreshold = (U32)(total >> tableLog); |
508 | U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); |
509 | |
510 | for (s=0; s<=maxSymbolValue; s++) { |
511 | if (count[s] == 0) { |
512 | norm[s]=0; |
513 | continue; |
514 | } |
515 | if (count[s] <= lowThreshold) { |
516 | norm[s] = -1; |
517 | distributed++; |
518 | total -= count[s]; |
519 | continue; |
520 | } |
521 | if (count[s] <= lowOne) { |
522 | norm[s] = 1; |
523 | distributed++; |
524 | total -= count[s]; |
525 | continue; |
526 | } |
527 | |
528 | norm[s]=NOT_YET_ASSIGNED; |
529 | } |
530 | ToDistribute = (1 << tableLog) - distributed; |
531 | |
532 | if ((total / ToDistribute) > lowOne) { |
533 | /* risk of rounding to zero */ |
534 | lowOne = (U32)((total * 3) / (ToDistribute * 2)); |
535 | for (s=0; s<=maxSymbolValue; s++) { |
536 | if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { |
537 | norm[s] = 1; |
538 | distributed++; |
539 | total -= count[s]; |
540 | continue; |
541 | } } |
542 | ToDistribute = (1 << tableLog) - distributed; |
543 | } |
544 | |
545 | if (distributed == maxSymbolValue+1) { |
546 | /* all values are pretty poor; |
547 | probably incompressible data (should have already been detected); |
548 | find max, then give all remaining points to max */ |
549 | U32 maxV = 0, maxC = 0; |
550 | for (s=0; s<=maxSymbolValue; s++) |
551 | if (count[s] > maxC) { maxV=s; maxC=count[s]; } |
552 | norm[maxV] += (short)ToDistribute; |
553 | return 0; |
554 | } |
555 | |
556 | if (total == 0) { |
557 | /* all of the symbols were low enough for the lowOne or lowThreshold */ |
558 | for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) |
559 | if (norm[s] > 0) { ToDistribute--; norm[s]++; } |
560 | return 0; |
561 | } |
562 | |
563 | { U64 const vStepLog = 62 - tableLog; |
564 | U64 const mid = (1ULL << (vStepLog-1)) - 1; |
565 | U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ |
566 | U64 tmpTotal = mid; |
567 | for (s=0; s<=maxSymbolValue; s++) { |
568 | if (norm[s]==NOT_YET_ASSIGNED) { |
569 | U64 const end = tmpTotal + (count[s] * rStep); |
570 | U32 const sStart = (U32)(tmpTotal >> vStepLog); |
571 | U32 const sEnd = (U32)(end >> vStepLog); |
572 | U32 const weight = sEnd - sStart; |
573 | if (weight < 1) |
574 | return ERROR(GENERIC); |
575 | norm[s] = (short)weight; |
576 | tmpTotal = end; |
577 | } } } |
578 | |
579 | return 0; |
580 | } |
581 | |
582 | |
583 | size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, |
584 | const unsigned* count, size_t total, |
585 | unsigned maxSymbolValue) |
586 | { |
587 | /* Sanity checks */ |
588 | if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; |
589 | if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ |
590 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ |
591 | if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ |
592 | |
593 | { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; |
594 | U64 const scale = 62 - tableLog; |
595 | U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ |
596 | U64 const vStep = 1ULL<<(scale-20); |
597 | int stillToDistribute = 1<<tableLog; |
598 | unsigned s; |
599 | unsigned largest=0; |
600 | short largestP=0; |
601 | U32 lowThreshold = (U32)(total >> tableLog); |
602 | |
603 | for (s=0; s<=maxSymbolValue; s++) { |
604 | if (count[s] == total) return 0; /* rle special case */ |
605 | if (count[s] == 0) { normalizedCounter[s]=0; continue; } |
606 | if (count[s] <= lowThreshold) { |
607 | normalizedCounter[s] = -1; |
608 | stillToDistribute--; |
609 | } else { |
610 | short proba = (short)((count[s]*step) >> scale); |
611 | if (proba<8) { |
612 | U64 restToBeat = vStep * rtbTable[proba]; |
613 | proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; |
614 | } |
615 | if (proba > largestP) { largestP=proba; largest=s; } |
616 | normalizedCounter[s] = proba; |
617 | stillToDistribute -= proba; |
618 | } } |
619 | if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { |
620 | /* corner case, need another normalization method */ |
621 | size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); |
622 | if (FSE_isError(errorCode)) return errorCode; |
623 | } |
624 | else normalizedCounter[largest] += (short)stillToDistribute; |
625 | } |
626 | |
627 | #if 0 |
628 | { /* Print Table (debug) */ |
629 | U32 s; |
630 | U32 nTotal = 0; |
631 | for (s=0; s<=maxSymbolValue; s++) |
632 | printf("%3i: %4i \n" , s, normalizedCounter[s]); |
633 | for (s=0; s<=maxSymbolValue; s++) |
634 | nTotal += abs(normalizedCounter[s]); |
635 | if (nTotal != (1U<<tableLog)) |
636 | printf("Warning !!! Total == %u != %u !!!" , nTotal, 1U<<tableLog); |
637 | getchar(); |
638 | } |
639 | #endif |
640 | |
641 | return tableLog; |
642 | } |
643 | |
644 | |
645 | /* fake FSE_CTable, for raw (uncompressed) input */ |
646 | size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) |
647 | { |
648 | const unsigned tableSize = 1 << nbBits; |
649 | const unsigned tableMask = tableSize - 1; |
650 | const unsigned maxSymbolValue = tableMask; |
651 | void* const ptr = ct; |
652 | U16* const tableU16 = ( (U16*) ptr) + 2; |
653 | void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ |
654 | FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); |
655 | unsigned s; |
656 | |
657 | /* Sanity checks */ |
658 | if (nbBits < 1) return ERROR(GENERIC); /* min size */ |
659 | |
660 | /* header */ |
661 | tableU16[-2] = (U16) nbBits; |
662 | tableU16[-1] = (U16) maxSymbolValue; |
663 | |
664 | /* Build table */ |
665 | for (s=0; s<tableSize; s++) |
666 | tableU16[s] = (U16)(tableSize + s); |
667 | |
668 | /* Build Symbol Transformation Table */ |
669 | { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); |
670 | for (s=0; s<=maxSymbolValue; s++) { |
671 | symbolTT[s].deltaNbBits = deltaNbBits; |
672 | symbolTT[s].deltaFindState = s-1; |
673 | } } |
674 | |
675 | return 0; |
676 | } |
677 | |
678 | /* fake FSE_CTable, for rle input (always same symbol) */ |
679 | size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) |
680 | { |
681 | void* ptr = ct; |
682 | U16* tableU16 = ( (U16*) ptr) + 2; |
683 | void* FSCTptr = (U32*)ptr + 2; |
684 | FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr; |
685 | |
686 | /* header */ |
687 | tableU16[-2] = (U16) 0; |
688 | tableU16[-1] = (U16) symbolValue; |
689 | |
690 | /* Build table */ |
691 | tableU16[0] = 0; |
692 | tableU16[1] = 0; /* just in case */ |
693 | |
694 | /* Build Symbol Transformation Table */ |
695 | symbolTT[symbolValue].deltaNbBits = 0; |
696 | symbolTT[symbolValue].deltaFindState = 0; |
697 | |
698 | return 0; |
699 | } |
700 | |
701 | |
702 | static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, |
703 | const void* src, size_t srcSize, |
704 | const FSE_CTable* ct, const unsigned fast) |
705 | { |
706 | const BYTE* const istart = (const BYTE*) src; |
707 | const BYTE* const iend = istart + srcSize; |
708 | const BYTE* ip=iend; |
709 | |
710 | BIT_CStream_t bitC; |
711 | FSE_CState_t CState1, CState2; |
712 | |
713 | /* init */ |
714 | if (srcSize <= 2) return 0; |
715 | { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); |
716 | if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } |
717 | |
718 | #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) |
719 | |
720 | if (srcSize & 1) { |
721 | FSE_initCState2(&CState1, ct, *--ip); |
722 | FSE_initCState2(&CState2, ct, *--ip); |
723 | FSE_encodeSymbol(&bitC, &CState1, *--ip); |
724 | FSE_FLUSHBITS(&bitC); |
725 | } else { |
726 | FSE_initCState2(&CState2, ct, *--ip); |
727 | FSE_initCState2(&CState1, ct, *--ip); |
728 | } |
729 | |
730 | /* join to mod 4 */ |
731 | srcSize -= 2; |
732 | if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ |
733 | FSE_encodeSymbol(&bitC, &CState2, *--ip); |
734 | FSE_encodeSymbol(&bitC, &CState1, *--ip); |
735 | FSE_FLUSHBITS(&bitC); |
736 | } |
737 | |
738 | /* 2 or 4 encoding per loop */ |
739 | while ( ip>istart ) { |
740 | |
741 | FSE_encodeSymbol(&bitC, &CState2, *--ip); |
742 | |
743 | if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ |
744 | FSE_FLUSHBITS(&bitC); |
745 | |
746 | FSE_encodeSymbol(&bitC, &CState1, *--ip); |
747 | |
748 | if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ |
749 | FSE_encodeSymbol(&bitC, &CState2, *--ip); |
750 | FSE_encodeSymbol(&bitC, &CState1, *--ip); |
751 | } |
752 | |
753 | FSE_FLUSHBITS(&bitC); |
754 | } |
755 | |
756 | FSE_flushCState(&bitC, &CState2); |
757 | FSE_flushCState(&bitC, &CState1); |
758 | return BIT_closeCStream(&bitC); |
759 | } |
760 | |
761 | size_t FSE_compress_usingCTable (void* dst, size_t dstSize, |
762 | const void* src, size_t srcSize, |
763 | const FSE_CTable* ct) |
764 | { |
765 | unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); |
766 | |
767 | if (fast) |
768 | return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); |
769 | else |
770 | return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); |
771 | } |
772 | |
773 | |
774 | size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |
775 | |
776 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e |
777 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
778 | |
779 | /* FSE_compress_wksp() : |
780 | * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). |
781 | * `wkspSize` size must be `(1<<tableLog)`. |
782 | */ |
783 | size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) |
784 | { |
785 | BYTE* const ostart = (BYTE*) dst; |
786 | BYTE* op = ostart; |
787 | BYTE* const oend = ostart + dstSize; |
788 | |
789 | U32 count[FSE_MAX_SYMBOL_VALUE+1]; |
790 | S16 norm[FSE_MAX_SYMBOL_VALUE+1]; |
791 | FSE_CTable* CTable = (FSE_CTable*)workSpace; |
792 | size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); |
793 | void* scratchBuffer = (void*)(CTable + CTableSize); |
794 | size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); |
795 | |
796 | /* init conditions */ |
797 | if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); |
798 | if (srcSize <= 1) return 0; /* Not compressible */ |
799 | if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; |
800 | if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; |
801 | |
802 | /* Scan input and build symbol stats */ |
803 | { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) ); |
804 | if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ |
805 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
806 | if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ |
807 | } |
808 | |
809 | tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); |
810 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); |
811 | |
812 | /* Write table description header */ |
813 | { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); |
814 | op += nc_err; |
815 | } |
816 | |
817 | /* Compress */ |
818 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); |
819 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); |
820 | if (cSize == 0) return 0; /* not enough space for compressed data */ |
821 | op += cSize; |
822 | } |
823 | |
824 | /* check compressibility */ |
825 | if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; |
826 | |
827 | return op-ostart; |
828 | } |
829 | |
830 | typedef struct { |
831 | FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; |
832 | BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; |
833 | } fseWkspMax_t; |
834 | |
835 | size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) |
836 | { |
837 | fseWkspMax_t scratchBuffer; |
838 | FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ |
839 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); |
840 | return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); |
841 | } |
842 | |
843 | size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
844 | { |
845 | return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); |
846 | } |
847 | |
848 | |
849 | #endif /* FSE_COMMONDEFS_ONLY */ |
850 | |