1 | /* ****************************************************************** |
2 | Huffman decoder, part of New Generation Entropy library |
3 | Copyright (C) 2013-2016, Yann Collet. |
4 | |
5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
6 | |
7 | Redistribution and use in source and binary forms, with or without |
8 | modification, are permitted provided that the following conditions are |
9 | met: |
10 | |
11 | * Redistributions of source code must retain the above copyright |
12 | notice, this list of conditions and the following disclaimer. |
13 | * Redistributions in binary form must reproduce the above |
14 | copyright notice, this list of conditions and the following disclaimer |
15 | in the documentation and/or other materials provided with the |
16 | distribution. |
17 | |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | You can contact the author at : |
31 | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy |
32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
33 | ****************************************************************** */ |
34 | |
35 | /* ************************************************************** |
36 | * Dependencies |
37 | ****************************************************************/ |
38 | #include <string.h> /* memcpy, memset */ |
39 | #include "bitstream.h" /* BIT_* */ |
40 | #include "compiler.h" |
41 | #include "fse.h" /* header compression */ |
42 | #define HUF_STATIC_LINKING_ONLY |
43 | #include "huf.h" |
44 | #include "error_private.h" |
45 | |
46 | |
47 | /* ************************************************************** |
48 | * Error Management |
49 | ****************************************************************/ |
50 | #define HUF_isError ERR_isError |
51 | #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ |
52 | #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } |
53 | |
54 | |
55 | /* ************************************************************** |
56 | * Byte alignment for workSpace management |
57 | ****************************************************************/ |
58 | #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) |
59 | #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) |
60 | |
61 | |
62 | /*-***************************/ |
63 | /* generic DTableDesc */ |
64 | /*-***************************/ |
65 | typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; |
66 | |
67 | static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) |
68 | { |
69 | DTableDesc dtd; |
70 | memcpy(&dtd, table, sizeof(dtd)); |
71 | return dtd; |
72 | } |
73 | |
74 | |
75 | /*-***************************/ |
76 | /* single-symbol decoding */ |
77 | /*-***************************/ |
78 | typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ |
79 | |
80 | size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) |
81 | { |
82 | U32 tableLog = 0; |
83 | U32 nbSymbols = 0; |
84 | size_t iSize; |
85 | void* const dtPtr = DTable + 1; |
86 | HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; |
87 | |
88 | U32* rankVal; |
89 | BYTE* huffWeight; |
90 | size_t spaceUsed32 = 0; |
91 | |
92 | rankVal = (U32 *)workSpace + spaceUsed32; |
93 | spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; |
94 | huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); |
95 | spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; |
96 | |
97 | if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); |
98 | |
99 | HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); |
100 | /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ |
101 | |
102 | iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); |
103 | if (HUF_isError(iSize)) return iSize; |
104 | |
105 | /* Table header */ |
106 | { DTableDesc dtd = HUF_getDTableDesc(DTable); |
107 | if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ |
108 | dtd.tableType = 0; |
109 | dtd.tableLog = (BYTE)tableLog; |
110 | memcpy(DTable, &dtd, sizeof(dtd)); |
111 | } |
112 | |
113 | /* Calculate starting value for each rank */ |
114 | { U32 n, = 0; |
115 | for (n=1; n<tableLog+1; n++) { |
116 | U32 const current = nextRankStart; |
117 | nextRankStart += (rankVal[n] << (n-1)); |
118 | rankVal[n] = current; |
119 | } } |
120 | |
121 | /* fill DTable */ |
122 | { U32 n; |
123 | for (n=0; n<nbSymbols; n++) { |
124 | U32 const w = huffWeight[n]; |
125 | U32 const length = (1 << w) >> 1; |
126 | U32 u; |
127 | HUF_DEltX2 D; |
128 | D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); |
129 | for (u = rankVal[w]; u < rankVal[w] + length; u++) |
130 | dt[u] = D; |
131 | rankVal[w] += length; |
132 | } } |
133 | |
134 | return iSize; |
135 | } |
136 | |
137 | size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) |
138 | { |
139 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
140 | return HUF_readDTableX2_wksp(DTable, src, srcSize, |
141 | workSpace, sizeof(workSpace)); |
142 | } |
143 | |
144 | typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ |
145 | |
146 | FORCE_INLINE_TEMPLATE BYTE |
147 | HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) |
148 | { |
149 | size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ |
150 | BYTE const c = dt[val].byte; |
151 | BIT_skipBits(Dstream, dt[val].nbBits); |
152 | return c; |
153 | } |
154 | |
155 | #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ |
156 | *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) |
157 | |
158 | #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ |
159 | if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ |
160 | HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) |
161 | |
162 | #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ |
163 | if (MEM_64bits()) \ |
164 | HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) |
165 | |
166 | HINT_INLINE size_t |
167 | HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) |
168 | { |
169 | BYTE* const pStart = p; |
170 | |
171 | /* up to 4 symbols at a time */ |
172 | while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { |
173 | HUF_DECODE_SYMBOLX2_2(p, bitDPtr); |
174 | HUF_DECODE_SYMBOLX2_1(p, bitDPtr); |
175 | HUF_DECODE_SYMBOLX2_2(p, bitDPtr); |
176 | HUF_DECODE_SYMBOLX2_0(p, bitDPtr); |
177 | } |
178 | |
179 | /* [0-3] symbols remaining */ |
180 | if (MEM_32bits()) |
181 | while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) |
182 | HUF_DECODE_SYMBOLX2_0(p, bitDPtr); |
183 | |
184 | /* no more data to retrieve from bitstream, no need to reload */ |
185 | while (p < pEnd) |
186 | HUF_DECODE_SYMBOLX2_0(p, bitDPtr); |
187 | |
188 | return pEnd-pStart; |
189 | } |
190 | |
191 | FORCE_INLINE_TEMPLATE size_t |
192 | HUF_decompress1X2_usingDTable_internal_body( |
193 | void* dst, size_t dstSize, |
194 | const void* cSrc, size_t cSrcSize, |
195 | const HUF_DTable* DTable) |
196 | { |
197 | BYTE* op = (BYTE*)dst; |
198 | BYTE* const oend = op + dstSize; |
199 | const void* dtPtr = DTable + 1; |
200 | const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; |
201 | BIT_DStream_t bitD; |
202 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
203 | U32 const dtLog = dtd.tableLog; |
204 | |
205 | CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); |
206 | |
207 | HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); |
208 | |
209 | if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); |
210 | |
211 | return dstSize; |
212 | } |
213 | |
214 | FORCE_INLINE_TEMPLATE size_t |
215 | HUF_decompress4X2_usingDTable_internal_body( |
216 | void* dst, size_t dstSize, |
217 | const void* cSrc, size_t cSrcSize, |
218 | const HUF_DTable* DTable) |
219 | { |
220 | /* Check */ |
221 | if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ |
222 | |
223 | { const BYTE* const istart = (const BYTE*) cSrc; |
224 | BYTE* const ostart = (BYTE*) dst; |
225 | BYTE* const oend = ostart + dstSize; |
226 | const void* const dtPtr = DTable + 1; |
227 | const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; |
228 | |
229 | /* Init */ |
230 | BIT_DStream_t bitD1; |
231 | BIT_DStream_t bitD2; |
232 | BIT_DStream_t bitD3; |
233 | BIT_DStream_t bitD4; |
234 | size_t const length1 = MEM_readLE16(istart); |
235 | size_t const length2 = MEM_readLE16(istart+2); |
236 | size_t const length3 = MEM_readLE16(istart+4); |
237 | size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); |
238 | const BYTE* const istart1 = istart + 6; /* jumpTable */ |
239 | const BYTE* const istart2 = istart1 + length1; |
240 | const BYTE* const istart3 = istart2 + length2; |
241 | const BYTE* const istart4 = istart3 + length3; |
242 | const size_t segmentSize = (dstSize+3) / 4; |
243 | BYTE* const opStart2 = ostart + segmentSize; |
244 | BYTE* const opStart3 = opStart2 + segmentSize; |
245 | BYTE* const opStart4 = opStart3 + segmentSize; |
246 | BYTE* op1 = ostart; |
247 | BYTE* op2 = opStart2; |
248 | BYTE* op3 = opStart3; |
249 | BYTE* op4 = opStart4; |
250 | U32 endSignal = BIT_DStream_unfinished; |
251 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
252 | U32 const dtLog = dtd.tableLog; |
253 | |
254 | if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ |
255 | CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); |
256 | CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); |
257 | CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); |
258 | CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); |
259 | |
260 | /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ |
261 | endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); |
262 | while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { |
263 | HUF_DECODE_SYMBOLX2_2(op1, &bitD1); |
264 | HUF_DECODE_SYMBOLX2_2(op2, &bitD2); |
265 | HUF_DECODE_SYMBOLX2_2(op3, &bitD3); |
266 | HUF_DECODE_SYMBOLX2_2(op4, &bitD4); |
267 | HUF_DECODE_SYMBOLX2_1(op1, &bitD1); |
268 | HUF_DECODE_SYMBOLX2_1(op2, &bitD2); |
269 | HUF_DECODE_SYMBOLX2_1(op3, &bitD3); |
270 | HUF_DECODE_SYMBOLX2_1(op4, &bitD4); |
271 | HUF_DECODE_SYMBOLX2_2(op1, &bitD1); |
272 | HUF_DECODE_SYMBOLX2_2(op2, &bitD2); |
273 | HUF_DECODE_SYMBOLX2_2(op3, &bitD3); |
274 | HUF_DECODE_SYMBOLX2_2(op4, &bitD4); |
275 | HUF_DECODE_SYMBOLX2_0(op1, &bitD1); |
276 | HUF_DECODE_SYMBOLX2_0(op2, &bitD2); |
277 | HUF_DECODE_SYMBOLX2_0(op3, &bitD3); |
278 | HUF_DECODE_SYMBOLX2_0(op4, &bitD4); |
279 | BIT_reloadDStream(&bitD1); |
280 | BIT_reloadDStream(&bitD2); |
281 | BIT_reloadDStream(&bitD3); |
282 | BIT_reloadDStream(&bitD4); |
283 | } |
284 | |
285 | /* check corruption */ |
286 | /* note : should not be necessary : op# advance in lock step, and we control op4. |
287 | * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ |
288 | if (op1 > opStart2) return ERROR(corruption_detected); |
289 | if (op2 > opStart3) return ERROR(corruption_detected); |
290 | if (op3 > opStart4) return ERROR(corruption_detected); |
291 | /* note : op4 supposed already verified within main loop */ |
292 | |
293 | /* finish bitStreams one by one */ |
294 | HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); |
295 | HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); |
296 | HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); |
297 | HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); |
298 | |
299 | /* check */ |
300 | { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); |
301 | if (!endCheck) return ERROR(corruption_detected); } |
302 | |
303 | /* decoded size */ |
304 | return dstSize; |
305 | } |
306 | } |
307 | |
308 | |
309 | FORCE_INLINE_TEMPLATE U32 |
310 | HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) |
311 | { |
312 | size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ |
313 | memcpy(op, dt+val, 2); |
314 | BIT_skipBits(DStream, dt[val].nbBits); |
315 | return dt[val].length; |
316 | } |
317 | |
318 | FORCE_INLINE_TEMPLATE U32 |
319 | HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) |
320 | { |
321 | size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ |
322 | memcpy(op, dt+val, 1); |
323 | if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); |
324 | else { |
325 | if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { |
326 | BIT_skipBits(DStream, dt[val].nbBits); |
327 | if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) |
328 | /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ |
329 | DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); |
330 | } } |
331 | return 1; |
332 | } |
333 | |
334 | #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ |
335 | ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) |
336 | |
337 | #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ |
338 | if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ |
339 | ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) |
340 | |
341 | #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ |
342 | if (MEM_64bits()) \ |
343 | ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) |
344 | |
345 | HINT_INLINE size_t |
346 | HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, |
347 | const HUF_DEltX4* const dt, const U32 dtLog) |
348 | { |
349 | BYTE* const pStart = p; |
350 | |
351 | /* up to 8 symbols at a time */ |
352 | while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { |
353 | HUF_DECODE_SYMBOLX4_2(p, bitDPtr); |
354 | HUF_DECODE_SYMBOLX4_1(p, bitDPtr); |
355 | HUF_DECODE_SYMBOLX4_2(p, bitDPtr); |
356 | HUF_DECODE_SYMBOLX4_0(p, bitDPtr); |
357 | } |
358 | |
359 | /* closer to end : up to 2 symbols at a time */ |
360 | while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) |
361 | HUF_DECODE_SYMBOLX4_0(p, bitDPtr); |
362 | |
363 | while (p <= pEnd-2) |
364 | HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ |
365 | |
366 | if (p < pEnd) |
367 | p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); |
368 | |
369 | return p-pStart; |
370 | } |
371 | |
372 | FORCE_INLINE_TEMPLATE size_t |
373 | HUF_decompress1X4_usingDTable_internal_body( |
374 | void* dst, size_t dstSize, |
375 | const void* cSrc, size_t cSrcSize, |
376 | const HUF_DTable* DTable) |
377 | { |
378 | BIT_DStream_t bitD; |
379 | |
380 | /* Init */ |
381 | CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); |
382 | |
383 | /* decode */ |
384 | { BYTE* const ostart = (BYTE*) dst; |
385 | BYTE* const oend = ostart + dstSize; |
386 | const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ |
387 | const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; |
388 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
389 | HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); |
390 | } |
391 | |
392 | /* check */ |
393 | if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); |
394 | |
395 | /* decoded size */ |
396 | return dstSize; |
397 | } |
398 | |
399 | |
400 | FORCE_INLINE_TEMPLATE size_t |
401 | HUF_decompress4X4_usingDTable_internal_body( |
402 | void* dst, size_t dstSize, |
403 | const void* cSrc, size_t cSrcSize, |
404 | const HUF_DTable* DTable) |
405 | { |
406 | if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ |
407 | |
408 | { const BYTE* const istart = (const BYTE*) cSrc; |
409 | BYTE* const ostart = (BYTE*) dst; |
410 | BYTE* const oend = ostart + dstSize; |
411 | const void* const dtPtr = DTable+1; |
412 | const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; |
413 | |
414 | /* Init */ |
415 | BIT_DStream_t bitD1; |
416 | BIT_DStream_t bitD2; |
417 | BIT_DStream_t bitD3; |
418 | BIT_DStream_t bitD4; |
419 | size_t const length1 = MEM_readLE16(istart); |
420 | size_t const length2 = MEM_readLE16(istart+2); |
421 | size_t const length3 = MEM_readLE16(istart+4); |
422 | size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); |
423 | const BYTE* const istart1 = istart + 6; /* jumpTable */ |
424 | const BYTE* const istart2 = istart1 + length1; |
425 | const BYTE* const istart3 = istart2 + length2; |
426 | const BYTE* const istart4 = istart3 + length3; |
427 | size_t const segmentSize = (dstSize+3) / 4; |
428 | BYTE* const opStart2 = ostart + segmentSize; |
429 | BYTE* const opStart3 = opStart2 + segmentSize; |
430 | BYTE* const opStart4 = opStart3 + segmentSize; |
431 | BYTE* op1 = ostart; |
432 | BYTE* op2 = opStart2; |
433 | BYTE* op3 = opStart3; |
434 | BYTE* op4 = opStart4; |
435 | U32 endSignal; |
436 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
437 | U32 const dtLog = dtd.tableLog; |
438 | |
439 | if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ |
440 | CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); |
441 | CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); |
442 | CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); |
443 | CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); |
444 | |
445 | /* 16-32 symbols per loop (4-8 symbols per stream) */ |
446 | endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); |
447 | for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { |
448 | HUF_DECODE_SYMBOLX4_2(op1, &bitD1); |
449 | HUF_DECODE_SYMBOLX4_2(op2, &bitD2); |
450 | HUF_DECODE_SYMBOLX4_2(op3, &bitD3); |
451 | HUF_DECODE_SYMBOLX4_2(op4, &bitD4); |
452 | HUF_DECODE_SYMBOLX4_1(op1, &bitD1); |
453 | HUF_DECODE_SYMBOLX4_1(op2, &bitD2); |
454 | HUF_DECODE_SYMBOLX4_1(op3, &bitD3); |
455 | HUF_DECODE_SYMBOLX4_1(op4, &bitD4); |
456 | HUF_DECODE_SYMBOLX4_2(op1, &bitD1); |
457 | HUF_DECODE_SYMBOLX4_2(op2, &bitD2); |
458 | HUF_DECODE_SYMBOLX4_2(op3, &bitD3); |
459 | HUF_DECODE_SYMBOLX4_2(op4, &bitD4); |
460 | HUF_DECODE_SYMBOLX4_0(op1, &bitD1); |
461 | HUF_DECODE_SYMBOLX4_0(op2, &bitD2); |
462 | HUF_DECODE_SYMBOLX4_0(op3, &bitD3); |
463 | HUF_DECODE_SYMBOLX4_0(op4, &bitD4); |
464 | |
465 | endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); |
466 | } |
467 | |
468 | /* check corruption */ |
469 | if (op1 > opStart2) return ERROR(corruption_detected); |
470 | if (op2 > opStart3) return ERROR(corruption_detected); |
471 | if (op3 > opStart4) return ERROR(corruption_detected); |
472 | /* note : op4 already verified within main loop */ |
473 | |
474 | /* finish bitStreams one by one */ |
475 | HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); |
476 | HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); |
477 | HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); |
478 | HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); |
479 | |
480 | /* check */ |
481 | { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); |
482 | if (!endCheck) return ERROR(corruption_detected); } |
483 | |
484 | /* decoded size */ |
485 | return dstSize; |
486 | } |
487 | } |
488 | |
489 | |
490 | typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, |
491 | const void *cSrc, |
492 | size_t cSrcSize, |
493 | const HUF_DTable *DTable); |
494 | #if DYNAMIC_BMI2 |
495 | |
496 | #define X(fn) \ |
497 | \ |
498 | static size_t fn##_default( \ |
499 | void* dst, size_t dstSize, \ |
500 | const void* cSrc, size_t cSrcSize, \ |
501 | const HUF_DTable* DTable) \ |
502 | { \ |
503 | return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ |
504 | } \ |
505 | \ |
506 | static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ |
507 | void* dst, size_t dstSize, \ |
508 | const void* cSrc, size_t cSrcSize, \ |
509 | const HUF_DTable* DTable) \ |
510 | { \ |
511 | return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ |
512 | } \ |
513 | \ |
514 | static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ |
515 | size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ |
516 | { \ |
517 | if (bmi2) { \ |
518 | return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ |
519 | } \ |
520 | return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ |
521 | } |
522 | |
523 | #else |
524 | |
525 | #define X(fn) \ |
526 | static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ |
527 | size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ |
528 | { \ |
529 | (void)bmi2; \ |
530 | return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ |
531 | } |
532 | |
533 | #endif |
534 | |
535 | X(HUF_decompress1X2_usingDTable_internal) |
536 | X(HUF_decompress4X2_usingDTable_internal) |
537 | X(HUF_decompress1X4_usingDTable_internal) |
538 | X(HUF_decompress4X4_usingDTable_internal) |
539 | |
540 | #undef X |
541 | |
542 | |
543 | size_t HUF_decompress1X2_usingDTable( |
544 | void* dst, size_t dstSize, |
545 | const void* cSrc, size_t cSrcSize, |
546 | const HUF_DTable* DTable) |
547 | { |
548 | DTableDesc dtd = HUF_getDTableDesc(DTable); |
549 | if (dtd.tableType != 0) return ERROR(GENERIC); |
550 | return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); |
551 | } |
552 | |
553 | size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, |
554 | const void* cSrc, size_t cSrcSize, |
555 | void* workSpace, size_t wkspSize) |
556 | { |
557 | const BYTE* ip = (const BYTE*) cSrc; |
558 | |
559 | size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); |
560 | if (HUF_isError(hSize)) return hSize; |
561 | if (hSize >= cSrcSize) return ERROR(srcSize_wrong); |
562 | ip += hSize; cSrcSize -= hSize; |
563 | |
564 | return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); |
565 | } |
566 | |
567 | |
568 | size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, |
569 | const void* cSrc, size_t cSrcSize) |
570 | { |
571 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
572 | return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, |
573 | workSpace, sizeof(workSpace)); |
574 | } |
575 | |
576 | size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
577 | { |
578 | HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); |
579 | return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); |
580 | } |
581 | |
582 | size_t HUF_decompress4X2_usingDTable( |
583 | void* dst, size_t dstSize, |
584 | const void* cSrc, size_t cSrcSize, |
585 | const HUF_DTable* DTable) |
586 | { |
587 | DTableDesc dtd = HUF_getDTableDesc(DTable); |
588 | if (dtd.tableType != 0) return ERROR(GENERIC); |
589 | return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); |
590 | } |
591 | |
592 | static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, |
593 | const void* cSrc, size_t cSrcSize, |
594 | void* workSpace, size_t wkspSize, int bmi2) |
595 | { |
596 | const BYTE* ip = (const BYTE*) cSrc; |
597 | |
598 | size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize, |
599 | workSpace, wkspSize); |
600 | if (HUF_isError(hSize)) return hSize; |
601 | if (hSize >= cSrcSize) return ERROR(srcSize_wrong); |
602 | ip += hSize; cSrcSize -= hSize; |
603 | |
604 | return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); |
605 | } |
606 | |
607 | size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, |
608 | const void* cSrc, size_t cSrcSize, |
609 | void* workSpace, size_t wkspSize) |
610 | { |
611 | return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); |
612 | } |
613 | |
614 | |
615 | size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
616 | { |
617 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
618 | return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, |
619 | workSpace, sizeof(workSpace)); |
620 | } |
621 | size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
622 | { |
623 | HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); |
624 | return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); |
625 | } |
626 | |
627 | |
628 | /* *************************/ |
629 | /* double-symbols decoding */ |
630 | /* *************************/ |
631 | typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; |
632 | |
633 | /* HUF_fillDTableX4Level2() : |
634 | * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ |
635 | static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, |
636 | const U32* rankValOrigin, const int minWeight, |
637 | const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, |
638 | U32 nbBitsBaseline, U16 baseSeq) |
639 | { |
640 | HUF_DEltX4 DElt; |
641 | U32 rankVal[HUF_TABLELOG_MAX + 1]; |
642 | |
643 | /* get pre-calculated rankVal */ |
644 | memcpy(rankVal, rankValOrigin, sizeof(rankVal)); |
645 | |
646 | /* fill skipped values */ |
647 | if (minWeight>1) { |
648 | U32 i, skipSize = rankVal[minWeight]; |
649 | MEM_writeLE16(&(DElt.sequence), baseSeq); |
650 | DElt.nbBits = (BYTE)(consumed); |
651 | DElt.length = 1; |
652 | for (i = 0; i < skipSize; i++) |
653 | DTable[i] = DElt; |
654 | } |
655 | |
656 | /* fill DTable */ |
657 | { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ |
658 | const U32 symbol = sortedSymbols[s].symbol; |
659 | const U32 weight = sortedSymbols[s].weight; |
660 | const U32 nbBits = nbBitsBaseline - weight; |
661 | const U32 length = 1 << (sizeLog-nbBits); |
662 | const U32 start = rankVal[weight]; |
663 | U32 i = start; |
664 | const U32 end = start + length; |
665 | |
666 | MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); |
667 | DElt.nbBits = (BYTE)(nbBits + consumed); |
668 | DElt.length = 2; |
669 | do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ |
670 | |
671 | rankVal[weight] += length; |
672 | } } |
673 | } |
674 | |
675 | typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; |
676 | typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; |
677 | |
678 | static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, |
679 | const sortedSymbol_t* sortedList, const U32 sortedListSize, |
680 | const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, |
681 | const U32 nbBitsBaseline) |
682 | { |
683 | U32 rankVal[HUF_TABLELOG_MAX + 1]; |
684 | const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ |
685 | const U32 minBits = nbBitsBaseline - maxWeight; |
686 | U32 s; |
687 | |
688 | memcpy(rankVal, rankValOrigin, sizeof(rankVal)); |
689 | |
690 | /* fill DTable */ |
691 | for (s=0; s<sortedListSize; s++) { |
692 | const U16 symbol = sortedList[s].symbol; |
693 | const U32 weight = sortedList[s].weight; |
694 | const U32 nbBits = nbBitsBaseline - weight; |
695 | const U32 start = rankVal[weight]; |
696 | const U32 length = 1 << (targetLog-nbBits); |
697 | |
698 | if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ |
699 | U32 sortedRank; |
700 | int minWeight = nbBits + scaleLog; |
701 | if (minWeight < 1) minWeight = 1; |
702 | sortedRank = rankStart[minWeight]; |
703 | HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, |
704 | rankValOrigin[nbBits], minWeight, |
705 | sortedList+sortedRank, sortedListSize-sortedRank, |
706 | nbBitsBaseline, symbol); |
707 | } else { |
708 | HUF_DEltX4 DElt; |
709 | MEM_writeLE16(&(DElt.sequence), symbol); |
710 | DElt.nbBits = (BYTE)(nbBits); |
711 | DElt.length = 1; |
712 | { U32 const end = start + length; |
713 | U32 u; |
714 | for (u = start; u < end; u++) DTable[u] = DElt; |
715 | } } |
716 | rankVal[weight] += length; |
717 | } |
718 | } |
719 | |
720 | size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src, |
721 | size_t srcSize, void* workSpace, |
722 | size_t wkspSize) |
723 | { |
724 | U32 tableLog, maxW, sizeOfSort, nbSymbols; |
725 | DTableDesc dtd = HUF_getDTableDesc(DTable); |
726 | U32 const maxTableLog = dtd.maxTableLog; |
727 | size_t iSize; |
728 | void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ |
729 | HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; |
730 | U32 *rankStart; |
731 | |
732 | rankValCol_t* rankVal; |
733 | U32* rankStats; |
734 | U32* rankStart0; |
735 | sortedSymbol_t* sortedSymbol; |
736 | BYTE* weightList; |
737 | size_t spaceUsed32 = 0; |
738 | |
739 | rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); |
740 | spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; |
741 | rankStats = (U32 *)workSpace + spaceUsed32; |
742 | spaceUsed32 += HUF_TABLELOG_MAX + 1; |
743 | rankStart0 = (U32 *)workSpace + spaceUsed32; |
744 | spaceUsed32 += HUF_TABLELOG_MAX + 2; |
745 | sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); |
746 | spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; |
747 | weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); |
748 | spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; |
749 | |
750 | if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); |
751 | |
752 | rankStart = rankStart0 + 1; |
753 | memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); |
754 | |
755 | HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ |
756 | if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
757 | /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ |
758 | |
759 | iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); |
760 | if (HUF_isError(iSize)) return iSize; |
761 | |
762 | /* check result */ |
763 | if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ |
764 | |
765 | /* find maxWeight */ |
766 | for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ |
767 | |
768 | /* Get start index of each weight */ |
769 | { U32 w, = 0; |
770 | for (w=1; w<maxW+1; w++) { |
771 | U32 current = nextRankStart; |
772 | nextRankStart += rankStats[w]; |
773 | rankStart[w] = current; |
774 | } |
775 | rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ |
776 | sizeOfSort = nextRankStart; |
777 | } |
778 | |
779 | /* sort symbols by weight */ |
780 | { U32 s; |
781 | for (s=0; s<nbSymbols; s++) { |
782 | U32 const w = weightList[s]; |
783 | U32 const r = rankStart[w]++; |
784 | sortedSymbol[r].symbol = (BYTE)s; |
785 | sortedSymbol[r].weight = (BYTE)w; |
786 | } |
787 | rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ |
788 | } |
789 | |
790 | /* Build rankVal */ |
791 | { U32* const rankVal0 = rankVal[0]; |
792 | { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ |
793 | U32 = 0; |
794 | U32 w; |
795 | for (w=1; w<maxW+1; w++) { |
796 | U32 current = nextRankVal; |
797 | nextRankVal += rankStats[w] << (w+rescale); |
798 | rankVal0[w] = current; |
799 | } } |
800 | { U32 const minBits = tableLog+1 - maxW; |
801 | U32 consumed; |
802 | for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { |
803 | U32* const rankValPtr = rankVal[consumed]; |
804 | U32 w; |
805 | for (w = 1; w < maxW+1; w++) { |
806 | rankValPtr[w] = rankVal0[w] >> consumed; |
807 | } } } } |
808 | |
809 | HUF_fillDTableX4(dt, maxTableLog, |
810 | sortedSymbol, sizeOfSort, |
811 | rankStart0, rankVal, maxW, |
812 | tableLog+1); |
813 | |
814 | dtd.tableLog = (BYTE)maxTableLog; |
815 | dtd.tableType = 1; |
816 | memcpy(DTable, &dtd, sizeof(dtd)); |
817 | return iSize; |
818 | } |
819 | |
820 | size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize) |
821 | { |
822 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
823 | return HUF_readDTableX4_wksp(DTable, src, srcSize, |
824 | workSpace, sizeof(workSpace)); |
825 | } |
826 | |
827 | size_t HUF_decompress1X4_usingDTable( |
828 | void* dst, size_t dstSize, |
829 | const void* cSrc, size_t cSrcSize, |
830 | const HUF_DTable* DTable) |
831 | { |
832 | DTableDesc dtd = HUF_getDTableDesc(DTable); |
833 | if (dtd.tableType != 1) return ERROR(GENERIC); |
834 | return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); |
835 | } |
836 | |
837 | size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, |
838 | const void* cSrc, size_t cSrcSize, |
839 | void* workSpace, size_t wkspSize) |
840 | { |
841 | const BYTE* ip = (const BYTE*) cSrc; |
842 | |
843 | size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, |
844 | workSpace, wkspSize); |
845 | if (HUF_isError(hSize)) return hSize; |
846 | if (hSize >= cSrcSize) return ERROR(srcSize_wrong); |
847 | ip += hSize; cSrcSize -= hSize; |
848 | |
849 | return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); |
850 | } |
851 | |
852 | |
853 | size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, |
854 | const void* cSrc, size_t cSrcSize) |
855 | { |
856 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
857 | return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, |
858 | workSpace, sizeof(workSpace)); |
859 | } |
860 | |
861 | size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
862 | { |
863 | HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); |
864 | return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); |
865 | } |
866 | |
867 | size_t HUF_decompress4X4_usingDTable( |
868 | void* dst, size_t dstSize, |
869 | const void* cSrc, size_t cSrcSize, |
870 | const HUF_DTable* DTable) |
871 | { |
872 | DTableDesc dtd = HUF_getDTableDesc(DTable); |
873 | if (dtd.tableType != 1) return ERROR(GENERIC); |
874 | return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); |
875 | } |
876 | |
877 | static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, |
878 | const void* cSrc, size_t cSrcSize, |
879 | void* workSpace, size_t wkspSize, int bmi2) |
880 | { |
881 | const BYTE* ip = (const BYTE*) cSrc; |
882 | |
883 | size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, |
884 | workSpace, wkspSize); |
885 | if (HUF_isError(hSize)) return hSize; |
886 | if (hSize >= cSrcSize) return ERROR(srcSize_wrong); |
887 | ip += hSize; cSrcSize -= hSize; |
888 | |
889 | return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); |
890 | } |
891 | |
892 | size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, |
893 | const void* cSrc, size_t cSrcSize, |
894 | void* workSpace, size_t wkspSize) |
895 | { |
896 | return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); |
897 | } |
898 | |
899 | |
900 | size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, |
901 | const void* cSrc, size_t cSrcSize) |
902 | { |
903 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
904 | return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, |
905 | workSpace, sizeof(workSpace)); |
906 | } |
907 | |
908 | size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
909 | { |
910 | HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); |
911 | return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); |
912 | } |
913 | |
914 | |
915 | /* ********************************/ |
916 | /* Generic decompression selector */ |
917 | /* ********************************/ |
918 | |
919 | size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, |
920 | const void* cSrc, size_t cSrcSize, |
921 | const HUF_DTable* DTable) |
922 | { |
923 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
924 | return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : |
925 | HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); |
926 | } |
927 | |
928 | size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, |
929 | const void* cSrc, size_t cSrcSize, |
930 | const HUF_DTable* DTable) |
931 | { |
932 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
933 | return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : |
934 | HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); |
935 | } |
936 | |
937 | |
938 | typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; |
939 | static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = |
940 | { |
941 | /* single, double, quad */ |
942 | {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ |
943 | {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ |
944 | {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ |
945 | {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ |
946 | {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ |
947 | {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ |
948 | {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ |
949 | {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ |
950 | {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ |
951 | {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ |
952 | {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ |
953 | {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ |
954 | {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ |
955 | {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ |
956 | {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ |
957 | {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ |
958 | }; |
959 | |
960 | /** HUF_selectDecoder() : |
961 | * Tells which decoder is likely to decode faster, |
962 | * based on a set of pre-computed metrics. |
963 | * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . |
964 | * Assumption : 0 < dstSize <= 128 KB */ |
965 | U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) |
966 | { |
967 | assert(dstSize > 0); |
968 | assert(dstSize <= 128 KB); |
969 | /* decoder timing evaluation */ |
970 | { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ |
971 | U32 const D256 = (U32)(dstSize >> 8); |
972 | U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); |
973 | U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); |
974 | DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ |
975 | return DTime1 < DTime0; |
976 | } } |
977 | |
978 | |
979 | typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); |
980 | |
981 | size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
982 | { |
983 | static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; |
984 | |
985 | /* validation checks */ |
986 | if (dstSize == 0) return ERROR(dstSize_tooSmall); |
987 | if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ |
988 | if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ |
989 | if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ |
990 | |
991 | { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); |
992 | return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); |
993 | } |
994 | } |
995 | |
996 | size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
997 | { |
998 | /* validation checks */ |
999 | if (dstSize == 0) return ERROR(dstSize_tooSmall); |
1000 | if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ |
1001 | if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ |
1002 | if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ |
1003 | |
1004 | { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); |
1005 | return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : |
1006 | HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; |
1007 | } |
1008 | } |
1009 | |
1010 | size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) |
1011 | { |
1012 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
1013 | return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, |
1014 | workSpace, sizeof(workSpace)); |
1015 | } |
1016 | |
1017 | |
1018 | size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, |
1019 | size_t dstSize, const void* cSrc, |
1020 | size_t cSrcSize, void* workSpace, |
1021 | size_t wkspSize) |
1022 | { |
1023 | /* validation checks */ |
1024 | if (dstSize == 0) return ERROR(dstSize_tooSmall); |
1025 | if (cSrcSize == 0) return ERROR(corruption_detected); |
1026 | |
1027 | { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); |
1028 | return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): |
1029 | HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); |
1030 | } |
1031 | } |
1032 | |
1033 | size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, |
1034 | const void* cSrc, size_t cSrcSize, |
1035 | void* workSpace, size_t wkspSize) |
1036 | { |
1037 | /* validation checks */ |
1038 | if (dstSize == 0) return ERROR(dstSize_tooSmall); |
1039 | if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ |
1040 | if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ |
1041 | if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ |
1042 | |
1043 | { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); |
1044 | return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, |
1045 | cSrcSize, workSpace, wkspSize): |
1046 | HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, |
1047 | cSrcSize, workSpace, wkspSize); |
1048 | } |
1049 | } |
1050 | |
1051 | size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, |
1052 | const void* cSrc, size_t cSrcSize) |
1053 | { |
1054 | U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; |
1055 | return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, |
1056 | workSpace, sizeof(workSpace)); |
1057 | } |
1058 | |
1059 | |
1060 | size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) |
1061 | { |
1062 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
1063 | return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : |
1064 | HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); |
1065 | } |
1066 | |
1067 | size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) |
1068 | { |
1069 | const BYTE* ip = (const BYTE*) cSrc; |
1070 | |
1071 | size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); |
1072 | if (HUF_isError(hSize)) return hSize; |
1073 | if (hSize >= cSrcSize) return ERROR(srcSize_wrong); |
1074 | ip += hSize; cSrcSize -= hSize; |
1075 | |
1076 | return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); |
1077 | } |
1078 | |
1079 | size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) |
1080 | { |
1081 | DTableDesc const dtd = HUF_getDTableDesc(DTable); |
1082 | return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : |
1083 | HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); |
1084 | } |
1085 | |
1086 | size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) |
1087 | { |
1088 | /* validation checks */ |
1089 | if (dstSize == 0) return ERROR(dstSize_tooSmall); |
1090 | if (cSrcSize == 0) return ERROR(corruption_detected); |
1091 | |
1092 | { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); |
1093 | return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : |
1094 | HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); |
1095 | } |
1096 | } |
1097 | |