1 | /* ****************************************************************** |
2 | * bitstream |
3 | * Part of FSE library |
4 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
5 | * |
6 | * You can contact the author at : |
7 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
8 | * |
9 | * This source code is licensed under both the BSD-style license (found in the |
10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
11 | * in the COPYING file in the root directory of this source tree). |
12 | * You may select, at your option, one of the above-listed licenses. |
13 | ****************************************************************** */ |
14 | #ifndef BITSTREAM_H_MODULE |
15 | #define BITSTREAM_H_MODULE |
16 | |
17 | #if defined (__cplusplus) |
18 | extern "C" { |
19 | #endif |
20 | /* |
21 | * This API consists of small unitary functions, which must be inlined for best performance. |
22 | * Since link-time-optimization is not available for all compilers, |
23 | * these functions are defined into a .h to be included. |
24 | */ |
25 | |
26 | /*-**************************************** |
27 | * Dependencies |
28 | ******************************************/ |
29 | #include "mem.h" /* unaligned access routines */ |
30 | #include "compiler.h" /* UNLIKELY() */ |
31 | #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ |
32 | #include "error_private.h" /* error codes and messages */ |
33 | #include "bits.h" /* ZSTD_highbit32 */ |
34 | |
35 | |
36 | /*========================================= |
37 | * Target specific |
38 | =========================================*/ |
39 | #ifndef ZSTD_NO_INTRINSICS |
40 | # if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__) |
41 | # include <immintrin.h> /* support for bextr (experimental)/bzhi */ |
42 | # elif defined(__ICCARM__) |
43 | # include <intrinsics.h> |
44 | # endif |
45 | #endif |
46 | |
47 | #define STREAM_ACCUMULATOR_MIN_32 25 |
48 | #define STREAM_ACCUMULATOR_MIN_64 57 |
49 | #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) |
50 | |
51 | |
52 | /*-****************************************** |
53 | * bitStream encoding API (write forward) |
54 | ********************************************/ |
55 | /* bitStream can mix input from multiple sources. |
56 | * A critical property of these streams is that they encode and decode in **reverse** direction. |
57 | * So the first bit sequence you add will be the last to be read, like a LIFO stack. |
58 | */ |
59 | typedef struct { |
60 | size_t bitContainer; |
61 | unsigned bitPos; |
62 | char* startPtr; |
63 | char* ptr; |
64 | char* endPtr; |
65 | } BIT_CStream_t; |
66 | |
67 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); |
68 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); |
69 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); |
70 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); |
71 | |
72 | /* Start with initCStream, providing the size of buffer to write into. |
73 | * bitStream will never write outside of this buffer. |
74 | * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. |
75 | * |
76 | * bits are first added to a local register. |
77 | * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. |
78 | * Writing data into memory is an explicit operation, performed by the flushBits function. |
79 | * Hence keep track how many bits are potentially stored into local register to avoid register overflow. |
80 | * After a flushBits, a maximum of 7 bits might still be stored into local register. |
81 | * |
82 | * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. |
83 | * |
84 | * Last operation is to close the bitStream. |
85 | * The function returns the final size of CStream in bytes. |
86 | * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) |
87 | */ |
88 | |
89 | |
90 | /*-******************************************** |
91 | * bitStream decoding API (read backward) |
92 | **********************************************/ |
93 | typedef struct { |
94 | size_t bitContainer; |
95 | unsigned bitsConsumed; |
96 | const char* ptr; |
97 | const char* start; |
98 | const char* limitPtr; |
99 | } BIT_DStream_t; |
100 | |
101 | typedef enum { BIT_DStream_unfinished = 0, |
102 | BIT_DStream_endOfBuffer = 1, |
103 | BIT_DStream_completed = 2, |
104 | BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ |
105 | /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ |
106 | |
107 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); |
108 | MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); |
109 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); |
110 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); |
111 | |
112 | |
113 | /* Start by invoking BIT_initDStream(). |
114 | * A chunk of the bitStream is then stored into a local register. |
115 | * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). |
116 | * You can then retrieve bitFields stored into the local register, **in reverse order**. |
117 | * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. |
118 | * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. |
119 | * Otherwise, it can be less than that, so proceed accordingly. |
120 | * Checking if DStream has reached its end can be performed with BIT_endOfDStream(). |
121 | */ |
122 | |
123 | |
124 | /*-**************************************** |
125 | * unsafe API |
126 | ******************************************/ |
127 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); |
128 | /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ |
129 | |
130 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); |
131 | /* unsafe version; does not check buffer overflow */ |
132 | |
133 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); |
134 | /* faster, but works only if nbBits >= 1 */ |
135 | |
136 | /*===== Local Constants =====*/ |
137 | static const unsigned BIT_mask[] = { |
138 | 0, 1, 3, 7, 0xF, 0x1F, |
139 | 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, |
140 | 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, |
141 | 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, |
142 | 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, |
143 | 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ |
144 | #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) |
145 | |
146 | /*-************************************************************** |
147 | * bitStream encoding |
148 | ****************************************************************/ |
149 | /*! BIT_initCStream() : |
150 | * `dstCapacity` must be > sizeof(size_t) |
151 | * @return : 0 if success, |
152 | * otherwise an error code (can be tested using ERR_isError()) */ |
153 | MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, |
154 | void* startPtr, size_t dstCapacity) |
155 | { |
156 | bitC->bitContainer = 0; |
157 | bitC->bitPos = 0; |
158 | bitC->startPtr = (char*)startPtr; |
159 | bitC->ptr = bitC->startPtr; |
160 | bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); |
161 | if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); |
162 | return 0; |
163 | } |
164 | |
165 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) |
166 | { |
167 | #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) |
168 | return _bzhi_u64(bitContainer, nbBits); |
169 | #else |
170 | assert(nbBits < BIT_MASK_SIZE); |
171 | return bitContainer & BIT_mask[nbBits]; |
172 | #endif |
173 | } |
174 | |
175 | /*! BIT_addBits() : |
176 | * can add up to 31 bits into `bitC`. |
177 | * Note : does not check for register overflow ! */ |
178 | MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, |
179 | size_t value, unsigned nbBits) |
180 | { |
181 | DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); |
182 | assert(nbBits < BIT_MASK_SIZE); |
183 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
184 | bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; |
185 | bitC->bitPos += nbBits; |
186 | } |
187 | |
188 | /*! BIT_addBitsFast() : |
189 | * works only if `value` is _clean_, |
190 | * meaning all high bits above nbBits are 0 */ |
191 | MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, |
192 | size_t value, unsigned nbBits) |
193 | { |
194 | assert((value>>nbBits) == 0); |
195 | assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
196 | bitC->bitContainer |= value << bitC->bitPos; |
197 | bitC->bitPos += nbBits; |
198 | } |
199 | |
200 | /*! BIT_flushBitsFast() : |
201 | * assumption : bitContainer has not overflowed |
202 | * unsafe version; does not check buffer overflow */ |
203 | MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) |
204 | { |
205 | size_t const nbBytes = bitC->bitPos >> 3; |
206 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
207 | assert(bitC->ptr <= bitC->endPtr); |
208 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
209 | bitC->ptr += nbBytes; |
210 | bitC->bitPos &= 7; |
211 | bitC->bitContainer >>= nbBytes*8; |
212 | } |
213 | |
214 | /*! BIT_flushBits() : |
215 | * assumption : bitContainer has not overflowed |
216 | * safe version; check for buffer overflow, and prevents it. |
217 | * note : does not signal buffer overflow. |
218 | * overflow will be revealed later on using BIT_closeCStream() */ |
219 | MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) |
220 | { |
221 | size_t const nbBytes = bitC->bitPos >> 3; |
222 | assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); |
223 | assert(bitC->ptr <= bitC->endPtr); |
224 | MEM_writeLEST(bitC->ptr, bitC->bitContainer); |
225 | bitC->ptr += nbBytes; |
226 | if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; |
227 | bitC->bitPos &= 7; |
228 | bitC->bitContainer >>= nbBytes*8; |
229 | } |
230 | |
231 | /*! BIT_closeCStream() : |
232 | * @return : size of CStream, in bytes, |
233 | * or 0 if it could not fit into dstBuffer */ |
234 | MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) |
235 | { |
236 | BIT_addBitsFast(bitC, 1, 1); /* endMark */ |
237 | BIT_flushBits(bitC); |
238 | if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ |
239 | return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); |
240 | } |
241 | |
242 | |
243 | /*-******************************************************** |
244 | * bitStream decoding |
245 | **********************************************************/ |
246 | /*! BIT_initDStream() : |
247 | * Initialize a BIT_DStream_t. |
248 | * `bitD` : a pointer to an already allocated BIT_DStream_t structure. |
249 | * `srcSize` must be the *exact* size of the bitStream, in bytes. |
250 | * @return : size of stream (== srcSize), or an errorCode if a problem is detected |
251 | */ |
252 | MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) |
253 | { |
254 | if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } |
255 | |
256 | bitD->start = (const char*)srcBuffer; |
257 | bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); |
258 | |
259 | if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ |
260 | bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); |
261 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
262 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
263 | bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ |
264 | if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } |
265 | } else { |
266 | bitD->ptr = bitD->start; |
267 | bitD->bitContainer = *(const BYTE*)(bitD->start); |
268 | switch(srcSize) |
269 | { |
270 | case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); |
271 | ZSTD_FALLTHROUGH; |
272 | |
273 | case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); |
274 | ZSTD_FALLTHROUGH; |
275 | |
276 | case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); |
277 | ZSTD_FALLTHROUGH; |
278 | |
279 | case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; |
280 | ZSTD_FALLTHROUGH; |
281 | |
282 | case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; |
283 | ZSTD_FALLTHROUGH; |
284 | |
285 | case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; |
286 | ZSTD_FALLTHROUGH; |
287 | |
288 | default: break; |
289 | } |
290 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
291 | bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; |
292 | if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ |
293 | } |
294 | bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; |
295 | } |
296 | |
297 | return srcSize; |
298 | } |
299 | |
300 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) |
301 | { |
302 | return bitContainer >> start; |
303 | } |
304 | |
305 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) |
306 | { |
307 | U32 const regMask = sizeof(bitContainer)*8 - 1; |
308 | /* if start > regMask, bitstream is corrupted, and result is undefined */ |
309 | assert(nbBits < BIT_MASK_SIZE); |
310 | /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better |
311 | * than accessing memory. When bmi2 instruction is not present, we consider |
312 | * such cpus old (pre-Haswell, 2013) and their performance is not of that |
313 | * importance. |
314 | */ |
315 | #if defined(__x86_64__) || defined(_M_X86) |
316 | return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); |
317 | #else |
318 | return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; |
319 | #endif |
320 | } |
321 | |
322 | /*! BIT_lookBits() : |
323 | * Provides next n bits from local register. |
324 | * local register is not modified. |
325 | * On 32-bits, maxNbBits==24. |
326 | * On 64-bits, maxNbBits==56. |
327 | * @return : value extracted */ |
328 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) |
329 | { |
330 | /* arbitrate between double-shift and shift+mask */ |
331 | #if 1 |
332 | /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, |
333 | * bitstream is likely corrupted, and result is undefined */ |
334 | return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); |
335 | #else |
336 | /* this code path is slower on my os-x laptop */ |
337 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; |
338 | return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); |
339 | #endif |
340 | } |
341 | |
342 | /*! BIT_lookBitsFast() : |
343 | * unsafe version; only works if nbBits >= 1 */ |
344 | MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) |
345 | { |
346 | U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; |
347 | assert(nbBits >= 1); |
348 | return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); |
349 | } |
350 | |
351 | MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) |
352 | { |
353 | bitD->bitsConsumed += nbBits; |
354 | } |
355 | |
356 | /*! BIT_readBits() : |
357 | * Read (consume) next n bits from local register and update. |
358 | * Pay attention to not read more than nbBits contained into local register. |
359 | * @return : extracted value. */ |
360 | MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) |
361 | { |
362 | size_t const value = BIT_lookBits(bitD, nbBits); |
363 | BIT_skipBits(bitD, nbBits); |
364 | return value; |
365 | } |
366 | |
367 | /*! BIT_readBitsFast() : |
368 | * unsafe version; only works if nbBits >= 1 */ |
369 | MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) |
370 | { |
371 | size_t const value = BIT_lookBitsFast(bitD, nbBits); |
372 | assert(nbBits >= 1); |
373 | BIT_skipBits(bitD, nbBits); |
374 | return value; |
375 | } |
376 | |
377 | /*! BIT_reloadDStreamFast() : |
378 | * Similar to BIT_reloadDStream(), but with two differences: |
379 | * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! |
380 | * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this |
381 | * point you must use BIT_reloadDStream() to reload. |
382 | */ |
383 | MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) |
384 | { |
385 | if (UNLIKELY(bitD->ptr < bitD->limitPtr)) |
386 | return BIT_DStream_overflow; |
387 | assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); |
388 | bitD->ptr -= bitD->bitsConsumed >> 3; |
389 | bitD->bitsConsumed &= 7; |
390 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
391 | return BIT_DStream_unfinished; |
392 | } |
393 | |
394 | /*! BIT_reloadDStream() : |
395 | * Refill `bitD` from buffer previously set in BIT_initDStream() . |
396 | * This function is safe, it guarantees it will not read beyond src buffer. |
397 | * @return : status of `BIT_DStream_t` internal register. |
398 | * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ |
399 | MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) |
400 | { |
401 | if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ |
402 | return BIT_DStream_overflow; |
403 | |
404 | if (bitD->ptr >= bitD->limitPtr) { |
405 | return BIT_reloadDStreamFast(bitD); |
406 | } |
407 | if (bitD->ptr == bitD->start) { |
408 | if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; |
409 | return BIT_DStream_completed; |
410 | } |
411 | /* start < ptr < limitPtr */ |
412 | { U32 nbBytes = bitD->bitsConsumed >> 3; |
413 | BIT_DStream_status result = BIT_DStream_unfinished; |
414 | if (bitD->ptr - nbBytes < bitD->start) { |
415 | nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ |
416 | result = BIT_DStream_endOfBuffer; |
417 | } |
418 | bitD->ptr -= nbBytes; |
419 | bitD->bitsConsumed -= nbBytes*8; |
420 | bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ |
421 | return result; |
422 | } |
423 | } |
424 | |
425 | /*! BIT_endOfDStream() : |
426 | * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). |
427 | */ |
428 | MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) |
429 | { |
430 | return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); |
431 | } |
432 | |
433 | #if defined (__cplusplus) |
434 | } |
435 | #endif |
436 | |
437 | #endif /* BITSTREAM_H_MODULE */ |
438 | |