1 | /* |
2 | * Copyright (c) 2020 - 2023 the ThorVG project. All rights reserved. |
3 | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | * of this software and associated documentation files (the "Software"), to deal |
6 | * in the Software without restriction, including without limitation the rights |
7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | * copies of the Software, and to permit persons to whom the Software is |
9 | * furnished to do so, subject to the following conditions: |
10 | |
11 | * The above copyright notice and this permission notice shall be included in all |
12 | * copies or substantial portions of the Software. |
13 | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
20 | * SOFTWARE. |
21 | */ |
22 | |
23 | /* |
24 | * Lempel–Ziv–Welch (LZW) encoder/decoder by Guilherme R. Lampert(guilherme.ronaldo.lampert@gmail.com) |
25 | |
26 | * This is the compression scheme used by the GIF image format and the Unix 'compress' tool. |
27 | * Main differences from this implementation is that End Of Input (EOI) and Clear Codes (CC) |
28 | * are not stored in the output and the max code length in bits is 12, vs 16 in compress. |
29 | * |
30 | * EOI is simply detected by the end of the data stream, while CC happens if the |
31 | * dictionary gets filled. Data is written/read from bit streams, which handle |
32 | * byte-alignment for us in a transparent way. |
33 | |
34 | * The decoder relies on the hardcoded data layout produced by the encoder, since |
35 | * no additional reconstruction data is added to the output, so they must match. |
36 | * The nice thing about LZW is that we can reconstruct the dictionary directly from |
37 | * the stream of codes generated by the encoder, so this avoids storing additional |
38 | * headers in the bit stream. |
39 | |
40 | * The output code length is variable. It starts with the minimum number of bits |
41 | * required to store the base byte-sized dictionary and automatically increases |
42 | * as the dictionary gets larger (it starts at 9-bits and grows to 10-bits when |
43 | * code 512 is added, then 11-bits when 1024 is added, and so on). If the dictionary |
44 | * is filled (4096 items for a 12-bits dictionary), the whole thing is cleared and |
45 | * the process starts over. This is the main reason why the encoder and the decoder |
46 | * must match perfectly, since the lengths of the codes will not be specified with |
47 | * the data itself. |
48 | |
49 | * USEFUL LINKS: |
50 | * https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch |
51 | * http://rosettacode.org/wiki/LZW_compression |
52 | * http://www.cs.duke.edu/csed/curious/compression/lzw.html |
53 | * http://www.cs.cf.ac.uk/Dave/Multimedia/node214.html |
54 | * http://marknelson.us/1989/10/01/lzw-data-compression/ |
55 | */ |
56 | #include "config.h" |
57 | |
58 | #if defined(THORVG_TVG_SAVER_SUPPORT) || defined(THORVG_TVG_LOADER_SUPPORT) |
59 | |
60 | /************************************************************************/ |
61 | /* Internal Class Implementation */ |
62 | /************************************************************************/ |
63 | |
64 | #include <string> |
65 | #include <memory.h> |
66 | #include "tvgLzw.h" |
67 | |
68 | namespace { |
69 | //LZW Dictionary helper: |
70 | constexpr int Nil = -1; |
71 | constexpr int MaxDictBits = 12; |
72 | constexpr int StartBits = 9; |
73 | constexpr int FirstCode = (1 << (StartBits - 1)); // 256 |
74 | constexpr int MaxDictEntries = (1 << MaxDictBits); // 4096 |
75 | |
76 | |
77 | //Round up to the next power-of-two number, e.g. 37 => 64 |
78 | static int nextPowerOfTwo(int num) |
79 | { |
80 | --num; |
81 | for (size_t i = 1; i < sizeof(num) * 8; i <<= 1) { |
82 | num = num | num >> i; |
83 | } |
84 | return ++num; |
85 | } |
86 | |
87 | |
88 | struct BitStreamWriter |
89 | { |
90 | uint8_t* stream; //Growable buffer to store our bits. Heap allocated & owned by the class instance. |
91 | int bytesAllocated; //Current size of heap-allocated stream buffer *in bytes*. |
92 | int granularity; //Amount bytesAllocated multiplies by when auto-resizing in appendBit(). |
93 | int currBytePos; //Current byte being written to, from 0 to bytesAllocated-1. |
94 | int nextBitPos; //Bit position within the current byte to access next. 0 to 7. |
95 | int numBitsWritten; //Number of bits in use from the stream buffer, not including byte-rounding padding. |
96 | |
97 | void internalInit() |
98 | { |
99 | stream = nullptr; |
100 | bytesAllocated = 0; |
101 | granularity = 2; |
102 | currBytePos = 0; |
103 | nextBitPos = 0; |
104 | numBitsWritten = 0; |
105 | } |
106 | |
107 | uint8_t* allocBytes(const int bytesWanted, uint8_t * oldPtr, const int oldSize) |
108 | { |
109 | auto newMemory = static_cast<uint8_t *>(malloc(bytesWanted)); |
110 | memset(newMemory, 0, bytesWanted); |
111 | |
112 | if (oldPtr) { |
113 | memcpy(newMemory, oldPtr, oldSize); |
114 | free(oldPtr); |
115 | } |
116 | return newMemory; |
117 | } |
118 | |
119 | BitStreamWriter() |
120 | { |
121 | /* 8192 bits for a start (1024 bytes). It will resize if needed. |
122 | Default granularity is 2. */ |
123 | internalInit(); |
124 | allocate(8192); |
125 | } |
126 | |
127 | BitStreamWriter(const int initialSizeInBits, const int growthGranularity = 2) |
128 | { |
129 | internalInit(); |
130 | setGranularity(growthGranularity); |
131 | allocate(initialSizeInBits); |
132 | } |
133 | |
134 | ~BitStreamWriter() |
135 | { |
136 | free(stream); |
137 | } |
138 | |
139 | void allocate(int bitsWanted) |
140 | { |
141 | //Require at least a byte. |
142 | if (bitsWanted <= 0) bitsWanted = 8; |
143 | |
144 | //Round upwards if needed: |
145 | if ((bitsWanted % 8) != 0) bitsWanted = nextPowerOfTwo(bitsWanted); |
146 | |
147 | //We might already have the required count. |
148 | const int sizeInBytes = bitsWanted / 8; |
149 | if (sizeInBytes <= bytesAllocated) return; |
150 | |
151 | stream = allocBytes(sizeInBytes, stream, bytesAllocated); |
152 | bytesAllocated = sizeInBytes; |
153 | } |
154 | |
155 | void appendBit(const int bit) |
156 | { |
157 | const uint32_t mask = uint32_t(1) << nextBitPos; |
158 | stream[currBytePos] = (stream[currBytePos] & ~mask) | (-bit & mask); |
159 | ++numBitsWritten; |
160 | |
161 | if (++nextBitPos == 8) { |
162 | nextBitPos = 0; |
163 | if (++currBytePos == bytesAllocated) allocate(bytesAllocated * granularity * 8); |
164 | } |
165 | } |
166 | |
167 | void appendBitsU64(const uint64_t num, const int bitCount) |
168 | { |
169 | for (int b = 0; b < bitCount; ++b) { |
170 | const uint64_t mask = uint64_t(1) << b; |
171 | const int bit = !!(num & mask); |
172 | appendBit(bit); |
173 | } |
174 | } |
175 | |
176 | uint8_t* release() |
177 | { |
178 | auto oldPtr = stream; |
179 | internalInit(); |
180 | return oldPtr; |
181 | } |
182 | |
183 | void setGranularity(const int growthGranularity) |
184 | { |
185 | granularity = (growthGranularity >= 2) ? growthGranularity : 2; |
186 | } |
187 | |
188 | int getByteCount() const |
189 | { |
190 | int usedBytes = numBitsWritten / 8; |
191 | int leftovers = numBitsWritten % 8; |
192 | if (leftovers != 0) ++usedBytes; |
193 | return usedBytes; |
194 | } |
195 | }; |
196 | |
197 | |
198 | struct BitStreamReader |
199 | { |
200 | const uint8_t* stream; // Pointer to the external bit stream. Not owned by the reader. |
201 | const int sizeInBytes; // Size of the stream *in bytes*. Might include padding. |
202 | const int sizeInBits; // Size of the stream *in bits*, padding *not* include. |
203 | int currBytePos = 0; // Current byte being read in the stream. |
204 | int nextBitPos = 0; // Bit position within the current byte to access next. 0 to 7. |
205 | int numBitsRead = 0; // Total bits read from the stream so far. Never includes byte-rounding padding. |
206 | |
207 | BitStreamReader(const uint8_t* bitStream, const int byteCount, const int bitCount) : stream(bitStream), sizeInBytes(byteCount), sizeInBits(bitCount) |
208 | { |
209 | } |
210 | |
211 | bool readNextBit(int& bitOut) |
212 | { |
213 | if (numBitsRead >= sizeInBits) return false; //We are done. |
214 | |
215 | const uint32_t mask = uint32_t(1) << nextBitPos; |
216 | bitOut = !!(stream[currBytePos] & mask); |
217 | ++numBitsRead; |
218 | |
219 | if (++nextBitPos == 8) { |
220 | nextBitPos = 0; |
221 | ++currBytePos; |
222 | } |
223 | return true; |
224 | } |
225 | |
226 | uint64_t readBitsU64(const int bitCount) |
227 | { |
228 | uint64_t num = 0; |
229 | for (int b = 0; b < bitCount; ++b) { |
230 | int bit; |
231 | if (!readNextBit(bit)) break; |
232 | /* Based on a "Stanford bit-hack": |
233 | http://graphics.stanford.edu/~seander/bithacks.html#ConditionalSetOrClearBitsWithoutBranching */ |
234 | const uint64_t mask = uint64_t(1) << b; |
235 | num = (num & ~mask) | (-bit & mask); |
236 | } |
237 | return num; |
238 | } |
239 | |
240 | bool isEndOfStream() const |
241 | { |
242 | return numBitsRead >= sizeInBits; |
243 | } |
244 | }; |
245 | |
246 | |
247 | struct Dictionary |
248 | { |
249 | struct Entry |
250 | { |
251 | int code; |
252 | int value; |
253 | }; |
254 | |
255 | //Dictionary entries 0-255 are always reserved to the byte/ASCII range. |
256 | int size; |
257 | Entry entries[MaxDictEntries]; |
258 | |
259 | Dictionary() |
260 | { |
261 | /* First 256 dictionary entries are reserved to the byte/ASCII range. |
262 | Additional entries follow for the character sequences found in the input. |
263 | Up to 4096 - 256 (MaxDictEntries - FirstCode). */ |
264 | size = FirstCode; |
265 | |
266 | for (int i = 0; i < size; ++i) { |
267 | entries[i].code = Nil; |
268 | entries[i].value = i; |
269 | } |
270 | } |
271 | |
272 | int findIndex(const int code, const int value) const |
273 | { |
274 | if (code == Nil) return value; |
275 | |
276 | //Linear search for now. |
277 | //TODO: Worth optimizing with a proper hash-table? |
278 | for (int i = 0; i < size; ++i) { |
279 | if (entries[i].code == code && entries[i].value == value) return i; |
280 | } |
281 | return Nil; |
282 | } |
283 | |
284 | bool add(const int code, const int value) |
285 | { |
286 | if (size == MaxDictEntries) return false; |
287 | entries[size].code = code; |
288 | entries[size].value = value; |
289 | ++size; |
290 | return true; |
291 | } |
292 | |
293 | bool flush(int & codeBitsWidth) |
294 | { |
295 | if (size == (1 << codeBitsWidth)) { |
296 | ++codeBitsWidth; |
297 | if (codeBitsWidth > MaxDictBits) { |
298 | //Clear the dictionary (except the first 256 byte entries). |
299 | codeBitsWidth = StartBits; |
300 | size = FirstCode; |
301 | return true; |
302 | } |
303 | } |
304 | return false; |
305 | } |
306 | }; |
307 | |
308 | |
309 | static bool outputByte(int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar) |
310 | { |
311 | if (bytesDecodedSoFar >= outputSizeBytes) return false; |
312 | *output++ = static_cast<uint8_t>(code); |
313 | ++bytesDecodedSoFar; |
314 | return true; |
315 | } |
316 | |
317 | |
318 | static bool outputSequence(const Dictionary& dict, int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar, int& firstByte) |
319 | { |
320 | /* A sequence is stored backwards, so we have to write |
321 | it to a temp then output the buffer in reverse. */ |
322 | int i = 0; |
323 | uint8_t sequence[MaxDictEntries]; |
324 | |
325 | do { |
326 | sequence[i++] = dict.entries[code].value; |
327 | code = dict.entries[code].code; |
328 | } while (code >= 0); |
329 | |
330 | firstByte = sequence[--i]; |
331 | |
332 | for (; i >= 0; --i) { |
333 | if (!outputByte(sequence[i], output, outputSizeBytes, bytesDecodedSoFar)) return false; |
334 | } |
335 | return true; |
336 | } |
337 | } |
338 | |
339 | |
340 | /************************************************************************/ |
341 | /* External Class Implementation */ |
342 | /************************************************************************/ |
343 | |
344 | namespace tvg { |
345 | |
346 | uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes) |
347 | { |
348 | int code = Nil; |
349 | int prevCode = Nil; |
350 | int firstByte = 0; |
351 | int bytesDecoded = 0; |
352 | int codeBitsWidth = StartBits; |
353 | auto uncompressed = (uint8_t*) malloc(sizeof(uint8_t) * uncompressedSizeBytes); |
354 | auto ptr = uncompressed; |
355 | |
356 | /* We'll reconstruct the dictionary based on the bit stream codes. |
357 | Unlike Huffman encoding, we don't store the dictionary as a prefix to the data. */ |
358 | Dictionary dictionary; |
359 | BitStreamReader bitStream(compressed, compressedSizeBytes, compressedSizeBits); |
360 | |
361 | /* We check to avoid an overflow of the user buffer. |
362 | If the buffer is smaller than the decompressed size, we break the loop and return the current decompression count. */ |
363 | while (!bitStream.isEndOfStream()) { |
364 | code = static_cast<int>(bitStream.readBitsU64(codeBitsWidth)); |
365 | |
366 | if (prevCode == Nil) { |
367 | if (!outputByte(code, ptr, uncompressedSizeBytes, bytesDecoded)) break; |
368 | firstByte = code; |
369 | prevCode = code; |
370 | continue; |
371 | } |
372 | if (code >= dictionary.size) { |
373 | if (!outputSequence(dictionary, prevCode, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break; |
374 | if (!outputByte(firstByte, ptr, uncompressedSizeBytes, bytesDecoded)) break; |
375 | } else if (!outputSequence(dictionary, code, ptr, uncompressedSizeBytes, bytesDecoded, firstByte)) break; |
376 | |
377 | dictionary.add(prevCode, firstByte); |
378 | if (dictionary.flush(codeBitsWidth)) prevCode = Nil; |
379 | else prevCode = code; |
380 | } |
381 | |
382 | return uncompressed; |
383 | } |
384 | |
385 | |
386 | uint8_t* lzwEncode(const uint8_t* uncompressed, uint32_t uncompressedSizeBytes, uint32_t* compressedSizeBytes, uint32_t* compressedSizeBits) |
387 | { |
388 | //LZW encoding context: |
389 | int code = Nil; |
390 | int codeBitsWidth = StartBits; |
391 | Dictionary dictionary; |
392 | |
393 | //Output bit stream we write to. This will allocate memory as needed to accommodate the encoded data. |
394 | BitStreamWriter bitStream; |
395 | |
396 | for (; uncompressedSizeBytes > 0; --uncompressedSizeBytes, ++uncompressed) { |
397 | const int value = *uncompressed; |
398 | const int index = dictionary.findIndex(code, value); |
399 | |
400 | if (index != Nil) { |
401 | code = index; |
402 | continue; |
403 | } |
404 | |
405 | //Write the dictionary code using the minimum bit-with: |
406 | bitStream.appendBitsU64(code, codeBitsWidth); |
407 | |
408 | //Flush it when full so we can restart the sequences. |
409 | if (!dictionary.flush(codeBitsWidth)) { |
410 | //There's still space for this sequence. |
411 | dictionary.add(code, value); |
412 | } |
413 | code = value; |
414 | } |
415 | |
416 | //Residual code at the end: |
417 | if (code != Nil) bitStream.appendBitsU64(code, codeBitsWidth); |
418 | |
419 | //Pass ownership of the compressed data buffer to the user pointer: |
420 | *compressedSizeBytes = bitStream.getByteCount(); |
421 | *compressedSizeBits = bitStream.numBitsWritten; |
422 | |
423 | return bitStream.release(); |
424 | } |
425 | |
426 | } |
427 | |
428 | #endif |
429 | |