1 | // basisu_transcoder_internal.h - Universal texture format transcoder library. |
2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
3 | // |
4 | // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing |
5 | // |
6 | // Licensed under the Apache License, Version 2.0 (the "License"); |
7 | // you may not use this file except in compliance with the License. |
8 | // You may obtain a copy of the License at |
9 | // |
10 | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | // |
12 | // Unless required by applicable law or agreed to in writing, software |
13 | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | // See the License for the specific language governing permissions and |
16 | // limitations under the License. |
17 | #pragma once |
18 | |
19 | #ifdef _MSC_VER |
20 | #pragma warning (disable: 4127) // conditional expression is constant |
21 | #endif |
22 | |
23 | #define BASISD_LIB_VERSION 116 |
24 | #define BASISD_VERSION_STRING "01.16" |
25 | |
26 | #ifdef _DEBUG |
27 | #define BASISD_BUILD_DEBUG |
28 | #else |
29 | #define BASISD_BUILD_RELEASE |
30 | #endif |
31 | |
32 | #include "basisu.h" |
33 | |
34 | #define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16)) |
35 | |
36 | namespace basisu |
37 | { |
38 | extern bool g_debug_printf; |
39 | } |
40 | |
41 | namespace basist |
42 | { |
43 | // Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats). |
44 | // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. |
45 | enum class block_format |
46 | { |
47 | cETC1, // ETC1S RGB |
48 | cETC2_RGBA, // full ETC2 EAC RGBA8 block |
49 | cBC1, // DXT1 RGB |
50 | cBC3, // BC4 block followed by a four color BC1 block |
51 | cBC4, // DXT5A (alpha block only) |
52 | cBC5, // two BC4 blocks |
53 | cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp |
54 | cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA |
55 | cBC7, // Full BC7 block, any mode |
56 | cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) |
57 | cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) |
58 | cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) |
59 | cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC |
60 | // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. |
61 | |
62 | cATC_RGB, |
63 | cATC_RGBA_INTERPOLATED_ALPHA, |
64 | cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size |
65 | |
66 | cPVRTC2_4_RGB, |
67 | cPVRTC2_4_RGBA, |
68 | |
69 | cETC2_EAC_R11, |
70 | cETC2_EAC_RG11, |
71 | |
72 | cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) |
73 | |
74 | cRGB32, // Writes RGB components to 32bpp output pixels |
75 | cRGBA32, // Writes RGB255 components to 32bpp output pixels |
76 | cA32, // Writes alpha component to 32bpp output pixels |
77 | |
78 | cRGB565, |
79 | cBGR565, |
80 | |
81 | cRGBA4444_COLOR, |
82 | cRGBA4444_ALPHA, |
83 | cRGBA4444_COLOR_OPAQUE, |
84 | cRGBA4444, |
85 | |
86 | cUASTC_4x4, |
87 | |
88 | cTotalBlockFormats |
89 | }; |
90 | |
91 | const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31; |
92 | const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21; |
93 | const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9; |
94 | const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3; |
95 | |
96 | const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1; |
97 | const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1; |
98 | const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3; |
99 | const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4; |
100 | |
101 | const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds); |
102 | const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; |
103 | const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS; |
104 | const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64; |
105 | const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; |
106 | const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; |
107 | const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); |
108 | |
109 | uint16_t crc16(const void *r, size_t size, uint16_t crc); |
110 | |
111 | class huffman_decoding_table |
112 | { |
113 | friend class bitwise_decoder; |
114 | |
115 | public: |
116 | huffman_decoding_table() |
117 | { |
118 | } |
119 | |
120 | void clear() |
121 | { |
122 | basisu::clear_vector(m_code_sizes); |
123 | basisu::clear_vector(m_lookup); |
124 | basisu::clear_vector(m_tree); |
125 | } |
126 | |
127 | bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits) |
128 | { |
129 | if (!total_syms) |
130 | { |
131 | clear(); |
132 | return true; |
133 | } |
134 | |
135 | m_code_sizes.resize(total_syms); |
136 | memcpy(&m_code_sizes[0], pCode_sizes, total_syms); |
137 | |
138 | const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; |
139 | |
140 | m_lookup.resize(0); |
141 | m_lookup.resize(huffman_fast_lookup_size); |
142 | |
143 | m_tree.resize(0); |
144 | m_tree.resize(total_syms * 2); |
145 | |
146 | uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; |
147 | basisu::clear_obj(syms_using_codesize); |
148 | for (uint32_t i = 0; i < total_syms; i++) |
149 | { |
150 | if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize) |
151 | return false; |
152 | syms_using_codesize[pCode_sizes[i]]++; |
153 | } |
154 | |
155 | uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; |
156 | next_code[0] = next_code[1] = 0; |
157 | |
158 | uint32_t used_syms = 0, total = 0; |
159 | for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++) |
160 | { |
161 | used_syms += syms_using_codesize[i]; |
162 | next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1)); |
163 | } |
164 | |
165 | if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms != 1U)) |
166 | return false; |
167 | |
168 | for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index) |
169 | { |
170 | uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index]; |
171 | if (!code_size) |
172 | continue; |
173 | |
174 | cur_code = next_code[code_size]++; |
175 | |
176 | for (l = code_size; l > 0; l--, cur_code >>= 1) |
177 | rev_code = (rev_code << 1) | (cur_code & 1); |
178 | |
179 | if (code_size <= fast_lookup_bits) |
180 | { |
181 | uint32_t k = (code_size << 16) | sym_index; |
182 | while (rev_code < huffman_fast_lookup_size) |
183 | { |
184 | if (m_lookup[rev_code] != 0) |
185 | { |
186 | // Supplied codesizes can't create a valid prefix code. |
187 | return false; |
188 | } |
189 | |
190 | m_lookup[rev_code] = k; |
191 | rev_code += (1 << code_size); |
192 | } |
193 | continue; |
194 | } |
195 | |
196 | int tree_cur; |
197 | if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)])) |
198 | { |
199 | const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1); |
200 | if (m_lookup[idx] != 0) |
201 | { |
202 | // Supplied codesizes can't create a valid prefix code. |
203 | return false; |
204 | } |
205 | |
206 | m_lookup[idx] = tree_next; |
207 | tree_cur = tree_next; |
208 | tree_next -= 2; |
209 | } |
210 | |
211 | if (tree_cur >= 0) |
212 | { |
213 | // Supplied codesizes can't create a valid prefix code. |
214 | return false; |
215 | } |
216 | |
217 | rev_code >>= (fast_lookup_bits - 1); |
218 | |
219 | for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--) |
220 | { |
221 | tree_cur -= ((rev_code >>= 1) & 1); |
222 | |
223 | const int idx = -tree_cur - 1; |
224 | if (idx < 0) |
225 | return false; |
226 | else if (idx >= (int)m_tree.size()) |
227 | m_tree.resize(idx + 1); |
228 | |
229 | if (!m_tree[idx]) |
230 | { |
231 | m_tree[idx] = (int16_t)tree_next; |
232 | tree_cur = tree_next; |
233 | tree_next -= 2; |
234 | } |
235 | else |
236 | { |
237 | tree_cur = m_tree[idx]; |
238 | if (tree_cur >= 0) |
239 | { |
240 | // Supplied codesizes can't create a valid prefix code. |
241 | return false; |
242 | } |
243 | } |
244 | } |
245 | |
246 | tree_cur -= ((rev_code >>= 1) & 1); |
247 | |
248 | const int idx = -tree_cur - 1; |
249 | if (idx < 0) |
250 | return false; |
251 | else if (idx >= (int)m_tree.size()) |
252 | m_tree.resize(idx + 1); |
253 | |
254 | if (m_tree[idx] != 0) |
255 | { |
256 | // Supplied codesizes can't create a valid prefix code. |
257 | return false; |
258 | } |
259 | |
260 | m_tree[idx] = (int16_t)sym_index; |
261 | } |
262 | |
263 | return true; |
264 | } |
265 | |
266 | const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } |
267 | const basisu::int_vec get_lookup() const { return m_lookup; } |
268 | const basisu::int16_vec get_tree() const { return m_tree; } |
269 | |
270 | bool is_valid() const { return m_code_sizes.size() > 0; } |
271 | |
272 | private: |
273 | basisu::uint8_vec m_code_sizes; |
274 | basisu::int_vec m_lookup; |
275 | basisu::int16_vec m_tree; |
276 | }; |
277 | |
278 | class bitwise_decoder |
279 | { |
280 | public: |
281 | bitwise_decoder() : |
282 | m_buf_size(0), |
283 | m_pBuf(nullptr), |
284 | m_pBuf_start(nullptr), |
285 | m_pBuf_end(nullptr), |
286 | m_bit_buf(0), |
287 | m_bit_buf_size(0) |
288 | { |
289 | } |
290 | |
291 | void clear() |
292 | { |
293 | m_buf_size = 0; |
294 | m_pBuf = nullptr; |
295 | m_pBuf_start = nullptr; |
296 | m_pBuf_end = nullptr; |
297 | m_bit_buf = 0; |
298 | m_bit_buf_size = 0; |
299 | } |
300 | |
301 | bool init(const uint8_t *pBuf, uint32_t buf_size) |
302 | { |
303 | if ((!pBuf) && (buf_size)) |
304 | return false; |
305 | |
306 | m_buf_size = buf_size; |
307 | m_pBuf = pBuf; |
308 | m_pBuf_start = pBuf; |
309 | m_pBuf_end = pBuf + buf_size; |
310 | m_bit_buf = 0; |
311 | m_bit_buf_size = 0; |
312 | return true; |
313 | } |
314 | |
315 | void stop() |
316 | { |
317 | } |
318 | |
319 | inline uint32_t peek_bits(uint32_t num_bits) |
320 | { |
321 | if (!num_bits) |
322 | return 0; |
323 | |
324 | assert(num_bits <= 25); |
325 | |
326 | while (m_bit_buf_size < num_bits) |
327 | { |
328 | uint32_t c = 0; |
329 | if (m_pBuf < m_pBuf_end) |
330 | c = *m_pBuf++; |
331 | |
332 | m_bit_buf |= (c << m_bit_buf_size); |
333 | m_bit_buf_size += 8; |
334 | assert(m_bit_buf_size <= 32); |
335 | } |
336 | |
337 | return m_bit_buf & ((1 << num_bits) - 1); |
338 | } |
339 | |
340 | void remove_bits(uint32_t num_bits) |
341 | { |
342 | assert(m_bit_buf_size >= num_bits); |
343 | |
344 | m_bit_buf >>= num_bits; |
345 | m_bit_buf_size -= num_bits; |
346 | } |
347 | |
348 | uint32_t get_bits(uint32_t num_bits) |
349 | { |
350 | if (num_bits > 25) |
351 | { |
352 | assert(num_bits <= 32); |
353 | |
354 | const uint32_t bits0 = peek_bits(25); |
355 | m_bit_buf >>= 25; |
356 | m_bit_buf_size -= 25; |
357 | num_bits -= 25; |
358 | |
359 | const uint32_t bits = peek_bits(num_bits); |
360 | m_bit_buf >>= num_bits; |
361 | m_bit_buf_size -= num_bits; |
362 | |
363 | return bits0 | (bits << 25); |
364 | } |
365 | |
366 | const uint32_t bits = peek_bits(num_bits); |
367 | |
368 | m_bit_buf >>= num_bits; |
369 | m_bit_buf_size -= num_bits; |
370 | |
371 | return bits; |
372 | } |
373 | |
374 | uint32_t decode_truncated_binary(uint32_t n) |
375 | { |
376 | assert(n >= 2); |
377 | |
378 | const uint32_t k = basisu::floor_log2i(n); |
379 | const uint32_t u = (1 << (k + 1)) - n; |
380 | |
381 | uint32_t result = get_bits(k); |
382 | |
383 | if (result >= u) |
384 | result = ((result << 1) | get_bits(1)) - u; |
385 | |
386 | return result; |
387 | } |
388 | |
389 | uint32_t decode_rice(uint32_t m) |
390 | { |
391 | assert(m); |
392 | |
393 | uint32_t q = 0; |
394 | for (;;) |
395 | { |
396 | uint32_t k = peek_bits(16); |
397 | |
398 | uint32_t l = 0; |
399 | while (k & 1) |
400 | { |
401 | l++; |
402 | k >>= 1; |
403 | } |
404 | |
405 | q += l; |
406 | |
407 | remove_bits(l); |
408 | |
409 | if (l < 16) |
410 | break; |
411 | } |
412 | |
413 | return (q << m) + (get_bits(m + 1) >> 1); |
414 | } |
415 | |
416 | inline uint32_t decode_vlc(uint32_t chunk_bits) |
417 | { |
418 | assert(chunk_bits); |
419 | |
420 | const uint32_t chunk_size = 1 << chunk_bits; |
421 | const uint32_t chunk_mask = chunk_size - 1; |
422 | |
423 | uint32_t v = 0; |
424 | uint32_t ofs = 0; |
425 | |
426 | for ( ; ; ) |
427 | { |
428 | uint32_t s = get_bits(chunk_bits + 1); |
429 | v |= ((s & chunk_mask) << ofs); |
430 | ofs += chunk_bits; |
431 | |
432 | if ((s & chunk_size) == 0) |
433 | break; |
434 | |
435 | if (ofs >= 32) |
436 | { |
437 | assert(0); |
438 | break; |
439 | } |
440 | } |
441 | |
442 | return v; |
443 | } |
444 | |
445 | inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits) |
446 | { |
447 | assert(ct.m_code_sizes.size()); |
448 | |
449 | const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; |
450 | |
451 | while (m_bit_buf_size < 16) |
452 | { |
453 | uint32_t c = 0; |
454 | if (m_pBuf < m_pBuf_end) |
455 | c = *m_pBuf++; |
456 | |
457 | m_bit_buf |= (c << m_bit_buf_size); |
458 | m_bit_buf_size += 8; |
459 | assert(m_bit_buf_size <= 32); |
460 | } |
461 | |
462 | int code_len; |
463 | |
464 | int sym; |
465 | if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0) |
466 | { |
467 | code_len = sym >> 16; |
468 | sym &= 0xFFFF; |
469 | } |
470 | else |
471 | { |
472 | code_len = fast_lookup_bits; |
473 | do |
474 | { |
475 | sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 |
476 | } while (sym < 0); |
477 | } |
478 | |
479 | m_bit_buf >>= code_len; |
480 | m_bit_buf_size -= code_len; |
481 | |
482 | return sym; |
483 | } |
484 | |
485 | bool read_huffman_table(huffman_decoding_table &ct) |
486 | { |
487 | ct.clear(); |
488 | |
489 | const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2); |
490 | |
491 | if (!total_used_syms) |
492 | return true; |
493 | if (total_used_syms > basisu::cHuffmanMaxSyms) |
494 | return false; |
495 | |
496 | uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes]; |
497 | basisu::clear_obj(code_length_code_sizes); |
498 | |
499 | const uint32_t num_codelength_codes = get_bits(5); |
500 | if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes)) |
501 | return false; |
502 | |
503 | for (uint32_t i = 0; i < num_codelength_codes; i++) |
504 | code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3)); |
505 | |
506 | huffman_decoding_table code_length_table; |
507 | if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes)) |
508 | return false; |
509 | |
510 | if (!code_length_table.is_valid()) |
511 | return false; |
512 | |
513 | basisu::uint8_vec code_sizes(total_used_syms); |
514 | |
515 | uint32_t cur = 0; |
516 | while (cur < total_used_syms) |
517 | { |
518 | int c = decode_huffman(code_length_table); |
519 | |
520 | if (c <= 16) |
521 | code_sizes[cur++] = static_cast<uint8_t>(c); |
522 | else if (c == basisu::cHuffmanSmallZeroRunCode) |
523 | cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin; |
524 | else if (c == basisu::cHuffmanBigZeroRunCode) |
525 | cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin; |
526 | else |
527 | { |
528 | if (!cur) |
529 | return false; |
530 | |
531 | uint32_t l; |
532 | if (c == basisu::cHuffmanSmallRepeatCode) |
533 | l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin; |
534 | else |
535 | l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin; |
536 | |
537 | const uint8_t prev = code_sizes[cur - 1]; |
538 | if (prev == 0) |
539 | return false; |
540 | do |
541 | { |
542 | if (cur >= total_used_syms) |
543 | return false; |
544 | code_sizes[cur++] = prev; |
545 | } while (--l > 0); |
546 | } |
547 | } |
548 | |
549 | if (cur != total_used_syms) |
550 | return false; |
551 | |
552 | return ct.init(total_used_syms, &code_sizes[0]); |
553 | } |
554 | |
555 | private: |
556 | uint32_t m_buf_size; |
557 | const uint8_t *m_pBuf; |
558 | const uint8_t *m_pBuf_start; |
559 | const uint8_t *m_pBuf_end; |
560 | |
561 | uint32_t m_bit_buf; |
562 | uint32_t m_bit_buf_size; |
563 | }; |
564 | |
565 | inline uint32_t basisd_rand(uint32_t seed) |
566 | { |
567 | if (!seed) |
568 | seed++; |
569 | uint32_t z = seed; |
570 | BASISD_znew; |
571 | return z; |
572 | } |
573 | |
574 | // Returns random number in [0,limit). Max limit is 0xFFFF. |
575 | inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit) |
576 | { |
577 | seed = basisd_rand(seed); |
578 | return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16; |
579 | } |
580 | |
581 | class approx_move_to_front |
582 | { |
583 | public: |
584 | approx_move_to_front(uint32_t n) |
585 | { |
586 | init(n); |
587 | } |
588 | |
589 | void init(uint32_t n) |
590 | { |
591 | m_values.resize(n); |
592 | m_rover = n / 2; |
593 | } |
594 | |
595 | const basisu::int_vec& get_values() const { return m_values; } |
596 | basisu::int_vec& get_values() { return m_values; } |
597 | |
598 | uint32_t size() const { return (uint32_t)m_values.size(); } |
599 | |
600 | const int& operator[] (uint32_t index) const { return m_values[index]; } |
601 | int operator[] (uint32_t index) { return m_values[index]; } |
602 | |
603 | void add(int new_value) |
604 | { |
605 | m_values[m_rover++] = new_value; |
606 | if (m_rover == m_values.size()) |
607 | m_rover = (uint32_t)m_values.size() / 2; |
608 | } |
609 | |
610 | void use(uint32_t index) |
611 | { |
612 | if (index) |
613 | { |
614 | //std::swap(m_values[index / 2], m_values[index]); |
615 | int x = m_values[index / 2]; |
616 | int y = m_values[index]; |
617 | m_values[index / 2] = y; |
618 | m_values[index] = x; |
619 | } |
620 | } |
621 | |
622 | // returns -1 if not found |
623 | int find(int value) const |
624 | { |
625 | for (uint32_t i = 0; i < m_values.size(); i++) |
626 | if (m_values[i] == value) |
627 | return i; |
628 | return -1; |
629 | } |
630 | |
631 | void reset() |
632 | { |
633 | const uint32_t n = (uint32_t)m_values.size(); |
634 | |
635 | m_values.clear(); |
636 | |
637 | init(n); |
638 | } |
639 | |
640 | private: |
641 | basisu::int_vec m_values; |
642 | uint32_t m_rover; |
643 | }; |
644 | |
645 | struct decoder_etc_block; |
646 | |
647 | inline uint8_t clamp255(int32_t i) |
648 | { |
649 | return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); |
650 | } |
651 | |
652 | enum eNoClamp |
653 | { |
654 | cNoClamp = 0 |
655 | }; |
656 | |
657 | struct color32 |
658 | { |
659 | union |
660 | { |
661 | struct |
662 | { |
663 | uint8_t r; |
664 | uint8_t g; |
665 | uint8_t b; |
666 | uint8_t a; |
667 | }; |
668 | |
669 | uint8_t c[4]; |
670 | |
671 | uint32_t m; |
672 | }; |
673 | |
674 | color32() { } |
675 | |
676 | color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } |
677 | color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } |
678 | |
679 | void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); } |
680 | |
681 | void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); } |
682 | void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } |
683 | |
684 | void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } |
685 | |
686 | uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } |
687 | uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } |
688 | |
689 | bool operator== (const color32&rhs) const { return m == rhs.m; } |
690 | |
691 | static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } |
692 | static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } |
693 | }; |
694 | |
695 | struct endpoint |
696 | { |
697 | color32 m_color5; |
698 | uint8_t m_inten5; |
699 | bool operator== (const endpoint& rhs) const |
700 | { |
701 | return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5); |
702 | } |
703 | bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } |
704 | }; |
705 | |
706 | struct selector |
707 | { |
708 | // Plain selectors (2-bits per value) |
709 | uint8_t m_selectors[4]; |
710 | |
711 | // ETC1 selectors |
712 | uint8_t m_bytes[4]; |
713 | |
714 | uint8_t m_lo_selector, m_hi_selector; |
715 | uint8_t m_num_unique_selectors; |
716 | bool operator== (const selector& rhs) const |
717 | { |
718 | return (m_selectors[0] == rhs.m_selectors[0]) && |
719 | (m_selectors[1] == rhs.m_selectors[1]) && |
720 | (m_selectors[2] == rhs.m_selectors[2]) && |
721 | (m_selectors[3] == rhs.m_selectors[3]); |
722 | } |
723 | bool operator!= (const selector& rhs) const |
724 | { |
725 | return !(*this == rhs); |
726 | } |
727 | |
728 | void init_flags() |
729 | { |
730 | uint32_t hist[4] = { 0, 0, 0, 0 }; |
731 | for (uint32_t y = 0; y < 4; y++) |
732 | { |
733 | for (uint32_t x = 0; x < 4; x++) |
734 | { |
735 | uint32_t s = get_selector(x, y); |
736 | hist[s]++; |
737 | } |
738 | } |
739 | |
740 | m_lo_selector = 3; |
741 | m_hi_selector = 0; |
742 | m_num_unique_selectors = 0; |
743 | |
744 | for (uint32_t i = 0; i < 4; i++) |
745 | { |
746 | if (hist[i]) |
747 | { |
748 | m_num_unique_selectors++; |
749 | if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i); |
750 | if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i); |
751 | } |
752 | } |
753 | } |
754 | |
755 | // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. |
756 | inline uint32_t get_selector(uint32_t x, uint32_t y) const |
757 | { |
758 | assert((x < 4) && (y < 4)); |
759 | return (m_selectors[y] >> (x * 2)) & 3; |
760 | } |
761 | |
762 | void set_selector(uint32_t x, uint32_t y, uint32_t val) |
763 | { |
764 | static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 }; |
765 | |
766 | assert((x | y | val) < 4); |
767 | |
768 | m_selectors[y] &= ~(3 << (x * 2)); |
769 | m_selectors[y] |= (val << (x * 2)); |
770 | |
771 | const uint32_t etc1_bit_index = x * 4 + y; |
772 | |
773 | uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)]; |
774 | |
775 | const uint32_t byte_bit_ofs = etc1_bit_index & 7; |
776 | const uint32_t mask = 1 << byte_bit_ofs; |
777 | |
778 | const uint32_t etc1_val = s_selector_index_to_etc1[val]; |
779 | |
780 | const uint32_t lsb = etc1_val & 1; |
781 | const uint32_t msb = etc1_val >> 1; |
782 | |
783 | p[0] &= ~mask; |
784 | p[0] |= (lsb << byte_bit_ofs); |
785 | |
786 | p[-2] &= ~mask; |
787 | p[-2] |= (msb << byte_bit_ofs); |
788 | } |
789 | }; |
790 | |
791 | bool basis_block_format_is_uncompressed(block_format tex_type); |
792 | |
793 | } // namespace basist |
794 | |
795 | |
796 | |
797 | |