1// basisu_transcoder.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "basisu_transcoder.h"
17#include <limits.h>
18#include "basisu_containers_impl.h"
19
20#ifndef BASISD_IS_BIG_ENDIAN
21// TODO: This doesn't work on OSX. How can this be so difficult?
22//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
23// #define BASISD_IS_BIG_ENDIAN (1)
24//#else
25 #define BASISD_IS_BIG_ENDIAN (0)
26//#endif
27#endif
28
29#ifndef BASISD_USE_UNALIGNED_WORD_READS
30 #ifdef __EMSCRIPTEN__
31 // Can't use unaligned loads/stores with WebAssembly.
32 #define BASISD_USE_UNALIGNED_WORD_READS (0)
33 #elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)
34 #define BASISD_USE_UNALIGNED_WORD_READS (1)
35 #else
36 #define BASISD_USE_UNALIGNED_WORD_READS (0)
37 #endif
38#endif
39
40// Using unaligned loads and stores causes errors when using UBSan. Jam it off.
41#if defined(__has_feature)
42#if __has_feature(undefined_behavior_sanitizer)
43#undef BASISD_USE_UNALIGNED_WORD_READS
44#define BASISD_USE_UNALIGNED_WORD_READS 0
45#endif
46#endif
47
48#define BASISD_SUPPORTED_BASIS_VERSION (0x13)
49
50#ifndef BASISD_SUPPORT_KTX2
51 #error Must have defined BASISD_SUPPORT_KTX2
52#endif
53
54#ifndef BASISD_SUPPORT_KTX2_ZSTD
55#error Must have defined BASISD_SUPPORT_KTX2_ZSTD
56#endif
57
58// Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data.
59#ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
60 #define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0
61#endif
62
63#ifndef BASISD_SUPPORT_DXT1
64 #define BASISD_SUPPORT_DXT1 1
65#endif
66
67#ifndef BASISD_SUPPORT_DXT5A
68 #define BASISD_SUPPORT_DXT5A 1
69#endif
70
71// Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript)
72#if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7
73 #ifndef BASISD_SUPPORT_BC7_MODE5
74 #define BASISD_SUPPORT_BC7_MODE5 0
75 #endif
76#endif // !BASISD_SUPPORT_BC7
77
78// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1.
79#ifndef BASISD_SUPPORT_BC7_MODE5
80 #define BASISD_SUPPORT_BC7_MODE5 1
81#endif
82
83#ifndef BASISD_SUPPORT_PVRTC1
84 #define BASISD_SUPPORT_PVRTC1 1
85#endif
86
87#ifndef BASISD_SUPPORT_ETC2_EAC_A8
88 #define BASISD_SUPPORT_ETC2_EAC_A8 1
89#endif
90
91// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files.
92#ifndef BASISD_SUPPORT_UASTC
93 #define BASISD_SUPPORT_UASTC 1
94#endif
95
96#ifndef BASISD_SUPPORT_ASTC
97 #define BASISD_SUPPORT_ASTC 1
98#endif
99
100// Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support.
101#ifndef BASISD_SUPPORT_ATC
102 #define BASISD_SUPPORT_ATC 1
103#endif
104
105// Support for ETC2 EAC R11 and ETC2 EAC RG11
106#ifndef BASISD_SUPPORT_ETC2_EAC_RG11
107 #define BASISD_SUPPORT_ETC2_EAC_RG11 1
108#endif
109
110// If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large.
111// This impacts grayscale and grayscale+alpha textures the most.
112#ifndef BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
113 #ifdef __EMSCRIPTEN__
114 // Let's assume size matters more than quality when compiling with emscripten.
115 #define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 0
116 #else
117 // Compiling native, so an extra 64K lookup table is probably acceptable.
118 #define BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY 1
119 #endif
120#endif
121
122#ifndef BASISD_SUPPORT_FXT1
123 #define BASISD_SUPPORT_FXT1 1
124#endif
125
126#ifndef BASISD_SUPPORT_PVRTC2
127 #define BASISD_SUPPORT_PVRTC2 1
128#endif
129
130#if BASISD_SUPPORT_PVRTC2
131 #if !BASISD_SUPPORT_ATC
132 #error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1
133 #endif
134#endif
135
136#if BASISD_SUPPORT_ATC
137 #if !BASISD_SUPPORT_DXT5A
138 #error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1
139 #endif
140#endif
141
142#define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0
143#define BASISD_WRITE_NEW_DXT1_TABLES 0
144#define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0
145#define BASISD_WRITE_NEW_ASTC_TABLES 0
146#define BASISD_WRITE_NEW_ATC_TABLES 0
147#define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0
148
149#ifndef BASISD_ENABLE_DEBUG_FLAGS
150 #define BASISD_ENABLE_DEBUG_FLAGS 0
151#endif
152
153// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header.
154#if BASISD_SUPPORT_KTX2
155 // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
156 #if BASISD_SUPPORT_KTX2_ZSTD
157 // We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
158 #include <zstd.h>
159 #endif
160#endif
161
162namespace basisu
163{
164 bool g_debug_printf;
165
166 void enable_debug_printf(bool enabled)
167 {
168 g_debug_printf = enabled;
169 }
170
171 void debug_printf(const char* pFmt, ...)
172 {
173#if BASISU_FORCE_DEVEL_MESSAGES
174 g_debug_printf = true;
175#endif
176 if (g_debug_printf)
177 {
178 va_list args;
179 va_start(args, pFmt);
180 vprintf(pFmt, args);
181 va_end(args);
182 }
183 }
184} // namespace basisu
185
186namespace basist
187{
188
189#if BASISD_ENABLE_DEBUG_FLAGS
190 static uint32_t g_debug_flags = 0;
191#endif
192
193 uint32_t get_debug_flags()
194 {
195#if BASISD_ENABLE_DEBUG_FLAGS
196 return g_debug_flags;
197#else
198 return 0;
199#endif
200 }
201
202 void set_debug_flags(uint32_t f)
203 {
204 BASISU_NOTE_UNUSED(f);
205#if BASISD_ENABLE_DEBUG_FLAGS
206 g_debug_flags = f;
207#endif
208 }
209
210 inline uint16_t byteswap_uint16(uint16_t v)
211 {
212 return static_cast<uint16_t>((v >> 8) | (v << 8));
213 }
214
215 static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; }
216 static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; }
217 static inline float saturate(float value) { return clampf(value, 0, 1.0f); }
218
219 static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
220
221 uint16_t crc16(const void* r, size_t size, uint16_t crc)
222 {
223 crc = ~crc;
224
225 const uint8_t* p = static_cast<const uint8_t*>(r);
226 for (; size; --size)
227 {
228 const uint16_t q = *p++ ^ (crc >> 8);
229 uint16_t k = (q >> 4) ^ q;
230 crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12);
231 }
232
233 return static_cast<uint16_t>(~crc);
234 }
235
236 enum etc_constants
237 {
238 cETC1BytesPerBlock = 8U,
239
240 cETC1SelectorBits = 2U,
241 cETC1SelectorValues = 1U << cETC1SelectorBits,
242 cETC1SelectorMask = cETC1SelectorValues - 1U,
243
244 cETC1BlockShift = 2U,
245 cETC1BlockSize = 1U << cETC1BlockShift,
246
247 cETC1LSBSelectorIndicesBitOffset = 0,
248 cETC1MSBSelectorIndicesBitOffset = 16,
249
250 cETC1FlipBitOffset = 32,
251 cETC1DiffBitOffset = 33,
252
253 cETC1IntenModifierNumBits = 3,
254 cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
255 cETC1RightIntenModifierTableBitOffset = 34,
256 cETC1LeftIntenModifierTableBitOffset = 37,
257
258 // Base+Delta encoding (5 bit bases, 3 bit delta)
259 cETC1BaseColorCompNumBits = 5,
260 cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
261
262 cETC1DeltaColorCompNumBits = 3,
263 cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
264 cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
265
266 cETC1BaseColor5RBitOffset = 59,
267 cETC1BaseColor5GBitOffset = 51,
268 cETC1BaseColor5BBitOffset = 43,
269
270 cETC1DeltaColor3RBitOffset = 56,
271 cETC1DeltaColor3GBitOffset = 48,
272 cETC1DeltaColor3BBitOffset = 40,
273
274 // Absolute (non-delta) encoding (two 4-bit per component bases)
275 cETC1AbsColorCompNumBits = 4,
276 cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
277
278 cETC1AbsColor4R1BitOffset = 60,
279 cETC1AbsColor4G1BitOffset = 52,
280 cETC1AbsColor4B1BitOffset = 44,
281
282 cETC1AbsColor4R2BitOffset = 56,
283 cETC1AbsColor4G2BitOffset = 48,
284 cETC1AbsColor4B2BitOffset = 40,
285
286 cETC1ColorDeltaMin = -4,
287 cETC1ColorDeltaMax = 3,
288
289 // Delta3:
290 // 0 1 2 3 4 5 6 7
291 // 000 001 010 011 100 101 110 111
292 // 0 1 2 3 -4 -3 -2 -1
293 };
294
295#define DECLARE_ETC1_INTEN_TABLE(name, N) \
296 static const int name[cETC1IntenModifierValues][cETC1SelectorValues] = \
297 { \
298 { N * -8, N * -2, N * 2, N * 8 },{ N * -17, N * -5, N * 5, N * 17 },{ N * -29, N * -9, N * 9, N * 29 },{ N * -42, N * -13, N * 13, N * 42 }, \
299 { N * -60, N * -18, N * 18, N * 60 },{ N * -80, N * -24, N * 24, N * 80 },{ N * -106, N * -33, N * 33, N * 106 },{ N * -183, N * -47, N * 47, N * 183 } \
300 };
301
302 DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1);
303 DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16);
304 DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16);
305
306 //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
307 const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
308
309 static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 };
310
311 struct decoder_etc_block
312 {
313 // big endian uint64:
314 // bit ofs: 56 48 40 32 24 16 8 0
315 // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
316 union
317 {
318 uint64_t m_uint64;
319
320 uint32_t m_uint32[2];
321
322 uint8_t m_bytes[8];
323
324 struct
325 {
326 signed m_dred2 : 3;
327 uint32_t m_red1 : 5;
328
329 signed m_dgreen2 : 3;
330 uint32_t m_green1 : 5;
331
332 signed m_dblue2 : 3;
333 uint32_t m_blue1 : 5;
334
335 uint32_t m_flip : 1;
336 uint32_t m_diff : 1;
337 uint32_t m_cw2 : 3;
338 uint32_t m_cw1 : 3;
339
340 uint32_t m_selectors;
341 } m_differential;
342 };
343
344 inline void clear()
345 {
346 assert(sizeof(*this) == 8);
347 basisu::clear_obj(*this);
348 }
349
350 inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits)
351 {
352 assert((ofs + num) <= 64U);
353 assert(num && (num < 32U));
354 assert((ofs >> 3) == ((ofs + num - 1) >> 3));
355 assert(bits < (1U << num));
356 const uint32_t byte_ofs = 7 - (ofs >> 3);
357 const uint32_t byte_bit_ofs = ofs & 7;
358 const uint32_t mask = (1 << num) - 1;
359 m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
360 m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
361 }
362
363 inline void set_flip_bit(bool flip)
364 {
365 m_bytes[3] &= ~1;
366 m_bytes[3] |= static_cast<uint8_t>(flip);
367 }
368
369 inline void set_diff_bit(bool diff)
370 {
371 m_bytes[3] &= ~2;
372 m_bytes[3] |= (static_cast<uint32_t>(diff) << 1);
373 }
374
375 // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
376 inline void set_inten_table(uint32_t subblock_id, uint32_t t)
377 {
378 assert(subblock_id < 2);
379 assert(t < 8);
380 const uint32_t ofs = subblock_id ? 2 : 5;
381 m_bytes[3] &= ~(7 << ofs);
382 m_bytes[3] |= (t << ofs);
383 }
384
385 // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
386 inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
387 {
388 assert((x | y | val) < 4);
389 const uint32_t bit_index = x * 4 + y;
390
391 uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
392
393 const uint32_t byte_bit_ofs = bit_index & 7;
394 const uint32_t mask = 1 << byte_bit_ofs;
395
396 static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 };
397 const uint32_t etc1_val = s_selector_index_to_etc1[val];
398
399 const uint32_t lsb = etc1_val & 1;
400 const uint32_t msb = etc1_val >> 1;
401
402 p[0] &= ~mask;
403 p[0] |= (lsb << byte_bit_ofs);
404
405 p[-2] &= ~mask;
406 p[-2] |= (msb << byte_bit_ofs);
407 }
408
409 // Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector())
410 inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const
411 {
412 assert((x | y) < 4);
413
414 const uint32_t bit_index = x * 4 + y;
415 const uint32_t byte_bit_ofs = bit_index & 7;
416 const uint8_t* p = &m_bytes[7 - (bit_index >> 3)];
417 const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1;
418 const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1;
419 const uint32_t val = lsb | (msb << 1);
420
421 return val;
422 }
423
424 // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
425 inline uint32_t get_selector(uint32_t x, uint32_t y) const
426 {
427 static const uint8_t s_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
428 return s_etc1_to_selector_index[get_raw_selector(x, y)];
429 }
430
431 inline void set_raw_selector_bits(uint32_t bits)
432 {
433 m_bytes[4] = static_cast<uint8_t>(bits);
434 m_bytes[5] = static_cast<uint8_t>(bits >> 8);
435 m_bytes[6] = static_cast<uint8_t>(bits >> 16);
436 m_bytes[7] = static_cast<uint8_t>(bits >> 24);
437 }
438
439 inline bool are_all_selectors_the_same() const
440 {
441 uint32_t v = *reinterpret_cast<const uint32_t*>(&m_bytes[4]);
442
443 if ((v == 0xFFFFFFFF) || (v == 0xFFFF) || (!v) || (v == 0xFFFF0000))
444 return true;
445
446 return false;
447 }
448
449 inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3)
450 {
451 m_bytes[4] = byte0;
452 m_bytes[5] = byte1;
453 m_bytes[6] = byte2;
454 m_bytes[7] = byte3;
455 }
456
457 inline uint32_t get_raw_selector_bits() const
458 {
459 return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24);
460 }
461
462 inline void set_base4_color(uint32_t idx, uint16_t c)
463 {
464 if (idx)
465 {
466 set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
467 set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
468 set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
469 }
470 else
471 {
472 set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
473 set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
474 set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
475 }
476 }
477
478 inline void set_base5_color(uint16_t c)
479 {
480 set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
481 set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
482 set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
483 }
484
485 void set_delta3_color(uint16_t c)
486 {
487 set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
488 set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
489 set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
490 }
491
492 void set_block_color4(const color32& c0_unscaled, const color32& c1_unscaled)
493 {
494 set_diff_bit(false);
495
496 set_base4_color(0, pack_color4(c0_unscaled, false));
497 set_base4_color(1, pack_color4(c1_unscaled, false));
498 }
499
500 void set_block_color5(const color32& c0_unscaled, const color32& c1_unscaled)
501 {
502 set_diff_bit(true);
503
504 set_base5_color(pack_color5(c0_unscaled, false));
505
506 int dr = c1_unscaled.r - c0_unscaled.r;
507 int dg = c1_unscaled.g - c0_unscaled.g;
508 int db = c1_unscaled.b - c0_unscaled.b;
509
510 set_delta3_color(pack_delta3(dr, dg, db));
511 }
512
513 bool set_block_color5_check(const color32& c0_unscaled, const color32& c1_unscaled)
514 {
515 set_diff_bit(true);
516
517 set_base5_color(pack_color5(c0_unscaled, false));
518
519 int dr = c1_unscaled.r - c0_unscaled.r;
520 int dg = c1_unscaled.g - c0_unscaled.g;
521 int db = c1_unscaled.b - c0_unscaled.b;
522
523 if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) ||
524 ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) ||
525 ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax)))
526 return false;
527
528 set_delta3_color(pack_delta3(dr, dg, db));
529
530 return true;
531 }
532
533 inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const
534 {
535 assert((ofs + num) <= 64U);
536 assert(num && (num <= 8U));
537 assert((ofs >> 3) == ((ofs + num - 1) >> 3));
538 const uint32_t byte_ofs = 7 - (ofs >> 3);
539 const uint32_t byte_bit_ofs = ofs & 7;
540 return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
541 }
542
543 inline uint16_t get_base5_color() const
544 {
545 const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
546 const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
547 const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
548 return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
549 }
550
551 inline uint16_t get_base4_color(uint32_t idx) const
552 {
553 uint32_t r, g, b;
554 if (idx)
555 {
556 r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
557 g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
558 b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
559 }
560 else
561 {
562 r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
563 g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
564 b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
565 }
566 return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
567 }
568
569 inline color32 get_base5_color_unscaled() const
570 {
571 return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255);
572 }
573
574 inline bool get_flip_bit() const
575 {
576 return (m_bytes[3] & 1) != 0;
577 }
578
579 inline bool get_diff_bit() const
580 {
581 return (m_bytes[3] & 2) != 0;
582 }
583
584 inline uint32_t get_inten_table(uint32_t subblock_id) const
585 {
586 assert(subblock_id < 2);
587 const uint32_t ofs = subblock_id ? 2 : 5;
588 return (m_bytes[3] >> ofs) & 7;
589 }
590
591 inline uint16_t get_delta3_color() const
592 {
593 const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
594 const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
595 const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
596 return static_cast<uint16_t>(b | (g << 3U) | (r << 6U));
597 }
598
599 void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const
600 {
601 color32 b;
602
603 if (get_diff_bit())
604 {
605 if (subblock_index)
606 unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255);
607 else
608 unpack_color5(b, get_base5_color(), true);
609 }
610 else
611 {
612 b = unpack_color4(get_base4_color(subblock_index), true, 255);
613 }
614
615 const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)];
616
617 pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
618 pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
619 pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
620 pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
621 }
622
623 static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U)
624 {
625 return pack_color4(color.r, color.g, color.b, scaled, bias);
626 }
627
628 static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
629 {
630 if (scaled)
631 {
632 r = (r * 15U + bias) / 255U;
633 g = (g * 15U + bias) / 255U;
634 b = (b * 15U + bias) / 255U;
635 }
636
637 r = basisu::minimum(r, 15U);
638 g = basisu::minimum(g, 15U);
639 b = basisu::minimum(b, 15U);
640
641 return static_cast<uint16_t>(b | (g << 4U) | (r << 8U));
642 }
643
644 static uint16_t pack_color5(const color32& color, bool scaled, uint32_t bias = 127U)
645 {
646 return pack_color5(color.r, color.g, color.b, scaled, bias);
647 }
648
649 static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U)
650 {
651 if (scaled)
652 {
653 r = (r * 31U + bias) / 255U;
654 g = (g * 31U + bias) / 255U;
655 b = (b * 31U + bias) / 255U;
656 }
657
658 r = basisu::minimum(r, 31U);
659 g = basisu::minimum(g, 31U);
660 b = basisu::minimum(b, 31U);
661
662 return static_cast<uint16_t>(b | (g << 5U) | (r << 10U));
663 }
664
665 uint16_t pack_delta3(const color32& color)
666 {
667 return pack_delta3(color.r, color.g, color.b);
668 }
669
670 uint16_t pack_delta3(int r, int g, int b)
671 {
672 assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
673 assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
674 assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
675 if (r < 0) r += 8;
676 if (g < 0) g += 8;
677 if (b < 0) b += 8;
678 return static_cast<uint16_t>(b | (g << 3) | (r << 6));
679 }
680
681 static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3)
682 {
683 r = (packed_delta3 >> 6) & 7;
684 g = (packed_delta3 >> 3) & 7;
685 b = packed_delta3 & 7;
686 if (r >= 4) r -= 8;
687 if (g >= 4) g -= 8;
688 if (b >= 4) b -= 8;
689 }
690
691 static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha)
692 {
693 uint32_t b = packed_color5 & 31U;
694 uint32_t g = (packed_color5 >> 5U) & 31U;
695 uint32_t r = (packed_color5 >> 10U) & 31U;
696
697 if (scaled)
698 {
699 b = (b << 3U) | (b >> 2U);
700 g = (g << 3U) | (g >> 2U);
701 r = (r << 3U) | (r >> 2U);
702 }
703
704 assert(alpha <= 255);
705
706 return color32(cNoClamp, r, g, b, alpha);
707 }
708
709 static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled)
710 {
711 color32 c(unpack_color5(packed_color5, scaled, 0));
712 r = c.r;
713 g = c.g;
714 b = c.b;
715 }
716
717 static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled)
718 {
719 result = unpack_color5(packed_color5, scaled, 255);
720 }
721
722 static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha)
723 {
724 int dr, dg, db;
725 unpack_delta3(dr, dg, db, packed_delta3);
726
727 int r = ((packed_color5 >> 10U) & 31U) + dr;
728 int g = ((packed_color5 >> 5U) & 31U) + dg;
729 int b = (packed_color5 & 31U) + db;
730
731 bool success = true;
732 if (static_cast<uint32_t>(r | g | b) > 31U)
733 {
734 success = false;
735 r = basisu::clamp<int>(r, 0, 31);
736 g = basisu::clamp<int>(g, 0, 31);
737 b = basisu::clamp<int>(b, 0, 31);
738 }
739
740 if (scaled)
741 {
742 b = (b << 3U) | (b >> 2U);
743 g = (g << 3U) | (g >> 2U);
744 r = (r << 3U) | (r >> 2U);
745 }
746
747 result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U));
748 return success;
749 }
750
751 static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha)
752 {
753 uint32_t b = packed_color4 & 15U;
754 uint32_t g = (packed_color4 >> 4U) & 15U;
755 uint32_t r = (packed_color4 >> 8U) & 15U;
756
757 if (scaled)
758 {
759 b = (b << 4U) | b;
760 g = (g << 4U) | g;
761 r = (r << 4U) | r;
762 }
763
764 return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U));
765 }
766
767 static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled)
768 {
769 color32 c(unpack_color4(packed_color4, scaled, 0));
770 r = c.r;
771 g = c.g;
772 b = c.b;
773 }
774
775 static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx)
776 {
777 assert(table_idx < cETC1IntenModifierValues);
778 const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
779
780 uint32_t r, g, b;
781 unpack_color5(r, g, b, packed_color5, true);
782
783 const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
784
785 const int y0 = pInten_modifer_table[0];
786 pDst[0].set(clamp255(ir + y0), clamp255(ig + y0), clamp255(ib + y0), 255);
787
788 const int y1 = pInten_modifer_table[1];
789 pDst[1].set(clamp255(ir + y1), clamp255(ig + y1), clamp255(ib + y1), 255);
790
791 const int y2 = pInten_modifer_table[2];
792 pDst[2].set(clamp255(ir + y2), clamp255(ig + y2), clamp255(ib + y2), 255);
793
794 const int y3 = pInten_modifer_table[3];
795 pDst[3].set(clamp255(ir + y3), clamp255(ig + y3), clamp255(ib + y3), 255);
796 }
797
798 static int clamp255(int x)
799 {
800 if (x & 0xFFFFFF00)
801 {
802 if (x < 0)
803 x = 0;
804 else if (x > 255)
805 x = 255;
806 }
807
808 return x;
809 }
810
811 static void get_block_colors5(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table)
812 {
813 color32 b(base_color5);
814
815 b.r = (b.r << 3) | (b.r >> 2);
816 b.g = (b.g << 3) | (b.g >> 2);
817 b.b = (b.b << 3) | (b.b >> 2);
818
819 const int* pInten_table = g_etc1_inten_tables[inten_table];
820
821 pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255);
822 pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255);
823 pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255);
824 pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255);
825 }
826
827 static void get_block_color5(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t& r, uint32_t &g, uint32_t &b)
828 {
829 assert(index < 4);
830
831 uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
832 uint32_t bg = (base_color5.g << 3) | (base_color5.g >> 2);
833 uint32_t bb = (base_color5.b << 3) | (base_color5.b >> 2);
834
835 const int* pInten_table = g_etc1_inten_tables[inten_table];
836
837 r = clamp255(br + pInten_table[index]);
838 g = clamp255(bg + pInten_table[index]);
839 b = clamp255(bb + pInten_table[index]);
840 }
841
842 static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r)
843 {
844 assert(index < 4);
845
846 uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2);
847
848 const int* pInten_table = g_etc1_inten_tables[inten_table];
849
850 r = clamp255(br + pInten_table[index]);
851 }
852
853 static void get_block_colors5_g(int* pBlock_colors, const color32& base_color5, uint32_t inten_table)
854 {
855 const int g = (base_color5.g << 3) | (base_color5.g >> 2);
856
857 const int* pInten_table = g_etc1_inten_tables[inten_table];
858
859 pBlock_colors[0] = clamp255(g + pInten_table[0]);
860 pBlock_colors[1] = clamp255(g + pInten_table[1]);
861 pBlock_colors[2] = clamp255(g + pInten_table[2]);
862 pBlock_colors[3] = clamp255(g + pInten_table[3]);
863 }
864
865 static void get_block_colors5_bounds(color32* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
866 {
867 color32 b(base_color5);
868
869 b.r = (b.r << 3) | (b.r >> 2);
870 b.g = (b.g << 3) | (b.g >> 2);
871 b.b = (b.b << 3) | (b.b >> 2);
872
873 const int* pInten_table = g_etc1_inten_tables[inten_table];
874
875 pBlock_colors[0].set(clamp255(b.r + pInten_table[l]), clamp255(b.g + pInten_table[l]), clamp255(b.b + pInten_table[l]), 255);
876 pBlock_colors[1].set(clamp255(b.r + pInten_table[h]), clamp255(b.g + pInten_table[h]), clamp255(b.b + pInten_table[h]), 255);
877 }
878
879 static void get_block_colors5_bounds_g(uint32_t* pBlock_colors, const color32& base_color5, uint32_t inten_table, uint32_t l = 0, uint32_t h = 3)
880 {
881 color32 b(base_color5);
882
883 b.g = (b.g << 3) | (b.g >> 2);
884
885 const int* pInten_table = g_etc1_inten_tables[inten_table];
886
887 pBlock_colors[0] = clamp255(b.g + pInten_table[l]);
888 pBlock_colors[1] = clamp255(b.g + pInten_table[h]);
889 }
890 };
891
892 enum dxt_constants
893 {
894 cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
895 cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
896 };
897
898 static const uint8_t g_etc1_x_selector_unpack[4][256] =
899 {
900 {
901 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
902 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
903 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
904 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
905 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
906 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
907 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
908 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
909 },
910 {
911 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
912 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
913 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
914 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
915 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
916 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
917 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
918 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
919 },
920
921 {
922 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
923 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
924 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
925 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
926 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
927 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
928 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
929 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
930 },
931
932 {
933 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
934 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
935 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
936 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
937 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
938 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
939 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
940 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
941 }
942 };
943
944 struct dxt1_block
945 {
946 enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
947
948 uint8_t m_low_color[cTotalEndpointBytes];
949 uint8_t m_high_color[cTotalEndpointBytes];
950 uint8_t m_selectors[cTotalSelectorBytes];
951
952 inline void clear() { basisu::clear_obj(*this); }
953
954 inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
955 inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
956 inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
957 inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
958 inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; }
959 inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
960
961 static uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U)
962 {
963 uint32_t r = color.r, g = color.g, b = color.b;
964 if (scaled)
965 {
966 r = (r * 31U + bias) / 255U;
967 g = (g * 63U + bias) / 255U;
968 b = (b * 31U + bias) / 255U;
969 }
970 return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
971 }
972
973 static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
974 };
975
976 struct dxt_selector_range
977 {
978 uint32_t m_low;
979 uint32_t m_high;
980 };
981
982 struct etc1_to_dxt1_56_solution
983 {
984 uint8_t m_lo;
985 uint8_t m_hi;
986 uint16_t m_err;
987 };
988
989#if BASISD_SUPPORT_DXT1
990 static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =
991 {
992 { 0, 3 },
993
994 { 1, 3 },
995 { 0, 2 },
996
997 { 1, 2 },
998
999 { 2, 3 },
1000 { 0, 1 },
1001 };
1002
1003 const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_RANGES = sizeof(g_etc1_to_dxt1_selector_ranges) / sizeof(g_etc1_to_dxt1_selector_ranges[0]);
1004
1005 static uint32_t g_etc1_to_dxt1_selector_range_index[4][4];
1006
1007 const uint32_t NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS = 10;
1008 static const uint8_t g_etc1_to_dxt1_selector_mappings[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][4] =
1009 {
1010 { 0, 0, 1, 1 },
1011 { 0, 0, 1, 2 },
1012 { 0, 0, 1, 3 },
1013 { 0, 0, 2, 3 },
1014 { 0, 1, 1, 1 },
1015 { 0, 1, 2, 2 },
1016 { 0, 1, 2, 3 },
1017 { 0, 2, 3, 3 },
1018 { 1, 2, 2, 2 },
1019 { 1, 2, 3, 3 },
1020 };
1021
1022 static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
1023 static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256];
1024
1025 static const etc1_to_dxt1_56_solution g_etc1_to_dxt_6[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
1026#include "basisu_transcoder_tables_dxt1_6.inc"
1027 };
1028
1029 static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = {
1030#include "basisu_transcoder_tables_dxt1_5.inc"
1031 };
1032#endif // BASISD_SUPPORT_DXT1
1033
1034#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
1035 // First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
1036 struct bc1_match_entry
1037 {
1038 uint8_t m_hi;
1039 uint8_t m_lo;
1040 };
1041 static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
1042 static bc1_match_entry g_bc1_match5_equals_0[256], g_bc1_match6_equals_0[256]; // selector 0, allow equals hi/lo
1043
1044 static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size0, int size1, int sel)
1045 {
1046 for (int i = 0; i < 256; i++)
1047 {
1048 int lowest_e = 256;
1049 for (int lo = 0; lo < size0; lo++)
1050 {
1051 for (int hi = 0; hi < size1; hi++)
1052 {
1053 const int lo_e = pExpand[lo], hi_e = pExpand[hi];
1054 int e;
1055
1056 if (sel == 1)
1057 {
1058 // Selector 1
1059 e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i);
1060 e += (basisu::iabs(hi_e - lo_e) * 3) / 100;
1061 }
1062 else
1063 {
1064 assert(sel == 0);
1065
1066 // Selector 0
1067 e = basisu::iabs(hi_e - i);
1068 }
1069
1070 if (e < lowest_e)
1071 {
1072 pTable[i].m_hi = static_cast<uint8_t>(hi);
1073 pTable[i].m_lo = static_cast<uint8_t>(lo);
1074
1075 lowest_e = e;
1076 }
1077
1078 } // hi
1079 } // lo
1080 }
1081 }
1082#endif
1083
1084#if BASISD_WRITE_NEW_DXT1_TABLES
1085 static void create_etc1_to_dxt1_5_conversion_table()
1086 {
1087 FILE* pFile = nullptr;
1088 fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");
1089
1090 uint32_t n = 0;
1091
1092 for (int inten = 0; inten < 8; inten++)
1093 {
1094 for (uint32_t g = 0; g < 32; g++)
1095 {
1096 color32 block_colors[4];
1097 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
1098
1099 for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
1100 {
1101 const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
1102 const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
1103
1104 for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
1105 {
1106 uint32_t best_lo = 0;
1107 uint32_t best_hi = 0;
1108 uint64_t best_err = UINT64_MAX;
1109
1110 for (uint32_t hi = 0; hi <= 31; hi++)
1111 {
1112 for (uint32_t lo = 0; lo <= 31; lo++)
1113 {
1114 //if (lo == hi) continue;
1115
1116 uint32_t colors[4];
1117
1118 colors[0] = (lo << 3) | (lo >> 2);
1119 colors[3] = (hi << 3) | (hi >> 2);
1120
1121 colors[1] = (colors[0] * 2 + colors[3]) / 3;
1122 colors[2] = (colors[3] * 2 + colors[0]) / 3;
1123
1124 uint64_t total_err = 0;
1125
1126 for (uint32_t s = low_selector; s <= high_selector; s++)
1127 {
1128 int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
1129
1130 total_err += err * err;
1131 }
1132
1133 if (total_err < best_err)
1134 {
1135 best_err = total_err;
1136 best_lo = lo;
1137 best_hi = hi;
1138 }
1139 }
1140 }
1141
1142 assert(best_err <= 0xFFFF);
1143
1144 //table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
1145 //table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
1146 //table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
1147
1148 //assert(best_lo != best_hi);
1149 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
1150 n++;
1151 if ((n & 31) == 31)
1152 fprintf(pFile, "\n");
1153 } // m
1154 } // sr
1155 } // g
1156 } // inten
1157
1158 fclose(pFile);
1159 }
1160
1161 static void create_etc1_to_dxt1_6_conversion_table()
1162 {
1163 FILE* pFile = nullptr;
1164 fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");
1165
1166 uint32_t n = 0;
1167
1168 for (int inten = 0; inten < 8; inten++)
1169 {
1170 for (uint32_t g = 0; g < 32; g++)
1171 {
1172 color32 block_colors[4];
1173 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
1174
1175 for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
1176 {
1177 const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
1178 const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
1179
1180 for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
1181 {
1182 uint32_t best_lo = 0;
1183 uint32_t best_hi = 0;
1184 uint64_t best_err = UINT64_MAX;
1185
1186 for (uint32_t hi = 0; hi <= 63; hi++)
1187 {
1188 for (uint32_t lo = 0; lo <= 63; lo++)
1189 {
1190 //if (lo == hi) continue;
1191
1192 uint32_t colors[4];
1193
1194 colors[0] = (lo << 2) | (lo >> 4);
1195 colors[3] = (hi << 2) | (hi >> 4);
1196
1197 colors[1] = (colors[0] * 2 + colors[3]) / 3;
1198 colors[2] = (colors[3] * 2 + colors[0]) / 3;
1199
1200 uint64_t total_err = 0;
1201
1202 for (uint32_t s = low_selector; s <= high_selector; s++)
1203 {
1204 int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
1205
1206 total_err += err * err;
1207 }
1208
1209 if (total_err < best_err)
1210 {
1211 best_err = total_err;
1212 best_lo = lo;
1213 best_hi = hi;
1214 }
1215 }
1216 }
1217
1218 assert(best_err <= 0xFFFF);
1219
1220 //table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
1221 //table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
1222 //table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
1223
1224 //assert(best_lo != best_hi);
1225 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
1226 n++;
1227 if ((n & 31) == 31)
1228 fprintf(pFile, "\n");
1229
1230 } // m
1231 } // sr
1232 } // g
1233 } // inten
1234
1235 fclose(pFile);
1236 }
1237#endif
1238
1239
1240#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1241 static const int8_t g_eac_modifier_table[16][8] =
1242 {
1243 { -3, -6, -9, -15, 2, 5, 8, 14 },
1244 { -3, -7, -10, -13, 2, 6, 9, 12 },
1245 { -2, -5, -8, -13, 1, 4, 7, 12 },
1246 { -2, -4, -6, -13, 1, 3, 5, 12 },
1247 { -3, -6, -8, -12, 2, 5, 7, 11 },
1248 { -3, -7, -9, -11, 2, 6, 8, 10 },
1249 { -4, -7, -8, -11, 3, 6, 7, 10 },
1250 { -3, -5, -8, -11, 2, 4, 7, 10 },
1251
1252 { -2, -6, -8, -10, 1, 5, 7, 9 },
1253 { -2, -5, -8, -10, 1, 4, 7, 9 },
1254 { -2, -4, -8, -10, 1, 3, 7, 9 },
1255 { -2, -5, -7, -10, 1, 4, 6, 9 },
1256 { -3, -4, -7, -10, 2, 3, 6, 9 },
1257 { -1, -2, -3, -10, 0, 1, 2, 9 }, // entry 13
1258 { -4, -6, -8, -9, 3, 5, 7, 8 },
1259 { -3, -5, -7, -9, 2, 4, 6, 8 }
1260 };
1261
1262 // Used by ETC2 EAC A8 and ETC2 EAC R11/RG11.
1263 struct eac_block
1264 {
1265 uint16_t m_base : 8;
1266
1267 uint16_t m_table : 4;
1268 uint16_t m_multiplier : 4;
1269
1270 uint8_t m_selectors[6];
1271
1272 uint32_t get_selector(uint32_t x, uint32_t y) const
1273 {
1274 assert((x < 4) && (y < 4));
1275
1276 const uint32_t ofs = 45 - (y + x * 4) * 3;
1277
1278 const uint64_t pixels = get_selector_bits();
1279
1280 return (pixels >> ofs) & 7;
1281 }
1282
1283 void set_selector(uint32_t x, uint32_t y, uint32_t s)
1284 {
1285 assert((x < 4) && (y < 4) && (s < 8));
1286
1287 const uint32_t ofs = 45 - (y + x * 4) * 3;
1288
1289 uint64_t pixels = get_selector_bits();
1290
1291 pixels &= ~(7ULL << ofs);
1292 pixels |= (static_cast<uint64_t>(s) << ofs);
1293
1294 set_selector_bits(pixels);
1295 }
1296
1297 uint64_t get_selector_bits() const
1298 {
1299 uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) |
1300 ((uint64_t)m_selectors[2] << 24) |
1301 ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
1302 return pixels;
1303 }
1304
1305 void set_selector_bits(uint64_t pixels)
1306 {
1307 m_selectors[0] = (uint8_t)(pixels >> 40);
1308 m_selectors[1] = (uint8_t)(pixels >> 32);
1309 m_selectors[2] = (uint8_t)(pixels >> 24);
1310 m_selectors[3] = (uint8_t)(pixels >> 16);
1311 m_selectors[4] = (uint8_t)(pixels >> 8);
1312 m_selectors[5] = (uint8_t)(pixels);
1313 }
1314 };
1315
1316#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1317
1318#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1319 static const dxt_selector_range s_etc2_eac_selector_ranges[] =
1320 {
1321 { 0, 3 },
1322
1323 { 1, 3 },
1324 { 0, 2 },
1325
1326 { 1, 2 },
1327 };
1328
1329 const uint32_t NUM_ETC2_EAC_SELECTOR_RANGES = sizeof(s_etc2_eac_selector_ranges) / sizeof(s_etc2_eac_selector_ranges[0]);
1330
1331 struct etc1_g_to_eac_conversion
1332 {
1333 uint8_t m_base;
1334 uint8_t m_table_mul; // mul*16+table
1335 uint16_t m_trans; // translates ETC1 selectors to ETC2_EAC_A8
1336 };
1337#endif // BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11
1338
1339#if BASISD_SUPPORT_ETC2_EAC_A8
1340
1341#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1342 struct pack_eac_a8_results
1343 {
1344 uint32_t m_base;
1345 uint32_t m_table;
1346 uint32_t m_multiplier;
1347 basisu::vector<uint8_t> m_selectors;
1348 basisu::vector<uint8_t> m_selectors_temp;
1349 };
1350
1351 static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels)
1352 {
1353 results.m_selectors.resize(num_pixels);
1354 results.m_selectors_temp.resize(num_pixels);
1355
1356 uint64_t best_err = UINT64_MAX;
1357
1358 for (uint32_t base_color = 0; base_color < 256; base_color++)
1359 {
1360 for (uint32_t multiplier = 1; multiplier < 16; multiplier++)
1361 {
1362 for (uint32_t table = 0; table < 16; table++)
1363 {
1364 uint64_t total_err = 0;
1365
1366 for (uint32_t i = 0; i < num_pixels; i++)
1367 {
1368 const int a = pPixels[i];
1369
1370 uint32_t best_s_err = UINT32_MAX;
1371 uint32_t best_s = 0;
1372 for (uint32_t s = 0; s < 8; s++)
1373 {
1374 int v = (int)multiplier * g_eac_modifier_table[table][s] + (int)base_color;
1375 if (v < 0)
1376 v = 0;
1377 else if (v > 255)
1378 v = 255;
1379
1380 uint32_t err = abs(a - v);
1381 if (err < best_s_err)
1382 {
1383 best_s_err = err;
1384 best_s = s;
1385 }
1386 }
1387
1388 results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
1389
1390 total_err += best_s_err * best_s_err;
1391 if (total_err >= best_err)
1392 break;
1393 }
1394
1395 if (total_err < best_err)
1396 {
1397 best_err = total_err;
1398 results.m_base = base_color;
1399 results.m_multiplier = multiplier;
1400 results.m_table = table;
1401 results.m_selectors.swap(results.m_selectors_temp);
1402 }
1403
1404 } // table
1405
1406 } // multiplier
1407
1408 } // base_color
1409
1410 return best_err;
1411 }
1412#endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1413
1414 static
1415#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1416 const
1417#endif
1418 etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
1419 {
1420 { { 0,1,3328 },{ 0,1,3328 },{ 0,1,256 },{ 0,1,256 } },
1421 { { 0,226,3936 },{ 0,226,3936 },{ 0,81,488 },{ 0,81,488 } },
1422 { { 6,178,4012 },{ 6,178,4008 },{ 0,146,501 },{ 0,130,496 } },
1423 { { 14,178,4012 },{ 14,178,4008 },{ 8,146,501 },{ 6,82,496 } },
1424 { { 23,178,4012 },{ 23,178,4008 },{ 17,146,501 },{ 3,228,496 } },
1425 { { 31,178,4012 },{ 31,178,4008 },{ 25,146,501 },{ 11,228,496 } },
1426 { { 39,178,4012 },{ 39,178,4008 },{ 33,146,501 },{ 19,228,496 } },
1427 { { 47,178,4012 },{ 47,178,4008 },{ 41,146,501 },{ 27,228,496 } },
1428 { { 56,178,4012 },{ 56,178,4008 },{ 50,146,501 },{ 36,228,496 } },
1429 { { 64,178,4012 },{ 64,178,4008 },{ 58,146,501 },{ 44,228,496 } },
1430 { { 72,178,4012 },{ 72,178,4008 },{ 66,146,501 },{ 52,228,496 } },
1431 { { 80,178,4012 },{ 80,178,4008 },{ 74,146,501 },{ 60,228,496 } },
1432 { { 89,178,4012 },{ 89,178,4008 },{ 83,146,501 },{ 69,228,496 } },
1433 { { 97,178,4012 },{ 97,178,4008 },{ 91,146,501 },{ 77,228,496 } },
1434 { { 105,178,4012 },{ 105,178,4008 },{ 99,146,501 },{ 85,228,496 } },
1435 { { 113,178,4012 },{ 113,178,4008 },{ 107,146,501 },{ 93,228,496 } },
1436 { { 122,178,4012 },{ 122,178,4008 },{ 116,146,501 },{ 102,228,496 } },
1437 { { 130,178,4012 },{ 130,178,4008 },{ 124,146,501 },{ 110,228,496 } },
1438 { { 138,178,4012 },{ 138,178,4008 },{ 132,146,501 },{ 118,228,496 } },
1439 { { 146,178,4012 },{ 146,178,4008 },{ 140,146,501 },{ 126,228,496 } },
1440 { { 155,178,4012 },{ 155,178,4008 },{ 149,146,501 },{ 135,228,496 } },
1441 { { 163,178,4012 },{ 163,178,4008 },{ 157,146,501 },{ 143,228,496 } },
1442 { { 171,178,4012 },{ 171,178,4008 },{ 165,146,501 },{ 151,228,496 } },
1443 { { 179,178,4012 },{ 179,178,4008 },{ 173,146,501 },{ 159,228,496 } },
1444 { { 188,178,4012 },{ 188,178,4008 },{ 182,146,501 },{ 168,228,496 } },
1445 { { 196,178,4012 },{ 196,178,4008 },{ 190,146,501 },{ 176,228,496 } },
1446 { { 204,178,4012 },{ 204,178,4008 },{ 198,146,501 },{ 184,228,496 } },
1447 { { 212,178,4012 },{ 212,178,4008 },{ 206,146,501 },{ 192,228,496 } },
1448 { { 221,178,4012 },{ 221,178,4008 },{ 215,146,501 },{ 201,228,496 } },
1449 { { 229,178,4012 },{ 229,178,4008 },{ 223,146,501 },{ 209,228,496 } },
1450 { { 235,66,4012 },{ 221,100,4008 },{ 231,146,501 },{ 217,228,496 } },
1451 { { 211,102,4085 },{ 118,31,4080 },{ 211,102,501 },{ 118,31,496 } },
1452 { { 1,2,3328 },{ 1,2,3328 },{ 0,1,320 },{ 0,1,320 } },
1453 { { 7,162,3905 },{ 7,162,3904 },{ 1,17,480 },{ 1,17,480 } },
1454 { { 15,162,3906 },{ 15,162,3904 },{ 1,117,352 },{ 1,117,352 } },
1455 { { 23,162,3906 },{ 23,162,3904 },{ 5,34,500 },{ 4,53,424 } },
1456 { { 32,162,3906 },{ 32,162,3904 },{ 14,34,500 },{ 3,69,424 } },
1457 { { 40,162,3906 },{ 40,162,3904 },{ 22,34,500 },{ 1,133,496 } },
1458 { { 48,162,3906 },{ 48,162,3904 },{ 30,34,500 },{ 4,85,496 } },
1459 { { 56,162,3906 },{ 56,162,3904 },{ 38,34,500 },{ 12,85,496 } },
1460 { { 65,162,3906 },{ 65,162,3904 },{ 47,34,500 },{ 1,106,424 } },
1461 { { 73,162,3906 },{ 73,162,3904 },{ 55,34,500 },{ 9,106,424 } },
1462 { { 81,162,3906 },{ 81,162,3904 },{ 63,34,500 },{ 7,234,496 } },
1463 { { 89,162,3906 },{ 89,162,3904 },{ 71,34,500 },{ 15,234,496 } },
1464 { { 98,162,3906 },{ 98,162,3904 },{ 80,34,500 },{ 24,234,496 } },
1465 { { 106,162,3906 },{ 106,162,3904 },{ 88,34,500 },{ 32,234,496 } },
1466 { { 114,162,3906 },{ 114,162,3904 },{ 96,34,500 },{ 40,234,496 } },
1467 { { 122,162,3906 },{ 122,162,3904 },{ 104,34,500 },{ 48,234,496 } },
1468 { { 131,162,3906 },{ 131,162,3904 },{ 113,34,500 },{ 57,234,496 } },
1469 { { 139,162,3906 },{ 139,162,3904 },{ 121,34,500 },{ 65,234,496 } },
1470 { { 147,162,3906 },{ 147,162,3904 },{ 129,34,500 },{ 73,234,496 } },
1471 { { 155,162,3906 },{ 155,162,3904 },{ 137,34,500 },{ 81,234,496 } },
1472 { { 164,162,3906 },{ 164,162,3904 },{ 146,34,500 },{ 90,234,496 } },
1473 { { 172,162,3906 },{ 172,162,3904 },{ 154,34,500 },{ 98,234,496 } },
1474 { { 180,162,3906 },{ 180,162,3904 },{ 162,34,500 },{ 106,234,496 } },
1475 { { 188,162,3906 },{ 188,162,3904 },{ 170,34,500 },{ 114,234,496 } },
1476 { { 197,162,3906 },{ 197,162,3904 },{ 179,34,500 },{ 123,234,496 } },
1477 { { 205,162,3906 },{ 205,162,3904 },{ 187,34,500 },{ 131,234,496 } },
1478 { { 213,162,3906 },{ 213,162,3904 },{ 195,34,500 },{ 139,234,496 } },
1479 { { 221,162,3906 },{ 221,162,3904 },{ 203,34,500 },{ 147,234,496 } },
1480 { { 230,162,3906 },{ 230,162,3904 },{ 212,34,500 },{ 156,234,496 } },
1481 { { 238,162,3906 },{ 174,106,4008 },{ 220,34,500 },{ 164,234,496 } },
1482 { { 240,178,4001 },{ 182,106,4008 },{ 228,34,500 },{ 172,234,496 } },
1483 { { 166,108,4085 },{ 115,31,4080 },{ 166,108,501 },{ 115,31,496 } },
1484 { { 1,68,3328 },{ 1,68,3328 },{ 0,17,384 },{ 0,17,384 } },
1485 { { 1,148,3904 },{ 1,148,3904 },{ 1,2,384 },{ 1,2,384 } },
1486 { { 21,18,3851 },{ 21,18,3848 },{ 1,50,488 },{ 1,50,488 } },
1487 { { 27,195,3851 },{ 29,18,3848 },{ 0,67,488 },{ 0,67,488 } },
1488 { { 34,195,3907 },{ 38,18,3848 },{ 20,66,482 },{ 0,3,496 } },
1489 { { 42,195,3907 },{ 46,18,3848 },{ 28,66,482 },{ 2,6,424 } },
1490 { { 50,195,3907 },{ 54,18,3848 },{ 36,66,482 },{ 4,22,424 } },
1491 { { 58,195,3907 },{ 62,18,3848 },{ 44,66,482 },{ 3,73,424 } },
1492 { { 67,195,3907 },{ 71,18,3848 },{ 53,66,482 },{ 3,22,496 } },
1493 { { 75,195,3907 },{ 79,18,3848 },{ 61,66,482 },{ 2,137,496 } },
1494 { { 83,195,3907 },{ 87,18,3848 },{ 69,66,482 },{ 1,89,496 } },
1495 { { 91,195,3907 },{ 95,18,3848 },{ 77,66,482 },{ 9,89,496 } },
1496 { { 100,195,3907 },{ 104,18,3848 },{ 86,66,482 },{ 18,89,496 } },
1497 { { 108,195,3907 },{ 112,18,3848 },{ 94,66,482 },{ 26,89,496 } },
1498 { { 116,195,3907 },{ 120,18,3848 },{ 102,66,482 },{ 34,89,496 } },
1499 { { 124,195,3907 },{ 128,18,3848 },{ 110,66,482 },{ 42,89,496 } },
1500 { { 133,195,3907 },{ 137,18,3848 },{ 119,66,482 },{ 51,89,496 } },
1501 { { 141,195,3907 },{ 145,18,3848 },{ 127,66,482 },{ 59,89,496 } },
1502 { { 149,195,3907 },{ 153,18,3848 },{ 135,66,482 },{ 67,89,496 } },
1503 { { 157,195,3907 },{ 161,18,3848 },{ 143,66,482 },{ 75,89,496 } },
1504 { { 166,195,3907 },{ 170,18,3848 },{ 152,66,482 },{ 84,89,496 } },
1505 { { 174,195,3907 },{ 178,18,3848 },{ 160,66,482 },{ 92,89,496 } },
1506 { { 182,195,3907 },{ 186,18,3848 },{ 168,66,482 },{ 100,89,496 } },
1507 { { 190,195,3907 },{ 194,18,3848 },{ 176,66,482 },{ 108,89,496 } },
1508 { { 199,195,3907 },{ 203,18,3848 },{ 185,66,482 },{ 117,89,496 } },
1509 { { 207,195,3907 },{ 211,18,3848 },{ 193,66,482 },{ 125,89,496 } },
1510 { { 215,195,3907 },{ 219,18,3848 },{ 201,66,482 },{ 133,89,496 } },
1511 { { 223,195,3907 },{ 227,18,3848 },{ 209,66,482 },{ 141,89,496 } },
1512 { { 231,195,3907 },{ 168,89,4008 },{ 218,66,482 },{ 150,89,496 } },
1513 { { 236,18,3907 },{ 176,89,4008 },{ 226,66,482 },{ 158,89,496 } },
1514 { { 158,90,4085 },{ 103,31,4080 },{ 158,90,501 },{ 103,31,496 } },
1515 { { 166,90,4085 },{ 111,31,4080 },{ 166,90,501 },{ 111,31,496 } },
1516 { { 0,70,3328 },{ 0,70,3328 },{ 0,45,256 },{ 0,45,256 } },
1517 { { 0,117,3904 },{ 0,117,3904 },{ 0,35,384 },{ 0,35,384 } },
1518 { { 13,165,3905 },{ 13,165,3904 },{ 3,221,416 },{ 3,221,416 } },
1519 { { 21,165,3906 },{ 21,165,3904 },{ 11,221,416 },{ 11,221,416 } },
1520 { { 30,165,3906 },{ 30,165,3904 },{ 7,61,352 },{ 7,61,352 } },
1521 { { 38,165,3906 },{ 38,165,3904 },{ 2,125,352 },{ 2,125,352 } },
1522 { { 46,165,3906 },{ 46,165,3904 },{ 2,37,500 },{ 10,125,352 } },
1523 { { 54,165,3906 },{ 54,165,3904 },{ 10,37,500 },{ 5,61,424 } },
1524 { { 63,165,3906 },{ 63,165,3904 },{ 19,37,500 },{ 1,189,424 } },
1525 { { 4,254,4012 },{ 71,165,3904 },{ 27,37,500 },{ 9,189,424 } },
1526 { { 12,254,4012 },{ 79,165,3904 },{ 35,37,500 },{ 4,77,424 } },
1527 { { 20,254,4012 },{ 87,165,3904 },{ 43,37,500 },{ 12,77,424 } },
1528 { { 29,254,4012 },{ 96,165,3904 },{ 52,37,500 },{ 8,93,424 } },
1529 { { 37,254,4012 },{ 104,165,3904 },{ 60,37,500 },{ 3,141,496 } },
1530 { { 45,254,4012 },{ 112,165,3904 },{ 68,37,500 },{ 11,141,496 } },
1531 { { 53,254,4012 },{ 120,165,3904 },{ 76,37,500 },{ 6,93,496 } },
1532 { { 62,254,4012 },{ 129,165,3904 },{ 85,37,500 },{ 15,93,496 } },
1533 { { 70,254,4012 },{ 137,165,3904 },{ 93,37,500 },{ 23,93,496 } },
1534 { { 78,254,4012 },{ 145,165,3904 },{ 101,37,500 },{ 31,93,496 } },
1535 { { 86,254,4012 },{ 153,165,3904 },{ 109,37,500 },{ 39,93,496 } },
1536 { { 95,254,4012 },{ 162,165,3904 },{ 118,37,500 },{ 48,93,496 } },
1537 { { 103,254,4012 },{ 170,165,3904 },{ 126,37,500 },{ 56,93,496 } },
1538 { { 111,254,4012 },{ 178,165,3904 },{ 134,37,500 },{ 64,93,496 } },
1539 { { 119,254,4012 },{ 186,165,3904 },{ 142,37,500 },{ 72,93,496 } },
1540 { { 128,254,4012 },{ 195,165,3904 },{ 151,37,500 },{ 81,93,496 } },
1541 { { 136,254,4012 },{ 203,165,3904 },{ 159,37,500 },{ 89,93,496 } },
1542 { { 212,165,3906 },{ 136,77,4008 },{ 167,37,500 },{ 97,93,496 } },
1543 { { 220,165,3394 },{ 131,93,4008 },{ 175,37,500 },{ 105,93,496 } },
1544 { { 214,181,4001 },{ 140,93,4008 },{ 184,37,500 },{ 114,93,496 } },
1545 { { 222,181,4001 },{ 148,93,4008 },{ 192,37,500 },{ 122,93,496 } },
1546 { { 114,95,4085 },{ 99,31,4080 },{ 114,95,501 },{ 99,31,496 } },
1547 { { 122,95,4085 },{ 107,31,4080 },{ 122,95,501 },{ 107,31,496 } },
1548 { { 0,102,3840 },{ 0,102,3840 },{ 0,18,384 },{ 0,18,384 } },
1549 { { 5,167,3904 },{ 5,167,3904 },{ 0,13,256 },{ 0,13,256 } },
1550 { { 4,54,3968 },{ 4,54,3968 },{ 1,67,448 },{ 1,67,448 } },
1551 { { 30,198,3850 },{ 30,198,3848 },{ 0,3,480 },{ 0,3,480 } },
1552 { { 39,198,3850 },{ 39,198,3848 },{ 3,52,488 },{ 3,52,488 } },
1553 { { 47,198,3851 },{ 47,198,3848 },{ 3,4,488 },{ 3,4,488 } },
1554 { { 55,198,3851 },{ 55,198,3848 },{ 1,70,488 },{ 1,70,488 } },
1555 { { 54,167,3906 },{ 63,198,3848 },{ 3,22,488 },{ 3,22,488 } },
1556 { { 62,167,3906 },{ 72,198,3848 },{ 24,118,488 },{ 0,6,496 } },
1557 { { 70,167,3906 },{ 80,198,3848 },{ 32,118,488 },{ 2,89,488 } },
1558 { { 78,167,3906 },{ 88,198,3848 },{ 40,118,488 },{ 1,73,496 } },
1559 { { 86,167,3906 },{ 96,198,3848 },{ 48,118,488 },{ 0,28,424 } },
1560 { { 95,167,3906 },{ 105,198,3848 },{ 57,118,488 },{ 9,28,424 } },
1561 { { 103,167,3906 },{ 113,198,3848 },{ 65,118,488 },{ 5,108,496 } },
1562 { { 111,167,3906 },{ 121,198,3848 },{ 73,118,488 },{ 13,108,496 } },
1563 { { 119,167,3906 },{ 129,198,3848 },{ 81,118,488 },{ 21,108,496 } },
1564 { { 128,167,3906 },{ 138,198,3848 },{ 90,118,488 },{ 6,28,496 } },
1565 { { 136,167,3906 },{ 146,198,3848 },{ 98,118,488 },{ 14,28,496 } },
1566 { { 144,167,3906 },{ 154,198,3848 },{ 106,118,488 },{ 22,28,496 } },
1567 { { 152,167,3906 },{ 162,198,3848 },{ 114,118,488 },{ 30,28,496 } },
1568 { { 161,167,3906 },{ 171,198,3848 },{ 123,118,488 },{ 39,28,496 } },
1569 { { 169,167,3906 },{ 179,198,3848 },{ 131,118,488 },{ 47,28,496 } },
1570 { { 177,167,3906 },{ 187,198,3848 },{ 139,118,488 },{ 55,28,496 } },
1571 { { 185,167,3906 },{ 195,198,3848 },{ 147,118,488 },{ 63,28,496 } },
1572 { { 194,167,3906 },{ 120,12,4008 },{ 156,118,488 },{ 72,28,496 } },
1573 { { 206,198,3907 },{ 116,28,4008 },{ 164,118,488 },{ 80,28,496 } },
1574 { { 214,198,3907 },{ 124,28,4008 },{ 172,118,488 },{ 88,28,496 } },
1575 { { 222,198,3395 },{ 132,28,4008 },{ 180,118,488 },{ 96,28,496 } },
1576 { { 207,134,4001 },{ 141,28,4008 },{ 189,118,488 },{ 105,28,496 } },
1577 { { 95,30,4085 },{ 86,31,4080 },{ 95,30,501 },{ 86,31,496 } },
1578 { { 103,30,4085 },{ 94,31,4080 },{ 103,30,501 },{ 94,31,496 } },
1579 { { 111,30,4085 },{ 102,31,4080 },{ 111,30,501 },{ 102,31,496 } },
1580 { { 0,104,3840 },{ 0,104,3840 },{ 0,18,448 },{ 0,18,448 } },
1581 { { 4,39,3904 },{ 4,39,3904 },{ 0,4,384 },{ 0,4,384 } },
1582 { { 0,56,3968 },{ 0,56,3968 },{ 0,84,448 },{ 0,84,448 } },
1583 { { 6,110,3328 },{ 6,110,3328 },{ 0,20,448 },{ 0,20,448 } },
1584 { { 41,200,3850 },{ 41,200,3848 },{ 1,4,480 },{ 1,4,480 } },
1585 { { 49,200,3850 },{ 49,200,3848 },{ 1,8,416 },{ 1,8,416 } },
1586 { { 57,200,3851 },{ 57,200,3848 },{ 1,38,488 },{ 1,38,488 } },
1587 { { 65,200,3851 },{ 65,200,3848 },{ 1,120,488 },{ 1,120,488 } },
1588 { { 74,200,3851 },{ 74,200,3848 },{ 2,72,488 },{ 2,72,488 } },
1589 { { 69,6,3907 },{ 82,200,3848 },{ 2,24,488 },{ 2,24,488 } },
1590 { { 77,6,3907 },{ 90,200,3848 },{ 26,120,488 },{ 10,24,488 } },
1591 { { 97,63,3330 },{ 98,200,3848 },{ 34,120,488 },{ 2,8,496 } },
1592 { { 106,63,3330 },{ 107,200,3848 },{ 43,120,488 },{ 3,92,488 } },
1593 { { 114,63,3330 },{ 115,200,3848 },{ 51,120,488 },{ 11,92,488 } },
1594 { { 122,63,3330 },{ 123,200,3848 },{ 59,120,488 },{ 7,76,496 } },
1595 { { 130,63,3330 },{ 131,200,3848 },{ 67,120,488 },{ 15,76,496 } },
1596 { { 139,63,3330 },{ 140,200,3848 },{ 76,120,488 },{ 24,76,496 } },
1597 { { 147,63,3330 },{ 148,200,3848 },{ 84,120,488 },{ 32,76,496 } },
1598 { { 155,63,3330 },{ 156,200,3848 },{ 92,120,488 },{ 40,76,496 } },
1599 { { 163,63,3330 },{ 164,200,3848 },{ 100,120,488 },{ 48,76,496 } },
1600 { { 172,63,3330 },{ 173,200,3848 },{ 109,120,488 },{ 57,76,496 } },
1601 { { 184,6,3851 },{ 181,200,3848 },{ 117,120,488 },{ 65,76,496 } },
1602 { { 192,6,3851 },{ 133,28,3936 },{ 125,120,488 },{ 73,76,496 } },
1603 { { 189,200,3907 },{ 141,28,3936 },{ 133,120,488 },{ 81,76,496 } },
1604 { { 198,200,3907 },{ 138,108,4000 },{ 142,120,488 },{ 90,76,496 } },
1605 { { 206,200,3907 },{ 146,108,4000 },{ 150,120,488 },{ 98,76,496 } },
1606 { { 214,200,3395 },{ 154,108,4000 },{ 158,120,488 },{ 106,76,496 } },
1607 { { 190,136,4001 },{ 162,108,4000 },{ 166,120,488 },{ 114,76,496 } },
1608 { { 123,30,4076 },{ 87,15,4080 },{ 123,30,492 },{ 87,15,496 } },
1609 { { 117,110,4084 },{ 80,31,4080 },{ 117,110,500 },{ 80,31,496 } },
1610 { { 125,110,4084 },{ 88,31,4080 },{ 125,110,500 },{ 88,31,496 } },
1611 { { 133,110,4084 },{ 96,31,4080 },{ 133,110,500 },{ 96,31,496 } },
1612 { { 9,56,3904 },{ 9,56,3904 },{ 0,67,448 },{ 0,67,448 } },
1613 { { 1,8,3904 },{ 1,8,3904 },{ 1,84,448 },{ 1,84,448 } },
1614 { { 1,124,3904 },{ 1,124,3904 },{ 0,39,384 },{ 0,39,384 } },
1615 { { 9,124,3904 },{ 9,124,3904 },{ 1,4,448 },{ 1,4,448 } },
1616 { { 6,76,3904 },{ 6,76,3904 },{ 0,70,448 },{ 0,70,448 } },
1617 { { 62,6,3859 },{ 62,6,3856 },{ 2,38,480 },{ 2,38,480 } },
1618 { { 70,6,3859 },{ 70,6,3856 },{ 5,43,416 },{ 5,43,416 } },
1619 { { 78,6,3859 },{ 78,6,3856 },{ 2,11,416 },{ 2,11,416 } },
1620 { { 87,6,3859 },{ 87,6,3856 },{ 0,171,488 },{ 0,171,488 } },
1621 { { 67,8,3906 },{ 95,6,3856 },{ 8,171,488 },{ 8,171,488 } },
1622 { { 75,8,3907 },{ 103,6,3856 },{ 5,123,488 },{ 5,123,488 } },
1623 { { 83,8,3907 },{ 111,6,3856 },{ 2,75,488 },{ 2,75,488 } },
1624 { { 92,8,3907 },{ 120,6,3856 },{ 0,27,488 },{ 0,27,488 } },
1625 { { 100,8,3907 },{ 128,6,3856 },{ 8,27,488 },{ 8,27,488 } },
1626 { { 120,106,3843 },{ 136,6,3856 },{ 100,6,387 },{ 16,27,488 } },
1627 { { 128,106,3843 },{ 144,6,3856 },{ 108,6,387 },{ 2,11,496 } },
1628 { { 137,106,3843 },{ 153,6,3856 },{ 117,6,387 },{ 11,11,496 } },
1629 { { 145,106,3843 },{ 161,6,3856 },{ 125,6,387 },{ 19,11,496 } },
1630 { { 163,8,3851 },{ 137,43,3904 },{ 133,6,387 },{ 27,11,496 } },
1631 { { 171,8,3851 },{ 101,11,4000 },{ 141,6,387 },{ 35,11,496 } },
1632 { { 180,8,3851 },{ 110,11,4000 },{ 150,6,387 },{ 44,11,496 } },
1633 { { 188,8,3851 },{ 118,11,4000 },{ 158,6,387 },{ 52,11,496 } },
1634 { { 172,72,3907 },{ 126,11,4000 },{ 166,6,387 },{ 60,11,496 } },
1635 { { 174,6,3971 },{ 134,11,4000 },{ 174,6,387 },{ 68,11,496 } },
1636 { { 183,6,3971 },{ 143,11,4000 },{ 183,6,387 },{ 77,11,496 } },
1637 { { 191,6,3971 },{ 151,11,4000 },{ 191,6,387 },{ 85,11,496 } },
1638 { { 199,6,3971 },{ 159,11,4000 },{ 199,6,387 },{ 93,11,496 } },
1639 { { 92,12,4084 },{ 69,15,4080 },{ 92,12,500 },{ 69,15,496 } },
1640 { { 101,12,4084 },{ 78,15,4080 },{ 101,12,500 },{ 78,15,496 } },
1641 { { 109,12,4084 },{ 86,15,4080 },{ 109,12,500 },{ 86,15,496 } },
1642 { { 117,12,4084 },{ 79,31,4080 },{ 117,12,500 },{ 79,31,496 } },
1643 { { 125,12,4084 },{ 87,31,4080 },{ 125,12,500 },{ 87,31,496 } },
1644 { { 71,8,3602 },{ 71,8,3600 },{ 2,21,384 },{ 2,21,384 } },
1645 { { 79,8,3611 },{ 79,8,3608 },{ 0,69,448 },{ 0,69,448 } },
1646 { { 87,8,3611 },{ 87,8,3608 },{ 0,23,384 },{ 0,23,384 } },
1647 { { 95,8,3611 },{ 95,8,3608 },{ 1,5,448 },{ 1,5,448 } },
1648 { { 104,8,3611 },{ 104,8,3608 },{ 0,88,448 },{ 0,88,448 } },
1649 { { 112,8,3611 },{ 112,8,3608 },{ 0,72,448 },{ 0,72,448 } },
1650 { { 120,8,3611 },{ 121,8,3608 },{ 36,21,458 },{ 36,21,456 } },
1651 { { 133,47,3091 },{ 129,8,3608 },{ 44,21,458 },{ 44,21,456 } },
1652 { { 142,47,3091 },{ 138,8,3608 },{ 53,21,459 },{ 53,21,456 } },
1653 { { 98,12,3850 },{ 98,12,3848 },{ 61,21,459 },{ 61,21,456 } },
1654 { { 106,12,3850 },{ 106,12,3848 },{ 10,92,480 },{ 69,21,456 } },
1655 { { 114,12,3851 },{ 114,12,3848 },{ 18,92,480 },{ 77,21,456 } },
1656 { { 87,12,3906 },{ 87,12,3904 },{ 3,44,488 },{ 86,21,456 } },
1657 { { 95,12,3906 },{ 95,12,3904 },{ 11,44,488 },{ 94,21,456 } },
1658 { { 103,12,3906 },{ 103,12,3904 },{ 19,44,488 },{ 102,21,456 } },
1659 { { 111,12,3907 },{ 111,12,3904 },{ 27,44,489 },{ 110,21,456 } },
1660 { { 120,12,3907 },{ 120,12,3904 },{ 36,44,489 },{ 119,21,456 } },
1661 { { 128,12,3907 },{ 128,12,3904 },{ 44,44,489 },{ 127,21,456 } },
1662 { { 136,12,3907 },{ 136,12,3904 },{ 52,44,489 },{ 135,21,456 } },
1663 { { 144,12,3907 },{ 144,12,3904 },{ 60,44,489 },{ 143,21,456 } },
1664 { { 153,12,3907 },{ 153,12,3904 },{ 69,44,490 },{ 152,21,456 } },
1665 { { 161,12,3395 },{ 149,188,3968 },{ 77,44,490 },{ 160,21,456 } },
1666 { { 169,12,3395 },{ 198,21,3928 },{ 85,44,490 },{ 168,21,456 } },
1667 { { 113,95,4001 },{ 201,69,3992 },{ 125,8,483 },{ 176,21,456 } },
1668 { { 122,95,4001 },{ 200,21,3984 },{ 134,8,483 },{ 185,21,456 } },
1669 { { 142,8,4067 },{ 208,21,3984 },{ 142,8,483 },{ 193,21,456 } },
1670 { { 151,8,4067 },{ 47,15,4080 },{ 151,8,483 },{ 47,15,496 } },
1671 { { 159,8,4067 },{ 55,15,4080 },{ 159,8,483 },{ 55,15,496 } },
1672 { { 168,8,4067 },{ 64,15,4080 },{ 168,8,483 },{ 64,15,496 } },
1673 { { 160,40,4075 },{ 72,15,4080 },{ 160,40,491 },{ 72,15,496 } },
1674 { { 168,40,4075 },{ 80,15,4080 },{ 168,40,491 },{ 80,15,496 } },
1675 { { 144,8,4082 },{ 88,15,4080 },{ 144,8,498 },{ 88,15,496 } }
1676 };
1677#endif // BASISD_SUPPORT_ETC2_EAC_A8
1678
1679#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1680 static void create_etc2_eac_a8_conversion_table()
1681 {
1682 FILE* pFile = fopen("basisu_decoder_tables_etc2_eac_a8.inc", "w");
1683
1684 for (uint32_t inten = 0; inten < 8; inten++)
1685 {
1686 for (uint32_t base = 0; base < 32; base++)
1687 {
1688 color32 block_colors[4];
1689 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
1690
1691 fprintf(pFile, "{");
1692
1693 for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
1694 {
1695 const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
1696 const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
1697
1698 // We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
1699 // Now find the best ETC2 EAC A8 base/table/multiplier that fits these colors.
1700
1701 uint8_t pixels[4];
1702 uint32_t num_pixels = 0;
1703 for (uint32_t s = low_selector; s <= high_selector; s++)
1704 pixels[num_pixels++] = block_colors[s].g;
1705
1706 pack_eac_a8_results pack_results;
1707 pack_eac_a8_exhaustive(pack_results, pixels, num_pixels);
1708
1709 etc1_g_to_eac_conversion& c = s_etc1_g_to_etc2_a8[base + inten * 32][sel_range];
1710
1711 c.m_base = pack_results.m_base;
1712 c.m_table_mul = pack_results.m_table * 16 + pack_results.m_multiplier;
1713 c.m_trans = 0;
1714
1715 for (uint32_t s = 0; s < 4; s++)
1716 {
1717 if ((s < low_selector) || (s > high_selector))
1718 continue;
1719
1720 uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
1721
1722 c.m_trans |= (etc2_selector << (s * 3));
1723 }
1724
1725 fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
1726 if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
1727 fprintf(pFile, ",");
1728 }
1729
1730 fprintf(pFile, "},\n");
1731 }
1732 }
1733
1734 fclose(pFile);
1735 }
1736#endif
1737
1738#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1739 struct pack_eac_r11_results
1740 {
1741 uint32_t m_base;
1742 uint32_t m_table;
1743 uint32_t m_multiplier;
1744 basisu::vector<uint8_t> m_selectors;
1745 basisu::vector<uint8_t> m_selectors_temp;
1746 };
1747
1748 static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels)
1749 {
1750 results.m_selectors.resize(num_pixels);
1751 results.m_selectors_temp.resize(num_pixels);
1752
1753 uint64_t best_err = UINT64_MAX;
1754
1755 for (uint32_t base_color = 0; base_color < 256; base_color++)
1756 {
1757 for (uint32_t multiplier = 0; multiplier < 16; multiplier++)
1758 {
1759 for (uint32_t table = 0; table < 16; table++)
1760 {
1761 uint64_t total_err = 0;
1762
1763 for (uint32_t i = 0; i < num_pixels; i++)
1764 {
1765 // Convert 8-bit input to 11-bits
1766 const int a = (pPixels[i] * 2047 + 128) / 255;
1767
1768 uint32_t best_s_err = UINT32_MAX;
1769 uint32_t best_s = 0;
1770 for (uint32_t s = 0; s < 8; s++)
1771 {
1772 int v = (int)(multiplier ? (multiplier * 8) : 1) * g_eac_modifier_table[table][s] + (int)base_color * 8 + 4;
1773 if (v < 0)
1774 v = 0;
1775 else if (v > 2047)
1776 v = 2047;
1777
1778 uint32_t err = abs(a - v);
1779 if (err < best_s_err)
1780 {
1781 best_s_err = err;
1782 best_s = s;
1783 }
1784 }
1785
1786 results.m_selectors_temp[i] = static_cast<uint8_t>(best_s);
1787
1788 total_err += best_s_err * best_s_err;
1789 if (total_err >= best_err)
1790 break;
1791 }
1792
1793 if (total_err < best_err)
1794 {
1795 best_err = total_err;
1796 results.m_base = base_color;
1797 results.m_multiplier = multiplier;
1798 results.m_table = table;
1799 results.m_selectors.swap(results.m_selectors_temp);
1800 }
1801
1802 } // table
1803
1804 } // multiplier
1805
1806 } // base_color
1807
1808 return best_err;
1809 }
1810
1811 static void create_etc2_eac_r11_conversion_table()
1812 {
1813 FILE* pFile = nullptr;
1814 fopen_s(&pFile, "basisu_decoder_tables_etc2_eac_r11.inc", "w");
1815
1816 for (uint32_t inten = 0; inten < 8; inten++)
1817 {
1818 for (uint32_t base = 0; base < 32; base++)
1819 {
1820 color32 block_colors[4];
1821 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(base, base, base, 255), false), inten);
1822
1823 fprintf(pFile, "{");
1824
1825 for (uint32_t sel_range = 0; sel_range < NUM_ETC2_EAC_SELECTOR_RANGES; sel_range++)
1826 {
1827 const uint32_t low_selector = s_etc2_eac_selector_ranges[sel_range].m_low;
1828 const uint32_t high_selector = s_etc2_eac_selector_ranges[sel_range].m_high;
1829
1830 // We have a ETC1 base color and intensity, and a used selector range from low_selector-high_selector.
1831 // Now find the best ETC2 EAC R11 base/table/multiplier that fits these colors.
1832
1833 uint8_t pixels[4];
1834 uint32_t num_pixels = 0;
1835 for (uint32_t s = low_selector; s <= high_selector; s++)
1836 pixels[num_pixels++] = block_colors[s].g;
1837
1838 pack_eac_r11_results pack_results;
1839 pack_eac_r11_exhaustive(pack_results, pixels, num_pixels);
1840
1841 etc1_g_to_eac_conversion c;
1842
1843 c.m_base = (uint8_t)pack_results.m_base;
1844 c.m_table_mul = (uint8_t)(pack_results.m_table * 16 + pack_results.m_multiplier);
1845 c.m_trans = 0;
1846
1847 for (uint32_t s = 0; s < 4; s++)
1848 {
1849 if ((s < low_selector) || (s > high_selector))
1850 continue;
1851
1852 uint32_t etc2_selector = pack_results.m_selectors[s - low_selector];
1853
1854 c.m_trans |= (etc2_selector << (s * 3));
1855 }
1856
1857 fprintf(pFile, "{%u,%u,%u}", c.m_base, c.m_table_mul, c.m_trans);
1858 if (sel_range < (NUM_ETC2_EAC_SELECTOR_RANGES - 1))
1859 fprintf(pFile, ",");
1860 }
1861
1862 fprintf(pFile, "},\n");
1863 }
1864 }
1865
1866 fclose(pFile);
1867 }
1868#endif // BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1869
1870#if BASISD_WRITE_NEW_ASTC_TABLES
1871 static void create_etc1_to_astc_conversion_table_0_47();
1872 static void create_etc1_to_astc_conversion_table_0_255();
1873#endif
1874
1875#if BASISD_SUPPORT_ASTC
1876 static void transcoder_init_astc();
1877#endif
1878
1879#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
1880 static void create_etc1_to_bc7_m5_color_conversion_table();
1881 static void create_etc1_to_bc7_m5_alpha_conversion_table();
1882#endif
1883
1884#if BASISD_SUPPORT_BC7_MODE5
1885 static void transcoder_init_bc7_mode5();
1886#endif
1887
1888#if BASISD_WRITE_NEW_ATC_TABLES
1889 static void create_etc1s_to_atc_conversion_tables();
1890#endif
1891
1892#if BASISD_SUPPORT_ATC
1893 static void transcoder_init_atc();
1894#endif
1895
1896#if BASISD_SUPPORT_PVRTC2
1897 static void transcoder_init_pvrtc2();
1898#endif
1899
1900#if BASISD_SUPPORT_UASTC
1901 void uastc_init();
1902#endif
1903
1904 static bool g_transcoder_initialized;
1905
1906 // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz.
1907 // If this is too slow, these computed tables can easilky be moved to be compiled in.
1908 void basisu_transcoder_init()
1909 {
1910 if (g_transcoder_initialized)
1911 {
1912 BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");
1913 return;
1914 }
1915
1916 BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");
1917
1918#if BASISD_SUPPORT_UASTC
1919 uastc_init();
1920#endif
1921
1922#if BASISD_SUPPORT_ASTC
1923 transcoder_init_astc();
1924#endif
1925
1926#if BASISD_WRITE_NEW_ASTC_TABLES
1927 create_etc1_to_astc_conversion_table_0_47();
1928 create_etc1_to_astc_conversion_table_0_255();
1929 exit(0);
1930#endif
1931
1932#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
1933 create_etc1_to_bc7_m5_color_conversion_table();
1934 create_etc1_to_bc7_m5_alpha_conversion_table();
1935 exit(0);
1936#endif
1937
1938#if BASISD_WRITE_NEW_DXT1_TABLES
1939 create_etc1_to_dxt1_5_conversion_table();
1940 create_etc1_to_dxt1_6_conversion_table();
1941 exit(0);
1942#endif
1943
1944#if BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES
1945 create_etc2_eac_a8_conversion_table();
1946 exit(0);
1947#endif
1948
1949#if BASISD_WRITE_NEW_ATC_TABLES
1950 create_etc1s_to_atc_conversion_tables();
1951 exit(0);
1952#endif
1953
1954#if BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES
1955 create_etc2_eac_r11_conversion_table();
1956 exit(0);
1957#endif
1958
1959#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC
1960 uint8_t bc1_expand5[32];
1961 for (int i = 0; i < 32; i++)
1962 bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
1963 prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 32, 1);
1964 prepare_bc1_single_color_table(g_bc1_match5_equals_0, bc1_expand5, 1, 32, 0);
1965
1966 uint8_t bc1_expand6[64];
1967 for (int i = 0; i < 64; i++)
1968 bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
1969 prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1);
1970 prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0);
1971
1972#if 0
1973 for (uint32_t i = 0; i < 256; i++)
1974 {
1975 printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi);
1976 }
1977 exit(0);
1978#endif
1979
1980#endif
1981
1982#if BASISD_SUPPORT_DXT1
1983 for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++)
1984 {
1985 uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low;
1986 uint32_t h = g_etc1_to_dxt1_selector_ranges[i].m_high;
1987 g_etc1_to_dxt1_selector_range_index[l][h] = i;
1988 }
1989
1990 for (uint32_t sm = 0; sm < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; sm++)
1991 {
1992 uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1[4];
1993 uint8_t etc1_to_dxt1_selector_mappings_raw_dxt1_inv[4];
1994
1995 for (uint32_t j = 0; j < 4; j++)
1996 {
1997 static const uint8_t s_linear_dxt1_to_dxt1[4] = { 0, 2, 3, 1 };
1998 static const uint8_t s_dxt1_inverted_xlat[4] = { 1, 0, 3, 2 };
1999
2000 etc1_to_dxt1_selector_mappings_raw_dxt1[j] = (uint8_t)s_linear_dxt1_to_dxt1[g_etc1_to_dxt1_selector_mappings[sm][j]];
2001 etc1_to_dxt1_selector_mappings_raw_dxt1_inv[j] = (uint8_t)s_dxt1_inverted_xlat[etc1_to_dxt1_selector_mappings_raw_dxt1[j]];
2002 }
2003
2004 for (uint32_t i = 0; i < 256; i++)
2005 {
2006 uint32_t k = 0, k_inv = 0;
2007 for (uint32_t s = 0; s < 4; s++)
2008 {
2009 k |= (etc1_to_dxt1_selector_mappings_raw_dxt1[(i >> (s * 2)) & 3] << (s * 2));
2010 k_inv |= (etc1_to_dxt1_selector_mappings_raw_dxt1_inv[(i >> (s * 2)) & 3] << (s * 2));
2011 }
2012 g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[sm][i] = (uint8_t)k;
2013 g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[sm][i] = (uint8_t)k_inv;
2014 }
2015 }
2016#endif
2017
2018#if BASISD_SUPPORT_BC7_MODE5
2019 transcoder_init_bc7_mode5();
2020#endif
2021
2022#if BASISD_SUPPORT_ATC
2023 transcoder_init_atc();
2024#endif
2025
2026#if BASISD_SUPPORT_PVRTC2
2027 transcoder_init_pvrtc2();
2028#endif
2029
2030 g_transcoder_initialized = true;
2031 }
2032
2033#if BASISD_SUPPORT_DXT1
2034 static void convert_etc1s_to_dxt1(dxt1_block* pDst_block, const endpoint *pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
2035 {
2036#if !BASISD_WRITE_NEW_DXT1_TABLES
2037 const uint32_t low_selector = pSelector->m_lo_selector;
2038 const uint32_t high_selector = pSelector->m_hi_selector;
2039
2040 const color32& base_color = pEndpoints->m_color5;
2041 const uint32_t inten_table = pEndpoints->m_inten5;
2042
2043 if (low_selector == high_selector)
2044 {
2045 uint32_t r, g, b;
2046 decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
2047
2048 uint32_t mask = 0xAA;
2049 uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi;
2050 uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo;
2051
2052 if ((!use_threecolor_blocks) && (min16 == max16))
2053 {
2054 // This is an annoying edge case that impacts BC3.
2055 // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
2056 mask = 0;
2057
2058 // Make l > h
2059 if (min16 > 0)
2060 min16--;
2061 else
2062 {
2063 // l = h = 0
2064 assert(min16 == max16 && max16 == 0);
2065
2066 max16 = 1;
2067 min16 = 0;
2068 mask = 0x55;
2069 }
2070
2071 assert(max16 > min16);
2072 }
2073
2074 if (max16 < min16)
2075 {
2076 std::swap(max16, min16);
2077 mask ^= 0x55;
2078 }
2079
2080 pDst_block->set_low_color(static_cast<uint16_t>(max16));
2081 pDst_block->set_high_color(static_cast<uint16_t>(min16));
2082 pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
2083 pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
2084 pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
2085 pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
2086
2087 return;
2088 }
2089 else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
2090 {
2091 color32 block_colors[4];
2092
2093 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
2094
2095 const uint32_t r0 = block_colors[0].r;
2096 const uint32_t g0 = block_colors[0].g;
2097 const uint32_t b0 = block_colors[0].b;
2098
2099 const uint32_t r1 = block_colors[3].r;
2100 const uint32_t g1 = block_colors[3].g;
2101 const uint32_t b1 = block_colors[3].b;
2102
2103 uint32_t max16 = (g_bc1_match5_equals_0[r0].m_hi << 11) | (g_bc1_match6_equals_0[g0].m_hi << 5) | g_bc1_match5_equals_0[b0].m_hi;
2104 uint32_t min16 = (g_bc1_match5_equals_0[r1].m_hi << 11) | (g_bc1_match6_equals_0[g1].m_hi << 5) | g_bc1_match5_equals_0[b1].m_hi;
2105
2106 uint32_t l = 0, h = 1;
2107
2108 if (min16 == max16)
2109 {
2110 // Make l > h
2111 if (min16 > 0)
2112 {
2113 min16--;
2114
2115 l = 0;
2116 h = 0;
2117 }
2118 else
2119 {
2120 // l = h = 0
2121 assert(min16 == max16 && max16 == 0);
2122
2123 max16 = 1;
2124 min16 = 0;
2125
2126 l = 1;
2127 h = 1;
2128 }
2129
2130 assert(max16 > min16);
2131 }
2132
2133 if (max16 < min16)
2134 {
2135 std::swap(max16, min16);
2136 l = 1;
2137 h = 0;
2138 }
2139
2140 pDst_block->set_low_color((uint16_t)max16);
2141 pDst_block->set_high_color((uint16_t)min16);
2142
2143 for (uint32_t y = 0; y < 4; y++)
2144 {
2145 for (uint32_t x = 0; x < 4; x++)
2146 {
2147 uint32_t s = pSelector->get_selector(x, y);
2148 pDst_block->set_selector(x, y, (s == 3) ? h : l);
2149 }
2150 }
2151
2152 return;
2153 }
2154
2155 const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector];
2156
2157 //[32][8][RANGES][MAPPING]
2158 const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2159 const etc1_to_dxt1_56_solution* pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2160 const etc1_to_dxt1_56_solution* pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
2161
2162 uint32_t best_err = UINT_MAX;
2163 uint32_t best_mapping = 0;
2164
2165 assert(NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS == 10);
2166#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
2167 DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
2168 DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
2169#undef DO_ITER
2170
2171 uint32_t l = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
2172 uint32_t h = dxt1_block::pack_unscaled_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
2173
2174 const uint8_t* pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[best_mapping][0];
2175
2176 if (l < h)
2177 {
2178 std::swap(l, h);
2179 pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0];
2180 }
2181
2182 pDst_block->set_low_color(static_cast<uint16_t>(l));
2183 pDst_block->set_high_color(static_cast<uint16_t>(h));
2184
2185 if (l == h)
2186 {
2187 uint8_t mask = 0;
2188
2189 if (!use_threecolor_blocks)
2190 {
2191 // This is an annoying edge case that impacts BC3.
2192
2193 // Make l > h
2194 if (h > 0)
2195 h--;
2196 else
2197 {
2198 // l = h = 0
2199 assert(l == h && h == 0);
2200
2201 h = 0;
2202 l = 1;
2203 mask = 0x55;
2204 }
2205
2206 assert(l > h);
2207 pDst_block->set_low_color(static_cast<uint16_t>(l));
2208 pDst_block->set_high_color(static_cast<uint16_t>(h));
2209 }
2210
2211 pDst_block->m_selectors[0] = mask;
2212 pDst_block->m_selectors[1] = mask;
2213 pDst_block->m_selectors[2] = mask;
2214 pDst_block->m_selectors[3] = mask;
2215
2216 return;
2217 }
2218
2219 pDst_block->m_selectors[0] = pSelectors_xlat_256[pSelector->m_selectors[0]];
2220 pDst_block->m_selectors[1] = pSelectors_xlat_256[pSelector->m_selectors[1]];
2221 pDst_block->m_selectors[2] = pSelectors_xlat_256[pSelector->m_selectors[2]];
2222 pDst_block->m_selectors[3] = pSelectors_xlat_256[pSelector->m_selectors[3]];
2223#endif
2224 }
2225
2226#if BASISD_ENABLE_DEBUG_FLAGS
2227 static void convert_etc1s_to_dxt1_vis(dxt1_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool use_threecolor_blocks)
2228 {
2229 convert_etc1s_to_dxt1(pDst_block, pEndpoints, pSelector, use_threecolor_blocks);
2230
2231 if (g_debug_flags & cDebugFlagVisBC1Sels)
2232 {
2233 uint32_t l = dxt1_block::pack_unscaled_color(31, 63, 31);
2234 uint32_t h = dxt1_block::pack_unscaled_color(0, 0, 0);
2235 pDst_block->set_low_color(static_cast<uint16_t>(l));
2236 pDst_block->set_high_color(static_cast<uint16_t>(h));
2237 }
2238 else if (g_debug_flags & cDebugFlagVisBC1Endpoints)
2239 {
2240 for (uint32_t y = 0; y < 4; y++)
2241 for (uint32_t x = 0; x < 4; x++)
2242 pDst_block->set_selector(x, y, (y < 2) ? 0 : 1);
2243 }
2244 }
2245#endif
2246#endif
2247
2248#if BASISD_SUPPORT_FXT1
2249 struct fxt1_block
2250 {
2251 union
2252 {
2253 struct
2254 {
2255 uint64_t m_t00 : 2;
2256 uint64_t m_t01 : 2;
2257 uint64_t m_t02 : 2;
2258 uint64_t m_t03 : 2;
2259 uint64_t m_t04 : 2;
2260 uint64_t m_t05 : 2;
2261 uint64_t m_t06 : 2;
2262 uint64_t m_t07 : 2;
2263 uint64_t m_t08 : 2;
2264 uint64_t m_t09 : 2;
2265 uint64_t m_t10 : 2;
2266 uint64_t m_t11 : 2;
2267 uint64_t m_t12 : 2;
2268 uint64_t m_t13 : 2;
2269 uint64_t m_t14 : 2;
2270 uint64_t m_t15 : 2;
2271 uint64_t m_t16 : 2;
2272 uint64_t m_t17 : 2;
2273 uint64_t m_t18 : 2;
2274 uint64_t m_t19 : 2;
2275 uint64_t m_t20 : 2;
2276 uint64_t m_t21 : 2;
2277 uint64_t m_t22 : 2;
2278 uint64_t m_t23 : 2;
2279 uint64_t m_t24 : 2;
2280 uint64_t m_t25 : 2;
2281 uint64_t m_t26 : 2;
2282 uint64_t m_t27 : 2;
2283 uint64_t m_t28 : 2;
2284 uint64_t m_t29 : 2;
2285 uint64_t m_t30 : 2;
2286 uint64_t m_t31 : 2;
2287 } m_lo;
2288 uint64_t m_lo_bits;
2289 uint8_t m_sels[8];
2290 };
2291 union
2292 {
2293 struct
2294 {
2295#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
2296 uint64_t m_b1 : 5;
2297 uint64_t m_g1 : 5;
2298 uint64_t m_r1 : 5;
2299 uint64_t m_b0 : 5;
2300 uint64_t m_g0 : 5;
2301 uint64_t m_r0 : 5;
2302 uint64_t m_b3 : 5;
2303 uint64_t m_g3 : 5;
2304 uint64_t m_r3 : 5;
2305 uint64_t m_b2 : 5;
2306 uint64_t m_g2 : 5;
2307 uint64_t m_r2 : 5;
2308#else
2309 uint64_t m_b0 : 5;
2310 uint64_t m_g0 : 5;
2311 uint64_t m_r0 : 5;
2312 uint64_t m_b1 : 5;
2313 uint64_t m_g1 : 5;
2314 uint64_t m_r1 : 5;
2315 uint64_t m_b2 : 5;
2316 uint64_t m_g2 : 5;
2317 uint64_t m_r2 : 5;
2318 uint64_t m_b3 : 5;
2319 uint64_t m_g3 : 5;
2320 uint64_t m_r3 : 5;
2321#endif
2322 uint64_t m_alpha : 1;
2323 uint64_t m_glsb : 2;
2324 uint64_t m_mode : 1;
2325 } m_hi;
2326 uint64_t m_hi_bits;
2327 };
2328 };
2329
2330 static uint8_t conv_dxt1_to_fxt1_sels(uint32_t sels)
2331 {
2332 static uint8_t s_conv_table[16] = { 0, 3, 1, 2, 12, 15, 13, 14, 4, 7, 5, 6, 8, 11, 9, 10 };
2333 return s_conv_table[sels & 15] | (s_conv_table[sels >> 4] << 4);
2334 }
2335
2336 static void convert_etc1s_to_fxt1(void *pDst, const endpoint *pEndpoints, const selector *pSelectors, uint32_t fxt1_subblock)
2337 {
2338 fxt1_block* pBlock = static_cast<fxt1_block*>(pDst);
2339
2340 // CC_MIXED is basically DXT1 with different encoding tricks.
2341 // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless.
2342 // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.)
2343 dxt1_block blk;
2344 convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false);
2345
2346 const uint32_t l = blk.get_low_color();
2347 const uint32_t h = blk.get_high_color();
2348
2349 color32 color0((l >> 11) & 31, (l >> 5) & 63, l & 31, 255);
2350 color32 color1((h >> 11) & 31, (h >> 5) & 63, h & 31, 255);
2351
2352 uint32_t g0 = color0.g & 1;
2353 uint32_t g1 = color1.g & 1;
2354
2355 color0.g >>= 1;
2356 color1.g >>= 1;
2357
2358 blk.m_selectors[0] = conv_dxt1_to_fxt1_sels(blk.m_selectors[0]);
2359 blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]);
2360 blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]);
2361 blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]);
2362
2363 if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1))
2364 {
2365 std::swap(color0, color1);
2366 std::swap(g0, g1);
2367
2368 blk.m_selectors[0] ^= 0xFF;
2369 blk.m_selectors[1] ^= 0xFF;
2370 blk.m_selectors[2] ^= 0xFF;
2371 blk.m_selectors[3] ^= 0xFF;
2372 }
2373
2374 if (fxt1_subblock == 0)
2375 {
2376 pBlock->m_hi.m_mode = 1;
2377 pBlock->m_hi.m_alpha = 0;
2378 pBlock->m_hi.m_glsb = g1 | (g1 << 1);
2379 pBlock->m_hi.m_r0 = color0.r;
2380 pBlock->m_hi.m_g0 = color0.g;
2381 pBlock->m_hi.m_b0 = color0.b;
2382 pBlock->m_hi.m_r1 = color1.r;
2383 pBlock->m_hi.m_g1 = color1.g;
2384 pBlock->m_hi.m_b1 = color1.b;
2385 pBlock->m_hi.m_r2 = color0.r;
2386 pBlock->m_hi.m_g2 = color0.g;
2387 pBlock->m_hi.m_b2 = color0.b;
2388 pBlock->m_hi.m_r3 = color1.r;
2389 pBlock->m_hi.m_g3 = color1.g;
2390 pBlock->m_hi.m_b3 = color1.b;
2391 pBlock->m_sels[0] = blk.m_selectors[0];
2392 pBlock->m_sels[1] = blk.m_selectors[1];
2393 pBlock->m_sels[2] = blk.m_selectors[2];
2394 pBlock->m_sels[3] = blk.m_selectors[3];
2395
2396 static const uint8_t s_border_dup[4] = { 0, 85, 170, 255 };
2397 pBlock->m_sels[4] = s_border_dup[blk.m_selectors[0] >> 6];
2398 pBlock->m_sels[5] = s_border_dup[blk.m_selectors[1] >> 6];
2399 pBlock->m_sels[6] = s_border_dup[blk.m_selectors[2] >> 6];
2400 pBlock->m_sels[7] = s_border_dup[blk.m_selectors[3] >> 6];
2401 }
2402 else
2403 {
2404 pBlock->m_hi.m_glsb = (pBlock->m_hi.m_glsb & 1) | (g1 << 1);
2405 pBlock->m_hi.m_r2 = color0.r;
2406 pBlock->m_hi.m_g2 = color0.g;
2407 pBlock->m_hi.m_b2 = color0.b;
2408 pBlock->m_hi.m_r3 = color1.r;
2409 pBlock->m_hi.m_g3 = color1.g;
2410 pBlock->m_hi.m_b3 = color1.b;
2411 pBlock->m_sels[4] = blk.m_selectors[0];
2412 pBlock->m_sels[5] = blk.m_selectors[1];
2413 pBlock->m_sels[6] = blk.m_selectors[2];
2414 pBlock->m_sels[7] = blk.m_selectors[3];
2415 }
2416 }
2417#endif // BASISD_SUPPORT_FXT1
2418#if BASISD_SUPPORT_DXT5A
2419 static dxt_selector_range s_dxt5a_selector_ranges[] =
2420 {
2421 { 0, 3 },
2422
2423 { 1, 3 },
2424 { 0, 2 },
2425
2426 { 1, 2 },
2427 };
2428
2429 const uint32_t NUM_DXT5A_SELECTOR_RANGES = sizeof(s_dxt5a_selector_ranges) / sizeof(s_dxt5a_selector_ranges[0]);
2430
2431 struct etc1_g_to_dxt5a_conversion
2432 {
2433 uint8_t m_lo, m_hi;
2434 uint16_t m_trans;
2435 };
2436
2437 static etc1_g_to_dxt5a_conversion g_etc1_g_to_dxt5a[32 * 8][NUM_DXT5A_SELECTOR_RANGES] =
2438 {
2439 { { 8, 0, 393 },{ 8, 0, 392 },{ 2, 0, 9 },{ 2, 0, 8 }, }, { { 6, 16, 710 },{ 16, 6, 328 },{ 0, 10, 96 },{ 10, 6, 8 }, },
2440 { { 28, 5, 1327 },{ 24, 14, 328 },{ 8, 18, 96 },{ 18, 14, 8 }, }, { { 36, 13, 1327 },{ 32, 22, 328 },{ 16, 26, 96 },{ 26, 22, 8 }, },
2441 { { 45, 22, 1327 },{ 41, 31, 328 },{ 25, 35, 96 },{ 35, 31, 8 }, }, { { 53, 30, 1327 },{ 49, 39, 328 },{ 33, 43, 96 },{ 43, 39, 8 }, },
2442 { { 61, 38, 1327 },{ 57, 47, 328 },{ 41, 51, 96 },{ 51, 47, 8 }, }, { { 69, 46, 1327 },{ 65, 55, 328 },{ 49, 59, 96 },{ 59, 55, 8 }, },
2443 { { 78, 55, 1327 },{ 74, 64, 328 },{ 58, 68, 96 },{ 68, 64, 8 }, }, { { 86, 63, 1327 },{ 82, 72, 328 },{ 66, 76, 96 },{ 76, 72, 8 }, },
2444 { { 94, 71, 1327 },{ 90, 80, 328 },{ 74, 84, 96 },{ 84, 80, 8 }, }, { { 102, 79, 1327 },{ 98, 88, 328 },{ 82, 92, 96 },{ 92, 88, 8 }, },
2445 { { 111, 88, 1327 },{ 107, 97, 328 },{ 91, 101, 96 },{ 101, 97, 8 }, }, { { 119, 96, 1327 },{ 115, 105, 328 },{ 99, 109, 96 },{ 109, 105, 8 }, },
2446 { { 127, 104, 1327 },{ 123, 113, 328 },{ 107, 117, 96 },{ 117, 113, 8 }, }, { { 135, 112, 1327 },{ 131, 121, 328 },{ 115, 125, 96 },{ 125, 121, 8 }, },
2447 { { 144, 121, 1327 },{ 140, 130, 328 },{ 124, 134, 96 },{ 134, 130, 8 }, }, { { 152, 129, 1327 },{ 148, 138, 328 },{ 132, 142, 96 },{ 142, 138, 8 }, },
2448 { { 160, 137, 1327 },{ 156, 146, 328 },{ 140, 150, 96 },{ 150, 146, 8 }, }, { { 168, 145, 1327 },{ 164, 154, 328 },{ 148, 158, 96 },{ 158, 154, 8 }, },
2449 { { 177, 154, 1327 },{ 173, 163, 328 },{ 157, 167, 96 },{ 167, 163, 8 }, }, { { 185, 162, 1327 },{ 181, 171, 328 },{ 165, 175, 96 },{ 175, 171, 8 }, },
2450 { { 193, 170, 1327 },{ 189, 179, 328 },{ 173, 183, 96 },{ 183, 179, 8 }, }, { { 201, 178, 1327 },{ 197, 187, 328 },{ 181, 191, 96 },{ 191, 187, 8 }, },
2451 { { 210, 187, 1327 },{ 206, 196, 328 },{ 190, 200, 96 },{ 200, 196, 8 }, }, { { 218, 195, 1327 },{ 214, 204, 328 },{ 198, 208, 96 },{ 208, 204, 8 }, },
2452 { { 226, 203, 1327 },{ 222, 212, 328 },{ 206, 216, 96 },{ 216, 212, 8 }, }, { { 234, 211, 1327 },{ 230, 220, 328 },{ 214, 224, 96 },{ 224, 220, 8 }, },
2453 { { 243, 220, 1327 },{ 239, 229, 328 },{ 223, 233, 96 },{ 233, 229, 8 }, }, { { 251, 228, 1327 },{ 247, 237, 328 },{ 231, 241, 96 },{ 241, 237, 8 }, },
2454 { { 239, 249, 3680 },{ 245, 249, 3648 },{ 239, 249, 96 },{ 249, 245, 8 }, }, { { 247, 253, 4040 },{ 255, 253, 8 },{ 247, 253, 456 },{ 255, 253, 8 }, },
2455 { { 5, 17, 566 },{ 5, 17, 560 },{ 5, 0, 9 },{ 5, 0, 8 }, }, { { 25, 0, 313 },{ 25, 3, 328 },{ 13, 0, 49 },{ 13, 3, 8 }, },
2456 { { 39, 0, 1329 },{ 33, 11, 328 },{ 11, 21, 70 },{ 21, 11, 8 }, }, { { 47, 7, 1329 },{ 41, 19, 328 },{ 29, 7, 33 },{ 29, 19, 8 }, },
2457 { { 50, 11, 239 },{ 50, 28, 328 },{ 38, 16, 33 },{ 38, 28, 8 }, }, { { 92, 13, 2423 },{ 58, 36, 328 },{ 46, 24, 33 },{ 46, 36, 8 }, },
2458 { { 100, 21, 2423 },{ 66, 44, 328 },{ 54, 32, 33 },{ 54, 44, 8 }, }, { { 86, 7, 1253 },{ 74, 52, 328 },{ 62, 40, 33 },{ 62, 52, 8 }, },
2459 { { 95, 16, 1253 },{ 83, 61, 328 },{ 71, 49, 33 },{ 71, 61, 8 }, }, { { 103, 24, 1253 },{ 91, 69, 328 },{ 79, 57, 33 },{ 79, 69, 8 }, },
2460 { { 111, 32, 1253 },{ 99, 77, 328 },{ 87, 65, 33 },{ 87, 77, 8 }, }, { { 119, 40, 1253 },{ 107, 85, 328 },{ 95, 73, 33 },{ 95, 85, 8 }, },
2461 { { 128, 49, 1253 },{ 116, 94, 328 },{ 104, 82, 33 },{ 104, 94, 8 }, }, { { 136, 57, 1253 },{ 124, 102, 328 },{ 112, 90, 33 },{ 112, 102, 8 }, },
2462 { { 144, 65, 1253 },{ 132, 110, 328 },{ 120, 98, 33 },{ 120, 110, 8 }, }, { { 152, 73, 1253 },{ 140, 118, 328 },{ 128, 106, 33 },{ 128, 118, 8 }, },
2463 { { 161, 82, 1253 },{ 149, 127, 328 },{ 137, 115, 33 },{ 137, 127, 8 }, }, { { 169, 90, 1253 },{ 157, 135, 328 },{ 145, 123, 33 },{ 145, 135, 8 }, },
2464 { { 177, 98, 1253 },{ 165, 143, 328 },{ 153, 131, 33 },{ 153, 143, 8 }, }, { { 185, 106, 1253 },{ 173, 151, 328 },{ 161, 139, 33 },{ 161, 151, 8 }, },
2465 { { 194, 115, 1253 },{ 182, 160, 328 },{ 170, 148, 33 },{ 170, 160, 8 }, }, { { 202, 123, 1253 },{ 190, 168, 328 },{ 178, 156, 33 },{ 178, 168, 8 }, },
2466 { { 210, 131, 1253 },{ 198, 176, 328 },{ 186, 164, 33 },{ 186, 176, 8 }, }, { { 218, 139, 1253 },{ 206, 184, 328 },{ 194, 172, 33 },{ 194, 184, 8 }, },
2467 { { 227, 148, 1253 },{ 215, 193, 328 },{ 203, 181, 33 },{ 203, 193, 8 }, }, { { 235, 156, 1253 },{ 223, 201, 328 },{ 211, 189, 33 },{ 211, 201, 8 }, },
2468 { { 243, 164, 1253 },{ 231, 209, 328 },{ 219, 197, 33 },{ 219, 209, 8 }, }, { { 183, 239, 867 },{ 239, 217, 328 },{ 227, 205, 33 },{ 227, 217, 8 }, },
2469 { { 254, 214, 1329 },{ 248, 226, 328 },{ 236, 214, 33 },{ 236, 226, 8 }, }, { { 222, 244, 3680 },{ 234, 244, 3648 },{ 244, 222, 33 },{ 244, 234, 8 }, },
2470 { { 230, 252, 3680 },{ 242, 252, 3648 },{ 252, 230, 33 },{ 252, 242, 8 }, }, { { 238, 250, 4040 },{ 255, 250, 8 },{ 238, 250, 456 },{ 255, 250, 8 }, },
2471 { { 9, 29, 566 },{ 9, 29, 560 },{ 9, 0, 9 },{ 9, 0, 8 }, }, { { 17, 37, 566 },{ 17, 37, 560 },{ 17, 0, 9 },{ 17, 0, 8 }, },
2472 { { 45, 0, 313 },{ 45, 0, 312 },{ 25, 0, 49 },{ 25, 7, 8 }, }, { { 14, 63, 2758 },{ 5, 53, 784 },{ 15, 33, 70 },{ 33, 15, 8 }, },
2473 { { 71, 6, 1329 },{ 72, 4, 1328 },{ 42, 4, 33 },{ 42, 24, 8 }, }, { { 70, 3, 239 },{ 70, 2, 232 },{ 50, 12, 33 },{ 50, 32, 8 }, },
2474 { { 0, 98, 2842 },{ 78, 10, 232 },{ 58, 20, 33 },{ 58, 40, 8 }, }, { { 97, 27, 1329 },{ 86, 18, 232 },{ 66, 28, 33 },{ 66, 48, 8 }, },
2475 { { 0, 94, 867 },{ 95, 27, 232 },{ 75, 37, 33 },{ 75, 57, 8 }, }, { { 8, 102, 867 },{ 103, 35, 232 },{ 83, 45, 33 },{ 83, 65, 8 }, },
2476 { { 12, 112, 867 },{ 111, 43, 232 },{ 91, 53, 33 },{ 91, 73, 8 }, }, { { 139, 2, 1253 },{ 119, 51, 232 },{ 99, 61, 33 },{ 99, 81, 8 }, },
2477 { { 148, 13, 1253 },{ 128, 60, 232 },{ 108, 70, 33 },{ 108, 90, 8 }, }, { { 156, 21, 1253 },{ 136, 68, 232 },{ 116, 78, 33 },{ 116, 98, 8 }, },
2478 { { 164, 29, 1253 },{ 144, 76, 232 },{ 124, 86, 33 },{ 124, 106, 8 }, }, { { 172, 37, 1253 },{ 152, 84, 232 },{ 132, 94, 33 },{ 132, 114, 8 }, },
2479 { { 181, 46, 1253 },{ 161, 93, 232 },{ 141, 103, 33 },{ 141, 123, 8 }, }, { { 189, 54, 1253 },{ 169, 101, 232 },{ 149, 111, 33 },{ 149, 131, 8 }, },
2480 { { 197, 62, 1253 },{ 177, 109, 232 },{ 157, 119, 33 },{ 157, 139, 8 }, }, { { 205, 70, 1253 },{ 185, 117, 232 },{ 165, 127, 33 },{ 165, 147, 8 }, },
2481 { { 214, 79, 1253 },{ 194, 126, 232 },{ 174, 136, 33 },{ 174, 156, 8 }, }, { { 222, 87, 1253 },{ 202, 134, 232 },{ 182, 144, 33 },{ 182, 164, 8 }, },
2482 { { 230, 95, 1253 },{ 210, 142, 232 },{ 190, 152, 33 },{ 190, 172, 8 }, }, { { 238, 103, 1253 },{ 218, 150, 232 },{ 198, 160, 33 },{ 198, 180, 8 }, },
2483 { { 247, 112, 1253 },{ 227, 159, 232 },{ 207, 169, 33 },{ 207, 189, 8 }, }, { { 255, 120, 1253 },{ 235, 167, 232 },{ 215, 177, 33 },{ 215, 197, 8 }, },
2484 { { 146, 243, 867 },{ 243, 175, 232 },{ 223, 185, 33 },{ 223, 205, 8 }, }, { { 184, 231, 3682 },{ 203, 251, 784 },{ 231, 193, 33 },{ 231, 213, 8 }, },
2485 { { 193, 240, 3682 },{ 222, 240, 3648 },{ 240, 202, 33 },{ 240, 222, 8 }, }, { { 255, 210, 169 },{ 230, 248, 3648 },{ 248, 210, 33 },{ 248, 230, 8 }, },
2486 { { 218, 238, 4040 },{ 255, 238, 8 },{ 218, 238, 456 },{ 255, 238, 8 }, }, { { 226, 246, 4040 },{ 255, 246, 8 },{ 226, 246, 456 },{ 255, 246, 8 }, },
2487 { { 13, 42, 566 },{ 13, 42, 560 },{ 13, 0, 9 },{ 13, 0, 8 }, }, { { 50, 0, 329 },{ 50, 0, 328 },{ 21, 0, 9 },{ 21, 0, 8 }, },
2488 { { 29, 58, 566 },{ 67, 2, 1352 },{ 3, 29, 70 },{ 29, 3, 8 }, }, { { 10, 79, 2758 },{ 76, 11, 1352 },{ 11, 37, 70 },{ 37, 11, 8 }, },
2489 { { 7, 75, 790 },{ 7, 75, 784 },{ 20, 46, 70 },{ 46, 20, 8 }, }, { { 15, 83, 790 },{ 97, 1, 1328 },{ 28, 54, 70 },{ 54, 28, 8 }, },
2490 { { 101, 7, 1329 },{ 105, 9, 1328 },{ 62, 0, 39 },{ 62, 36, 8 }, }, { { 99, 1, 239 },{ 99, 3, 232 },{ 1, 71, 98 },{ 70, 44, 8 }, },
2491 { { 107, 11, 239 },{ 108, 12, 232 },{ 10, 80, 98 },{ 79, 53, 8 }, }, { { 115, 19, 239 },{ 116, 20, 232 },{ 18, 88, 98 },{ 87, 61, 8 }, },
2492 { { 123, 27, 239 },{ 124, 28, 232 },{ 26, 96, 98 },{ 95, 69, 8 }, }, { { 131, 35, 239 },{ 132, 36, 232 },{ 34, 104, 98 },{ 103, 77, 8 }, },
2493 { { 140, 44, 239 },{ 141, 45, 232 },{ 43, 113, 98 },{ 112, 86, 8 }, }, { { 148, 52, 239 },{ 149, 53, 232 },{ 51, 121, 98 },{ 120, 94, 8 }, },
2494 { { 156, 60, 239 },{ 157, 61, 232 },{ 59, 129, 98 },{ 128, 102, 8 }, }, { { 164, 68, 239 },{ 165, 69, 232 },{ 67, 137, 98 },{ 136, 110, 8 }, },
2495 { { 173, 77, 239 },{ 174, 78, 232 },{ 76, 146, 98 },{ 145, 119, 8 }, }, { { 181, 85, 239 },{ 182, 86, 232 },{ 84, 154, 98 },{ 153, 127, 8 }, },
2496 { { 189, 93, 239 },{ 190, 94, 232 },{ 92, 162, 98 },{ 161, 135, 8 }, }, { { 197, 101, 239 },{ 198, 102, 232 },{ 100, 170, 98 },{ 169, 143, 8 }, },
2497 { { 206, 110, 239 },{ 207, 111, 232 },{ 109, 179, 98 },{ 178, 152, 8 }, }, { { 214, 118, 239 },{ 215, 119, 232 },{ 117, 187, 98 },{ 186, 160, 8 }, },
2498 { { 222, 126, 239 },{ 223, 127, 232 },{ 125, 195, 98 },{ 194, 168, 8 }, }, { { 230, 134, 239 },{ 231, 135, 232 },{ 133, 203, 98 },{ 202, 176, 8 }, },
2499 { { 239, 143, 239 },{ 240, 144, 232 },{ 142, 212, 98 },{ 211, 185, 8 }, }, { { 247, 151, 239 },{ 180, 248, 784 },{ 150, 220, 98 },{ 219, 193, 8 }, },
2500 { { 159, 228, 3682 },{ 201, 227, 3648 },{ 158, 228, 98 },{ 227, 201, 8 }, }, { { 181, 249, 3928 },{ 209, 235, 3648 },{ 166, 236, 98 },{ 235, 209, 8 }, },
2501 { { 255, 189, 169 },{ 218, 244, 3648 },{ 175, 245, 98 },{ 244, 218, 8 }, }, { { 197, 226, 4040 },{ 226, 252, 3648 },{ 183, 253, 98 },{ 252, 226, 8 }, },
2502 { { 205, 234, 4040 },{ 255, 234, 8 },{ 205, 234, 456 },{ 255, 234, 8 }, }, { { 213, 242, 4040 },{ 255, 242, 8 },{ 213, 242, 456 },{ 255, 242, 8 }, },
2503 { { 18, 60, 566 },{ 18, 60, 560 },{ 18, 0, 9 },{ 18, 0, 8 }, }, { { 26, 68, 566 },{ 26, 68, 560 },{ 26, 0, 9 },{ 26, 0, 8 }, },
2504 { { 34, 76, 566 },{ 34, 76, 560 },{ 34, 0, 9 },{ 34, 0, 8 }, }, { { 5, 104, 2758 },{ 98, 5, 1352 },{ 42, 0, 57 },{ 42, 6, 8 }, },
2505 { { 92, 0, 313 },{ 93, 1, 312 },{ 15, 51, 70 },{ 51, 15, 8 }, }, { { 3, 101, 790 },{ 3, 101, 784 },{ 0, 59, 88 },{ 59, 23, 8 }, },
2506 { { 14, 107, 790 },{ 11, 109, 784 },{ 31, 67, 70 },{ 67, 31, 8 }, }, { { 19, 117, 790 },{ 19, 117, 784 },{ 39, 75, 70 },{ 75, 39, 8 }, },
2507 { { 28, 126, 790 },{ 28, 126, 784 },{ 83, 5, 33 },{ 84, 48, 8 }, }, { { 132, 0, 239 },{ 36, 134, 784 },{ 91, 13, 33 },{ 92, 56, 8 }, },
2508 { { 142, 4, 239 },{ 44, 142, 784 },{ 99, 21, 33 },{ 100, 64, 8 }, }, { { 150, 12, 239 },{ 52, 150, 784 },{ 107, 29, 33 },{ 108, 72, 8 }, },
2509 { { 159, 21, 239 },{ 61, 159, 784 },{ 116, 38, 33 },{ 117, 81, 8 }, }, { { 167, 29, 239 },{ 69, 167, 784 },{ 124, 46, 33 },{ 125, 89, 8 }, },
2510 { { 175, 37, 239 },{ 77, 175, 784 },{ 132, 54, 33 },{ 133, 97, 8 }, }, { { 183, 45, 239 },{ 85, 183, 784 },{ 140, 62, 33 },{ 141, 105, 8 }, },
2511 { { 192, 54, 239 },{ 94, 192, 784 },{ 149, 71, 33 },{ 150, 114, 8 }, }, { { 200, 62, 239 },{ 102, 200, 784 },{ 157, 79, 33 },{ 158, 122, 8 }, },
2512 { { 208, 70, 239 },{ 110, 208, 784 },{ 165, 87, 33 },{ 166, 130, 8 }, }, { { 216, 78, 239 },{ 118, 216, 784 },{ 173, 95, 33 },{ 174, 138, 8 }, },
2513 { { 225, 87, 239 },{ 127, 225, 784 },{ 182, 104, 33 },{ 183, 147, 8 }, }, { { 233, 95, 239 },{ 135, 233, 784 },{ 190, 112, 33 },{ 191, 155, 8 }, },
2514 { { 241, 103, 239 },{ 143, 241, 784 },{ 198, 120, 33 },{ 199, 163, 8 }, }, { { 111, 208, 3682 },{ 151, 249, 784 },{ 206, 128, 33 },{ 207, 171, 8 }, },
2515 { { 120, 217, 3682 },{ 180, 216, 3648 },{ 215, 137, 33 },{ 216, 180, 8 }, }, { { 128, 225, 3682 },{ 188, 224, 3648 },{ 223, 145, 33 },{ 224, 188, 8 }, },
2516 { { 155, 253, 3928 },{ 196, 232, 3648 },{ 231, 153, 33 },{ 232, 196, 8 }, }, { { 144, 241, 3682 },{ 204, 240, 3648 },{ 239, 161, 33 },{ 240, 204, 8 }, },
2517 { { 153, 250, 3682 },{ 213, 249, 3648 },{ 248, 170, 33 },{ 249, 213, 8 }, }, { { 179, 221, 4040 },{ 255, 221, 8 },{ 179, 221, 456 },{ 255, 221, 8 }, },
2518 { { 187, 229, 4040 },{ 255, 229, 8 },{ 187, 229, 456 },{ 255, 229, 8 }, }, { { 195, 237, 4040 },{ 255, 237, 8 },{ 195, 237, 456 },{ 255, 237, 8 }, },
2519 { { 24, 80, 566 },{ 24, 80, 560 },{ 24, 0, 9 },{ 24, 0, 8 }, }, { { 32, 88, 566 },{ 32, 88, 560 },{ 32, 0, 9 },{ 32, 0, 8 }, },
2520 { { 40, 96, 566 },{ 40, 96, 560 },{ 40, 0, 9 },{ 40, 0, 8 }, }, { { 48, 104, 566 },{ 48, 104, 560 },{ 48, 0, 9 },{ 48, 0, 8 }, },
2521 { { 9, 138, 2758 },{ 130, 7, 1352 },{ 9, 57, 70 },{ 57, 9, 8 }, }, { { 119, 0, 313 },{ 120, 0, 312 },{ 17, 65, 70 },{ 65, 17, 8 }, },
2522 { { 0, 128, 784 },{ 128, 6, 312 },{ 25, 73, 70 },{ 73, 25, 8 }, }, { { 6, 137, 790 },{ 5, 136, 784 },{ 33, 81, 70 },{ 81, 33, 8 }, },
2523 { { 42, 171, 2758 },{ 14, 145, 784 },{ 42, 90, 70 },{ 90, 42, 8 }, }, { { 50, 179, 2758 },{ 22, 153, 784 },{ 50, 98, 70 },{ 98, 50, 8 }, },
2524 { { 58, 187, 2758 },{ 30, 161, 784 },{ 58, 106, 70 },{ 106, 58, 8 }, }, { { 191, 18, 1329 },{ 38, 169, 784 },{ 112, 9, 33 },{ 114, 66, 8 }, },
2525 { { 176, 0, 239 },{ 47, 178, 784 },{ 121, 18, 33 },{ 123, 75, 8 }, }, { { 187, 1, 239 },{ 55, 186, 784 },{ 129, 26, 33 },{ 131, 83, 8 }, },
2526 { { 195, 10, 239 },{ 63, 194, 784 },{ 137, 34, 33 },{ 139, 91, 8 }, }, { { 203, 18, 239 },{ 71, 202, 784 },{ 145, 42, 33 },{ 147, 99, 8 }, },
2527 { { 212, 27, 239 },{ 80, 211, 784 },{ 154, 51, 33 },{ 156, 108, 8 }, }, { { 220, 35, 239 },{ 88, 219, 784 },{ 162, 59, 33 },{ 164, 116, 8 }, },
2528 { { 228, 43, 239 },{ 96, 227, 784 },{ 170, 67, 33 },{ 172, 124, 8 }, }, { { 236, 51, 239 },{ 104, 235, 784 },{ 178, 75, 33 },{ 180, 132, 8 }, },
2529 { { 245, 60, 239 },{ 113, 244, 784 },{ 187, 84, 33 },{ 189, 141, 8 }, }, { { 91, 194, 3680 },{ 149, 197, 3648 },{ 195, 92, 33 },{ 197, 149, 8 }, },
2530 { { 99, 202, 3680 },{ 157, 205, 3648 },{ 203, 100, 33 },{ 205, 157, 8 }, }, { { 107, 210, 3680 },{ 165, 213, 3648 },{ 211, 108, 33 },{ 213, 165, 8 }, },
2531 { { 119, 249, 3928 },{ 174, 222, 3648 },{ 220, 117, 33 },{ 222, 174, 8 }, }, { { 127, 255, 856 },{ 182, 230, 3648 },{ 228, 125, 33 },{ 230, 182, 8 }, },
2532 { { 255, 135, 169 },{ 190, 238, 3648 },{ 236, 133, 33 },{ 238, 190, 8 }, }, { { 140, 243, 3680 },{ 198, 246, 3648 },{ 244, 141, 33 },{ 246, 198, 8 }, },
2533 { { 151, 207, 4040 },{ 255, 207, 8 },{ 151, 207, 456 },{ 255, 207, 8 }, }, { { 159, 215, 4040 },{ 255, 215, 8 },{ 159, 215, 456 },{ 255, 215, 8 }, },
2534 { { 167, 223, 4040 },{ 255, 223, 8 },{ 167, 223, 456 },{ 255, 223, 8 }, }, { { 175, 231, 4040 },{ 255, 231, 8 },{ 175, 231, 456 },{ 255, 231, 8 }, },
2535 { { 33, 106, 566 },{ 33, 106, 560 },{ 33, 0, 9 },{ 33, 0, 8 }, }, { { 41, 114, 566 },{ 41, 114, 560 },{ 41, 0, 9 },{ 41, 0, 8 }, },
2536 { { 49, 122, 566 },{ 49, 122, 560 },{ 49, 0, 9 },{ 49, 0, 8 }, }, { { 57, 130, 566 },{ 57, 130, 560 },{ 57, 0, 9 },{ 57, 0, 8 }, },
2537 { { 66, 139, 566 },{ 66, 139, 560 },{ 66, 0, 9 },{ 66, 0, 8 }, }, { { 74, 147, 566 },{ 170, 7, 1352 },{ 8, 74, 70 },{ 74, 8, 8 }, },
2538 { { 152, 0, 313 },{ 178, 15, 1352 },{ 0, 82, 80 },{ 82, 16, 8 }, }, { { 162, 0, 313 },{ 186, 23, 1352 },{ 24, 90, 70 },{ 90, 24, 8 }, },
2539 { { 0, 171, 784 },{ 195, 32, 1352 },{ 33, 99, 70 },{ 99, 33, 8 }, }, { { 6, 179, 790 },{ 203, 40, 1352 },{ 41, 107, 70 },{ 107, 41, 8 }, },
2540 { { 15, 187, 790 },{ 211, 48, 1352 },{ 115, 0, 41 },{ 115, 49, 8 }, }, { { 61, 199, 710 },{ 219, 56, 1352 },{ 57, 123, 70 },{ 123, 57, 8 }, },
2541 { { 70, 208, 710 },{ 228, 65, 1352 },{ 66, 132, 70 },{ 132, 66, 8 }, }, { { 78, 216, 710 },{ 236, 73, 1352 },{ 74, 140, 70 },{ 140, 74, 8 }, },
2542 { { 86, 224, 710 },{ 244, 81, 1352 },{ 145, 7, 33 },{ 148, 82, 8 }, }, { { 222, 8, 233 },{ 252, 89, 1352 },{ 153, 15, 33 },{ 156, 90, 8 }, },
2543 { { 235, 0, 239 },{ 241, 101, 328 },{ 166, 6, 39 },{ 165, 99, 8 }, }, { { 32, 170, 3680 },{ 249, 109, 328 },{ 0, 175, 98 },{ 173, 107, 8 }, },
2544 { { 40, 178, 3680 },{ 115, 181, 3648 },{ 8, 183, 98 },{ 181, 115, 8 }, }, { { 48, 186, 3680 },{ 123, 189, 3648 },{ 16, 191, 98 },{ 189, 123, 8 }, },
2545 { { 57, 195, 3680 },{ 132, 198, 3648 },{ 25, 200, 98 },{ 198, 132, 8 }, }, { { 67, 243, 3928 },{ 140, 206, 3648 },{ 33, 208, 98 },{ 206, 140, 8 }, },
2546 { { 76, 251, 3928 },{ 148, 214, 3648 },{ 41, 216, 98 },{ 214, 148, 8 }, }, { { 86, 255, 856 },{ 156, 222, 3648 },{ 49, 224, 98 },{ 222, 156, 8 }, },
2547 { { 255, 93, 169 },{ 165, 231, 3648 },{ 58, 233, 98 },{ 231, 165, 8 }, }, { { 98, 236, 3680 },{ 173, 239, 3648 },{ 66, 241, 98 },{ 239, 173, 8 }, },
2548 { { 108, 181, 4040 },{ 181, 247, 3648 },{ 74, 249, 98 },{ 247, 181, 8 }, }, { { 116, 189, 4040 },{ 255, 189, 8 },{ 116, 189, 456 },{ 255, 189, 8 }, },
2549 { { 125, 198, 4040 },{ 255, 198, 8 },{ 125, 198, 456 },{ 255, 198, 8 }, }, { { 133, 206, 4040 },{ 255, 206, 8 },{ 133, 206, 456 },{ 255, 206, 8 }, },
2550 { { 141, 214, 4040 },{ 255, 214, 8 },{ 141, 214, 456 },{ 255, 214, 8 }, }, { { 149, 222, 4040 },{ 255, 222, 8 },{ 149, 222, 456 },{ 255, 222, 8 }, },
2551 { { 47, 183, 566 },{ 47, 183, 560 },{ 47, 0, 9 },{ 47, 0, 8 }, }, { { 55, 191, 566 },{ 55, 191, 560 },{ 55, 0, 9 },{ 55, 0, 8 }, },
2552 { { 63, 199, 566 },{ 63, 199, 560 },{ 63, 0, 9 },{ 63, 0, 8 }, }, { { 71, 207, 566 },{ 71, 207, 560 },{ 71, 0, 9 },{ 71, 0, 8 }, },
2553 { { 80, 216, 566 },{ 80, 216, 560 },{ 80, 0, 9 },{ 80, 0, 8 }, }, { { 88, 224, 566 },{ 88, 224, 560 },{ 88, 0, 9 },{ 88, 0, 8 }, },
2554 { { 3, 233, 710 },{ 3, 233, 704 },{ 2, 96, 70 },{ 96, 2, 8 }, }, { { 11, 241, 710 },{ 11, 241, 704 },{ 10, 104, 70 },{ 104, 10, 8 }, },
2555 { { 20, 250, 710 },{ 20, 250, 704 },{ 19, 113, 70 },{ 113, 19, 8 }, }, { { 27, 121, 3654 },{ 27, 121, 3648 },{ 27, 121, 70 },{ 121, 27, 8 }, },
2556 { { 35, 129, 3654 },{ 35, 129, 3648 },{ 35, 129, 70 },{ 129, 35, 8 }, }, { { 43, 137, 3654 },{ 43, 137, 3648 },{ 43, 137, 70 },{ 137, 43, 8 }, },
2557 { { 52, 146, 3654 },{ 52, 146, 3648 },{ 52, 146, 70 },{ 146, 52, 8 }, }, { { 60, 154, 3654 },{ 60, 154, 3648 },{ 60, 154, 70 },{ 154, 60, 8 }, },
2558 { { 68, 162, 3654 },{ 68, 162, 3648 },{ 68, 162, 70 },{ 162, 68, 8 }, }, { { 76, 170, 3654 },{ 76, 170, 3648 },{ 76, 170, 70 },{ 170, 76, 8 }, },
2559 { { 85, 179, 3654 },{ 85, 179, 3648 },{ 85, 179, 70 },{ 179, 85, 8 }, }, { { 93, 187, 3654 },{ 93, 187, 3648 },{ 93, 187, 70 },{ 187, 93, 8 }, },
2560 { { 101, 195, 3654 },{ 101, 195, 3648 },{ 101, 195, 70 },{ 195, 101, 8 }, }, { { 109, 203, 3654 },{ 109, 203, 3648 },{ 109, 203, 70 },{ 203, 109, 8 }, },
2561 { { 118, 212, 3654 },{ 118, 212, 3648 },{ 118, 212, 70 },{ 212, 118, 8 }, }, { { 126, 220, 3654 },{ 126, 220, 3648 },{ 126, 220, 70 },{ 220, 126, 8 }, },
2562 { { 134, 228, 3654 },{ 134, 228, 3648 },{ 134, 228, 70 },{ 228, 134, 8 }, }, { { 5, 236, 3680 },{ 142, 236, 3648 },{ 5, 236, 96 },{ 236, 142, 8 }, },
2563 { { 14, 245, 3680 },{ 151, 245, 3648 },{ 14, 245, 96 },{ 245, 151, 8 }, }, { { 23, 159, 4040 },{ 159, 253, 3648 },{ 23, 159, 456 },{ 253, 159, 8 }, },
2564 { { 31, 167, 4040 },{ 255, 167, 8 },{ 31, 167, 456 },{ 255, 167, 8 }, }, { { 39, 175, 4040 },{ 255, 175, 8 },{ 39, 175, 456 },{ 255, 175, 8 }, },
2565 { { 48, 184, 4040 },{ 255, 184, 8 },{ 48, 184, 456 },{ 255, 184, 8 }, }, { { 56, 192, 4040 },{ 255, 192, 8 },{ 56, 192, 456 },{ 255, 192, 8 }, },
2566 { { 64, 200, 4040 },{ 255, 200, 8 },{ 64, 200, 456 },{ 255, 200, 8 }, },{ { 72, 208, 4040 },{ 255, 208, 8 },{ 72, 208, 456 },{ 255, 208, 8 }, },
2567
2568 };
2569
2570 struct dxt5a_block
2571 {
2572 uint8_t m_endpoints[2];
2573
2574 enum { cTotalSelectorBytes = 6 };
2575 uint8_t m_selectors[cTotalSelectorBytes];
2576
2577 inline void clear()
2578 {
2579 basisu::clear_obj(*this);
2580 }
2581
2582 inline uint32_t get_low_alpha() const
2583 {
2584 return m_endpoints[0];
2585 }
2586
2587 inline uint32_t get_high_alpha() const
2588 {
2589 return m_endpoints[1];
2590 }
2591
2592 inline void set_low_alpha(uint32_t i)
2593 {
2594 assert(i <= UINT8_MAX);
2595 m_endpoints[0] = static_cast<uint8_t>(i);
2596 }
2597
2598 inline void set_high_alpha(uint32_t i)
2599 {
2600 assert(i <= UINT8_MAX);
2601 m_endpoints[1] = static_cast<uint8_t>(i);
2602 }
2603
2604 inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
2605
2606 uint32_t get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); }
2607 uint32_t get_selectors_as_word(uint32_t index) { assert(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); }
2608
2609 inline uint32_t get_selector(uint32_t x, uint32_t y) const
2610 {
2611 assert((x < 4U) && (y < 4U));
2612
2613 uint32_t selector_index = (y * 4) + x;
2614 uint32_t bit_index = selector_index * cDXT5SelectorBits;
2615
2616 uint32_t byte_index = bit_index >> 3;
2617 uint32_t bit_ofs = bit_index & 7;
2618
2619 uint32_t v = m_selectors[byte_index];
2620 if (byte_index < (cTotalSelectorBytes - 1))
2621 v |= (m_selectors[byte_index + 1] << 8);
2622
2623 return (v >> bit_ofs) & 7;
2624 }
2625
2626 inline void set_selector(uint32_t x, uint32_t y, uint32_t val)
2627 {
2628 assert((x < 4U) && (y < 4U) && (val < 8U));
2629
2630 uint32_t selector_index = (y * 4) + x;
2631 uint32_t bit_index = selector_index * cDXT5SelectorBits;
2632
2633 uint32_t byte_index = bit_index >> 3;
2634 uint32_t bit_ofs = bit_index & 7;
2635
2636 uint32_t v = m_selectors[byte_index];
2637 if (byte_index < (cTotalSelectorBytes - 1))
2638 v |= (m_selectors[byte_index + 1] << 8);
2639
2640 v &= (~(7 << bit_ofs));
2641 v |= (val << bit_ofs);
2642
2643 m_selectors[byte_index] = static_cast<uint8_t>(v);
2644 if (byte_index < (cTotalSelectorBytes - 1))
2645 m_selectors[byte_index + 1] = static_cast<uint8_t>(v >> 8);
2646 }
2647
2648 enum { cMaxSelectorValues = 8 };
2649
2650 static uint32_t get_block_values6(color32* pDst, uint32_t l, uint32_t h)
2651 {
2652 pDst[0].a = static_cast<uint8_t>(l);
2653 pDst[1].a = static_cast<uint8_t>(h);
2654 pDst[2].a = static_cast<uint8_t>((l * 4 + h) / 5);
2655 pDst[3].a = static_cast<uint8_t>((l * 3 + h * 2) / 5);
2656 pDst[4].a = static_cast<uint8_t>((l * 2 + h * 3) / 5);
2657 pDst[5].a = static_cast<uint8_t>((l + h * 4) / 5);
2658 pDst[6].a = 0;
2659 pDst[7].a = 255;
2660 return 6;
2661 }
2662
2663 static uint32_t get_block_values8(color32* pDst, uint32_t l, uint32_t h)
2664 {
2665 pDst[0].a = static_cast<uint8_t>(l);
2666 pDst[1].a = static_cast<uint8_t>(h);
2667 pDst[2].a = static_cast<uint8_t>((l * 6 + h) / 7);
2668 pDst[3].a = static_cast<uint8_t>((l * 5 + h * 2) / 7);
2669 pDst[4].a = static_cast<uint8_t>((l * 4 + h * 3) / 7);
2670 pDst[5].a = static_cast<uint8_t>((l * 3 + h * 4) / 7);
2671 pDst[6].a = static_cast<uint8_t>((l * 2 + h * 5) / 7);
2672 pDst[7].a = static_cast<uint8_t>((l + h * 6) / 7);
2673 return 8;
2674 }
2675
2676 static uint32_t get_block_values(color32* pDst, uint32_t l, uint32_t h)
2677 {
2678 if (l > h)
2679 return get_block_values8(pDst, l, h);
2680 else
2681 return get_block_values6(pDst, l, h);
2682 }
2683 };
2684
2685 static void convert_etc1s_to_dxt5a(dxt5a_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
2686 {
2687 const uint32_t low_selector = pSelector->m_lo_selector;
2688 const uint32_t high_selector = pSelector->m_hi_selector;
2689
2690 const color32& base_color = pEndpoints->m_color5;
2691 const uint32_t inten_table = pEndpoints->m_inten5;
2692
2693 if (low_selector == high_selector)
2694 {
2695 uint32_t r;
2696 decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
2697
2698 pDst_block->set_low_alpha(r);
2699 pDst_block->set_high_alpha(r);
2700 pDst_block->m_selectors[0] = 0;
2701 pDst_block->m_selectors[1] = 0;
2702 pDst_block->m_selectors[2] = 0;
2703 pDst_block->m_selectors[3] = 0;
2704 pDst_block->m_selectors[4] = 0;
2705 pDst_block->m_selectors[5] = 0;
2706 return;
2707 }
2708 else if (pSelector->m_num_unique_selectors == 2)
2709 {
2710 color32 block_colors[4];
2711
2712 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
2713
2714 const uint32_t r0 = block_colors[low_selector].r;
2715 const uint32_t r1 = block_colors[high_selector].r;
2716
2717 pDst_block->set_low_alpha(r0);
2718 pDst_block->set_high_alpha(r1);
2719
2720 // TODO: Optimize this
2721 for (uint32_t y = 0; y < 4; y++)
2722 {
2723 for (uint32_t x = 0; x < 4; x++)
2724 {
2725 uint32_t s = pSelector->get_selector(x, y);
2726 pDst_block->set_selector(x, y, (s == high_selector) ? 1 : 0);
2727 }
2728 }
2729
2730 return;
2731 }
2732
2733 uint32_t selector_range_table = 0;
2734 for (selector_range_table = 0; selector_range_table < NUM_DXT5A_SELECTOR_RANGES; selector_range_table++)
2735 if ((low_selector == s_dxt5a_selector_ranges[selector_range_table].m_low) && (high_selector == s_dxt5a_selector_ranges[selector_range_table].m_high))
2736 break;
2737 if (selector_range_table >= NUM_DXT5A_SELECTOR_RANGES)
2738 selector_range_table = 0;
2739
2740 const etc1_g_to_dxt5a_conversion* pTable_entry = &g_etc1_g_to_dxt5a[base_color.r + inten_table * 32][selector_range_table];
2741
2742 pDst_block->set_low_alpha(pTable_entry->m_lo);
2743 pDst_block->set_high_alpha(pTable_entry->m_hi);
2744
2745 // TODO: Optimize this (like ETC1->BC1)
2746 for (uint32_t y = 0; y < 4; y++)
2747 {
2748 for (uint32_t x = 0; x < 4; x++)
2749 {
2750 uint32_t s = pSelector->get_selector(x, y);
2751
2752 uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
2753
2754 pDst_block->set_selector(x, y, ds);
2755 }
2756 }
2757 }
2758#endif //BASISD_SUPPORT_DXT5A
2759
2760 // PVRTC
2761
2762#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC
2763 static const uint16_t g_pvrtc_swizzle_table[256] =
2764 {
2765 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
2766 0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
2767 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
2768 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
2769 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
2770 0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
2771 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
2772 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
2773 };
2774
2775 // Note we can't use simple calculations to convert PVRTC1 encoded endpoint components to/from 8-bits, due to hardware approximations.
2776 static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 };
2777 static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 };
2778 static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 };
2779 static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 };
2780
2781 static const uint8_t g_pvrtc_5_floor[256] =
2782 {
2783 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,
2784 3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,
2785 7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
2786 11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,
2787 15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,
2788 19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,
2789 23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,
2790 27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31
2791 };
2792
2793 static const uint8_t g_pvrtc_5_ceil[256] =
2794 {
2795 0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,
2796 4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,
2797 8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,
2798 12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,
2799 16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,
2800 20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,
2801 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,
2802 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31
2803 };
2804
2805 static const uint8_t g_pvrtc_4_floor[256] =
2806 {
2807 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2808 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2809 3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2810 5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,
2811 7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,
2812 9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,
2813 11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,
2814 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15
2815 };
2816
2817 static const uint8_t g_pvrtc_4_ceil[256] =
2818 {
2819 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2820 2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2821 4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,
2822 6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,
2823 8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,
2824 10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,
2825 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,
2826 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
2827 };
2828
2829 static const uint8_t g_pvrtc_3_floor[256] =
2830 {
2831 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2832 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2833 1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2834 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2835 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,
2836 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
2837 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,
2838 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7
2839 };
2840
2841 static const uint8_t g_pvrtc_3_ceil[256] =
2842 {
2843 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2844 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2845 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2846 3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2847 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,
2848 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,
2849 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,
2850 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
2851 };
2852
2853 static const uint8_t g_pvrtc_alpha_floor[256] =
2854 {
2855 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2856 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2857 1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2858 2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2859 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2860 4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2861 5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
2862 6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8
2863 };
2864
2865 static const uint8_t g_pvrtc_alpha_ceil[256] =
2866 {
2867 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2868 1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2869 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2870 3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
2871 4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
2872 5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
2873 6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
2874 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
2875 };
2876
2877 struct pvrtc4_block
2878 {
2879 uint32_t m_modulation;
2880 uint32_t m_endpoints;
2881
2882 pvrtc4_block() : m_modulation(0), m_endpoints(0) { }
2883
2884 inline bool operator== (const pvrtc4_block& rhs) const
2885 {
2886 return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints);
2887 }
2888
2889 inline void clear()
2890 {
2891 m_modulation = 0;
2892 m_endpoints = 0;
2893 }
2894
2895 inline bool get_block_uses_transparent_modulation() const
2896 {
2897 return (m_endpoints & 1) != 0;
2898 }
2899
2900 inline void set_block_uses_transparent_modulation(bool m)
2901 {
2902 m_endpoints = (m_endpoints & ~1U) | static_cast<uint32_t>(m);
2903 }
2904
2905 inline bool is_endpoint_opaque(uint32_t endpoint_index) const
2906 {
2907 static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
2908 return (m_endpoints & s_bitmasks[basisu::open_range_check(endpoint_index, 2U)]) != 0;
2909 }
2910
2911 inline void set_endpoint_opaque(uint32_t endpoint_index, bool opaque)
2912 {
2913 assert(endpoint_index < 2);
2914 static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U };
2915 if (opaque)
2916 m_endpoints |= s_bitmasks[endpoint_index];
2917 else
2918 m_endpoints &= ~s_bitmasks[endpoint_index];
2919 }
2920
2921 inline color32 get_endpoint_5554(uint32_t endpoint_index) const
2922 {
2923 assert(endpoint_index < 2);
2924 static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
2925 uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
2926
2927 uint32_t r, g, b, a;
2928 if (packed & 0x8000)
2929 {
2930 // opaque 554 or 555
2931 r = (packed >> 10) & 31;
2932 g = (packed >> 5) & 31;
2933 b = packed & 31;
2934
2935 if (!endpoint_index)
2936 b |= (b >> 4);
2937
2938 a = 0xF;
2939 }
2940 else
2941 {
2942 // translucent 4433 or 4443
2943 r = (packed >> 7) & 0x1E;
2944 g = (packed >> 3) & 0x1E;
2945 b = (packed & 0xF) << 1;
2946
2947 r |= (r >> 4);
2948 g |= (g >> 4);
2949
2950 if (!endpoint_index)
2951 b |= (b >> 3);
2952 else
2953 b |= (b >> 4);
2954
2955 a = (packed >> 11) & 0xE;
2956 }
2957
2958 assert((r < 32) && (g < 32) && (b < 32) && (a < 16));
2959
2960 return color32(r, g, b, a);
2961 }
2962
2963 inline color32 get_endpoint_8888(uint32_t endpoint_index) const
2964 {
2965 assert(endpoint_index < 2);
2966 static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
2967 uint32_t packed = (m_endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
2968
2969 uint32_t r, g, b, a;
2970 if (packed & 0x8000)
2971 {
2972 // opaque 554 or 555
2973 // 1RRRRRGGGGGBBBBM
2974 // 1RRRRRGGGGGBBBBB
2975 r = (packed >> 10) & 31;
2976 g = (packed >> 5) & 31;
2977 b = packed & 31;
2978
2979 r = g_pvrtc_5[r];
2980 g = g_pvrtc_5[g];
2981
2982 if (!endpoint_index)
2983 b = g_pvrtc_4[b >> 1];
2984 else
2985 b = g_pvrtc_5[b];
2986
2987 a = 255;
2988 }
2989 else
2990 {
2991 // translucent 4433 or 4443
2992 // 0AAA RRRR GGGG BBBM
2993 // 0AAA RRRR GGGG BBBB
2994 r = (packed >> 8) & 0xF;
2995 g = (packed >> 4) & 0xF;
2996 b = packed & 0xF;
2997 a = (packed >> 12) & 7;
2998
2999 r = g_pvrtc_4[r];
3000 g = g_pvrtc_4[g];
3001
3002 if (!endpoint_index)
3003 b = g_pvrtc_3[b >> 1];
3004 else
3005 b = g_pvrtc_4[b];
3006
3007 a = g_pvrtc_alpha[a];
3008 }
3009
3010 return color32(r, g, b, a);
3011 }
3012
3013 inline uint32_t get_endpoint_l8(uint32_t endpoint_index) const
3014 {
3015 color32 c(get_endpoint_8888(endpoint_index));
3016 return c.r + c.g + c.b + c.a;
3017 }
3018
3019 inline uint32_t get_opaque_endpoint_l0() const
3020 {
3021 uint32_t packed = m_endpoints & 0xFFFE;
3022
3023 uint32_t r, g, b;
3024 assert(packed & 0x8000);
3025
3026 // opaque 554 or 555
3027 r = (packed >> 10) & 31;
3028 g = (packed >> 5) & 31;
3029 b = packed & 31;
3030 b |= (b >> 4);
3031
3032 return r + g + b;
3033 }
3034
3035 inline uint32_t get_opaque_endpoint_l1() const
3036 {
3037 uint32_t packed = m_endpoints >> 16;
3038
3039 uint32_t r, g, b;
3040 assert(packed & 0x8000);
3041
3042 // opaque 554 or 555
3043 r = (packed >> 10) & 31;
3044 g = (packed >> 5) & 31;
3045 b = packed & 31;
3046
3047 return r + g + b;
3048 }
3049
3050 static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint)
3051 {
3052 static const uint32_t s_comp_prec[4][4] =
3053 {
3054 // R0 G0 B0 A0 R1 G1 B1 A1
3055 { 4, 4, 3, 3 },{ 4, 4, 4, 3 }, // transparent endpoint
3056
3057 { 5, 5, 4, 0 },{ 5, 5, 5, 0 } // opaque endpoint
3058 };
3059 return s_comp_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][basisu::open_range_check(c, 4U)];
3060 }
3061
3062 static color32 get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint)
3063 {
3064 static const color32 s_color_prec[4] =
3065 {
3066 color32(4, 4, 3, 3), color32(4, 4, 4, 3), // transparent endpoint
3067 color32(5, 5, 4, 0), color32(5, 5, 5, 0) // opaque endpoint
3068 };
3069 return s_color_prec[basisu::open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)];
3070 }
3071
3072 inline void set_opaque_endpoint_floor(uint32_t endpoint_index, const color32& c)
3073 {
3074 assert(endpoint_index < 2);
3075 const uint32_t m = m_endpoints & 1;
3076
3077 uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
3078
3079 if (!endpoint_index)
3080 b = g_pvrtc_4_floor[b] << 1;
3081 else
3082 b = g_pvrtc_5_floor[b];
3083
3084 // rgba=555 here
3085 assert((r < 32) && (g < 32) && (b < 32));
3086
3087 // 1RRRRRGGGGGBBBBM
3088 // 1RRRRRGGGGGBBBBB
3089
3090 // opaque 554 or 555
3091 uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
3092 if (!endpoint_index)
3093 packed = (packed & ~1) | m;
3094
3095 assert(packed <= 0xFFFF);
3096
3097 if (endpoint_index)
3098 m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3099 else
3100 m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3101 }
3102
3103 inline void set_opaque_endpoint_ceil(uint32_t endpoint_index, const color32& c)
3104 {
3105 assert(endpoint_index < 2);
3106 const uint32_t m = m_endpoints & 1;
3107
3108 uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
3109
3110 if (!endpoint_index)
3111 b = g_pvrtc_4_ceil[b] << 1;
3112 else
3113 b = g_pvrtc_5_ceil[b];
3114
3115 // rgba=555 here
3116 assert((r < 32) && (g < 32) && (b < 32));
3117
3118 // 1RRRRRGGGGGBBBBM
3119 // 1RRRRRGGGGGBBBBB
3120
3121 // opaque 554 or 555
3122 uint32_t packed = 0x8000 | (r << 10) | (g << 5) | b;
3123 if (!endpoint_index)
3124 packed |= m;
3125
3126 assert(packed <= 0xFFFF);
3127
3128 if (endpoint_index)
3129 m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3130 else
3131 m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3132 }
3133
3134 // opaque endpoints: 554 or 555
3135 // transparent endpoints: 3443 or 3444
3136 inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint)
3137 {
3138 assert(endpoint_index < 2);
3139 const uint32_t m = m_endpoints & 1;
3140 uint32_t r = c[0], g = c[1], b = c[2], a = c[3];
3141
3142 uint32_t packed;
3143
3144 if (opaque_endpoint)
3145 {
3146 if (!endpoint_index)
3147 {
3148 // 554
3149 // 1RRRRRGGGGGBBBBM
3150 assert((r < 32) && (g < 32) && (b < 16));
3151 packed = 0x8000 | (r << 10) | (g << 5) | (b << 1) | m;
3152 }
3153 else
3154 {
3155 // 555
3156 // 1RRRRRGGGGGBBBBB
3157 assert((r < 32) && (g < 32) && (b < 32));
3158 packed = 0x8000 | (r << 10) | (g << 5) | b;
3159 }
3160 }
3161 else
3162 {
3163 if (!endpoint_index)
3164 {
3165 // 3443
3166 // 0AAA RRRR GGGG BBBM
3167 assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
3168 packed = (a << 12) | (r << 8) | (g << 4) | (b << 1) | m;
3169 }
3170 else
3171 {
3172 // 3444
3173 // 0AAA RRRR GGGG BBBB
3174 assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
3175 packed = (a << 12) | (r << 8) | (g << 4) | b;
3176 }
3177 }
3178
3179 assert(packed <= 0xFFFF);
3180
3181 if (endpoint_index)
3182 m_endpoints = (m_endpoints & 0xFFFFU) | (packed << 16);
3183 else
3184 m_endpoints = (m_endpoints & 0xFFFF0000U) | packed;
3185 }
3186
3187 inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c)
3188 {
3189 assert(endpoint_index < 2);
3190
3191 int a = g_pvrtc_alpha_floor[c.a];
3192 if (a == 8)
3193 {
3194 // 554 or 555
3195 uint32_t r = g_pvrtc_5_floor[c[0]], g = g_pvrtc_5_floor[c[1]], b = c[2];
3196
3197 if (!endpoint_index)
3198 b = g_pvrtc_4_floor[b];
3199 else
3200 b = g_pvrtc_5_floor[b];
3201
3202 set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
3203 }
3204 else
3205 {
3206 // 4433 or 4443
3207 uint32_t r = g_pvrtc_4_floor[c[0]], g = g_pvrtc_4_floor[c[1]], b = c[2];
3208
3209 if (!endpoint_index)
3210 b = g_pvrtc_3_floor[b];
3211 else
3212 b = g_pvrtc_4_floor[b];
3213
3214 set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
3215 }
3216 }
3217
3218 inline void set_endpoint_ceil(uint32_t endpoint_index, const color32& c)
3219 {
3220 assert(endpoint_index < 2);
3221
3222 int a = g_pvrtc_alpha_ceil[c.a];
3223 if (a == 8)
3224 {
3225 // 554 or 555
3226 uint32_t r = g_pvrtc_5_ceil[c[0]], g = g_pvrtc_5_ceil[c[1]], b = c[2];
3227
3228 if (!endpoint_index)
3229 b = g_pvrtc_4_ceil[b];
3230 else
3231 b = g_pvrtc_5_ceil[b];
3232
3233 set_endpoint_raw(endpoint_index, color32(r, g, b, a), true);
3234 }
3235 else
3236 {
3237 // 4433 or 4443
3238 uint32_t r = g_pvrtc_4_ceil[c[0]], g = g_pvrtc_4_ceil[c[1]], b = c[2];
3239
3240 if (!endpoint_index)
3241 b = g_pvrtc_3_ceil[b];
3242 else
3243 b = g_pvrtc_4_ceil[b];
3244
3245 set_endpoint_raw(endpoint_index, color32(r, g, b, a), false);
3246 }
3247 }
3248
3249 inline uint32_t get_modulation(uint32_t x, uint32_t y) const
3250 {
3251 assert((x < 4) && (y < 4));
3252 return (m_modulation >> ((y * 4 + x) * 2)) & 3;
3253 }
3254
3255 // Scaled by 8
3256 inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const
3257 {
3258 static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 },{ 0, 4, 4, 8 } };
3259 return s_block_scales[block_uses_transparent_modulation];
3260 }
3261
3262 // Scaled by 8
3263 inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const
3264 {
3265 return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)];
3266 }
3267
3268 inline void set_modulation(uint32_t x, uint32_t y, uint32_t s)
3269 {
3270 assert((x < 4) && (y < 4) && (s < 4));
3271 uint32_t n = (y * 4 + x) * 2;
3272 m_modulation = (m_modulation & (~(3 << n))) | (s << n);
3273 assert(get_modulation(x, y) == s);
3274 }
3275
3276 // Assumes modulation was initialized to 0
3277 inline void set_modulation_fast(uint32_t x, uint32_t y, uint32_t s)
3278 {
3279 assert((x < 4) && (y < 4) && (s < 4));
3280 uint32_t n = (y * 4 + x) * 2;
3281 m_modulation |= (s << n);
3282 assert(get_modulation(x, y) == s);
3283 }
3284 };
3285
3286#if 0
3287 static const uint8_t g_pvrtc_bilinear_weights[16][4] =
3288 {
3289 { 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 },
3290 { 2, 2, 6, 6 }, { 1, 3, 3, 9 }, { 4, 0, 12, 0 }, { 3, 1, 9, 3 },
3291 { 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 },
3292 { 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 },
3293 };
3294#endif
3295
3296 struct pvrtc1_temp_block
3297 {
3298 decoder_etc_block m_etc1_block;
3299 uint32_t m_pvrtc_endpoints;
3300 };
3301
3302 static inline uint32_t get_opaque_endpoint_l0(uint32_t endpoints)
3303 {
3304 uint32_t packed = endpoints;
3305
3306 uint32_t r, g, b;
3307 assert(packed & 0x8000);
3308
3309 r = (packed >> 10) & 31;
3310 g = (packed >> 5) & 31;
3311 b = packed & 30;
3312 b |= (b >> 4);
3313
3314 return r + g + b;
3315 }
3316
3317 static inline uint32_t get_opaque_endpoint_l1(uint32_t endpoints)
3318 {
3319 uint32_t packed = endpoints >> 16;
3320
3321 uint32_t r, g, b;
3322 assert(packed & 0x8000);
3323
3324 r = (packed >> 10) & 31;
3325 g = (packed >> 5) & 31;
3326 b = packed & 31;
3327
3328 return r + g + b;
3329 }
3330
3331 static color32 get_endpoint_8888(uint32_t endpoints, uint32_t endpoint_index)
3332 {
3333 assert(endpoint_index < 2);
3334 static const uint32_t s_endpoint_mask[2] = { 0xFFFE, 0xFFFF };
3335 uint32_t packed = (endpoints >> (basisu::open_range_check(endpoint_index, 2U) ? 16 : 0)) & s_endpoint_mask[endpoint_index];
3336
3337 uint32_t r, g, b, a;
3338 if (packed & 0x8000)
3339 {
3340 // opaque 554 or 555
3341 // 1RRRRRGGGGGBBBBM
3342 // 1RRRRRGGGGGBBBBB
3343 r = (packed >> 10) & 31;
3344 g = (packed >> 5) & 31;
3345 b = packed & 31;
3346
3347 r = g_pvrtc_5[r];
3348 g = g_pvrtc_5[g];
3349
3350 if (!endpoint_index)
3351 b = g_pvrtc_4[b >> 1];
3352 else
3353 b = g_pvrtc_5[b];
3354
3355 a = 255;
3356 }
3357 else
3358 {
3359 // translucent 4433 or 4443
3360 // 0AAA RRRR GGGG BBBM
3361 // 0AAA RRRR GGGG BBBB
3362 r = (packed >> 8) & 0xF;
3363 g = (packed >> 4) & 0xF;
3364 b = packed & 0xF;
3365 a = (packed >> 12) & 7;
3366
3367 r = g_pvrtc_4[r];
3368 g = g_pvrtc_4[g];
3369
3370 if (!endpoint_index)
3371 b = g_pvrtc_3[b >> 1];
3372 else
3373 b = g_pvrtc_4[b];
3374
3375 a = g_pvrtc_alpha[a];
3376 }
3377
3378 return color32(r, g, b, a);
3379 }
3380
3381 static uint32_t get_endpoint_l8(uint32_t endpoints, uint32_t endpoint_index)
3382 {
3383 color32 c(get_endpoint_8888(endpoints, endpoint_index));
3384 return c.r + c.g + c.b + c.a;
3385 }
3386#endif
3387
3388#if BASISD_SUPPORT_PVRTC1
3389 // TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture.
3390 static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
3391 {
3392 const uint32_t x_mask = num_blocks_x - 1;
3393 const uint32_t y_mask = num_blocks_y - 1;
3394 const uint32_t x_bits = basisu::total_bits(x_mask);
3395 const uint32_t y_bits = basisu::total_bits(y_mask);
3396 const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
3397 //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
3398 const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
3399
3400 uint32_t block_index = 0;
3401
3402 // really 3x3
3403 int e0[4][4], e1[4][4];
3404
3405 for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
3406 {
3407 const uint32_t* pE_rows[3];
3408
3409 for (int ey = 0; ey < 3; ey++)
3410 {
3411 int by = y + ey - 1;
3412
3413 const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
3414
3415 pE_rows[ey] = pE;
3416
3417 for (int ex = 0; ex < 3; ex++)
3418 {
3419 int bx = 0 + ex - 1;
3420
3421 const uint32_t e = pE[bx & x_mask];
3422
3423 e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
3424 e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
3425 }
3426 }
3427
3428 const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
3429
3430 for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
3431 {
3432 const decoder_etc_block& src_block = pETC_Blocks[block_index];
3433
3434 const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
3435
3436 uint32_t swizzled = x_swizzle | y_swizzle;
3437 if (num_blocks_x != num_blocks_y)
3438 {
3439 swizzled &= swizzle_mask;
3440
3441 if (num_blocks_x > num_blocks_y)
3442 swizzled |= ((x >> min_bits) << (min_bits * 2));
3443 else
3444 swizzled |= ((y >> min_bits) << (min_bits * 2));
3445 }
3446
3447 pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
3448 pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
3449
3450 uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
3451 uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
3452 uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
3453
3454 const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
3455 int by = (base_r + base_g + base_b) * 16;
3456 int block_colors_y_x16[4];
3457 block_colors_y_x16[0] = by + pInten_table48[2];
3458 block_colors_y_x16[1] = by + pInten_table48[3];
3459 block_colors_y_x16[2] = by + pInten_table48[1];
3460 block_colors_y_x16[3] = by + pInten_table48[0];
3461
3462 {
3463 const uint32_t ex = 2;
3464 int bx = x + ex - 1;
3465 bx &= x_mask;
3466
3467#define DO_ROW(ey) \
3468 { \
3469 const uint32_t e = pE_rows[ey][bx]; \
3470 e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
3471 e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
3472 }
3473
3474 DO_ROW(0);
3475 DO_ROW(1);
3476 DO_ROW(2);
3477#undef DO_ROW
3478 }
3479
3480 uint32_t mod = 0;
3481
3482 uint32_t lookup_x[4];
3483
3484#define DO_LOOKUP(lx) { \
3485 const uint32_t byte_ofs = 7 - (((lx) * 4) >> 3); \
3486 const uint32_t lsb_bits = src_block.m_bytes[byte_ofs] >> (((lx) & 1) * 4); \
3487 const uint32_t msb_bits = src_block.m_bytes[byte_ofs - 2] >> (((lx) & 1) * 4); \
3488 lookup_x[lx] = (lsb_bits & 0xF) | ((msb_bits & 0xF) << 4); }
3489
3490 DO_LOOKUP(0);
3491 DO_LOOKUP(1);
3492 DO_LOOKUP(2);
3493 DO_LOOKUP(3);
3494#undef DO_LOOKUP
3495
3496#define DO_PIX(lx, ly, w0, w1, w2, w3) \
3497 { \
3498 int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
3499 int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
3500 int cl = block_colors_y_x16[g_etc1_x_selector_unpack[ly][lookup_x[lx]]]; \
3501 int dl = cb_l - ca_l; \
3502 int vl = cl - ca_l; \
3503 int p = vl * 16; \
3504 if (ca_l > cb_l) { p = -p; dl = -dl; } \
3505 uint32_t m = 0; \
3506 if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
3507 if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
3508 if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
3509 mod |= m; \
3510 }
3511
3512 {
3513 const uint32_t ex = 0, ey = 0;
3514 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3515 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3516 DO_PIX(0, 0, 4, 4, 4, 4);
3517 DO_PIX(1, 0, 2, 6, 2, 6);
3518 DO_PIX(0, 1, 2, 2, 6, 6);
3519 DO_PIX(1, 1, 1, 3, 3, 9);
3520 }
3521
3522 {
3523 const uint32_t ex = 1, ey = 0;
3524 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3525 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3526 DO_PIX(2, 0, 8, 0, 8, 0);
3527 DO_PIX(3, 0, 6, 2, 6, 2);
3528 DO_PIX(2, 1, 4, 0, 12, 0);
3529 DO_PIX(3, 1, 3, 1, 9, 3);
3530 }
3531
3532 {
3533 const uint32_t ex = 0, ey = 1;
3534 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3535 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3536 DO_PIX(0, 2, 8, 8, 0, 0);
3537 DO_PIX(1, 2, 4, 12, 0, 0);
3538 DO_PIX(0, 3, 6, 6, 2, 2);
3539 DO_PIX(1, 3, 3, 9, 1, 3);
3540 }
3541
3542 {
3543 const uint32_t ex = 1, ey = 1;
3544 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3545 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3546 DO_PIX(2, 2, 16, 0, 0, 0);
3547 DO_PIX(3, 2, 12, 4, 0, 0);
3548 DO_PIX(2, 3, 12, 0, 4, 0);
3549 DO_PIX(3, 3, 9, 3, 3, 1);
3550 }
3551#undef DO_PIX
3552
3553 pDst_block->m_modulation = mod;
3554
3555 e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
3556 e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
3557 e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
3558
3559 e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
3560 e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
3561 e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
3562
3563 } // x
3564 } // y
3565 }
3566
3567 static void fixup_pvrtc1_4_modulation_rgba(
3568 const decoder_etc_block* pETC_Blocks,
3569 const uint32_t* pPVRTC_endpoints,
3570 void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks,
3571 const endpoint* pEndpoints, const selector* pSelectors)
3572 {
3573 const uint32_t x_mask = num_blocks_x - 1;
3574 const uint32_t y_mask = num_blocks_y - 1;
3575 const uint32_t x_bits = basisu::total_bits(x_mask);
3576 const uint32_t y_bits = basisu::total_bits(y_mask);
3577 const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
3578 //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
3579 const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
3580
3581 uint32_t block_index = 0;
3582
3583 // really 3x3
3584 int e0[4][4], e1[4][4];
3585
3586 for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
3587 {
3588 const uint32_t* pE_rows[3];
3589
3590 for (int ey = 0; ey < 3; ey++)
3591 {
3592 int by = y + ey - 1;
3593
3594 const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
3595
3596 pE_rows[ey] = pE;
3597
3598 for (int ex = 0; ex < 3; ex++)
3599 {
3600 int bx = 0 + ex - 1;
3601
3602 const uint32_t e = pE[bx & x_mask];
3603
3604 e0[ex][ey] = get_endpoint_l8(e, 0);
3605 e1[ex][ey] = get_endpoint_l8(e, 1);
3606 }
3607 }
3608
3609 const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
3610
3611 for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
3612 {
3613 const decoder_etc_block& src_block = pETC_Blocks[block_index];
3614
3615 const uint16_t* pSrc_alpha_block = reinterpret_cast<const uint16_t*>(static_cast<const uint32_t*>(pAlpha_blocks) + x + (y * num_blocks_x));
3616 const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]];
3617 const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]];
3618
3619 const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
3620
3621 uint32_t swizzled = x_swizzle | y_swizzle;
3622 if (num_blocks_x != num_blocks_y)
3623 {
3624 swizzled &= swizzle_mask;
3625
3626 if (num_blocks_x > num_blocks_y)
3627 swizzled |= ((x >> min_bits) << (min_bits * 2));
3628 else
3629 swizzled |= ((y >> min_bits) << (min_bits * 2));
3630 }
3631
3632 pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
3633 pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
3634
3635 uint32_t base_r = g_etc_5_to_8[src_block.m_differential.m_red1];
3636 uint32_t base_g = g_etc_5_to_8[src_block.m_differential.m_green1];
3637 uint32_t base_b = g_etc_5_to_8[src_block.m_differential.m_blue1];
3638
3639 const int* pInten_table48 = g_etc1_inten_tables48[src_block.m_differential.m_cw1];
3640 int by = (base_r + base_g + base_b) * 16;
3641 int block_colors_y_x16[4];
3642 block_colors_y_x16[0] = basisu::clamp<int>(by + pInten_table48[0], 0, 48 * 255);
3643 block_colors_y_x16[1] = basisu::clamp<int>(by + pInten_table48[1], 0, 48 * 255);
3644 block_colors_y_x16[2] = basisu::clamp<int>(by + pInten_table48[2], 0, 48 * 255);
3645 block_colors_y_x16[3] = basisu::clamp<int>(by + pInten_table48[3], 0, 48 * 255);
3646
3647 uint32_t alpha_base_g = g_etc_5_to_8[pAlpha_endpoints->m_color5.g] * 16;
3648 const int* pInten_table16 = g_etc1_inten_tables16[pAlpha_endpoints->m_inten5];
3649 int alpha_block_colors_x16[4];
3650 alpha_block_colors_x16[0] = basisu::clamp<int>(alpha_base_g + pInten_table16[0], 0, 16 * 255);
3651 alpha_block_colors_x16[1] = basisu::clamp<int>(alpha_base_g + pInten_table16[1], 0, 16 * 255);
3652 alpha_block_colors_x16[2] = basisu::clamp<int>(alpha_base_g + pInten_table16[2], 0, 16 * 255);
3653 alpha_block_colors_x16[3] = basisu::clamp<int>(alpha_base_g + pInten_table16[3], 0, 16 * 255);
3654
3655 // clamp((base_r + base_g + base_b) * 16 + color_inten[s] * 48) + clamp(alpha_base_g * 16 + alpha_inten[as] * 16)
3656
3657 {
3658 const uint32_t ex = 2;
3659 int bx = x + ex - 1;
3660 bx &= x_mask;
3661
3662#define DO_ROW(ey) \
3663 { \
3664 const uint32_t e = pE_rows[ey][bx]; \
3665 e0[ex][ey] = get_endpoint_l8(e, 0); \
3666 e1[ex][ey] = get_endpoint_l8(e, 1); \
3667 }
3668
3669 DO_ROW(0);
3670 DO_ROW(1);
3671 DO_ROW(2);
3672#undef DO_ROW
3673 }
3674
3675 uint32_t mod = 0;
3676
3677#define DO_PIX(lx, ly, w0, w1, w2, w3) \
3678 { \
3679 int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
3680 int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
3681 int cl = block_colors_y_x16[(src_block.m_bytes[4 + ly] >> (lx * 2)) & 3] + alpha_block_colors_x16[(pAlpha_selectors->m_selectors[ly] >> (lx * 2)) & 3]; \
3682 int dl = cb_l - ca_l; \
3683 int vl = cl - ca_l; \
3684 int p = vl * 16; \
3685 if (ca_l > cb_l) { p = -p; dl = -dl; } \
3686 uint32_t m = 0; \
3687 if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
3688 if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
3689 if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
3690 mod |= m; \
3691 }
3692
3693 {
3694 const uint32_t ex = 0, ey = 0;
3695 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3696 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3697 DO_PIX(0, 0, 4, 4, 4, 4);
3698 DO_PIX(1, 0, 2, 6, 2, 6);
3699 DO_PIX(0, 1, 2, 2, 6, 6);
3700 DO_PIX(1, 1, 1, 3, 3, 9);
3701 }
3702
3703 {
3704 const uint32_t ex = 1, ey = 0;
3705 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3706 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3707 DO_PIX(2, 0, 8, 0, 8, 0);
3708 DO_PIX(3, 0, 6, 2, 6, 2);
3709 DO_PIX(2, 1, 4, 0, 12, 0);
3710 DO_PIX(3, 1, 3, 1, 9, 3);
3711 }
3712
3713 {
3714 const uint32_t ex = 0, ey = 1;
3715 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3716 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3717 DO_PIX(0, 2, 8, 8, 0, 0);
3718 DO_PIX(1, 2, 4, 12, 0, 0);
3719 DO_PIX(0, 3, 6, 6, 2, 2);
3720 DO_PIX(1, 3, 3, 9, 1, 3);
3721 }
3722
3723 {
3724 const uint32_t ex = 1, ey = 1;
3725 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
3726 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
3727 DO_PIX(2, 2, 16, 0, 0, 0);
3728 DO_PIX(3, 2, 12, 4, 0, 0);
3729 DO_PIX(2, 3, 12, 0, 4, 0);
3730 DO_PIX(3, 3, 9, 3, 3, 1);
3731 }
3732#undef DO_PIX
3733
3734 pDst_block->m_modulation = mod;
3735
3736 e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
3737 e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
3738 e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
3739
3740 e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
3741 e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
3742 e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
3743
3744 } // x
3745 } // y
3746 }
3747#endif // BASISD_SUPPORT_PVRTC1
3748
3749#if BASISD_SUPPORT_BC7_MODE5
3750 static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] =
3751 {
3752 { 0, 3 },
3753 { 1, 3 },
3754 { 0, 2 },
3755 { 1, 2 },
3756 { 2, 3 },
3757 { 0, 1 },
3758 };
3759
3760 const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]);
3761
3762 static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4];
3763
3764 const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10;
3765 static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] =
3766 {
3767 { 0, 0, 1, 1 },
3768 { 0, 0, 1, 2 },
3769 { 0, 0, 1, 3 },
3770 { 0, 0, 2, 3 },
3771 { 0, 1, 1, 1 },
3772 { 0, 1, 2, 2 },
3773 { 0, 1, 2, 3 },
3774 { 0, 2, 3, 3 },
3775 { 1, 2, 2, 2 },
3776 { 1, 2, 3, 3 },
3777 };
3778
3779 struct etc1_to_bc7_m5_solution
3780 {
3781 uint8_t m_lo;
3782 uint8_t m_hi;
3783 uint16_t m_err;
3784 };
3785
3786 static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = {
3787#include "basisu_transcoder_tables_bc7_m5_color.inc"
3788 };
3789
3790 static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] =
3791 {
3792 { 0, 3 },
3793 { 1, 3 },
3794 { 0, 2 },
3795 { 1, 2 },
3796 { 2, 3 },
3797 { 0, 1 }
3798 };
3799
3800 const uint32_t NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5a_selector_ranges) / sizeof(g_etc1_to_bc7_m5a_selector_ranges[0]);
3801
3802 static uint32_t g_etc1_to_bc7_m5a_selector_range_index[4][4];
3803
3804 struct etc1_g_to_bc7_m5a_conversion
3805 {
3806 uint8_t m_lo, m_hi;
3807 uint8_t m_trans;
3808 };
3809
3810 static etc1_g_to_bc7_m5a_conversion g_etc1_g_to_bc7_m5a[8 * 32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES] =
3811 {
3812 #include "basisu_transcoder_tables_bc7_m5_alpha.inc"
3813 };
3814
3815 static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs)
3816 {
3817 assert(num_bits < 32);
3818 assert(val < (1ULL << num_bits));
3819
3820 uint32_t mask = static_cast<uint32_t>((1ULL << num_bits) - 1);
3821
3822 while (num_bits)
3823 {
3824 const uint32_t n = basisu::minimum<uint32_t>(8 - (cur_ofs & 7), num_bits);
3825
3826 pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7));
3827 pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7));
3828
3829 val >>= n;
3830 mask >>= n;
3831
3832 num_bits -= n;
3833 cur_ofs += n;
3834 }
3835
3836 return cur_ofs;
3837 }
3838
3839 struct bc7_mode_5
3840 {
3841 union
3842 {
3843 struct
3844 {
3845 uint64_t m_mode : 6;
3846 uint64_t m_rot : 2;
3847
3848 uint64_t m_r0 : 7;
3849 uint64_t m_r1 : 7;
3850 uint64_t m_g0 : 7;
3851 uint64_t m_g1 : 7;
3852 uint64_t m_b0 : 7;
3853 uint64_t m_b1 : 7;
3854 uint64_t m_a0 : 8;
3855 uint64_t m_a1_0 : 6;
3856
3857 } m_lo;
3858
3859 uint64_t m_lo_bits;
3860 };
3861
3862 union
3863 {
3864 struct
3865 {
3866 uint64_t m_a1_1 : 2;
3867
3868 // bit 2
3869 uint64_t m_c00 : 1;
3870 uint64_t m_c10 : 2;
3871 uint64_t m_c20 : 2;
3872 uint64_t m_c30 : 2;
3873
3874 uint64_t m_c01 : 2;
3875 uint64_t m_c11 : 2;
3876 uint64_t m_c21 : 2;
3877 uint64_t m_c31 : 2;
3878
3879 uint64_t m_c02 : 2;
3880 uint64_t m_c12 : 2;
3881 uint64_t m_c22 : 2;
3882 uint64_t m_c32 : 2;
3883
3884 uint64_t m_c03 : 2;
3885 uint64_t m_c13 : 2;
3886 uint64_t m_c23 : 2;
3887 uint64_t m_c33 : 2;
3888
3889 // bit 33
3890 uint64_t m_a00 : 1;
3891 uint64_t m_a10 : 2;
3892 uint64_t m_a20 : 2;
3893 uint64_t m_a30 : 2;
3894
3895 uint64_t m_a01 : 2;
3896 uint64_t m_a11 : 2;
3897 uint64_t m_a21 : 2;
3898 uint64_t m_a31 : 2;
3899
3900 uint64_t m_a02 : 2;
3901 uint64_t m_a12 : 2;
3902 uint64_t m_a22 : 2;
3903 uint64_t m_a32 : 2;
3904
3905 uint64_t m_a03 : 2;
3906 uint64_t m_a13 : 2;
3907 uint64_t m_a23 : 2;
3908 uint64_t m_a33 : 2;
3909
3910 } m_hi;
3911
3912 uint64_t m_hi_bits;
3913 };
3914 };
3915
3916#if BASISD_WRITE_NEW_BC7_MODE5_TABLES
3917 static void create_etc1_to_bc7_m5_color_conversion_table()
3918 {
3919 FILE* pFile = nullptr;
3920 fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_color.inc", "w");
3921
3922 uint32_t n = 0;
3923
3924 for (int inten = 0; inten < 8; inten++)
3925 {
3926 for (uint32_t g = 0; g < 32; g++)
3927 {
3928 color32 block_colors[4];
3929 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
3930
3931 for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; sr++)
3932 {
3933 const uint32_t low_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_low;
3934 const uint32_t high_selector = g_etc1_to_bc7_m5_selector_ranges[sr].m_high;
3935
3936 for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS; m++)
3937 {
3938 uint32_t best_lo = 0;
3939 uint32_t best_hi = 0;
3940 uint64_t best_err = UINT64_MAX;
3941
3942 for (uint32_t hi = 0; hi <= 127; hi++)
3943 {
3944 for (uint32_t lo = 0; lo <= 127; lo++)
3945 {
3946 uint32_t colors[4];
3947
3948 colors[0] = (lo << 1) | (lo >> 6);
3949 colors[3] = (hi << 1) | (hi >> 6);
3950
3951 colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
3952 colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
3953
3954 uint64_t total_err = 0;
3955
3956 for (uint32_t s = low_selector; s <= high_selector; s++)
3957 {
3958 int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]];
3959
3960 int err_scale = 1;
3961 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
3962 // the low/high selectors which are clamping to either 0 or 255.
3963 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
3964 err_scale = 5;
3965
3966 total_err += (err * err) * err_scale;
3967 }
3968
3969 if (total_err < best_err)
3970 {
3971 best_err = total_err;
3972 best_lo = lo;
3973 best_hi = hi;
3974 }
3975 }
3976 }
3977
3978 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
3979
3980 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
3981 n++;
3982 if ((n & 31) == 31)
3983 fprintf(pFile, "\n");
3984 } // m
3985 } // sr
3986 } // g
3987 } // inten
3988
3989 fclose(pFile);
3990 }
3991
3992 static void create_etc1_to_bc7_m5_alpha_conversion_table()
3993 {
3994 FILE* pFile = nullptr;
3995 fopen_s(&pFile, "basisu_transcoder_tables_bc7_m5_alpha.inc", "w");
3996
3997 uint32_t n = 0;
3998
3999 for (int inten = 0; inten < 8; inten++)
4000 {
4001 for (uint32_t g = 0; g < 32; g++)
4002 {
4003 color32 block_colors[4];
4004 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4005
4006 for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; sr++)
4007 {
4008 const uint32_t low_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_low;
4009 const uint32_t high_selector = g_etc1_to_bc7_m5a_selector_ranges[sr].m_high;
4010
4011 uint32_t best_lo = 0;
4012 uint32_t best_hi = 0;
4013 uint64_t best_err = UINT64_MAX;
4014 uint32_t best_output_selectors = 0;
4015
4016 for (uint32_t hi = 0; hi <= 255; hi++)
4017 {
4018 for (uint32_t lo = 0; lo <= 255; lo++)
4019 {
4020 uint32_t colors[4];
4021
4022 colors[0] = lo;
4023 colors[3] = hi;
4024
4025 colors[1] = (colors[0] * (64 - 21) + colors[3] * 21 + 32) / 64;
4026 colors[2] = (colors[0] * (64 - 43) + colors[3] * 43 + 32) / 64;
4027
4028 uint64_t total_err = 0;
4029 uint32_t output_selectors = 0;
4030
4031 for (uint32_t s = low_selector; s <= high_selector; s++)
4032 {
4033 int best_mapping_err = INT_MAX;
4034 int best_k = 0;
4035 for (int k = 0; k < 4; k++)
4036 {
4037 int mapping_err = block_colors[s].g - colors[k];
4038 mapping_err *= mapping_err;
4039
4040 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
4041 // the low/high selectors which are clamping to either 0 or 255.
4042 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
4043 mapping_err *= 5;
4044
4045 if (mapping_err < best_mapping_err)
4046 {
4047 best_mapping_err = mapping_err;
4048 best_k = k;
4049 }
4050 } // k
4051
4052 total_err += best_mapping_err;
4053 output_selectors |= (best_k << (s * 2));
4054 } // s
4055
4056 if (total_err < best_err)
4057 {
4058 best_err = total_err;
4059 best_lo = lo;
4060 best_hi = hi;
4061 best_output_selectors = output_selectors;
4062 }
4063
4064 } // lo
4065 } // hi
4066
4067 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors);
4068 n++;
4069 if ((n & 31) == 31)
4070 fprintf(pFile, "\n");
4071
4072 } // sr
4073 } // g
4074 } // inten
4075
4076 fclose(pFile);
4077 }
4078#endif // BASISD_WRITE_NEW_BC7_MODE5_TABLES
4079
4080 struct bc7_m5_match_entry
4081 {
4082 uint8_t m_hi;
4083 uint8_t m_lo;
4084 };
4085
4086 static bc7_m5_match_entry g_bc7_m5_equals_1[256] =
4087 {
4088 {0,0},{1,0},{3,0},{4,0},{6,0},{7,0},{9,0},{10,0},{12,0},{13,0},{15,0},{16,0},{18,0},{20,0},{21,0},{23,0},
4089 {24,0},{26,0},{27,0},{29,0},{30,0},{32,0},{33,0},{35,0},{36,0},{38,0},{39,0},{41,0},{42,0},{44,0},{45,0},{47,0},
4090 {48,0},{50,0},{52,0},{53,0},{55,0},{56,0},{58,0},{59,0},{61,0},{62,0},{64,0},{65,0},{66,0},{68,0},{69,0},{71,0},
4091 {72,0},{74,0},{75,0},{77,0},{78,0},{80,0},{82,0},{83,0},{85,0},{86,0},{88,0},{89,0},{91,0},{92,0},{94,0},{95,0},
4092 {97,0},{98,0},{100,0},{101,0},{103,0},{104,0},{106,0},{107,0},{109,0},{110,0},{112,0},{114,0},{115,0},{117,0},{118,0},{120,0},
4093 {121,0},{123,0},{124,0},{126,0},{127,0},{127,1},{126,2},{126,3},{127,3},{127,4},{126,5},{126,6},{127,6},{127,7},{126,8},{126,9},
4094 {127,9},{127,10},{126,11},{126,12},{127,12},{127,13},{126,14},{125,15},{127,15},{126,16},{126,17},{127,17},{127,18},{126,19},{126,20},{127,20},
4095 {127,21},{126,22},{126,23},{127,23},{127,24},{126,25},{126,26},{127,26},{127,27},{126,28},{126,29},{127,29},{127,30},{126,31},{126,32},{127,32},
4096 {127,33},{126,34},{126,35},{127,35},{127,36},{126,37},{126,38},{127,38},{127,39},{126,40},{126,41},{127,41},{127,42},{126,43},{126,44},{127,44},
4097 {127,45},{126,46},{125,47},{127,47},{126,48},{126,49},{127,49},{127,50},{126,51},{126,52},{127,52},{127,53},{126,54},{126,55},{127,55},{127,56},
4098 {126,57},{126,58},{127,58},{127,59},{126,60},{126,61},{127,61},{127,62},{126,63},{125,64},{126,64},{126,65},{127,65},{127,66},{126,67},{126,68},
4099 {127,68},{127,69},{126,70},{126,71},{127,71},{127,72},{126,73},{126,74},{127,74},{127,75},{126,76},{125,77},{127,77},{126,78},{126,79},{127,79},
4100 {127,80},{126,81},{126,82},{127,82},{127,83},{126,84},{126,85},{127,85},{127,86},{126,87},{126,88},{127,88},{127,89},{126,90},{126,91},{127,91},
4101 {127,92},{126,93},{126,94},{127,94},{127,95},{126,96},{126,97},{127,97},{127,98},{126,99},{126,100},{127,100},{127,101},{126,102},{126,103},{127,103},
4102 {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115},
4103 {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127}
4104 };
4105
4106 static void transcoder_init_bc7_mode5()
4107 {
4108#if 0
4109 // This is a little too much work to do at init time, so precompute it.
4110 for (int i = 0; i < 256; i++)
4111 {
4112 int lowest_e = 256;
4113 for (int lo = 0; lo < 128; lo++)
4114 {
4115 for (int hi = 0; hi < 128; hi++)
4116 {
4117 const int lo_e = (lo << 1) | (lo >> 6);
4118 const int hi_e = (hi << 1) | (hi >> 6);
4119
4120 // Selector 1
4121 int v = (lo_e * (64 - 21) + hi_e * 21 + 32) >> 6;
4122 int e = abs(v - i);
4123
4124 if (e < lowest_e)
4125 {
4126 g_bc7_m5_equals_1[i].m_hi = static_cast<uint8_t>(hi);
4127 g_bc7_m5_equals_1[i].m_lo = static_cast<uint8_t>(lo);
4128
4129 lowest_e = e;
4130 }
4131
4132 } // hi
4133
4134 } // lo
4135
4136 printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo);
4137 if ((i & 15) == 15) printf("\n");
4138 }
4139#endif
4140
4141 for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES; i++)
4142 {
4143 uint32_t l = g_etc1_to_bc7_m5_selector_ranges[i].m_low;
4144 uint32_t h = g_etc1_to_bc7_m5_selector_ranges[i].m_high;
4145 g_etc1_to_bc7_m5_selector_range_index[l][h] = i;
4146 }
4147
4148 for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES; i++)
4149 {
4150 uint32_t l = g_etc1_to_bc7_m5a_selector_ranges[i].m_low;
4151 uint32_t h = g_etc1_to_bc7_m5a_selector_ranges[i].m_high;
4152 g_etc1_to_bc7_m5a_selector_range_index[l][h] = i;
4153 }
4154 }
4155
4156 static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
4157 {
4158 bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
4159
4160 // First ensure the block is cleared to all 0's
4161 static_cast<uint64_t*>(pDst)[0] = 0;
4162 static_cast<uint64_t*>(pDst)[1] = 0;
4163
4164 // Set alpha to 255
4165 pDst_block->m_lo.m_mode = 1 << 5;
4166 pDst_block->m_lo.m_a0 = 255;
4167 pDst_block->m_lo.m_a1_0 = 63;
4168 pDst_block->m_hi.m_a1_1 = 3;
4169
4170 const uint32_t low_selector = pSelector->m_lo_selector;
4171 const uint32_t high_selector = pSelector->m_hi_selector;
4172
4173 const uint32_t base_color_r = pEndpoints->m_color5.r;
4174 const uint32_t base_color_g = pEndpoints->m_color5.g;
4175 const uint32_t base_color_b = pEndpoints->m_color5.b;
4176 const uint32_t inten_table = pEndpoints->m_inten5;
4177
4178 if (pSelector->m_num_unique_selectors == 1)
4179 {
4180 // Solid color block - use precomputed tables and set selectors to 1.
4181 uint32_t r, g, b;
4182 decoder_etc_block::get_block_color5(pEndpoints->m_color5, inten_table, low_selector, r, g, b);
4183
4184 pDst_block->m_lo.m_r0 = g_bc7_m5_equals_1[r].m_lo;
4185 pDst_block->m_lo.m_g0 = g_bc7_m5_equals_1[g].m_lo;
4186 pDst_block->m_lo.m_b0 = g_bc7_m5_equals_1[b].m_lo;
4187
4188 pDst_block->m_lo.m_r1 = g_bc7_m5_equals_1[r].m_hi;
4189 pDst_block->m_lo.m_g1 = g_bc7_m5_equals_1[g].m_hi;
4190 pDst_block->m_lo.m_b1 = g_bc7_m5_equals_1[b].m_hi;
4191
4192 set_block_bits((uint8_t*)pDst, 0x2aaaaaab, 31, 66);
4193 return;
4194 }
4195 else if (pSelector->m_num_unique_selectors == 2)
4196 {
4197 // Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
4198 color32 block_colors[4];
4199
4200 decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table);
4201
4202 const uint32_t r0 = block_colors[low_selector].r;
4203 const uint32_t g0 = block_colors[low_selector].g;
4204 const uint32_t b0 = block_colors[low_selector].b;
4205
4206 const uint32_t r1 = block_colors[high_selector].r;
4207 const uint32_t g1 = block_colors[high_selector].g;
4208 const uint32_t b1 = block_colors[high_selector].b;
4209
4210 pDst_block->m_lo.m_r0 = r0 >> 1;
4211 pDst_block->m_lo.m_g0 = g0 >> 1;
4212 pDst_block->m_lo.m_b0 = b0 >> 1;
4213
4214 pDst_block->m_lo.m_r1 = r1 >> 1;
4215 pDst_block->m_lo.m_g1 = g1 >> 1;
4216 pDst_block->m_lo.m_b1 = b1 >> 1;
4217
4218 uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
4219
4220 for (uint32_t y = 0; y < 4; y++)
4221 {
4222 for (uint32_t x = 0; x < 4; x++)
4223 {
4224 uint32_t s = pSelector->get_selector(x, y);
4225 uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
4226
4227 uint32_t num_bits = 2;
4228
4229 if ((x | y) == 0)
4230 {
4231 if (os & 2)
4232 {
4233 pDst_block->m_lo.m_r0 = r1 >> 1;
4234 pDst_block->m_lo.m_g0 = g1 >> 1;
4235 pDst_block->m_lo.m_b0 = b1 >> 1;
4236
4237 pDst_block->m_lo.m_r1 = r0 >> 1;
4238 pDst_block->m_lo.m_g1 = g0 >> 1;
4239 pDst_block->m_lo.m_b1 = b0 >> 1;
4240
4241 output_low_selector = 3;
4242 os = 0;
4243 }
4244
4245 num_bits = 1;
4246 }
4247
4248 output_bits |= (os << output_bit_offset);
4249 output_bit_offset += num_bits;
4250 }
4251 }
4252
4253 set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
4254 return;
4255 }
4256
4257 const uint32_t selector_range_table = g_etc1_to_bc7_m5_selector_range_index[low_selector][high_selector];
4258
4259 //[32][8][RANGES][MAPPING]
4260 const etc1_to_bc7_m5_solution* pTable_r = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_r) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4261 const etc1_to_bc7_m5_solution* pTable_g = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_g) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4262 const etc1_to_bc7_m5_solution* pTable_b = &g_etc1_to_bc7_m5_color[(inten_table * 32 + base_color_b) * (NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS];
4263
4264 uint32_t best_err = UINT_MAX;
4265 uint32_t best_mapping = 0;
4266
4267 assert(NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS == 10);
4268#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
4269 DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
4270 DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
4271#undef DO_ITER
4272
4273 const uint8_t* pSelectors_xlat = &g_etc1_to_bc7_m5_selector_mappings[best_mapping][0];
4274
4275 uint32_t s_inv = 0;
4276 if (pSelectors_xlat[pSelector->get_selector(0, 0)] & 2)
4277 {
4278 pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_hi;
4279 pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_hi;
4280 pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_hi;
4281
4282 pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo;
4283 pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo;
4284 pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo;
4285
4286 s_inv = 3;
4287 }
4288 else
4289 {
4290 pDst_block->m_lo.m_r0 = pTable_r[best_mapping].m_lo;
4291 pDst_block->m_lo.m_g0 = pTable_g[best_mapping].m_lo;
4292 pDst_block->m_lo.m_b0 = pTable_b[best_mapping].m_lo;
4293
4294 pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_hi;
4295 pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_hi;
4296 pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_hi;
4297 }
4298
4299 uint32_t output_bits = 0, output_bit_ofs = 0;
4300
4301 for (uint32_t y = 0; y < 4; y++)
4302 {
4303 for (uint32_t x = 0; x < 4; x++)
4304 {
4305 const uint32_t s = pSelector->get_selector(x, y);
4306
4307 const uint32_t os = pSelectors_xlat[s] ^ s_inv;
4308
4309 output_bits |= (os << output_bit_ofs);
4310
4311 output_bit_ofs += (((x | y) == 0) ? 1 : 2);
4312 }
4313 }
4314
4315 set_block_bits((uint8_t*)pDst, output_bits, 31, 66);
4316 }
4317
4318 static void convert_etc1s_to_bc7_m5_alpha(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
4319 {
4320 bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst);
4321
4322 const uint32_t low_selector = pSelector->m_lo_selector;
4323 const uint32_t high_selector = pSelector->m_hi_selector;
4324
4325 const uint32_t base_color_r = pEndpoints->m_color5.r;
4326 const uint32_t inten_table = pEndpoints->m_inten5;
4327
4328 if (pSelector->m_num_unique_selectors == 1)
4329 {
4330 uint32_t r;
4331 decoder_etc_block::get_block_color5_r(pEndpoints->m_color5, inten_table, low_selector, r);
4332
4333 pDst_block->m_lo.m_a0 = r;
4334 pDst_block->m_lo.m_a1_0 = r & 63;
4335 pDst_block->m_hi.m_a1_1 = r >> 6;
4336
4337 return;
4338 }
4339 else if (pSelector->m_num_unique_selectors == 2)
4340 {
4341 // Only one or two unique selectors, so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks.
4342 int block_colors[4];
4343
4344 decoder_etc_block::get_block_colors5_g(block_colors, pEndpoints->m_color5, inten_table);
4345
4346 pDst_block->m_lo.m_a0 = block_colors[low_selector];
4347 pDst_block->m_lo.m_a1_0 = block_colors[high_selector] & 63;
4348 pDst_block->m_hi.m_a1_1 = block_colors[high_selector] >> 6;
4349
4350 uint32_t output_low_selector = 0, output_bit_offset = 0, output_bits = 0;
4351
4352 for (uint32_t y = 0; y < 4; y++)
4353 {
4354 for (uint32_t x = 0; x < 4; x++)
4355 {
4356 const uint32_t s = pSelector->get_selector(x, y);
4357 uint32_t os = (s == low_selector) ? output_low_selector : (3 ^ output_low_selector);
4358
4359 uint32_t num_bits = 2;
4360
4361 if ((x | y) == 0)
4362 {
4363 if (os & 2)
4364 {
4365 pDst_block->m_lo.m_a0 = block_colors[high_selector];
4366 pDst_block->m_lo.m_a1_0 = block_colors[low_selector] & 63;
4367 pDst_block->m_hi.m_a1_1 = block_colors[low_selector] >> 6;
4368
4369 output_low_selector = 3;
4370 os = 0;
4371 }
4372
4373 num_bits = 1;
4374 }
4375
4376 output_bits |= (os << output_bit_offset);
4377 output_bit_offset += num_bits;
4378 }
4379 }
4380
4381 set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
4382 return;
4383 }
4384
4385 const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector];
4386
4387 const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table];
4388
4389 pDst_block->m_lo.m_a0 = pTable->m_lo;
4390 pDst_block->m_lo.m_a1_0 = pTable->m_hi & 63;
4391 pDst_block->m_hi.m_a1_1 = pTable->m_hi >> 6;
4392
4393 uint32_t output_bit_offset = 0, output_bits = 0, selector_trans = pTable->m_trans;
4394
4395 for (uint32_t y = 0; y < 4; y++)
4396 {
4397 for (uint32_t x = 0; x < 4; x++)
4398 {
4399 const uint32_t s = pSelector->get_selector(x, y);
4400 uint32_t os = (selector_trans >> (s * 2)) & 3;
4401
4402 uint32_t num_bits = 2;
4403
4404 if ((x | y) == 0)
4405 {
4406 if (os & 2)
4407 {
4408 pDst_block->m_lo.m_a0 = pTable->m_hi;
4409 pDst_block->m_lo.m_a1_0 = pTable->m_lo & 63;
4410 pDst_block->m_hi.m_a1_1 = pTable->m_lo >> 6;
4411
4412 selector_trans ^= 0xFF;
4413 os ^= 3;
4414 }
4415
4416 num_bits = 1;
4417 }
4418
4419 output_bits |= (os << output_bit_offset);
4420 output_bit_offset += num_bits;
4421 }
4422 }
4423
4424 set_block_bits((uint8_t*)pDst, output_bits, 31, 97);
4425 }
4426#endif // BASISD_SUPPORT_BC7_MODE5
4427
4428#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC
4429 static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
4430#endif
4431
4432#if BASISD_SUPPORT_ETC2_EAC_A8
4433 static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
4434 {
4435 const uint32_t low_selector = pSelector->m_lo_selector;
4436 const uint32_t high_selector = pSelector->m_hi_selector;
4437
4438 const color32& base_color = pEndpoints->m_color5;
4439 const uint32_t inten_table = pEndpoints->m_inten5;
4440
4441 if (low_selector == high_selector)
4442 {
4443 uint32_t r;
4444 decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
4445
4446 // Constant alpha block
4447 // Select table 13, use selector 4 (0), set multiplier to 1 and base color g
4448 pDst_block->m_base = r;
4449 pDst_block->m_table = 13;
4450 pDst_block->m_multiplier = 1;
4451
4452 // selectors are all 4's
4453 memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
4454
4455 return;
4456 }
4457
4458 uint32_t selector_range_table = 0;
4459 for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
4460 if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
4461 break;
4462 if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
4463 selector_range_table = 0;
4464
4465 const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_a8[base_color.r + inten_table * 32][selector_range_table];
4466
4467 pDst_block->m_base = pTable_entry->m_base;
4468 pDst_block->m_table = pTable_entry->m_table_mul >> 4;
4469 pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
4470
4471 uint64_t selector_bits = 0;
4472
4473 for (uint32_t y = 0; y < 4; y++)
4474 {
4475 for (uint32_t x = 0; x < 4; x++)
4476 {
4477 uint32_t s = pSelector->get_selector(x, y);
4478
4479 uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
4480
4481 const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
4482 selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
4483 }
4484 }
4485
4486 pDst_block->set_selector_bits(selector_bits);
4487 }
4488#endif // BASISD_SUPPORT_ETC2_EAC_A8
4489
4490#if BASISD_SUPPORT_ETC2_EAC_RG11
4491 static const etc1_g_to_eac_conversion s_etc1_g_to_etc2_r11[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] =
4492 {
4493 {{0,1,3328},{0,1,3328},{0,16,457},{0,16,456}},
4494 {{0,226,3936},{0,226,3936},{0,17,424},{8,0,472}},
4495 {{6,178,4012},{6,178,4008},{0,146,501},{16,0,472}},
4496 {{14,178,4012},{14,178,4008},{8,146,501},{24,0,472}},
4497 {{23,178,4012},{23,178,4008},{17,146,501},{33,0,472}},
4498 {{31,178,4012},{31,178,4008},{25,146,501},{41,0,472}},
4499 {{39,178,4012},{39,178,4008},{33,146,501},{49,0,472}},
4500 {{47,178,4012},{47,178,4008},{41,146,501},{27,228,496}},
4501 {{56,178,4012},{56,178,4008},{50,146,501},{36,228,496}},
4502 {{64,178,4012},{64,178,4008},{58,146,501},{44,228,496}},
4503 {{72,178,4012},{72,178,4008},{66,146,501},{52,228,496}},
4504 {{80,178,4012},{80,178,4008},{74,146,501},{60,228,496}},
4505 {{89,178,4012},{89,178,4008},{83,146,501},{69,228,496}},
4506 {{97,178,4012},{97,178,4008},{91,146,501},{77,228,496}},
4507 {{105,178,4012},{105,178,4008},{99,146,501},{85,228,496}},
4508 {{113,178,4012},{113,178,4008},{107,146,501},{93,228,496}},
4509 {{122,178,4012},{122,178,4008},{116,146,501},{102,228,496}},
4510 {{130,178,4012},{130,178,4008},{124,146,501},{110,228,496}},
4511 {{138,178,4012},{138,178,4008},{132,146,501},{118,228,496}},
4512 {{146,178,4012},{146,178,4008},{140,146,501},{126,228,496}},
4513 {{155,178,4012},{155,178,4008},{149,146,501},{135,228,496}},
4514 {{163,178,4012},{163,178,4008},{157,146,501},{143,228,496}},
4515 {{171,178,4012},{171,178,4008},{165,146,501},{151,228,496}},
4516 {{179,178,4012},{179,178,4008},{173,146,501},{159,228,496}},
4517 {{188,178,4012},{188,178,4008},{182,146,501},{168,228,496}},
4518 {{196,178,4012},{196,178,4008},{190,146,501},{176,228,496}},
4519 {{204,178,4012},{204,178,4008},{198,146,501},{184,228,496}},
4520 {{212,178,4012},{212,178,4008},{206,146,501},{192,228,496}},
4521 {{221,178,4012},{221,178,4008},{215,146,501},{201,228,496}},
4522 {{229,178,4012},{229,178,4008},{223,146,501},{209,228,496}},
4523 {{235,66,4012},{221,100,4008},{231,146,501},{217,228,496}},
4524 {{211,102,4085},{254,32,4040},{211,102,501},{254,32,456}},
4525 {{0,2,3328},{0,2,3328},{0,1,320},{0,1,320}},
4526 {{7,162,3905},{7,162,3904},{0,17,480},{0,17,480}},
4527 {{15,162,3906},{15,162,3904},{1,117,352},{1,117,352}},
4528 {{23,162,3906},{23,162,3904},{5,34,500},{4,53,424}},
4529 {{32,162,3906},{32,162,3904},{14,34,500},{3,69,424}},
4530 {{40,162,3906},{40,162,3904},{22,34,500},{1,133,496}},
4531 {{48,162,3906},{48,162,3904},{30,34,500},{4,85,496}},
4532 {{56,162,3906},{56,162,3904},{38,34,500},{12,85,496}},
4533 {{65,162,3906},{65,162,3904},{47,34,500},{1,106,424}},
4534 {{73,162,3906},{73,162,3904},{55,34,500},{9,106,424}},
4535 {{81,162,3906},{81,162,3904},{63,34,500},{7,234,496}},
4536 {{89,162,3906},{89,162,3904},{71,34,500},{15,234,496}},
4537 {{98,162,3906},{98,162,3904},{80,34,500},{24,234,496}},
4538 {{106,162,3906},{106,162,3904},{88,34,500},{32,234,496}},
4539 {{114,162,3906},{114,162,3904},{96,34,500},{40,234,496}},
4540 {{122,162,3906},{122,162,3904},{104,34,500},{48,234,496}},
4541 {{131,162,3906},{131,162,3904},{113,34,500},{57,234,496}},
4542 {{139,162,3906},{139,162,3904},{121,34,500},{65,234,496}},
4543 {{147,162,3906},{147,162,3904},{129,34,500},{73,234,496}},
4544 {{155,162,3906},{155,162,3904},{137,34,500},{81,234,496}},
4545 {{164,162,3906},{164,162,3904},{146,34,500},{90,234,496}},
4546 {{172,162,3906},{172,162,3904},{154,34,500},{98,234,496}},
4547 {{180,162,3906},{180,162,3904},{162,34,500},{106,234,496}},
4548 {{188,162,3906},{188,162,3904},{170,34,500},{114,234,496}},
4549 {{197,162,3906},{197,162,3904},{179,34,500},{123,234,496}},
4550 {{205,162,3906},{205,162,3904},{187,34,500},{131,234,496}},
4551 {{213,162,3906},{213,162,3904},{195,34,500},{139,234,496}},
4552 {{221,162,3906},{221,162,3904},{203,34,500},{147,234,496}},
4553 {{230,162,3906},{230,162,3904},{212,34,500},{156,234,496}},
4554 {{238,162,3906},{174,106,4008},{220,34,500},{164,234,496}},
4555 {{240,178,4001},{182,106,4008},{228,34,500},{172,234,496}},
4556 {{166,108,4085},{115,31,4080},{166,108,501},{115,31,496}},
4557 {{1,68,3328},{1,68,3328},{0,1,384},{0,1,384}},
4558 {{1,51,3968},{1,51,3968},{0,2,384},{0,2,384}},
4559 {{21,18,3851},{21,18,3848},{1,50,488},{1,50,488}},
4560 {{26,195,3851},{29,18,3848},{0,67,488},{0,67,488}},
4561 {{35,195,3851},{38,18,3848},{12,115,488},{0,3,496}},
4562 {{43,195,3851},{46,18,3848},{20,115,488},{2,6,424}},
4563 {{51,195,3851},{54,18,3848},{36,66,482},{4,22,424}},
4564 {{59,195,3851},{62,18,3848},{44,66,482},{3,73,424}},
4565 {{68,195,3851},{71,18,3848},{53,66,482},{3,22,496}},
4566 {{76,195,3851},{79,18,3848},{61,66,482},{2,137,496}},
4567 {{84,195,3851},{87,18,3848},{69,66,482},{1,89,496}},
4568 {{92,195,3851},{95,18,3848},{77,66,482},{9,89,496}},
4569 {{101,195,3851},{104,18,3848},{86,66,482},{18,89,496}},
4570 {{109,195,3851},{112,18,3848},{94,66,482},{26,89,496}},
4571 {{117,195,3851},{120,18,3848},{102,66,482},{34,89,496}},
4572 {{125,195,3851},{128,18,3848},{110,66,482},{42,89,496}},
4573 {{134,195,3851},{137,18,3848},{119,66,482},{51,89,496}},
4574 {{141,195,3907},{145,18,3848},{127,66,482},{59,89,496}},
4575 {{149,195,3907},{153,18,3848},{135,66,482},{67,89,496}},
4576 {{157,195,3907},{161,18,3848},{143,66,482},{75,89,496}},
4577 {{166,195,3907},{170,18,3848},{152,66,482},{84,89,496}},
4578 {{174,195,3907},{178,18,3848},{160,66,482},{92,89,496}},
4579 {{182,195,3907},{186,18,3848},{168,66,482},{100,89,496}},
4580 {{190,195,3907},{194,18,3848},{176,66,482},{108,89,496}},
4581 {{199,195,3907},{203,18,3848},{185,66,482},{117,89,496}},
4582 {{207,195,3907},{211,18,3848},{193,66,482},{125,89,496}},
4583 {{215,195,3907},{219,18,3848},{201,66,482},{133,89,496}},
4584 {{223,195,3907},{227,18,3848},{209,66,482},{141,89,496}},
4585 {{232,195,3907},{168,89,4008},{218,66,482},{150,89,496}},
4586 {{236,18,3907},{176,89,4008},{226,66,482},{158,89,496}},
4587 {{158,90,4085},{103,31,4080},{158,90,501},{103,31,496}},
4588 {{166,90,4085},{111,31,4080},{166,90,501},{111,31,496}},
4589 {{0,70,3328},{0,70,3328},{0,17,448},{0,17,448}},
4590 {{0,117,3904},{0,117,3904},{0,35,384},{0,35,384}},
4591 {{13,165,3905},{13,165,3904},{2,211,480},{2,211,480}},
4592 {{21,165,3906},{21,165,3904},{1,51,488},{1,51,488}},
4593 {{30,165,3906},{30,165,3904},{7,61,352},{7,61,352}},
4594 {{38,165,3906},{38,165,3904},{2,125,352},{2,125,352}},
4595 {{46,165,3906},{46,165,3904},{1,37,500},{10,125,352}},
4596 {{54,165,3906},{54,165,3904},{9,37,500},{5,61,424}},
4597 {{63,165,3906},{63,165,3904},{18,37,500},{1,189,424}},
4598 {{71,165,3906},{71,165,3904},{26,37,500},{9,189,424}},
4599 {{79,165,3906},{79,165,3904},{34,37,500},{4,77,424}},
4600 {{87,165,3906},{87,165,3904},{42,37,500},{12,77,424}},
4601 {{96,165,3906},{96,165,3904},{51,37,500},{8,93,424}},
4602 {{104,165,3906},{104,165,3904},{59,37,500},{3,141,496}},
4603 {{112,165,3906},{112,165,3904},{68,37,500},{11,141,496}},
4604 {{120,165,3906},{120,165,3904},{76,37,500},{6,93,496}},
4605 {{129,165,3906},{129,165,3904},{85,37,500},{15,93,496}},
4606 {{70,254,4012},{137,165,3904},{93,37,500},{23,93,496}},
4607 {{145,165,3906},{145,165,3904},{101,37,500},{31,93,496}},
4608 {{86,254,4012},{153,165,3904},{109,37,500},{39,93,496}},
4609 {{163,165,3906},{162,165,3904},{118,37,500},{48,93,496}},
4610 {{171,165,3906},{170,165,3904},{126,37,500},{56,93,496}},
4611 {{179,165,3906},{178,165,3904},{134,37,500},{64,93,496}},
4612 {{187,165,3906},{187,165,3904},{142,37,500},{72,93,496}},
4613 {{196,165,3906},{196,165,3904},{151,37,500},{81,93,496}},
4614 {{204,165,3906},{204,165,3904},{159,37,500},{89,93,496}},
4615 {{212,165,3906},{136,77,4008},{167,37,500},{97,93,496}},
4616 {{220,165,3906},{131,93,4008},{175,37,500},{105,93,496}},
4617 {{214,181,4001},{140,93,4008},{184,37,500},{114,93,496}},
4618 {{222,181,4001},{148,93,4008},{192,37,500},{122,93,496}},
4619 {{115,95,4085},{99,31,4080},{115,95,501},{99,31,496}},
4620 {{123,95,4085},{107,31,4080},{123,95,501},{107,31,496}},
4621 {{0,102,3840},{0,102,3840},{0,18,384},{0,18,384}},
4622 {{5,167,3904},{5,167,3904},{0,13,256},{0,13,256}},
4623 {{4,54,3968},{4,54,3968},{1,67,448},{1,67,448}},
4624 {{30,198,3850},{30,198,3848},{0,3,480},{0,3,480}},
4625 {{39,198,3850},{39,198,3848},{3,52,488},{3,52,488}},
4626 {{47,198,3851},{47,198,3848},{3,4,488},{3,4,488}},
4627 {{55,198,3851},{55,198,3848},{1,70,488},{1,70,488}},
4628 {{53,167,3906},{63,198,3848},{3,22,488},{3,22,488}},
4629 {{62,167,3906},{72,198,3848},{24,118,488},{0,6,496}},
4630 {{70,167,3906},{80,198,3848},{32,118,488},{2,89,488}},
4631 {{78,167,3906},{88,198,3848},{40,118,488},{1,73,496}},
4632 {{86,167,3906},{96,198,3848},{48,118,488},{0,28,424}},
4633 {{95,167,3906},{105,198,3848},{57,118,488},{9,28,424}},
4634 {{103,167,3906},{113,198,3848},{65,118,488},{5,108,496}},
4635 {{111,167,3906},{121,198,3848},{73,118,488},{13,108,496}},
4636 {{119,167,3906},{129,198,3848},{81,118,488},{21,108,496}},
4637 {{128,167,3906},{138,198,3848},{90,118,488},{6,28,496}},
4638 {{136,167,3906},{146,198,3848},{98,118,488},{14,28,496}},
4639 {{145,167,3906},{154,198,3848},{106,118,488},{22,28,496}},
4640 {{153,167,3906},{162,198,3848},{114,118,488},{30,28,496}},
4641 {{162,167,3906},{171,198,3848},{123,118,488},{39,28,496}},
4642 {{170,167,3906},{179,198,3848},{131,118,488},{47,28,496}},
4643 {{178,167,3906},{187,198,3848},{139,118,488},{55,28,496}},
4644 {{186,167,3906},{195,198,3848},{147,118,488},{63,28,496}},
4645 {{194,167,3906},{120,12,4008},{156,118,488},{72,28,496}},
4646 {{206,198,3907},{116,28,4008},{164,118,488},{80,28,496}},
4647 {{214,198,3907},{124,28,4008},{172,118,488},{88,28,496}},
4648 {{222,198,3395},{132,28,4008},{180,118,488},{96,28,496}},
4649 {{207,134,4001},{141,28,4008},{189,118,488},{105,28,496}},
4650 {{95,30,4085},{86,31,4080},{95,30,501},{86,31,496}},
4651 {{103,30,4085},{94,31,4080},{103,30,501},{94,31,496}},
4652 {{111,30,4085},{102,31,4080},{111,30,501},{102,31,496}},
4653 {{0,104,3840},{0,104,3840},{0,18,448},{0,18,448}},
4654 {{4,39,3904},{4,39,3904},{0,4,384},{0,4,384}},
4655 {{0,56,3968},{0,56,3968},{0,84,448},{0,84,448}},
4656 {{6,110,3328},{6,110,3328},{0,20,448},{0,20,448}},
4657 {{41,200,3850},{41,200,3848},{1,4,480},{1,4,480}},
4658 {{49,200,3850},{49,200,3848},{1,8,416},{1,8,416}},
4659 {{57,200,3851},{57,200,3848},{1,38,488},{1,38,488}},
4660 {{65,200,3851},{65,200,3848},{1,120,488},{1,120,488}},
4661 {{74,200,3851},{74,200,3848},{2,72,488},{2,72,488}},
4662 {{68,6,3907},{82,200,3848},{2,24,488},{2,24,488}},
4663 {{77,6,3907},{90,200,3848},{26,120,488},{10,24,488}},
4664 {{97,63,3330},{98,200,3848},{34,120,488},{2,8,496}},
4665 {{106,63,3330},{107,200,3848},{43,120,488},{3,92,488}},
4666 {{114,63,3330},{115,200,3848},{51,120,488},{11,92,488}},
4667 {{122,63,3330},{123,200,3848},{59,120,488},{7,76,496}},
4668 {{130,63,3330},{131,200,3848},{67,120,488},{15,76,496}},
4669 {{139,63,3330},{140,200,3848},{76,120,488},{24,76,496}},
4670 {{147,63,3330},{148,200,3848},{84,120,488},{32,76,496}},
4671 {{155,63,3330},{156,200,3848},{92,120,488},{40,76,496}},
4672 {{164,63,3330},{164,200,3848},{100,120,488},{48,76,496}},
4673 {{173,63,3330},{173,200,3848},{109,120,488},{57,76,496}},
4674 {{184,6,3851},{181,200,3848},{117,120,488},{65,76,496}},
4675 {{192,6,3851},{133,28,3936},{125,120,488},{73,76,496}},
4676 {{189,200,3907},{141,28,3936},{133,120,488},{81,76,496}},
4677 {{198,200,3907},{138,108,4000},{142,120,488},{90,76,496}},
4678 {{206,200,3907},{146,108,4000},{150,120,488},{98,76,496}},
4679 {{214,200,3395},{154,108,4000},{158,120,488},{106,76,496}},
4680 {{190,136,4001},{162,108,4000},{166,120,488},{114,76,496}},
4681 {{123,30,4076},{87,15,4080},{123,30,492},{87,15,496}},
4682 {{117,110,4084},{80,31,4080},{117,110,500},{80,31,496}},
4683 {{125,110,4084},{88,31,4080},{125,110,500},{88,31,496}},
4684 {{133,110,4084},{96,31,4080},{133,110,500},{96,31,496}},
4685 {{9,56,3904},{9,56,3904},{0,67,448},{0,67,448}},
4686 {{1,8,3904},{1,8,3904},{1,84,448},{1,84,448}},
4687 {{1,124,3904},{1,124,3904},{0,39,384},{0,39,384}},
4688 {{9,124,3904},{9,124,3904},{1,4,448},{1,4,448}},
4689 {{6,76,3904},{6,76,3904},{0,70,448},{0,70,448}},
4690 {{62,6,3859},{62,6,3856},{2,38,480},{2,38,480}},
4691 {{70,6,3859},{70,6,3856},{5,43,416},{5,43,416}},
4692 {{78,6,3859},{78,6,3856},{2,11,416},{2,11,416}},
4693 {{87,6,3859},{87,6,3856},{0,171,488},{0,171,488}},
4694 {{67,8,3906},{95,6,3856},{8,171,488},{8,171,488}},
4695 {{75,8,3907},{103,6,3856},{5,123,488},{5,123,488}},
4696 {{83,8,3907},{111,6,3856},{2,75,488},{2,75,488}},
4697 {{92,8,3907},{120,6,3856},{0,27,488},{0,27,488}},
4698 {{100,8,3907},{128,6,3856},{8,27,488},{8,27,488}},
4699 {{120,106,3843},{136,6,3856},{99,6,387},{16,27,488}},
4700 {{128,106,3843},{144,6,3856},{107,6,387},{2,11,496}},
4701 {{137,106,3843},{153,6,3856},{117,6,387},{11,11,496}},
4702 {{145,106,3843},{161,6,3856},{125,6,387},{19,11,496}},
4703 {{163,8,3851},{137,43,3904},{133,6,387},{27,11,496}},
4704 {{171,8,3851},{145,43,3904},{141,6,387},{35,11,496}},
4705 {{180,8,3851},{110,11,4000},{150,6,387},{44,11,496}},
4706 {{188,8,3851},{118,11,4000},{158,6,387},{52,11,496}},
4707 {{172,72,3907},{126,11,4000},{166,6,387},{60,11,496}},
4708 {{174,6,3971},{134,11,4000},{174,6,387},{68,11,496}},
4709 {{183,6,3971},{143,11,4000},{183,6,387},{77,11,496}},
4710 {{191,6,3971},{151,11,4000},{191,6,387},{85,11,496}},
4711 {{199,6,3971},{159,11,4000},{199,6,387},{93,11,496}},
4712 {{92,12,4084},{69,15,4080},{92,12,500},{69,15,496}},
4713 {{101,12,4084},{78,15,4080},{101,12,500},{78,15,496}},
4714 {{110,12,4084},{86,15,4080},{110,12,500},{86,15,496}},
4715 {{118,12,4084},{79,31,4080},{118,12,500},{79,31,496}},
4716 {{126,12,4084},{87,31,4080},{126,12,500},{87,31,496}},
4717 {{71,8,3602},{71,8,3600},{2,21,384},{2,21,384}},
4718 {{79,8,3611},{79,8,3608},{0,69,448},{0,69,448}},
4719 {{87,8,3611},{87,8,3608},{0,23,384},{0,23,384}},
4720 {{95,8,3611},{95,8,3608},{1,5,448},{1,5,448}},
4721 {{104,8,3611},{104,8,3608},{0,88,448},{0,88,448}},
4722 {{112,8,3611},{112,8,3608},{0,72,448},{0,72,448}},
4723 {{120,8,3611},{121,8,3608},{36,21,458},{36,21,456}},
4724 {{133,47,3091},{129,8,3608},{44,21,458},{44,21,456}},
4725 {{142,47,3091},{138,8,3608},{53,21,459},{53,21,456}},
4726 {{98,12,3850},{98,12,3848},{61,21,459},{61,21,456}},
4727 {{106,12,3850},{106,12,3848},{10,92,480},{69,21,456}},
4728 {{114,12,3851},{114,12,3848},{18,92,480},{77,21,456}},
4729 {{123,12,3851},{123,12,3848},{3,44,488},{86,21,456}},
4730 {{95,12,3906},{95,12,3904},{11,44,488},{94,21,456}},
4731 {{103,12,3906},{103,12,3904},{19,44,488},{102,21,456}},
4732 {{111,12,3907},{111,12,3904},{27,44,489},{110,21,456}},
4733 {{120,12,3907},{120,12,3904},{36,44,489},{119,21,456}},
4734 {{128,12,3907},{128,12,3904},{44,44,489},{127,21,456}},
4735 {{136,12,3907},{136,12,3904},{52,44,489},{135,21,456}},
4736 {{144,12,3907},{144,12,3904},{60,44,490},{144,21,456}},
4737 {{153,12,3907},{153,12,3904},{69,44,490},{153,21,456}},
4738 {{161,12,3395},{149,188,3968},{77,44,490},{161,21,456}},
4739 {{169,12,3395},{199,21,3928},{85,44,490},{169,21,456}},
4740 {{113,95,4001},{202,69,3992},{125,8,483},{177,21,456}},
4741 {{122,95,4001},{201,21,3984},{134,8,483},{186,21,456}},
4742 {{143,8,4067},{209,21,3984},{142,8,483},{194,21,456}},
4743 {{151,8,4067},{47,15,4080},{151,8,483},{47,15,496}},
4744 {{159,8,4067},{55,15,4080},{159,8,483},{55,15,496}},
4745 {{168,8,4067},{64,15,4080},{168,8,483},{64,15,496}},
4746 {{160,40,4075},{72,15,4080},{160,40,491},{72,15,496}},
4747 {{168,40,4075},{80,15,4080},{168,40,491},{80,15,496}},
4748 {{144,8,4082},{88,15,4080},{144,8,498},{88,15,496}},
4749 };
4750
4751 static void convert_etc1s_to_etc2_eac_r11(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector)
4752 {
4753 const uint32_t low_selector = pSelector->m_lo_selector;
4754 const uint32_t high_selector = pSelector->m_hi_selector;
4755
4756 const color32& base_color = pEndpoints->m_color5;
4757 const uint32_t inten_table = pEndpoints->m_inten5;
4758
4759 if (low_selector == high_selector)
4760 {
4761 uint32_t r;
4762 decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r);
4763
4764 // Constant alpha block
4765 // Select table 13, use selector 4 (0), set multiplier to 1 and base color r
4766 pDst_block->m_base = r;
4767 pDst_block->m_table = 13;
4768 pDst_block->m_multiplier = 1;
4769
4770 // selectors are all 4's
4771 static const uint8_t s_etc2_eac_r11_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
4772 memcpy(pDst_block->m_selectors, s_etc2_eac_r11_sel4, sizeof(s_etc2_eac_r11_sel4));
4773
4774 return;
4775 }
4776
4777 uint32_t selector_range_table = 0;
4778 for (selector_range_table = 0; selector_range_table < NUM_ETC2_EAC_SELECTOR_RANGES; selector_range_table++)
4779 if ((low_selector == s_etc2_eac_selector_ranges[selector_range_table].m_low) && (high_selector == s_etc2_eac_selector_ranges[selector_range_table].m_high))
4780 break;
4781 if (selector_range_table >= NUM_ETC2_EAC_SELECTOR_RANGES)
4782 selector_range_table = 0;
4783
4784 const etc1_g_to_eac_conversion* pTable_entry = &s_etc1_g_to_etc2_r11[base_color.r + inten_table * 32][selector_range_table];
4785
4786 pDst_block->m_base = pTable_entry->m_base;
4787 pDst_block->m_table = pTable_entry->m_table_mul >> 4;
4788 pDst_block->m_multiplier = pTable_entry->m_table_mul & 15;
4789
4790 uint64_t selector_bits = 0;
4791
4792 for (uint32_t y = 0; y < 4; y++)
4793 {
4794 for (uint32_t x = 0; x < 4; x++)
4795 {
4796 uint32_t s = pSelector->get_selector(x, y);
4797
4798 uint32_t ds = (pTable_entry->m_trans >> (s * 3)) & 7;
4799
4800 const uint32_t dst_ofs = 45 - (y + x * 4) * 3;
4801 selector_bits |= (static_cast<uint64_t>(ds) << dst_ofs);
4802 }
4803 }
4804
4805 pDst_block->set_selector_bits(selector_bits);
4806 }
4807#endif // BASISD_SUPPORT_ETC2_EAC_RG11
4808
4809// ASTC
4810 struct etc1_to_astc_solution
4811 {
4812 uint8_t m_lo;
4813 uint8_t m_hi;
4814 uint16_t m_err;
4815 };
4816
4817#if BASISD_SUPPORT_ASTC
4818 static dxt_selector_range g_etc1_to_astc_selector_ranges[] =
4819 {
4820 { 0, 3 },
4821
4822 { 1, 3 },
4823 { 0, 2 },
4824
4825 { 1, 2 },
4826
4827 { 2, 3 },
4828 { 0, 1 },
4829 };
4830
4831 const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_RANGES = sizeof(g_etc1_to_astc_selector_ranges) / sizeof(g_etc1_to_astc_selector_ranges[0]);
4832
4833 static uint32_t g_etc1_to_astc_selector_range_index[4][4];
4834
4835 const uint32_t NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS = 10;
4836 static const uint8_t g_etc1_to_astc_selector_mappings[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS][4] =
4837 {
4838 { 0, 0, 1, 1 },
4839 { 0, 0, 1, 2 },
4840 { 0, 0, 1, 3 },
4841 { 0, 0, 2, 3 },
4842 { 0, 1, 1, 1 },
4843 { 0, 1, 2, 2 },
4844 { 0, 1, 2, 3 },
4845 { 0, 2, 3, 3 },
4846 { 1, 2, 2, 2 },
4847 { 1, 2, 3, 3 },
4848 };
4849
4850 static const etc1_to_astc_solution g_etc1_to_astc[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
4851#include "basisu_transcoder_tables_astc.inc"
4852 };
4853
4854 // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data.
4855 static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
4856
4857#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
4858 static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = {
4859#include "basisu_transcoder_tables_astc_0_255.inc"
4860 };
4861 static uint8_t g_etc1_to_astc_best_grayscale_mapping_0_255[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES];
4862#endif
4863
4864 static uint32_t g_ise_to_unquant[48];
4865
4866#if BASISD_WRITE_NEW_ASTC_TABLES
4867 static void create_etc1_to_astc_conversion_table_0_47()
4868 {
4869 FILE* pFile = nullptr;
4870 fopen_s(&pFile, "basisu_transcoder_tables_astc.inc", "w");
4871
4872 uint32_t n = 0;
4873
4874 for (int inten = 0; inten < 8; inten++)
4875 {
4876 for (uint32_t g = 0; g < 32; g++)
4877 {
4878 color32 block_colors[4];
4879 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4880
4881 for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
4882 {
4883 const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
4884 const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
4885
4886 uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
4887 uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
4888 uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
4889 uint64_t highest_best_err = 0;
4890
4891 for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
4892 {
4893 uint32_t best_lo = 0;
4894 uint32_t best_hi = 0;
4895 uint64_t best_err = UINT64_MAX;
4896
4897 for (uint32_t hi = 0; hi <= 47; hi++)
4898 {
4899 for (uint32_t lo = 0; lo <= 47; lo++)
4900 {
4901 uint32_t colors[4];
4902
4903 for (uint32_t s = 0; s < 4; s++)
4904 {
4905 uint32_t s_scaled = s | (s << 2) | (s << 4);
4906 if (s_scaled > 32)
4907 s_scaled++;
4908
4909 uint32_t c0 = g_ise_to_unquant[lo] | (g_ise_to_unquant[lo] << 8);
4910 uint32_t c1 = g_ise_to_unquant[hi] | (g_ise_to_unquant[hi] << 8);
4911 colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
4912 }
4913
4914 uint64_t total_err = 0;
4915
4916 for (uint32_t s = low_selector; s <= high_selector; s++)
4917 {
4918 int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
4919
4920 int err_scale = 1;
4921 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
4922 // the low/high selectors which are clamping to either 0 or 255.
4923 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
4924 err_scale = 8;
4925
4926 total_err += (err * err) * err_scale;
4927 }
4928
4929 if (total_err < best_err)
4930 {
4931 best_err = total_err;
4932 best_lo = lo;
4933 best_hi = hi;
4934 }
4935 }
4936 }
4937
4938 mapping_best_low[m] = best_lo;
4939 mapping_best_high[m] = best_hi;
4940 mapping_best_err[m] = best_err;
4941 highest_best_err = basisu::maximum(highest_best_err, best_err);
4942
4943 } // m
4944
4945 for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
4946 {
4947 uint64_t err = mapping_best_err[m];
4948
4949 err = basisu::minimum<uint64_t>(err, 0xFFFF);
4950
4951 fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
4952
4953 n++;
4954 if ((n & 31) == 31)
4955 fprintf(pFile, "\n");
4956 } // m
4957
4958 } // sr
4959 } // g
4960 } // inten
4961
4962 fclose(pFile);
4963 }
4964
4965 static void create_etc1_to_astc_conversion_table_0_255()
4966 {
4967 FILE* pFile = nullptr;
4968 fopen_s(&pFile, "basisu_transcoder_tables_astc_0_255.inc", "w");
4969
4970 uint32_t n = 0;
4971
4972 for (int inten = 0; inten < 8; inten++)
4973 {
4974 for (uint32_t g = 0; g < 32; g++)
4975 {
4976 color32 block_colors[4];
4977 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
4978
4979 for (uint32_t sr = 0; sr < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; sr++)
4980 {
4981 const uint32_t low_selector = g_etc1_to_astc_selector_ranges[sr].m_low;
4982 const uint32_t high_selector = g_etc1_to_astc_selector_ranges[sr].m_high;
4983
4984 uint32_t mapping_best_low[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
4985 uint32_t mapping_best_high[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
4986 uint64_t mapping_best_err[NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
4987 uint64_t highest_best_err = 0;
4988
4989 for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
4990 {
4991 uint32_t best_lo = 0;
4992 uint32_t best_hi = 0;
4993 uint64_t best_err = UINT64_MAX;
4994
4995 for (uint32_t hi = 0; hi <= 255; hi++)
4996 {
4997 for (uint32_t lo = 0; lo <= 255; lo++)
4998 {
4999 uint32_t colors[4];
5000
5001 for (uint32_t s = 0; s < 4; s++)
5002 {
5003 uint32_t s_scaled = s | (s << 2) | (s << 4);
5004 if (s_scaled > 32)
5005 s_scaled++;
5006
5007 uint32_t c0 = lo | (lo << 8);
5008 uint32_t c1 = hi | (hi << 8);
5009 colors[s] = ((c0 * (64 - s_scaled) + c1 * s_scaled + 32) / 64) >> 8;
5010 }
5011
5012 uint64_t total_err = 0;
5013
5014 for (uint32_t s = low_selector; s <= high_selector; s++)
5015 {
5016 int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]];
5017
5018 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
5019 // the low/high selectors which are clamping to either 0 or 255.
5020 int err_scale = 1;
5021 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
5022 err_scale = 8;
5023
5024 total_err += (err * err) * err_scale;
5025 }
5026
5027 if (total_err < best_err)
5028 {
5029 best_err = total_err;
5030 best_lo = lo;
5031 best_hi = hi;
5032 }
5033 }
5034 }
5035
5036 mapping_best_low[m] = best_lo;
5037 mapping_best_high[m] = best_hi;
5038 mapping_best_err[m] = best_err;
5039 highest_best_err = basisu::maximum(highest_best_err, best_err);
5040 } // m
5041
5042 for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++)
5043 {
5044 uint64_t err = mapping_best_err[m];
5045
5046 err = basisu::minimum<uint64_t>(err, 0xFFFF);
5047
5048 fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err);
5049
5050 n++;
5051 if ((n & 31) == 31)
5052 fprintf(pFile, "\n");
5053 } // m
5054
5055 } // sr
5056 } // g
5057 } // inten
5058
5059 fclose(pFile);
5060 }
5061#endif
5062
5063#endif
5064
5065#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
5066 // Table encodes 5 trits to 8 output bits. 3^5 entries.
5067 // Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
5068 static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
5069 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
5070 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
5071 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
5072 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
5073 191, 223, 124, 125, 126 };
5074
5075 // Extracts bits [low,high]
5076 static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
5077 {
5078 return (bits >> low) & ((1 << (high - low + 1)) - 1);
5079 }
5080
5081 // Writes bits to output in an endian safe way
5082 static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits)
5083 {
5084 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5085
5086 while (total_bits)
5087 {
5088 const uint32_t bits_to_write = basisu::minimum<int>(total_bits, 8 - (bit_pos & 7));
5089
5090 pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
5091
5092 bit_pos += bits_to_write;
5093 total_bits -= bits_to_write;
5094 value >>= bits_to_write;
5095 }
5096 }
5097
5098 // Encodes 5 values to output, usable for any range that uses trits and bits
5099 static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
5100 {
5101 // First extract the trits and the bits from the 5 input values
5102 int trits = 0, bits[5];
5103 const uint32_t bit_mask = (1 << n) - 1;
5104 for (int i = 0; i < 5; i++)
5105 {
5106 static const int s_muls[5] = { 1, 3, 9, 27, 81 };
5107
5108 const int t = pValues[i] >> n;
5109
5110 trits += t * s_muls[i];
5111 bits[i] = pValues[i] & bit_mask;
5112 }
5113
5114 // Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
5115 // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
5116
5117 assert(trits < 243);
5118 const int T = g_astc_trit_encode[trits];
5119
5120 // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
5121 astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
5122
5123 astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
5124 (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
5125 }
5126#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC
5127
5128#if BASISD_SUPPORT_ASTC
5129 struct astc_block_params
5130 {
5131 // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00)
5132 uint8_t m_endpoints[10];
5133 uint8_t m_weights[32];
5134 };
5135
5136 // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2).
5137 // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity.
5138 // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color.
5139 // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47.
5140 // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec:
5141 // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
5142 // 32 total weights, stored as 16 CA CA, each ranging from 0-3.
5143 static void astc_pack_block_cem_12_weight_range2(uint32_t *pOutput, const astc_block_params* pBlock)
5144 {
5145 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5146
5147 // Write constant block mode, color component selector, number of partitions, color endpoint mode
5148 // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5149 pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
5150 pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
5151
5152 pOutput[2] = 0;
5153 pOutput[3] = 0;
5154
5155 // Pack 8 endpoints (each ranging between [0,47]) using BISE starting at bit 17
5156 int bit_pos = 17;
5157 astc_encode_trits(pOutput, pBlock->m_endpoints, bit_pos, 4);
5158 astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4);
5159
5160 // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
5161
5162 for (uint32_t i = 0; i < 32; i++)
5163 {
5164 static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5165 const uint32_t ofs = 126 - (i * 2);
5166 pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5167 }
5168 }
5169
5170 // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights
5171 // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient.
5172 static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock)
5173 {
5174 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5175
5176 // Write constant block mode, color component selector, number of partitions, color endpoint mode
5177 // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5178 pBytes[0] = 0x41; pBytes[1] = 0x84; pBytes[2] = 0x01; pBytes[3] = 0x00;
5179 pOutput[1] = 0;
5180 pBytes[8] = 0x00; pBytes[9] = 0x00; pBytes[10] = 0x00; pBytes[11] = 0xc0;
5181 pOutput[3] = 0;
5182
5183 // Pack 8 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5184 int bit_pos = 17;
5185 for (uint32_t i = 0; i < 8; i++)
5186 astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5187
5188 // Pack 32 1-bit weights, which are stored from the top down into the block in opposite bit order.
5189 for (uint32_t i = 0; i < 32; i++)
5190 {
5191 const uint32_t ofs = 127 - i;
5192 pBytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
5193 }
5194 }
5195
5196#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5197 // Optional 8-bit endpoint packing functions.
5198
5199 // CEM mode 4 (LDR Luminance+Alpha Direct), 8-bit endpoints, 2 bit weights
5200 static void astc_pack_block_cem_4_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
5201 {
5202 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5203
5204 // Write constant block mode, color component selector, number of partitions, color endpoint mode
5205 // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5206 pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00;
5207 pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0;
5208
5209 pOutput[2] = 0;
5210 pOutput[3] = 0;
5211
5212 // Pack 4 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5213 int bit_pos = 17;
5214 for (uint32_t i = 0; i < 4; i++)
5215 astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5216
5217 // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order.
5218 for (uint32_t i = 0; i < 32; i++)
5219 {
5220 static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5221 const uint32_t ofs = 126 - (i * 2);
5222 pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5223 }
5224 }
5225
5226 // CEM mode 8 (LDR RGB Direct), 8-bit endpoints, 2 bit weights
5227 static void astc_pack_block_cem_8_weight_range2(uint32_t* pOutput, const astc_block_params* pBlock)
5228 {
5229 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
5230
5231 // Write constant block mode, color component selector, number of partitions, color endpoint mode
5232 // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode
5233 pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00;
5234
5235 pOutput[1] = 0;
5236 pOutput[2] = 0;
5237 pOutput[3] = 0;
5238
5239 // Pack 6 endpoints (each ranging between [0,255]) as 8-bits starting at bit 17
5240 int bit_pos = 17;
5241 for (uint32_t i = 0; i < 6; i++)
5242 astc_set_bits(pOutput, bit_pos, pBlock->m_endpoints[i], 8);
5243
5244 // Pack 16 2-bit weights, which are stored from the top down into the block in opposite bit order.
5245 for (uint32_t i = 0; i < 16; i++)
5246 {
5247 static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 };
5248 const uint32_t ofs = 126 - (i * 2);
5249 pBytes[ofs >> 3] |= (s_reverse_bits[pBlock->m_weights[i]] << (ofs & 7));
5250 }
5251 }
5252#endif
5253
5254 // Optimal quantized [0,47] entry to use given [0,255] input
5255 static uint8_t g_astc_single_color_encoding_0[256];
5256
5257 // Optimal quantized [0,47] low/high values given [0,255] input assuming a selector of 1
5258 static struct
5259 {
5260 uint8_t m_lo, m_hi;
5261 } g_astc_single_color_encoding_1[256];
5262
5263 static void transcoder_init_astc()
5264 {
5265 for (uint32_t base_color = 0; base_color < 32; base_color++)
5266 {
5267 for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
5268 {
5269 for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
5270 {
5271 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5272
5273 uint32_t best_mapping = 0;
5274 uint32_t best_err = UINT32_MAX;
5275 for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
5276 {
5277 if (pTable_g[mapping_index].m_err < best_err)
5278 {
5279 best_err = pTable_g[mapping_index].m_err;
5280 best_mapping = mapping_index;
5281 }
5282 }
5283
5284 g_etc1_to_astc_best_grayscale_mapping[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
5285 }
5286 }
5287 }
5288
5289#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5290 for (uint32_t base_color = 0; base_color < 32; base_color++)
5291 {
5292 for (uint32_t inten_table = 0; inten_table < 8; inten_table++)
5293 {
5294 for (uint32_t range_index = 0; range_index < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; range_index++)
5295 {
5296 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + range_index * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5297
5298 uint32_t best_mapping = 0;
5299 uint32_t best_err = UINT32_MAX;
5300 for (uint32_t mapping_index = 0; mapping_index < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; mapping_index++)
5301 {
5302 if (pTable_g[mapping_index].m_err < best_err)
5303 {
5304 best_err = pTable_g[mapping_index].m_err;
5305 best_mapping = mapping_index;
5306 }
5307 }
5308
5309 g_etc1_to_astc_best_grayscale_mapping_0_255[base_color][inten_table][range_index] = static_cast<uint8_t>(best_mapping);
5310 }
5311 }
5312 }
5313#endif
5314
5315 for (uint32_t i = 0; i < NUM_ETC1_TO_ASTC_SELECTOR_RANGES; i++)
5316 {
5317 uint32_t l = g_etc1_to_astc_selector_ranges[i].m_low;
5318 uint32_t h = g_etc1_to_astc_selector_ranges[i].m_high;
5319 g_etc1_to_astc_selector_range_index[l][h] = i;
5320 }
5321
5322 // Endpoint dequantization, see:
5323 // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization
5324 for (uint32_t trit = 0; trit < 3; trit++)
5325 {
5326 for (uint32_t bit = 0; bit < 16; bit++)
5327 {
5328 const uint32_t A = (bit & 1) ? 511 : 0;
5329 const uint32_t B = (bit >> 1) | ((bit >> 1) << 6);
5330 const uint32_t C = 22;
5331 const uint32_t D = trit;
5332
5333 uint32_t unq = D * C + B;
5334 unq = unq ^ A;
5335 unq = (A & 0x80) | (unq >> 2);
5336
5337 g_ise_to_unquant[bit | (trit << 4)] = unq;
5338 }
5339 }
5340
5341 // Compute table used for optimal single color encoding.
5342 for (int i = 0; i < 256; i++)
5343 {
5344 int lowest_e = INT_MAX;
5345
5346 for (int lo = 0; lo < 48; lo++)
5347 {
5348 for (int hi = 0; hi < 48; hi++)
5349 {
5350 const int lo_v = g_ise_to_unquant[lo];
5351 const int hi_v = g_ise_to_unquant[hi];
5352
5353 int l = lo_v | (lo_v << 8);
5354 int h = hi_v | (hi_v << 8);
5355
5356 int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8;
5357
5358 int e = abs(v - i);
5359
5360 if (e < lowest_e)
5361 {
5362 g_astc_single_color_encoding_1[i].m_hi = static_cast<uint8_t>(hi);
5363 g_astc_single_color_encoding_1[i].m_lo = static_cast<uint8_t>(lo);
5364
5365 lowest_e = e;
5366 }
5367
5368 } // hi
5369 } // lo
5370 }
5371
5372 for (int i = 0; i < 256; i++)
5373 {
5374 int lowest_e = INT_MAX;
5375
5376 for (int lo = 0; lo < 48; lo++)
5377 {
5378 const int lo_v = g_ise_to_unquant[lo];
5379
5380 int e = abs(lo_v - i);
5381
5382 if (e < lowest_e)
5383 {
5384 g_astc_single_color_encoding_0[i] = static_cast<uint8_t>(lo);
5385
5386 lowest_e = e;
5387 }
5388 } // lo
5389 }
5390 }
5391
5392 // Converts opaque or color+alpha ETC1S block to ASTC 4x4.
5393 // This function tries to use the best ASTC mode given the block's actual contents.
5394 static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector,
5395 bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook)
5396 {
5397 astc_block_params blk;
5398
5399 blk.m_endpoints[8] = 0;
5400 blk.m_endpoints[9] = 0;
5401
5402 int constant_alpha_val = 255;
5403 int num_unique_alpha_selectors = 1;
5404
5405 if (transcode_alpha)
5406 {
5407 const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5408
5409 num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
5410
5411 if (num_unique_alpha_selectors == 1)
5412 {
5413 const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5414
5415 const color32& alpha_base_color = alpha_endpoint.m_color5;
5416 const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5417
5418 int alpha_block_colors[4];
5419 decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5420
5421 constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
5422 }
5423 }
5424
5425 const color32& base_color = pEndpoints->m_color5;
5426 const uint32_t inten_table = pEndpoints->m_inten5;
5427
5428 const uint32_t low_selector = pSelector->m_lo_selector;
5429 const uint32_t high_selector = pSelector->m_hi_selector;
5430
5431 // Handle solid color or BTC blocks, which can always be encoded from ETC1S to ASTC losslessly.
5432 if ((pSelector->m_num_unique_selectors == 1) && (num_unique_alpha_selectors == 1))
5433 {
5434 // Both color and alpha are constant, write a solid color block and exit.
5435 // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks
5436 uint32_t r, g, b;
5437 decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
5438
5439 uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
5440 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
5441
5442 pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
5443
5444 pOutput[1] = 0xffffffff;
5445 pOutput[2] = 0;
5446 pOutput[3] = 0;
5447
5448 int bit_pos = 64;
5449 astc_set_bits(pOutput, bit_pos, r | (r << 8), 16);
5450 astc_set_bits(pOutput, bit_pos, g | (g << 8), 16);
5451 astc_set_bits(pOutput, bit_pos, b | (b << 8), 16);
5452 astc_set_bits(pOutput, bit_pos, constant_alpha_val | (constant_alpha_val << 8), 16);
5453
5454 return;
5455 }
5456 else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2))
5457 {
5458 // Both color and alpha use <= 2 unique selectors each.
5459 // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights).
5460 color32 block_colors[4];
5461 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
5462
5463 blk.m_endpoints[0] = block_colors[low_selector].r;
5464 blk.m_endpoints[2] = block_colors[low_selector].g;
5465 blk.m_endpoints[4] = block_colors[low_selector].b;
5466
5467 blk.m_endpoints[1] = block_colors[high_selector].r;
5468 blk.m_endpoints[3] = block_colors[high_selector].g;
5469 blk.m_endpoints[5] = block_colors[high_selector].b;
5470
5471 int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
5472 int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
5473 bool invert = false;
5474 if (s1 < s0)
5475 {
5476 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5477 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5478 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5479 invert = true;
5480 }
5481
5482 if (transcode_alpha)
5483 {
5484 const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5485 const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5486
5487 const color32& alpha_base_color = alpha_endpoint.m_color5;
5488 const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5489
5490 const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5491 const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5492
5493 int alpha_block_colors[4];
5494 decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5495
5496 blk.m_endpoints[6] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
5497 blk.m_endpoints[7] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
5498
5499 for (uint32_t y = 0; y < 4; y++)
5500 {
5501 for (uint32_t x = 0; x < 4; x++)
5502 {
5503 uint32_t s = alpha_selectors.get_selector(x, y);
5504 s = (s == alpha_high_selector) ? 1 : 0;
5505
5506 blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(s);
5507 } // x
5508 } // y
5509 }
5510 else
5511 {
5512 blk.m_endpoints[6] = 255;
5513 blk.m_endpoints[7] = 255;
5514
5515 for (uint32_t i = 0; i < 16; i++)
5516 blk.m_weights[i * 2 + 1] = 0;
5517 }
5518
5519 for (uint32_t y = 0; y < 4; y++)
5520 {
5521 for (uint32_t x = 0; x < 4; x++)
5522 {
5523 uint32_t s = pSelector->get_selector(x, y);
5524
5525 s = (s == high_selector) ? 1 : 0;
5526
5527 if (invert)
5528 s = 1 - s;
5529
5530 blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(s);
5531 } // x
5532 } // y
5533
5534 astc_pack_block_cem_12_weight_range0(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5535
5536 return;
5537 }
5538
5539 // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex.
5540
5541#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5542 // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints.
5543
5544 // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha.
5545 if ((base_color.r == base_color.g) && (base_color.r == base_color.b))
5546 {
5547 if (transcode_alpha)
5548 {
5549 const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5550 const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5551
5552 const color32& alpha_base_color = alpha_endpoint.m_color5;
5553 const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5554
5555 const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5556 const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5557
5558 if (num_unique_alpha_selectors <= 2)
5559 {
5560 // Simple alpha block with only 1 or 2 unique values, so use BTC. This is lossless.
5561 int alpha_block_colors[4];
5562 decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5563
5564 blk.m_endpoints[2] = static_cast<uint8_t>(alpha_block_colors[alpha_low_selector]);
5565 blk.m_endpoints[3] = static_cast<uint8_t>(alpha_block_colors[alpha_high_selector]);
5566
5567 for (uint32_t i = 0; i < 16; i++)
5568 {
5569 uint32_t s = alpha_selectors.get_selector(i & 3, i >> 2);
5570 blk.m_weights[i * 2 + 1] = (s == alpha_high_selector) ? 3 : 0;
5571 }
5572 }
5573 else
5574 {
5575 // Convert ETC1S alpha
5576 const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
5577
5578 //[32][8][RANGES][MAPPING]
5579 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5580
5581 const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
5582
5583 blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
5584 blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
5585
5586 const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5587
5588 for (uint32_t y = 0; y < 4; y++)
5589 {
5590 for (uint32_t x = 0; x < 4; x++)
5591 {
5592 uint32_t s = alpha_selectors.get_selector(x, y);
5593 uint32_t as = pSelectors_xlat[s];
5594
5595 blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
5596 } // x
5597 } // y
5598 }
5599 }
5600 else
5601 {
5602 // No alpha slice - set output alpha to all 255's
5603 blk.m_endpoints[2] = 255;
5604 blk.m_endpoints[3] = 255;
5605
5606 for (uint32_t i = 0; i < 16; i++)
5607 blk.m_weights[i * 2 + 1] = 0;
5608 }
5609
5610 if (pSelector->m_num_unique_selectors <= 2)
5611 {
5612 // Simple color block with only 1 or 2 unique values, so use BTC. This is lossless.
5613 int block_colors[4];
5614 decoder_etc_block::get_block_colors5_g(block_colors, base_color, inten_table);
5615
5616 blk.m_endpoints[0] = static_cast<uint8_t>(block_colors[low_selector]);
5617 blk.m_endpoints[1] = static_cast<uint8_t>(block_colors[high_selector]);
5618
5619 for (uint32_t i = 0; i < 16; i++)
5620 {
5621 uint32_t s = pSelector->get_selector(i & 3, i >> 2);
5622 blk.m_weights[i * 2] = (s == high_selector) ? 3 : 0;
5623 }
5624 }
5625 else
5626 {
5627 // Convert ETC1S alpha
5628 const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5629
5630 //[32][8][RANGES][MAPPING]
5631 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5632
5633 const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table];
5634
5635 blk.m_endpoints[0] = pTable_g[best_mapping].m_lo;
5636 blk.m_endpoints[1] = pTable_g[best_mapping].m_hi;
5637
5638 const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5639
5640 for (uint32_t y = 0; y < 4; y++)
5641 {
5642 for (uint32_t x = 0; x < 4; x++)
5643 {
5644 uint32_t s = pSelector->get_selector(x, y);
5645 uint32_t as = pSelectors_xlat[s];
5646
5647 blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
5648 } // x
5649 } // y
5650 }
5651
5652 astc_pack_block_cem_4_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5653 return;
5654 }
5655
5656 // The block isn't grayscale and it uses > 2 unique selectors for opaque and/or alpha.
5657 // Check for fully opaque blocks, if so use 8-bit endpoints for slightly higher opaque quality (higher than BC1, but lower than BC7 mode 6 opaque).
5658 if ((num_unique_alpha_selectors == 1) && (constant_alpha_val == 255))
5659 {
5660 // Convert ETC1S color
5661 const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5662
5663 //[32][8][RANGES][MAPPING]
5664 const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5665 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5666 const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5667
5668 uint32_t best_err = UINT_MAX;
5669 uint32_t best_mapping = 0;
5670
5671 assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
5672#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
5673 DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
5674 DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
5675#undef DO_ITER
5676
5677 blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
5678 blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
5679
5680 blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
5681 blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
5682
5683 blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
5684 blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
5685
5686 int s0 = blk.m_endpoints[0] + blk.m_endpoints[2] + blk.m_endpoints[4];
5687 int s1 = blk.m_endpoints[1] + blk.m_endpoints[3] + blk.m_endpoints[5];
5688 bool invert = false;
5689
5690 if (s1 < s0)
5691 {
5692 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5693 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5694 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5695 invert = true;
5696 }
5697
5698 const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5699
5700 for (uint32_t y = 0; y < 4; y++)
5701 {
5702 for (uint32_t x = 0; x < 4; x++)
5703 {
5704 uint32_t s = pSelector->get_selector(x, y);
5705 uint32_t as = pSelectors_xlat[s];
5706 if (invert)
5707 as = 3 - as;
5708
5709 blk.m_weights[x + y * 4] = static_cast<uint8_t>(as);
5710 } // x
5711 } // y
5712
5713 // Now pack to ASTC
5714 astc_pack_block_cem_8_weight_range2(reinterpret_cast<uint32_t*>(pDst_block), &blk);
5715 return;
5716 }
5717#endif //#if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY
5718
5719 // Nothing else worked, so fall back to CEM Mode 12 (LDR RGBA Direct), [0,47] endpoints, weight range 2 (2-bit weights), dual planes.
5720 // This mode can handle everything, but at slightly less quality than BC1.
5721 if (transcode_alpha)
5722 {
5723 const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pDst_block)[0]];
5724 const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pDst_block)[1]];
5725
5726 const color32& alpha_base_color = alpha_endpoint.m_color5;
5727 const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
5728
5729 const uint32_t alpha_low_selector = alpha_selectors.m_lo_selector;
5730 const uint32_t alpha_high_selector = alpha_selectors.m_hi_selector;
5731
5732 if (alpha_low_selector == alpha_high_selector)
5733 {
5734 // Solid alpha block - use precomputed tables.
5735 int alpha_block_colors[4];
5736 decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
5737
5738 const uint32_t g = alpha_block_colors[alpha_low_selector];
5739
5740 blk.m_endpoints[6] = g_astc_single_color_encoding_1[g].m_lo;
5741 blk.m_endpoints[7] = g_astc_single_color_encoding_1[g].m_hi;
5742
5743 for (uint32_t i = 0; i < 16; i++)
5744 blk.m_weights[i * 2 + 1] = 1;
5745 }
5746 else if ((alpha_inten_table >= 7) && (alpha_selectors.m_num_unique_selectors == 2) && (alpha_low_selector == 0) && (alpha_high_selector == 3))
5747 {
5748 // Handle outlier case where only the two outer colors are used with inten table 7.
5749 color32 alpha_block_colors[4];
5750
5751 decoder_etc_block::get_block_colors5(alpha_block_colors, alpha_base_color, alpha_inten_table);
5752
5753 const uint32_t g0 = alpha_block_colors[0].g;
5754 const uint32_t g1 = alpha_block_colors[3].g;
5755
5756 blk.m_endpoints[6] = g_astc_single_color_encoding_0[g0];
5757 blk.m_endpoints[7] = g_astc_single_color_encoding_0[g1];
5758
5759 for (uint32_t y = 0; y < 4; y++)
5760 {
5761 for (uint32_t x = 0; x < 4; x++)
5762 {
5763 uint32_t s = alpha_selectors.get_selector(x, y);
5764 uint32_t as = (s == alpha_high_selector) ? 3 : 0;
5765
5766 blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
5767 } // x
5768 } // y
5769 }
5770 else
5771 {
5772 // Convert ETC1S alpha
5773 const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector];
5774
5775 //[32][8][RANGES][MAPPING]
5776 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5777
5778 const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping[alpha_base_color.g][alpha_inten_table][alpha_selector_range_table];
5779
5780 blk.m_endpoints[6] = pTable_g[best_mapping].m_lo;
5781 blk.m_endpoints[7] = pTable_g[best_mapping].m_hi;
5782
5783 const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5784
5785 for (uint32_t y = 0; y < 4; y++)
5786 {
5787 for (uint32_t x = 0; x < 4; x++)
5788 {
5789 uint32_t s = alpha_selectors.get_selector(x, y);
5790 uint32_t as = pSelectors_xlat[s];
5791
5792 blk.m_weights[(x + y * 4) * 2 + 1] = static_cast<uint8_t>(as);
5793 } // x
5794 } // y
5795 }
5796 }
5797 else
5798 {
5799 // No alpha slice - set output alpha to all 255's
5800 // 1 is 255 when dequantized
5801 blk.m_endpoints[6] = 1;
5802 blk.m_endpoints[7] = 1;
5803
5804 for (uint32_t i = 0; i < 16; i++)
5805 blk.m_weights[i * 2 + 1] = 0;
5806 }
5807
5808 if (low_selector == high_selector)
5809 {
5810 // Solid color block - use precomputed tables of optimal endpoints assuming selector weights are all 1.
5811 color32 block_colors[4];
5812
5813 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
5814
5815 const uint32_t r = block_colors[low_selector].r;
5816 const uint32_t g = block_colors[low_selector].g;
5817 const uint32_t b = block_colors[low_selector].b;
5818
5819 blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo;
5820 blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi;
5821
5822 blk.m_endpoints[2] = g_astc_single_color_encoding_1[g].m_lo;
5823 blk.m_endpoints[3] = g_astc_single_color_encoding_1[g].m_hi;
5824
5825 blk.m_endpoints[4] = g_astc_single_color_encoding_1[b].m_lo;
5826 blk.m_endpoints[5] = g_astc_single_color_encoding_1[b].m_hi;
5827
5828 int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
5829 int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
5830 bool invert = false;
5831
5832 if (s1 < s0)
5833 {
5834 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5835 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5836 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5837 invert = true;
5838 }
5839
5840 for (uint32_t i = 0; i < 16; i++)
5841 blk.m_weights[i * 2] = invert ? 2 : 1;
5842 }
5843 else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
5844 {
5845 // Handle outlier case where only the two outer colors are used with inten table 7.
5846 color32 block_colors[4];
5847
5848 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
5849
5850 const uint32_t r0 = block_colors[0].r;
5851 const uint32_t g0 = block_colors[0].g;
5852 const uint32_t b0 = block_colors[0].b;
5853
5854 const uint32_t r1 = block_colors[3].r;
5855 const uint32_t g1 = block_colors[3].g;
5856 const uint32_t b1 = block_colors[3].b;
5857
5858 blk.m_endpoints[0] = g_astc_single_color_encoding_0[r0];
5859 blk.m_endpoints[1] = g_astc_single_color_encoding_0[r1];
5860
5861 blk.m_endpoints[2] = g_astc_single_color_encoding_0[g0];
5862 blk.m_endpoints[3] = g_astc_single_color_encoding_0[g1];
5863
5864 blk.m_endpoints[4] = g_astc_single_color_encoding_0[b0];
5865 blk.m_endpoints[5] = g_astc_single_color_encoding_0[b1];
5866
5867 int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
5868 int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
5869 bool invert = false;
5870
5871 if (s1 < s0)
5872 {
5873 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5874 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5875 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5876 invert = true;
5877 }
5878
5879 for (uint32_t y = 0; y < 4; y++)
5880 {
5881 for (uint32_t x = 0; x < 4; x++)
5882 {
5883 uint32_t s = pSelector->get_selector(x, y);
5884 uint32_t as = (s == low_selector) ? 0 : 3;
5885
5886 if (invert)
5887 as = 3 - as;
5888
5889 blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
5890 } // x
5891 } // y
5892 }
5893 else
5894 {
5895 // Convert ETC1S color
5896 const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector];
5897
5898 //[32][8][RANGES][MAPPING]
5899 const etc1_to_astc_solution* pTable_r = &g_etc1_to_astc[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5900 const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5901 const etc1_to_astc_solution* pTable_b = &g_etc1_to_astc[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS];
5902
5903 uint32_t best_err = UINT_MAX;
5904 uint32_t best_mapping = 0;
5905
5906 assert(NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS == 10);
5907#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
5908 DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
5909 DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
5910#undef DO_ITER
5911
5912 blk.m_endpoints[0] = pTable_r[best_mapping].m_lo;
5913 blk.m_endpoints[1] = pTable_r[best_mapping].m_hi;
5914
5915 blk.m_endpoints[2] = pTable_g[best_mapping].m_lo;
5916 blk.m_endpoints[3] = pTable_g[best_mapping].m_hi;
5917
5918 blk.m_endpoints[4] = pTable_b[best_mapping].m_lo;
5919 blk.m_endpoints[5] = pTable_b[best_mapping].m_hi;
5920
5921 int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]];
5922 int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]];
5923 bool invert = false;
5924
5925 if (s1 < s0)
5926 {
5927 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
5928 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
5929 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
5930 invert = true;
5931 }
5932
5933 const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0];
5934
5935 for (uint32_t y = 0; y < 4; y++)
5936 {
5937 for (uint32_t x = 0; x < 4; x++)
5938 {
5939 uint32_t s = pSelector->get_selector(x, y);
5940 uint32_t as = pSelectors_xlat[s];
5941 if (invert)
5942 as = 3 - as;
5943
5944 blk.m_weights[(x + y * 4) * 2] = static_cast<uint8_t>(as);
5945 } // x
5946 } // y
5947 }
5948
5949 // Now pack to ASTC
5950 astc_pack_block_cem_12_weight_range2(reinterpret_cast<uint32_t *>(pDst_block), &blk);
5951 }
5952#endif
5953
5954#if BASISD_SUPPORT_ATC
5955 // ATC and PVRTC2 both use these tables.
5956 struct etc1s_to_atc_solution
5957 {
5958 uint8_t m_lo;
5959 uint8_t m_hi;
5960 uint16_t m_err;
5961 };
5962
5963 static dxt_selector_range g_etc1s_to_atc_selector_ranges[] =
5964 {
5965 { 0, 3 },
5966 { 1, 3 },
5967 { 0, 2 },
5968 { 1, 2 },
5969 { 2, 3 },
5970 { 0, 1 },
5971 };
5972
5973 const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_RANGES = sizeof(g_etc1s_to_atc_selector_ranges) / sizeof(g_etc1s_to_atc_selector_ranges[0]);
5974
5975 static uint32_t g_etc1s_to_atc_selector_range_index[4][4];
5976
5977 const uint32_t NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS = 10;
5978 static const uint8_t g_etc1s_to_atc_selector_mappings[NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS][4] =
5979 {
5980 { 0, 0, 1, 1 },
5981 { 0, 0, 1, 2 },
5982 { 0, 0, 1, 3 },
5983 { 0, 0, 2, 3 },
5984 { 0, 1, 1, 1 },
5985 { 0, 1, 2, 2 },
5986 { 0, 1, 2, 3 }, //6 - identity
5987 { 0, 2, 3, 3 },
5988 { 1, 2, 2, 2 },
5989 { 1, 2, 3, 3 },
5990 };
5991 const uint32_t ATC_IDENTITY_SELECTOR_MAPPING_INDEX = 6;
5992
5993#if BASISD_SUPPORT_PVRTC2
5994 static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
5995#include "basisu_transcoder_tables_pvrtc2_45.inc"
5996 };
5997
5998#if 0
5999 static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6000#include "basisu_transcoder_tables_pvrtc2_alpha_33.inc"
6001 };
6002#endif
6003
6004#endif
6005
6006 static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6007#include "basisu_transcoder_tables_atc_55.inc"
6008 };
6009
6010 static const etc1s_to_atc_solution g_etc1s_to_atc_56[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = {
6011#include "basisu_transcoder_tables_atc_56.inc"
6012 };
6013
6014 struct atc_match_entry
6015 {
6016 uint8_t m_lo;
6017 uint8_t m_hi;
6018 };
6019 static atc_match_entry g_pvrtc2_match45_equals_1[256], g_atc_match55_equals_1[256], g_atc_match56_equals_1[256]; // selector 1
6020 static atc_match_entry g_pvrtc2_match4[256], g_atc_match5[256], g_atc_match6[256];
6021
6022 static void prepare_atc_single_color_table(atc_match_entry* pTable, int size0, int size1, int sel)
6023 {
6024 for (int i = 0; i < 256; i++)
6025 {
6026 int lowest_e = 256;
6027 for (int lo = 0; lo < size0; lo++)
6028 {
6029 int lo_e = lo;
6030 if (size0 == 16)
6031 {
6032 lo_e = (lo_e << 1) | (lo_e >> 3);
6033 lo_e = (lo_e << 3) | (lo_e >> 2);
6034 }
6035 else if (size0 == 32)
6036 lo_e = (lo_e << 3) | (lo_e >> 2);
6037 else
6038 lo_e = (lo_e << 2) | (lo_e >> 4);
6039
6040 for (int hi = 0; hi < size1; hi++)
6041 {
6042 int hi_e = hi;
6043 if (size1 == 16)
6044 {
6045 // This is only for PVRTC2 - expand to 5 then 8
6046 hi_e = (hi_e << 1) | (hi_e >> 3);
6047 hi_e = (hi_e << 3) | (hi_e >> 2);
6048 }
6049 else if (size1 == 32)
6050 hi_e = (hi_e << 3) | (hi_e >> 2);
6051 else
6052 hi_e = (hi_e << 2) | (hi_e >> 4);
6053
6054 int e;
6055
6056 if (sel == 1)
6057 {
6058 // Selector 1
6059 e = abs(((lo_e * 5 + hi_e * 3) / 8) - i);
6060 }
6061 else
6062 {
6063 assert(sel == 3);
6064
6065 // Selector 3
6066 e = abs(hi_e - i);
6067 }
6068
6069 if (e < lowest_e)
6070 {
6071 pTable[i].m_lo = static_cast<uint8_t>(lo);
6072 pTable[i].m_hi = static_cast<uint8_t>(hi);
6073
6074 lowest_e = e;
6075 }
6076
6077 } // hi
6078 } // lo
6079 } // i
6080 }
6081
6082 static void transcoder_init_atc()
6083 {
6084 prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1);
6085 prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1);
6086 prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1);
6087
6088 prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3);
6089 prepare_atc_single_color_table(g_atc_match5, 1, 32, 3);
6090 prepare_atc_single_color_table(g_atc_match6, 1, 64, 3);
6091
6092 for (uint32_t i = 0; i < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; i++)
6093 {
6094 uint32_t l = g_etc1s_to_atc_selector_ranges[i].m_low;
6095 uint32_t h = g_etc1s_to_atc_selector_ranges[i].m_high;
6096 g_etc1s_to_atc_selector_range_index[l][h] = i;
6097 }
6098 }
6099
6100 struct atc_block
6101 {
6102 uint8_t m_lo[2];
6103 uint8_t m_hi[2];
6104 uint8_t m_sels[4];
6105
6106 void set_low_color(uint32_t r, uint32_t g, uint32_t b)
6107 {
6108 assert((r < 32) && (g < 32) && (b < 32));
6109 uint32_t x = (r << 10) | (g << 5) | b;
6110 m_lo[0] = x & 0xFF;
6111 m_lo[1] = (x >> 8) & 0xFF;
6112 }
6113
6114 void set_high_color(uint32_t r, uint32_t g, uint32_t b)
6115 {
6116 assert((r < 32) && (g < 64) && (b < 32));
6117 uint32_t x = (r << 11) | (g << 5) | b;
6118 m_hi[0] = x & 0xFF;
6119 m_hi[1] = (x >> 8) & 0xFF;
6120 }
6121 };
6122
6123 static void convert_etc1s_to_atc(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
6124 {
6125 atc_block* pBlock = static_cast<atc_block*>(pDst);
6126
6127 const uint32_t low_selector = pSelector->m_lo_selector;
6128 const uint32_t high_selector = pSelector->m_hi_selector;
6129
6130 const color32& base_color = pEndpoints->m_color5;
6131 const uint32_t inten_table = pEndpoints->m_inten5;
6132
6133 if (low_selector == high_selector)
6134 {
6135 uint32_t r, g, b;
6136 decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
6137
6138 pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo);
6139 pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi);
6140
6141 pBlock->m_sels[0] = 0x55;
6142 pBlock->m_sels[1] = 0x55;
6143 pBlock->m_sels[2] = 0x55;
6144 pBlock->m_sels[3] = 0x55;
6145
6146 return;
6147 }
6148 else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
6149 {
6150 color32 block_colors[4];
6151 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6152
6153 const uint32_t r0 = block_colors[0].r;
6154 const uint32_t g0 = block_colors[0].g;
6155 const uint32_t b0 = block_colors[0].b;
6156
6157 const uint32_t r1 = block_colors[3].r;
6158 const uint32_t g1 = block_colors[3].g;
6159 const uint32_t b1 = block_colors[3].b;
6160
6161 pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_atc_match5[b0].m_hi);
6162 pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match6[g1].m_hi, g_atc_match5[b1].m_hi);
6163
6164 pBlock->m_sels[0] = pSelector->m_selectors[0];
6165 pBlock->m_sels[1] = pSelector->m_selectors[1];
6166 pBlock->m_sels[2] = pSelector->m_selectors[2];
6167 pBlock->m_sels[3] = pSelector->m_selectors[3];
6168
6169 return;
6170 }
6171
6172 const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
6173
6174 //[32][8][RANGES][MAPPING]
6175 const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6176 const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_56[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6177 const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6178
6179 uint32_t best_err = UINT_MAX;
6180 uint32_t best_mapping = 0;
6181
6182 assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
6183#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6184 DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6185 DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6186#undef DO_ITER
6187
6188 pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
6189 pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
6190
6191 if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
6192 {
6193 pBlock->m_sels[0] = pSelector->m_selectors[0];
6194 pBlock->m_sels[1] = pSelector->m_selectors[1];
6195 pBlock->m_sels[2] = pSelector->m_selectors[2];
6196 pBlock->m_sels[3] = pSelector->m_selectors[3];
6197 }
6198 else
6199 {
6200 const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
6201
6202 const uint32_t sel_bits0 = pSelector->m_selectors[0];
6203 const uint32_t sel_bits1 = pSelector->m_selectors[1];
6204 const uint32_t sel_bits2 = pSelector->m_selectors[2];
6205 const uint32_t sel_bits3 = pSelector->m_selectors[3];
6206
6207 uint32_t atc_sels0 = 0, atc_sels1 = 0, atc_sels2 = 0, atc_sels3 = 0;
6208
6209#define DO_X(x) { \
6210 const uint32_t x_shift = (x) * 2; \
6211 atc_sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
6212 atc_sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
6213 atc_sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
6214 atc_sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
6215
6216 DO_X(0);
6217 DO_X(1);
6218 DO_X(2);
6219 DO_X(3);
6220#undef DO_X
6221
6222 pBlock->m_sels[0] = (uint8_t)atc_sels0;
6223 pBlock->m_sels[1] = (uint8_t)atc_sels1;
6224 pBlock->m_sels[2] = (uint8_t)atc_sels2;
6225 pBlock->m_sels[3] = (uint8_t)atc_sels3;
6226 }
6227 }
6228
6229#if BASISD_WRITE_NEW_ATC_TABLES
6230 static void create_etc1s_to_atc_conversion_tables()
6231 {
6232 // ATC 55
6233 FILE* pFile = nullptr;
6234 fopen_s(&pFile, "basisu_transcoder_tables_atc_55.inc", "w");
6235
6236 uint32_t n = 0;
6237
6238 for (int inten = 0; inten < 8; inten++)
6239 {
6240 for (uint32_t g = 0; g < 32; g++)
6241 {
6242 color32 block_colors[4];
6243 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6244
6245 for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6246 {
6247 const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6248 const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6249
6250 for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6251 {
6252 uint32_t best_lo = 0;
6253 uint32_t best_hi = 0;
6254 uint64_t best_err = UINT64_MAX;
6255
6256 for (uint32_t hi = 0; hi <= 31; hi++)
6257 {
6258 for (uint32_t lo = 0; lo <= 31; lo++)
6259 {
6260 uint32_t colors[4];
6261
6262 colors[0] = (lo << 3) | (lo >> 2);
6263 colors[3] = (hi << 3) | (hi >> 2);
6264
6265 colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6266 colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6267
6268 uint64_t total_err = 0;
6269
6270 for (uint32_t s = low_selector; s <= high_selector; s++)
6271 {
6272 int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6273
6274 int err_scale = 1;
6275 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6276 // the low/high selectors which are clamping to either 0 or 255.
6277 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6278 err_scale = 5;
6279
6280 total_err += (err * err) * err_scale;
6281 }
6282
6283 if (total_err < best_err)
6284 {
6285 best_err = total_err;
6286 best_lo = lo;
6287 best_hi = hi;
6288 }
6289 }
6290 }
6291
6292 //assert(best_err <= 0xFFFF);
6293 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6294
6295 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6296 n++;
6297 if ((n & 31) == 31)
6298 fprintf(pFile, "\n");
6299 } // m
6300 } // sr
6301 } // g
6302 } // inten
6303
6304 fclose(pFile);
6305 pFile = nullptr;
6306
6307 // ATC 56
6308 fopen_s(&pFile, "basisu_transcoder_tables_atc_56.inc", "w");
6309
6310 n = 0;
6311
6312 for (int inten = 0; inten < 8; inten++)
6313 {
6314 for (uint32_t g = 0; g < 32; g++)
6315 {
6316 color32 block_colors[4];
6317 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6318
6319 for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6320 {
6321 const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6322 const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6323
6324 for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6325 {
6326 uint32_t best_lo = 0;
6327 uint32_t best_hi = 0;
6328 uint64_t best_err = UINT64_MAX;
6329
6330 for (uint32_t hi = 0; hi <= 63; hi++)
6331 {
6332 for (uint32_t lo = 0; lo <= 31; lo++)
6333 {
6334 uint32_t colors[4];
6335
6336 colors[0] = (lo << 3) | (lo >> 2);
6337 colors[3] = (hi << 2) | (hi >> 4);
6338
6339 colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6340 colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6341
6342 uint64_t total_err = 0;
6343
6344 for (uint32_t s = low_selector; s <= high_selector; s++)
6345 {
6346 int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6347
6348 int err_scale = 1;
6349 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6350 // the low/high selectors which are clamping to either 0 or 255.
6351 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6352 err_scale = 5;
6353
6354 total_err += (err * err) * err_scale;
6355 }
6356
6357 if (total_err < best_err)
6358 {
6359 best_err = total_err;
6360 best_lo = lo;
6361 best_hi = hi;
6362 }
6363 }
6364 }
6365
6366 //assert(best_err <= 0xFFFF);
6367 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6368
6369 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6370 n++;
6371 if ((n & 31) == 31)
6372 fprintf(pFile, "\n");
6373 } // m
6374 } // sr
6375 } // g
6376 } // inten
6377
6378 fclose(pFile);
6379
6380 // PVRTC2 45
6381 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w");
6382
6383 n = 0;
6384
6385 for (int inten = 0; inten < 8; inten++)
6386 {
6387 for (uint32_t g = 0; g < 32; g++)
6388 {
6389 color32 block_colors[4];
6390 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6391
6392 for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6393 {
6394 const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6395 const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6396
6397 for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6398 {
6399 uint32_t best_lo = 0;
6400 uint32_t best_hi = 0;
6401 uint64_t best_err = UINT64_MAX;
6402
6403 for (uint32_t hi = 0; hi <= 31; hi++)
6404 {
6405 for (uint32_t lo = 0; lo <= 15; lo++)
6406 {
6407 uint32_t colors[4];
6408
6409 colors[0] = (lo << 1) | (lo >> 3);
6410 colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6411
6412 colors[3] = (hi << 3) | (hi >> 2);
6413
6414 colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6415 colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6416
6417 uint64_t total_err = 0;
6418
6419 for (uint32_t s = low_selector; s <= high_selector; s++)
6420 {
6421 int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6422
6423 int err_scale = 1;
6424 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6425 // the low/high selectors which are clamping to either 0 or 255.
6426 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6427 err_scale = 5;
6428
6429 total_err += (err * err) * err_scale;
6430 }
6431
6432 if (total_err < best_err)
6433 {
6434 best_err = total_err;
6435 best_lo = lo;
6436 best_hi = hi;
6437 }
6438 }
6439 }
6440
6441 //assert(best_err <= 0xFFFF);
6442 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6443
6444 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6445 n++;
6446 if ((n & 31) == 31)
6447 fprintf(pFile, "\n");
6448 } // m
6449 } // sr
6450 } // g
6451 } // inten
6452
6453 fclose(pFile);
6454
6455#if 0
6456 // PVRTC2 34
6457 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_34.inc", "w");
6458
6459 n = 0;
6460
6461 for (int inten = 0; inten < 8; inten++)
6462 {
6463 for (uint32_t g = 0; g < 32; g++)
6464 {
6465 color32 block_colors[4];
6466 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6467
6468 for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6469 {
6470 const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6471 const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6472
6473 for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6474 {
6475 uint32_t best_lo = 0;
6476 uint32_t best_hi = 0;
6477 uint64_t best_err = UINT64_MAX;
6478
6479 for (uint32_t hi = 0; hi <= 15; hi++)
6480 {
6481 for (uint32_t lo = 0; lo <= 7; lo++)
6482 {
6483 uint32_t colors[4];
6484
6485 colors[0] = (lo << 2) | (lo >> 1);
6486 colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6487
6488 colors[3] = (hi << 1) | (hi >> 3);
6489 colors[3] = (colors[3] << 3) | (colors[3] >> 2);
6490
6491 colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6492 colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6493
6494 uint64_t total_err = 0;
6495
6496 for (uint32_t s = low_selector; s <= high_selector; s++)
6497 {
6498 int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6499
6500 int err_scale = 1;
6501 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6502 // the low/high selectors which are clamping to either 0 or 255.
6503 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6504 err_scale = 5;
6505
6506 total_err += (err * err) * err_scale;
6507 }
6508
6509 if (total_err < best_err)
6510 {
6511 best_err = total_err;
6512 best_lo = lo;
6513 best_hi = hi;
6514 }
6515 }
6516 }
6517
6518 //assert(best_err <= 0xFFFF);
6519 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6520
6521 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6522 n++;
6523 if ((n & 31) == 31)
6524 fprintf(pFile, "\n");
6525 } // m
6526 } // sr
6527 } // g
6528 } // inten
6529
6530 fclose(pFile);
6531#endif
6532#if 0
6533 // PVRTC2 44
6534 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_44.inc", "w");
6535
6536 n = 0;
6537
6538 for (int inten = 0; inten < 8; inten++)
6539 {
6540 for (uint32_t g = 0; g < 32; g++)
6541 {
6542 color32 block_colors[4];
6543 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6544
6545 for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6546 {
6547 const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6548 const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6549
6550 for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6551 {
6552 uint32_t best_lo = 0;
6553 uint32_t best_hi = 0;
6554 uint64_t best_err = UINT64_MAX;
6555
6556 for (uint32_t hi = 0; hi <= 15; hi++)
6557 {
6558 for (uint32_t lo = 0; lo <= 15; lo++)
6559 {
6560 uint32_t colors[4];
6561
6562 colors[0] = (lo << 1) | (lo >> 3);
6563 colors[0] = (colors[0] << 3) | (colors[0] >> 2);
6564
6565 colors[3] = (hi << 1) | (hi >> 3);
6566 colors[3] = (colors[3] << 3) | (colors[3] >> 2);
6567
6568 colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6569 colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6570
6571 uint64_t total_err = 0;
6572
6573 for (uint32_t s = low_selector; s <= high_selector; s++)
6574 {
6575 int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6576
6577 int err_scale = 1;
6578 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6579 // the low/high selectors which are clamping to either 0 or 255.
6580 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6581 err_scale = 5;
6582
6583 total_err += (err * err) * err_scale;
6584 }
6585
6586 if (total_err < best_err)
6587 {
6588 best_err = total_err;
6589 best_lo = lo;
6590 best_hi = hi;
6591 }
6592 }
6593 }
6594
6595 //assert(best_err <= 0xFFFF);
6596 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6597
6598 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6599 n++;
6600 if ((n & 31) == 31)
6601 fprintf(pFile, "\n");
6602 } // m
6603 } // sr
6604 } // g
6605 } // inten
6606
6607 fclose(pFile);
6608#endif
6609
6610 // PVRTC2 alpha 33
6611 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_alpha_33.inc", "w");
6612
6613 n = 0;
6614
6615 for (int inten = 0; inten < 8; inten++)
6616 {
6617 for (uint32_t g = 0; g < 32; g++)
6618 {
6619 color32 block_colors[4];
6620 decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
6621
6622 for (uint32_t sr = 0; sr < NUM_ETC1S_TO_ATC_SELECTOR_RANGES; sr++)
6623 {
6624 const uint32_t low_selector = g_etc1s_to_atc_selector_ranges[sr].m_low;
6625 const uint32_t high_selector = g_etc1s_to_atc_selector_ranges[sr].m_high;
6626
6627 for (uint32_t m = 0; m < NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS; m++)
6628 {
6629 uint32_t best_lo = 0;
6630 uint32_t best_hi = 0;
6631 uint64_t best_err = UINT64_MAX;
6632
6633 for (uint32_t hi = 0; hi <= 7; hi++)
6634 {
6635 for (uint32_t lo = 0; lo <= 7; lo++)
6636 {
6637 uint32_t colors[4];
6638
6639 colors[0] = (lo << 1);
6640 colors[0] = (colors[0] << 4) | colors[0];
6641
6642 colors[3] = (hi << 1) | 1;
6643 colors[3] = (colors[3] << 4) | colors[3];
6644
6645 colors[1] = (colors[0] * 5 + colors[3] * 3) / 8;
6646 colors[2] = (colors[3] * 5 + colors[0] * 3) / 8;
6647
6648 uint64_t total_err = 0;
6649
6650 for (uint32_t s = low_selector; s <= high_selector; s++)
6651 {
6652 int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]];
6653
6654 int err_scale = 1;
6655 // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor
6656 // the low/high selectors which are clamping to either 0 or 255.
6657 if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3)))
6658 err_scale = 5;
6659
6660 total_err += (err * err) * err_scale;
6661 }
6662
6663 if (total_err < best_err)
6664 {
6665 best_err = total_err;
6666 best_lo = lo;
6667 best_hi = hi;
6668 }
6669 }
6670 }
6671
6672 //assert(best_err <= 0xFFFF);
6673 best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF);
6674
6675 fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
6676 n++;
6677 if ((n & 31) == 31)
6678 fprintf(pFile, "\n");
6679 } // m
6680 } // sr
6681 } // g
6682 } // inten
6683
6684 fclose(pFile);
6685 }
6686#endif // BASISD_WRITE_NEW_ATC_TABLES
6687
6688#endif // BASISD_SUPPORT_ATC
6689
6690#if BASISD_SUPPORT_PVRTC2
6691 struct pvrtc2_block
6692 {
6693 uint8_t m_modulation[4];
6694
6695 union
6696 {
6697 union
6698 {
6699 // Opaque mode: RGB colora=554 and colorb=555
6700 struct
6701 {
6702 uint32_t m_mod_flag : 1;
6703 uint32_t m_blue_a : 4;
6704 uint32_t m_green_a : 5;
6705 uint32_t m_red_a : 5;
6706 uint32_t m_hard_flag : 1;
6707 uint32_t m_blue_b : 5;
6708 uint32_t m_green_b : 5;
6709 uint32_t m_red_b : 5;
6710 uint32_t m_opaque_flag : 1;
6711
6712 } m_opaque_color_data;
6713
6714 // Transparent mode: RGBA colora=4433 and colorb=4443
6715 struct
6716 {
6717 uint32_t m_mod_flag : 1;
6718 uint32_t m_blue_a : 3;
6719 uint32_t m_green_a : 4;
6720 uint32_t m_red_a : 4;
6721 uint32_t m_alpha_a : 3;
6722 uint32_t m_hard_flag : 1;
6723 uint32_t m_blue_b : 4;
6724 uint32_t m_green_b : 4;
6725 uint32_t m_red_b : 4;
6726 uint32_t m_alpha_b : 3;
6727 uint32_t m_opaque_flag : 1;
6728
6729 } m_trans_color_data;
6730 };
6731
6732 uint32_t m_color_data_bits;
6733 };
6734
6735 // 554
6736 void set_low_color(uint32_t r, uint32_t g, uint32_t b)
6737 {
6738 assert((r < 32) && (g < 32) && (b < 16));
6739 m_opaque_color_data.m_red_a = r;
6740 m_opaque_color_data.m_green_a = g;
6741 m_opaque_color_data.m_blue_a = b;
6742 }
6743
6744 // 555
6745 void set_high_color(uint32_t r, uint32_t g, uint32_t b)
6746 {
6747 assert((r < 32) && (g < 32) && (b < 32));
6748 m_opaque_color_data.m_red_b = r;
6749 m_opaque_color_data.m_green_b = g;
6750 m_opaque_color_data.m_blue_b = b;
6751 }
6752
6753 // 4433
6754 void set_trans_low_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
6755 {
6756 assert((r < 16) && (g < 16) && (b < 8) && (a < 8));
6757 m_trans_color_data.m_red_a = r;
6758 m_trans_color_data.m_green_a = g;
6759 m_trans_color_data.m_blue_a = b;
6760 m_trans_color_data.m_alpha_a = a;
6761 }
6762
6763 // 4443
6764 void set_trans_high_color(uint32_t r, uint32_t g, uint32_t b, uint32_t a)
6765 {
6766 assert((r < 16) && (g < 16) && (b < 16) && (a < 8));
6767 m_trans_color_data.m_red_b = r;
6768 m_trans_color_data.m_green_b = g;
6769 m_trans_color_data.m_blue_b = b;
6770 m_trans_color_data.m_alpha_b = a;
6771 }
6772 };
6773
6774 static struct
6775 {
6776 uint8_t m_l, m_h;
6777 } g_pvrtc2_trans_match34[256];
6778
6779 static struct
6780 {
6781 uint8_t m_l, m_h;
6782 } g_pvrtc2_trans_match44[256];
6783
6784 static struct
6785 {
6786 uint8_t m_l, m_h;
6787 } g_pvrtc2_alpha_match33[256];
6788
6789 static struct
6790 {
6791 uint8_t m_l, m_h;
6792 } g_pvrtc2_alpha_match33_0[256];
6793
6794 static struct
6795 {
6796 uint8_t m_l, m_h;
6797 } g_pvrtc2_alpha_match33_3[256];
6798
6799 // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity.
6800 static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector)
6801 {
6802 pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
6803
6804 pBlock->m_opaque_color_data.m_hard_flag = 1;
6805 pBlock->m_opaque_color_data.m_mod_flag = 0;
6806 pBlock->m_opaque_color_data.m_opaque_flag = 1;
6807
6808 const uint32_t low_selector = pSelector->m_lo_selector;
6809 const uint32_t high_selector = pSelector->m_hi_selector;
6810
6811 const color32& base_color = pEndpoints->m_color5;
6812 const uint32_t inten_table = pEndpoints->m_inten5;
6813
6814 if (low_selector == high_selector)
6815 {
6816 uint32_t r, g, b;
6817 decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
6818
6819 pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match55_equals_1[g].m_lo, g_pvrtc2_match45_equals_1[b].m_lo);
6820 pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match55_equals_1[g].m_hi, g_pvrtc2_match45_equals_1[b].m_hi);
6821
6822 pBlock->m_modulation[0] = 0x55;
6823 pBlock->m_modulation[1] = 0x55;
6824 pBlock->m_modulation[2] = 0x55;
6825 pBlock->m_modulation[3] = 0x55;
6826
6827 return;
6828 }
6829 else if ((inten_table >= 7) && (pSelector->m_num_unique_selectors == 2) && (pSelector->m_lo_selector == 0) && (pSelector->m_hi_selector == 3))
6830 {
6831 color32 block_colors[4];
6832 decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table);
6833
6834 const uint32_t r0 = block_colors[0].r;
6835 const uint32_t g0 = block_colors[0].g;
6836 const uint32_t b0 = block_colors[0].b;
6837
6838 const uint32_t r1 = block_colors[3].r;
6839 const uint32_t g1 = block_colors[3].g;
6840 const uint32_t b1 = block_colors[3].b;
6841
6842 pBlock->set_low_color(g_atc_match5[r0].m_hi, g_atc_match5[g0].m_hi, g_pvrtc2_match4[b0].m_hi);
6843 pBlock->set_high_color(g_atc_match5[r1].m_hi, g_atc_match5[g1].m_hi, g_atc_match5[b1].m_hi);
6844
6845 pBlock->m_modulation[0] = pSelector->m_selectors[0];
6846 pBlock->m_modulation[1] = pSelector->m_selectors[1];
6847 pBlock->m_modulation[2] = pSelector->m_selectors[2];
6848 pBlock->m_modulation[3] = pSelector->m_selectors[3];
6849
6850 return;
6851 }
6852
6853 const uint32_t selector_range_table = g_etc1s_to_atc_selector_range_index[low_selector][high_selector];
6854
6855 //[32][8][RANGES][MAPPING]
6856 const etc1s_to_atc_solution* pTable_r = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.r) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6857 const etc1s_to_atc_solution* pTable_g = &g_etc1s_to_atc_55[(inten_table * 32 + base_color.g) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6858 const etc1s_to_atc_solution* pTable_b = &g_etc1s_to_pvrtc2_45[(inten_table * 32 + base_color.b) * (NUM_ETC1S_TO_ATC_SELECTOR_RANGES * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS];
6859
6860 uint32_t best_err = UINT_MAX;
6861 uint32_t best_mapping = 0;
6862
6863 assert(NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS == 10);
6864#define DO_ITER(m) { uint32_t total_err = pTable_r[m].m_err + pTable_g[m].m_err + pTable_b[m].m_err; if (total_err < best_err) { best_err = total_err; best_mapping = m; } }
6865 DO_ITER(0); DO_ITER(1); DO_ITER(2); DO_ITER(3); DO_ITER(4);
6866 DO_ITER(5); DO_ITER(6); DO_ITER(7); DO_ITER(8); DO_ITER(9);
6867#undef DO_ITER
6868
6869 pBlock->set_low_color(pTable_r[best_mapping].m_lo, pTable_g[best_mapping].m_lo, pTable_b[best_mapping].m_lo);
6870 pBlock->set_high_color(pTable_r[best_mapping].m_hi, pTable_g[best_mapping].m_hi, pTable_b[best_mapping].m_hi);
6871
6872 if (ATC_IDENTITY_SELECTOR_MAPPING_INDEX == best_mapping)
6873 {
6874 pBlock->m_modulation[0] = pSelector->m_selectors[0];
6875 pBlock->m_modulation[1] = pSelector->m_selectors[1];
6876 pBlock->m_modulation[2] = pSelector->m_selectors[2];
6877 pBlock->m_modulation[3] = pSelector->m_selectors[3];
6878 }
6879 else
6880 {
6881 // TODO: We could make this faster using several precomputed 256 entry tables, like ETC1S->BC1 does.
6882 const uint8_t* pSelectors_xlat = &g_etc1s_to_atc_selector_mappings[best_mapping][0];
6883
6884 const uint32_t sel_bits0 = pSelector->m_selectors[0];
6885 const uint32_t sel_bits1 = pSelector->m_selectors[1];
6886 const uint32_t sel_bits2 = pSelector->m_selectors[2];
6887 const uint32_t sel_bits3 = pSelector->m_selectors[3];
6888
6889 uint32_t sels0 = 0, sels1 = 0, sels2 = 0, sels3 = 0;
6890
6891#define DO_X(x) { \
6892 const uint32_t x_shift = (x) * 2; \
6893 sels0 |= (pSelectors_xlat[(sel_bits0 >> x_shift) & 3] << x_shift); \
6894 sels1 |= (pSelectors_xlat[(sel_bits1 >> x_shift) & 3] << x_shift); \
6895 sels2 |= (pSelectors_xlat[(sel_bits2 >> x_shift) & 3] << x_shift); \
6896 sels3 |= (pSelectors_xlat[(sel_bits3 >> x_shift) & 3] << x_shift); }
6897
6898 DO_X(0);
6899 DO_X(1);
6900 DO_X(2);
6901 DO_X(3);
6902#undef DO_X
6903
6904 pBlock->m_modulation[0] = (uint8_t)sels0;
6905 pBlock->m_modulation[1] = (uint8_t)sels1;
6906 pBlock->m_modulation[2] = (uint8_t)sels2;
6907 pBlock->m_modulation[3] = (uint8_t)sels3;
6908 }
6909 }
6910
6911 typedef struct { float c[4]; } vec4F;
6912
6913 static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; }
6914 static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; }
6915 static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; }
6916 static inline vec4F vec4F_saturate(const vec4F* pV) { vec4F res; res.c[0] = saturate(pV->c[0]); res.c[1] = saturate(pV->c[1]); res.c[2] = saturate(pV->c[2]); res.c[3] = saturate(pV->c[3]); return res; }
6917 static inline vec4F vec4F_from_color(const color32* pC) { vec4F res; vec4F_set(&res, pC->c[0], pC->c[1], pC->c[2], pC->c[3]); return res; }
6918 static inline vec4F vec4F_add(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] + pRHS->c[0], pLHS->c[1] + pRHS->c[1], pLHS->c[2] + pRHS->c[2], pLHS->c[3] + pRHS->c[3]); return res; }
6919 static inline vec4F vec4F_sub(const vec4F* pLHS, const vec4F* pRHS) { vec4F res; vec4F_set(&res, pLHS->c[0] - pRHS->c[0], pLHS->c[1] - pRHS->c[1], pLHS->c[2] - pRHS->c[2], pLHS->c[3] - pRHS->c[3]); return res; }
6920 static inline float vec4F_dot(const vec4F* pLHS, const vec4F* pRHS) { return pLHS->c[0] * pRHS->c[0] + pLHS->c[1] * pRHS->c[1] + pLHS->c[2] * pRHS->c[2] + pLHS->c[3] * pRHS->c[3]; }
6921 static inline vec4F vec4F_mul(const vec4F* pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->c[0] * s, pLHS->c[1] * s, pLHS->c[2] * s, pLHS->c[3] * s); return res; }
6922 static inline vec4F* vec4F_normalize_in_place(vec4F* pV) { float s = pV->c[0] * pV->c[0] + pV->c[1] * pV->c[1] + pV->c[2] * pV->c[2] + pV->c[3] * pV->c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->c[0] *= s; pV->c[1] *= s; pV->c[2] *= s; pV->c[3] *= s; } return pV; }
6923
6924 static color32 convert_rgba_5554_to_8888(const color32& col)
6925 {
6926 return color32((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
6927 }
6928
6929 static inline int sq(int x) { return x * x; }
6930
6931 // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0.
6932 // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha!
6933 // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
6934 static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
6935 {
6936 pvrtc2_block* pBlock = static_cast<pvrtc2_block*>(pDst);
6937
6938 const endpoint& alpha_endpoint = pEndpoint_codebook[((uint16_t*)pBlock)[0]];
6939 const selector& alpha_selectors = pSelector_codebook[((uint16_t*)pBlock)[1]];
6940
6941 pBlock->m_opaque_color_data.m_hard_flag = 1;
6942 pBlock->m_opaque_color_data.m_mod_flag = 0;
6943 pBlock->m_opaque_color_data.m_opaque_flag = 0;
6944
6945 const int num_unique_alpha_selectors = alpha_selectors.m_num_unique_selectors;
6946
6947 const color32& alpha_base_color = alpha_endpoint.m_color5;
6948 const uint32_t alpha_inten_table = alpha_endpoint.m_inten5;
6949
6950 int constant_alpha_val = -1;
6951
6952 int alpha_block_colors[4];
6953 decoder_etc_block::get_block_colors5_g(alpha_block_colors, alpha_base_color, alpha_inten_table);
6954
6955 if (num_unique_alpha_selectors == 1)
6956 {
6957 constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
6958 }
6959 else
6960 {
6961 constant_alpha_val = alpha_block_colors[alpha_selectors.m_lo_selector];
6962
6963 for (uint32_t i = alpha_selectors.m_lo_selector + 1; i <= alpha_selectors.m_hi_selector; i++)
6964 {
6965 if (constant_alpha_val != alpha_block_colors[i])
6966 {
6967 constant_alpha_val = -1;
6968 break;
6969 }
6970 }
6971 }
6972
6973 if (constant_alpha_val >= 250)
6974 {
6975 // It's opaque enough, so don't bother trying to encode it as an alpha block.
6976 convert_etc1s_to_pvrtc2_rgb(pDst, pEndpoints, pSelector);
6977 return;
6978 }
6979
6980 const color32& base_color = pEndpoints->m_color5;
6981 const uint32_t inten_table = pEndpoints->m_inten5;
6982
6983 const uint32_t low_selector = pSelector->m_lo_selector;
6984 const uint32_t high_selector = pSelector->m_hi_selector;
6985
6986 const int num_unique_color_selectors = pSelector->m_num_unique_selectors;
6987
6988 // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes.
6989 // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values.
6990 const int br = (base_color.r << 3) | (base_color.r >> 2);
6991 const int bg = (base_color.g << 3) | (base_color.g >> 2);
6992 const int bb = (base_color.b << 3) | (base_color.b >> 2);
6993
6994 color32 block_cols[4];
6995 for (uint32_t i = 0; i < 4; i++)
6996 {
6997 const int ci = g_etc1_inten_tables[inten_table][i];
6998 block_cols[i].set_clamped(br + ci, bg + ci, bb + ci, alpha_block_colors[i]);
6999 }
7000
7001 bool solid_color_block = true;
7002 if (num_unique_color_selectors > 1)
7003 {
7004 for (uint32_t i = low_selector + 1; i <= high_selector; i++)
7005 {
7006 if ((block_cols[low_selector].r != block_cols[i].r) || (block_cols[low_selector].g != block_cols[i].g) || (block_cols[low_selector].b != block_cols[i].b))
7007 {
7008 solid_color_block = false;
7009 break;
7010 }
7011 }
7012 }
7013
7014 if ((solid_color_block) && (constant_alpha_val >= 0))
7015 {
7016 // Constant color/alpha block.
7017 // This is more complex than it may seem because of the way color and alpha are packed in PVRTC2. We need to evaluate mod0, mod1 and mod3 encodings to find the best one.
7018 uint32_t r, g, b;
7019 decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b);
7020
7021 // Mod 0
7022 uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255;
7023 uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l;
7024
7025 uint32_t cr0 = (lr0 << 1) | (lr0 >> 3);
7026 uint32_t cg0 = (lg0 << 1) | (lg0 >> 3);
7027 uint32_t cb0 = (lb0 << 2) | (lb0 >> 1);
7028 uint32_t ca0 = (la0 << 1);
7029
7030 cr0 = (cr0 << 3) | (cr0 >> 2);
7031 cg0 = (cg0 << 3) | (cg0 >> 2);
7032 cb0 = (cb0 << 3) | (cb0 >> 2);
7033 ca0 = (ca0 << 4) | ca0;
7034
7035 uint32_t err0 = sq(cr0 - r) + sq(cg0 - g) + sq(cb0 - b) + sq(ca0 - constant_alpha_val) * 2;
7036
7037 // If the alpha is < 3 or so we're kinda screwed. It's better to have some RGB error than it is to turn a 100% transparent area slightly opaque.
7038 if ((err0 == 0) || (constant_alpha_val < 3))
7039 {
7040 pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
7041 pBlock->set_trans_high_color(0, 0, 0, 0);
7042
7043 pBlock->m_modulation[0] = 0;
7044 pBlock->m_modulation[1] = 0;
7045 pBlock->m_modulation[2] = 0;
7046 pBlock->m_modulation[3] = 0;
7047 return;
7048 }
7049
7050 // Mod 3
7051 uint32_t lr3 = (r * 15 + 128) / 255, lg3 = (g * 15 + 128) / 255, lb3 = (b * 15 + 128) / 255;
7052 uint32_t la3 = g_pvrtc2_alpha_match33_3[constant_alpha_val].m_l;
7053
7054 uint32_t cr3 = (lr3 << 1) | (lr3 >> 3);
7055 uint32_t cg3 = (lg3 << 1) | (lg3 >> 3);
7056 uint32_t cb3 = (lb3 << 1) | (lb3 >> 3);
7057 uint32_t ca3 = (la3 << 1) | 1;
7058
7059 cr3 = (cr3 << 3) | (cr3 >> 2);
7060 cg3 = (cg3 << 3) | (cg3 >> 2);
7061 cb3 = (cb3 << 3) | (cb3 >> 2);
7062 ca3 = (ca3 << 4) | ca3;
7063
7064 uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2;
7065
7066 // Mod 1
7067 uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l;
7068 uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h;
7069 uint32_t la1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_l, ha1 = g_pvrtc2_alpha_match33[constant_alpha_val].m_h;
7070
7071 uint32_t clr1 = (lr1 << 1) | (lr1 >> 3);
7072 uint32_t clg1 = (lg1 << 1) | (lg1 >> 3);
7073 uint32_t clb1 = (lb1 << 2) | (lb1 >> 1);
7074 uint32_t cla1 = (la1 << 1);
7075
7076 clr1 = (clr1 << 3) | (clr1 >> 2);
7077 clg1 = (clg1 << 3) | (clg1 >> 2);
7078 clb1 = (clb1 << 3) | (clb1 >> 2);
7079 cla1 = (cla1 << 4) | cla1;
7080
7081 uint32_t chr1 = (hr1 << 1) | (hr1 >> 3);
7082 uint32_t chg1 = (hg1 << 1) | (hg1 >> 3);
7083 uint32_t chb1 = (hb1 << 1) | (hb1 >> 3);
7084 uint32_t cha1 = (ha1 << 1) | 1;
7085
7086 chr1 = (chr1 << 3) | (chr1 >> 2);
7087 chg1 = (chg1 << 3) | (chg1 >> 2);
7088 chb1 = (chb1 << 3) | (chb1 >> 2);
7089 cha1 = (cha1 << 4) | cha1;
7090
7091 uint32_t r1 = (clr1 * 5 + chr1 * 3) / 8;
7092 uint32_t g1 = (clg1 * 5 + chg1 * 3) / 8;
7093 uint32_t b1 = (clb1 * 5 + chb1 * 3) / 8;
7094 uint32_t a1 = (cla1 * 5 + cha1 * 3) / 8;
7095
7096 uint32_t err1 = sq(r1 - r) + sq(g1 - g) + sq(b1 - b) + sq(a1 - constant_alpha_val) * 2;
7097
7098 if ((err1 < err0) && (err1 < err3))
7099 {
7100 pBlock->set_trans_low_color(lr1, lg1, lb1, la1);
7101 pBlock->set_trans_high_color(hr1, hg1, hb1, ha1);
7102
7103 pBlock->m_modulation[0] = 0x55;
7104 pBlock->m_modulation[1] = 0x55;
7105 pBlock->m_modulation[2] = 0x55;
7106 pBlock->m_modulation[3] = 0x55;
7107 }
7108 else if (err0 < err3)
7109 {
7110 pBlock->set_trans_low_color(lr0, lg0, lb0, la0);
7111 pBlock->set_trans_high_color(0, 0, 0, 0);
7112
7113 pBlock->m_modulation[0] = 0;
7114 pBlock->m_modulation[1] = 0;
7115 pBlock->m_modulation[2] = 0;
7116 pBlock->m_modulation[3] = 0;
7117 }
7118 else
7119 {
7120 pBlock->set_trans_low_color(0, 0, 0, 0);
7121 pBlock->set_trans_high_color(lr3, lg3, lb3, la3);
7122
7123 pBlock->m_modulation[0] = 0xFF;
7124 pBlock->m_modulation[1] = 0xFF;
7125 pBlock->m_modulation[2] = 0xFF;
7126 pBlock->m_modulation[3] = 0xFF;
7127 }
7128
7129 return;
7130 }
7131
7132 // It's a complex block with non-solid color and/or alpha pixels.
7133 vec4F minColor, maxColor;
7134
7135 if (solid_color_block)
7136 {
7137 // It's a solid color block.
7138 uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a;
7139 uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a;
7140
7141 const float S = 1.0f / 255.0f;
7142 vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S);
7143 vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S);
7144 }
7145 else if (constant_alpha_val >= 0)
7146 {
7147 // It's a solid alpha block.
7148 const float S = 1.0f / 255.0f;
7149 vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S);
7150 vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S);
7151 }
7152 // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis).
7153 // To keep quality up we need to use full 4D PCA in this case.
7154 else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) ||
7155 (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) ||
7156 (block_cols[low_selector].c[2] == 0) || (block_cols[high_selector].c[2] == 255) ||
7157 (block_cols[alpha_selectors.m_lo_selector].c[3] == 0) || (block_cols[alpha_selectors.m_hi_selector].c[3] == 255))
7158 {
7159 // Find principle component of RGBA colors treated as 4D vectors.
7160 color32 pixels[16];
7161
7162 uint32_t sum_r = 0, sum_g = 0, sum_b = 0, sum_a = 0;
7163 for (uint32_t i = 0; i < 16; i++)
7164 {
7165 color32 rgb(block_cols[pSelector->get_selector(i & 3, i >> 2)]);
7166 uint32_t a = block_cols[alpha_selectors.get_selector(i & 3, i >> 2)].a;
7167
7168 pixels[i].set(rgb.r, rgb.g, rgb.b, a);
7169
7170 sum_r += rgb.r;
7171 sum_g += rgb.g;
7172 sum_b += rgb.b;
7173 sum_a += a;
7174 }
7175
7176 vec4F meanColor;
7177 vec4F_set(&meanColor, (float)sum_r, (float)sum_g, (float)sum_b, (float)sum_a);
7178 vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / 16.0f);
7179
7180 meanColor = vec4F_mul(&meanColor, 1.0f / (float)(16.0f * 255.0f));
7181 vec4F_saturate_in_place(&meanColor);
7182
7183 vec4F axis;
7184 vec4F_set_scalar(&axis, 0.0f);
7185 // Why this incremental method? Because it's stable and predictable. Covar+power method can require a lot of iterations to converge in 4D.
7186 for (uint32_t i = 0; i < 16; i++)
7187 {
7188 vec4F color = vec4F_from_color(&pixels[i]);
7189 color = vec4F_sub(&color, &meanColorScaled);
7190 vec4F a = vec4F_mul(&color, color.c[0]);
7191 vec4F b = vec4F_mul(&color, color.c[1]);
7192 vec4F c = vec4F_mul(&color, color.c[2]);
7193 vec4F d = vec4F_mul(&color, color.c[3]);
7194 vec4F n = i ? axis : color;
7195 vec4F_normalize_in_place(&n);
7196 axis.c[0] += vec4F_dot(&a, &n);
7197 axis.c[1] += vec4F_dot(&b, &n);
7198 axis.c[2] += vec4F_dot(&c, &n);
7199 axis.c[3] += vec4F_dot(&d, &n);
7200 }
7201
7202 vec4F_normalize_in_place(&axis);
7203
7204 if (vec4F_dot(&axis, &axis) < .5f)
7205 vec4F_set_scalar(&axis, .5f);
7206
7207 float l = 1e+9f, h = -1e+9f;
7208
7209 for (uint32_t i = 0; i < 16; i++)
7210 {
7211 vec4F color = vec4F_from_color(&pixels[i]);
7212
7213 vec4F q = vec4F_sub(&color, &meanColorScaled);
7214 float d = vec4F_dot(&q, &axis);
7215
7216 l = basisu::minimum(l, d);
7217 h = basisu::maximum(h, d);
7218 }
7219
7220 l *= (1.0f / 255.0f);
7221 h *= (1.0f / 255.0f);
7222
7223 vec4F b0 = vec4F_mul(&axis, l);
7224 vec4F b1 = vec4F_mul(&axis, h);
7225 vec4F c0 = vec4F_add(&meanColor, &b0);
7226 vec4F c1 = vec4F_add(&meanColor, &b1);
7227 minColor = vec4F_saturate(&c0);
7228 maxColor = vec4F_saturate(&c1);
7229 if (minColor.c[3] > maxColor.c[3])
7230 {
7231 // VS 2019 release Code Generator issue
7232 //std::swap(minColor, maxColor);
7233
7234 float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3];
7235 minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
7236 minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3];
7237 maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d;
7238 }
7239 }
7240 else
7241 {
7242 // We know the RGB axis is luma, because it's an ETC1S block and none of the block colors got clamped. So we only need to use 2D PCA.
7243 // We project each LA vector onto two 2D lines with axes (1,1) and (1,-1) and find the largest projection to determine if axis A is flipped relative to L.
7244 uint32_t block_cols_l[4], block_cols_a[4];
7245 for (uint32_t i = 0; i < 4; i++)
7246 {
7247 block_cols_l[i] = block_cols[i].r + block_cols[i].g + block_cols[i].b;
7248 block_cols_a[i] = block_cols[i].a * 3;
7249 }
7250
7251 int p0_min = INT_MAX, p0_max = INT_MIN;
7252 int p1_min = INT_MAX, p1_max = INT_MIN;
7253 for (uint32_t y = 0; y < 4; y++)
7254 {
7255 const uint32_t cs = pSelector->m_selectors[y];
7256 const uint32_t as = alpha_selectors.m_selectors[y];
7257
7258 {
7259 const int l = block_cols_l[cs & 3];
7260 const int a = block_cols_a[as & 3];
7261 const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7262 const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7263 }
7264 {
7265 const int l = block_cols_l[(cs >> 2) & 3];
7266 const int a = block_cols_a[(as >> 2) & 3];
7267 const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7268 const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7269 }
7270 {
7271 const int l = block_cols_l[(cs >> 4) & 3];
7272 const int a = block_cols_a[(as >> 4) & 3];
7273 const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7274 const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7275 }
7276 {
7277 const int l = block_cols_l[cs >> 6];
7278 const int a = block_cols_a[as >> 6];
7279 const int p0 = l + a; p0_min = basisu::minimum(p0_min, p0); p0_max = basisu::maximum(p0_max, p0);
7280 const int p1 = l - a; p1_min = basisu::minimum(p1_min, p1); p1_max = basisu::maximum(p1_max, p1);
7281 }
7282 }
7283
7284 int dist0 = p0_max - p0_min;
7285 int dist1 = p1_max - p1_min;
7286
7287 const float S = 1.0f / 255.0f;
7288
7289 vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, block_cols[alpha_selectors.m_lo_selector].a * S);
7290 vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, block_cols[alpha_selectors.m_hi_selector].a * S);
7291
7292 // See if the A component of the principle axis is flipped relative to L. If so, we need to flip either RGB or A bounds.
7293 if (dist1 > dist0)
7294 {
7295 std::swap(minColor.c[0], maxColor.c[0]);
7296 std::swap(minColor.c[1], maxColor.c[1]);
7297 std::swap(minColor.c[2], maxColor.c[2]);
7298 }
7299 }
7300
7301 // 4433 4443
7302 color32 trialMinColor, trialMaxColor;
7303
7304 trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f));
7305 trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f));
7306
7307 pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a);
7308 pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a);
7309
7310 color32 color_a((trialMinColor.r << 1) | (trialMinColor.r >> 3), (trialMinColor.g << 1) | (trialMinColor.g >> 3), (trialMinColor.b << 2) | (trialMinColor.b >> 1), trialMinColor.a << 1);
7311 color32 color_b((trialMaxColor.r << 1) | (trialMaxColor.r >> 3), (trialMaxColor.g << 1) | (trialMaxColor.g >> 3), (trialMaxColor.b << 1) | (trialMaxColor.b >> 3), (trialMaxColor.a << 1) | 1);
7312
7313 color32 color0(convert_rgba_5554_to_8888(color_a));
7314 color32 color3(convert_rgba_5554_to_8888(color_b));
7315
7316 const int lr = color0.r;
7317 const int lg = color0.g;
7318 const int lb = color0.b;
7319 const int la = color0.a;
7320
7321 const int axis_r = color3.r - lr;
7322 const int axis_g = color3.g - lg;
7323 const int axis_b = color3.b - lb;
7324 const int axis_a = color3.a - la;
7325 const int len_a = (axis_r * axis_r) + (axis_g * axis_g) + (axis_b * axis_b) + (axis_a * axis_a);
7326
7327 const int thresh01 = (len_a * 3) / 16;
7328 const int thresh12 = len_a >> 1;
7329 const int thresh23 = (len_a * 13) / 16;
7330
7331 if ((axis_r | axis_g | axis_b) == 0)
7332 {
7333 int ca_sel[4];
7334
7335 for (uint32_t i = 0; i < 4; i++)
7336 {
7337 int ca = (block_cols[i].a - la) * axis_a;
7338 ca_sel[i] = (ca >= thresh23) + (ca >= thresh12) + (ca >= thresh01);
7339 }
7340
7341 for (uint32_t y = 0; y < 4; y++)
7342 {
7343 const uint32_t a_sels = alpha_selectors.m_selectors[y];
7344
7345 uint32_t sel = ca_sel[a_sels & 3] | (ca_sel[(a_sels >> 2) & 3] << 2) | (ca_sel[(a_sels >> 4) & 3] << 4) | (ca_sel[a_sels >> 6] << 6);
7346
7347 pBlock->m_modulation[y] = (uint8_t)sel;
7348 }
7349 }
7350 else
7351 {
7352 int cy[4], ca[4];
7353
7354 for (uint32_t i = 0; i < 4; i++)
7355 {
7356 cy[i] = (block_cols[i].r - lr) * axis_r + (block_cols[i].g - lg) * axis_g + (block_cols[i].b - lb) * axis_b;
7357 ca[i] = (block_cols[i].a - la) * axis_a;
7358 }
7359
7360 for (uint32_t y = 0; y < 4; y++)
7361 {
7362 const uint32_t c_sels = pSelector->m_selectors[y];
7363 const uint32_t a_sels = alpha_selectors.m_selectors[y];
7364
7365 const int d0 = cy[c_sels & 3] + ca[a_sels & 3];
7366 const int d1 = cy[(c_sels >> 2) & 3] + ca[(a_sels >> 2) & 3];
7367 const int d2 = cy[(c_sels >> 4) & 3] + ca[(a_sels >> 4) & 3];
7368 const int d3 = cy[c_sels >> 6] + ca[a_sels >> 6];
7369
7370 uint32_t sel = ((d0 >= thresh23) + (d0 >= thresh12) + (d0 >= thresh01)) |
7371 (((d1 >= thresh23) + (d1 >= thresh12) + (d1 >= thresh01)) << 2) |
7372 (((d2 >= thresh23) + (d2 >= thresh12) + (d2 >= thresh01)) << 4) |
7373 (((d3 >= thresh23) + (d3 >= thresh12) + (d3 >= thresh01)) << 6);
7374
7375 pBlock->m_modulation[y] = (uint8_t)sel;
7376 }
7377 }
7378 }
7379
7380 static void transcoder_init_pvrtc2()
7381 {
7382 for (uint32_t v = 0; v < 256; v++)
7383 {
7384 int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7385
7386 for (uint32_t l = 0; l < 8; l++)
7387 {
7388 uint32_t le = (l << 1);
7389 le = (le << 4) | le;
7390
7391 for (uint32_t h = 0; h < 8; h++)
7392 {
7393 uint32_t he = (h << 1) | 1;
7394 he = (he << 4) | he;
7395
7396 uint32_t m = (le * 5 + he * 3) / 8;
7397
7398 int err = (int)labs((int)v - (int)m);
7399 if (err < lowest_err)
7400 {
7401 lowest_err = err;
7402 best_l = l;
7403 best_h = h;
7404 }
7405 }
7406 }
7407
7408 g_pvrtc2_alpha_match33[v].m_l = (uint8_t)best_l;
7409 g_pvrtc2_alpha_match33[v].m_h = (uint8_t)best_h;
7410 }
7411
7412 for (uint32_t v = 0; v < 256; v++)
7413 {
7414 int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7415
7416 for (uint32_t l = 0; l < 8; l++)
7417 {
7418 uint32_t le = (l << 1);
7419 le = (le << 4) | le;
7420
7421 int err = (int)labs((int)v - (int)le);
7422 if (err < lowest_err)
7423 {
7424 lowest_err = err;
7425 best_l = l;
7426 best_h = l;
7427 }
7428 }
7429
7430 g_pvrtc2_alpha_match33_0[v].m_l = (uint8_t)best_l;
7431 g_pvrtc2_alpha_match33_0[v].m_h = (uint8_t)best_h;
7432 }
7433
7434 for (uint32_t v = 0; v < 256; v++)
7435 {
7436 int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7437
7438 for (uint32_t h = 0; h < 8; h++)
7439 {
7440 uint32_t he = (h << 1) | 1;
7441 he = (he << 4) | he;
7442
7443 int err = (int)labs((int)v - (int)he);
7444 if (err < lowest_err)
7445 {
7446 lowest_err = err;
7447 best_l = h;
7448 best_h = h;
7449 }
7450 }
7451
7452 g_pvrtc2_alpha_match33_3[v].m_l = (uint8_t)best_l;
7453 g_pvrtc2_alpha_match33_3[v].m_h = (uint8_t)best_h;
7454 }
7455
7456 for (uint32_t v = 0; v < 256; v++)
7457 {
7458 int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7459
7460 for (uint32_t l = 0; l < 8; l++)
7461 {
7462 uint32_t le = (l << 2) | (l >> 1);
7463 le = (le << 3) | (le >> 2);
7464
7465 for (uint32_t h = 0; h < 16; h++)
7466 {
7467 uint32_t he = (h << 1) | (h >> 3);
7468 he = (he << 3) | (he >> 2);
7469
7470 uint32_t m = (le * 5 + he * 3) / 8;
7471
7472 int err = (int)labs((int)v - (int)m);
7473 if (err < lowest_err)
7474 {
7475 lowest_err = err;
7476 best_l = l;
7477 best_h = h;
7478 }
7479 }
7480 }
7481
7482 g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l;
7483 g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h;
7484 }
7485
7486 for (uint32_t v = 0; v < 256; v++)
7487 {
7488 int best_l = 0, best_h = 0, lowest_err = INT_MAX;
7489
7490 for (uint32_t l = 0; l < 16; l++)
7491 {
7492 uint32_t le = (l << 1) | (l >> 3);
7493 le = (le << 3) | (le >> 2);
7494
7495 for (uint32_t h = 0; h < 16; h++)
7496 {
7497 uint32_t he = (h << 1) | (h >> 3);
7498 he = (he << 3) | (he >> 2);
7499
7500 uint32_t m = (le * 5 + he * 3) / 8;
7501
7502 int err = (int)labs((int)v - (int)m);
7503 if (err < lowest_err)
7504 {
7505 lowest_err = err;
7506 best_l = l;
7507 best_h = h;
7508 }
7509 }
7510 }
7511
7512 g_pvrtc2_trans_match44[v].m_l = (uint8_t)best_l;
7513 g_pvrtc2_trans_match44[v].m_h = (uint8_t)best_h;
7514 }
7515 }
7516#endif // BASISD_SUPPORT_PVRTC2
7517
7518 basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
7519 m_pGlobal_codebook(nullptr),
7520 m_selector_history_buf_size(0)
7521 {
7522 }
7523
7524 bool basisu_lowlevel_etc1s_transcoder::decode_palettes(
7525 uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size,
7526 uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size)
7527 {
7528 if (m_pGlobal_codebook)
7529 {
7530 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n");
7531 return false;
7532 }
7533 bitwise_decoder sym_codec;
7534
7535 huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model;
7536
7537 if (!sym_codec.init(pEndpoints_data, endpoints_data_size))
7538 {
7539 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n");
7540 return false;
7541 }
7542
7543 if (!sym_codec.read_huffman_table(color5_delta_model0))
7544 {
7545 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n");
7546 return false;
7547 }
7548
7549 if (!sym_codec.read_huffman_table(color5_delta_model1))
7550 {
7551 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n");
7552 return false;
7553 }
7554
7555 if (!sym_codec.read_huffman_table(color5_delta_model2))
7556 {
7557 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n");
7558 return false;
7559 }
7560
7561 if (!sym_codec.read_huffman_table(inten_delta_model))
7562 {
7563 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
7564 return false;
7565 }
7566
7567 if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid())
7568 {
7569 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n");
7570 return false;
7571 }
7572
7573 const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0;
7574
7575 m_local_endpoints.resize(num_endpoints);
7576
7577 color32 prev_color5(16, 16, 16, 0);
7578 uint32_t prev_inten = 0;
7579
7580 for (uint32_t i = 0; i < num_endpoints; i++)
7581 {
7582 uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model);
7583 m_local_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7);
7584 prev_inten = m_local_endpoints[i].m_inten5;
7585
7586 for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++)
7587 {
7588 int delta;
7589 if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI)
7590 delta = sym_codec.decode_huffman(color5_delta_model0);
7591 else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI)
7592 delta = sym_codec.decode_huffman(color5_delta_model1);
7593 else
7594 delta = sym_codec.decode_huffman(color5_delta_model2);
7595
7596 int v = (prev_color5[c] + delta) & 31;
7597
7598 m_local_endpoints[i].m_color5[c] = static_cast<uint8_t>(v);
7599
7600 prev_color5[c] = static_cast<uint8_t>(v);
7601 }
7602
7603 if (endpoints_are_grayscale)
7604 {
7605 m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0];
7606 m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0];
7607 }
7608 }
7609
7610 sym_codec.stop();
7611
7612 m_local_selectors.resize(num_selectors);
7613
7614 if (!sym_codec.init(pSelectors_data, selectors_data_size))
7615 {
7616 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n");
7617 return false;
7618 }
7619
7620 basist::huffman_decoding_table delta_selector_pal_model;
7621
7622 const bool used_global_selector_cb = (sym_codec.get_bits(1) == 1);
7623
7624 if (used_global_selector_cb)
7625 {
7626 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: global selector codebooks are unsupported\n");
7627 return false;
7628 }
7629 else
7630 {
7631 const bool used_hybrid_selector_cb = (sym_codec.get_bits(1) == 1);
7632
7633 if (used_hybrid_selector_cb)
7634 {
7635 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n");
7636 return false;
7637 }
7638
7639 const bool used_raw_encoding = (sym_codec.get_bits(1) == 1);
7640
7641 if (used_raw_encoding)
7642 {
7643 for (uint32_t i = 0; i < num_selectors; i++)
7644 {
7645 for (uint32_t j = 0; j < 4; j++)
7646 {
7647 uint32_t cur_byte = sym_codec.get_bits(8);
7648
7649 for (uint32_t k = 0; k < 4; k++)
7650 m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
7651 }
7652
7653 m_local_selectors[i].init_flags();
7654 }
7655 }
7656 else
7657 {
7658 if (!sym_codec.read_huffman_table(delta_selector_pal_model))
7659 {
7660 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n");
7661 return false;
7662 }
7663
7664 if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid()))
7665 {
7666 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n");
7667 return false;
7668 }
7669
7670 uint8_t prev_bytes[4] = { 0, 0, 0, 0 };
7671
7672 for (uint32_t i = 0; i < num_selectors; i++)
7673 {
7674 if (!i)
7675 {
7676 for (uint32_t j = 0; j < 4; j++)
7677 {
7678 uint32_t cur_byte = sym_codec.get_bits(8);
7679 prev_bytes[j] = static_cast<uint8_t>(cur_byte);
7680
7681 for (uint32_t k = 0; k < 4; k++)
7682 m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
7683 }
7684 m_local_selectors[i].init_flags();
7685 continue;
7686 }
7687
7688 for (uint32_t j = 0; j < 4; j++)
7689 {
7690 int delta_byte = sym_codec.decode_huffman(delta_selector_pal_model);
7691
7692 uint32_t cur_byte = delta_byte ^ prev_bytes[j];
7693 prev_bytes[j] = static_cast<uint8_t>(cur_byte);
7694
7695 for (uint32_t k = 0; k < 4; k++)
7696 m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3);
7697 }
7698 m_local_selectors[i].init_flags();
7699 }
7700 }
7701 }
7702
7703 sym_codec.stop();
7704
7705 return true;
7706 }
7707
7708 bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size)
7709 {
7710 basist::bitwise_decoder sym_codec;
7711 if (!sym_codec.init(pTable_data, table_data_size))
7712 {
7713 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n");
7714 return false;
7715 }
7716
7717 if (!sym_codec.read_huffman_table(m_endpoint_pred_model))
7718 {
7719 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n");
7720 return false;
7721 }
7722
7723 if (m_endpoint_pred_model.get_code_sizes().size() == 0)
7724 {
7725 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n");
7726 return false;
7727 }
7728
7729 if (!sym_codec.read_huffman_table(m_delta_endpoint_model))
7730 {
7731 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n");
7732 return false;
7733 }
7734
7735 if (m_delta_endpoint_model.get_code_sizes().size() == 0)
7736 {
7737 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n");
7738 return false;
7739 }
7740
7741 if (!sym_codec.read_huffman_table(m_selector_model))
7742 {
7743 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n");
7744 return false;
7745 }
7746
7747 if (m_selector_model.get_code_sizes().size() == 0)
7748 {
7749 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n");
7750 return false;
7751 }
7752
7753 if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model))
7754 {
7755 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n");
7756 return false;
7757 }
7758
7759 if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0)
7760 {
7761 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n");
7762 return false;
7763 }
7764
7765 m_selector_history_buf_size = sym_codec.get_bits(13);
7766 // Check for bogus values.
7767 if (!m_selector_history_buf_size)
7768 {
7769 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 5\n");
7770 return false;
7771 }
7772
7773 sym_codec.stop();
7774
7775 return true;
7776 }
7777
7778 bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
7779 uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
7780 basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels)
7781 {
7782 // 'pDst_blocks' unused when disabling *all* hardware transcode options
7783 // (and 'bc1_allow_threecolor_blocks' when disabling DXT)
7784 BASISU_NOTE_UNUSED(pDst_blocks);
7785 BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
7786 BASISU_NOTE_UNUSED(transcode_alpha);
7787 BASISU_NOTE_UNUSED(pAlpha_blocks);
7788
7789 assert(g_transcoder_initialized);
7790 if (!g_transcoder_initialized)
7791 {
7792 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n");
7793 return false;
7794 }
7795
7796 if (!pState)
7797 pState = &m_def_state;
7798
7799 const uint32_t total_blocks = num_blocks_x * num_blocks_y;
7800
7801 if (!output_row_pitch_in_blocks_or_pixels)
7802 {
7803 if (basis_block_format_is_uncompressed(fmt))
7804 output_row_pitch_in_blocks_or_pixels = orig_width;
7805 else
7806 {
7807 if (fmt == block_format::cFXT1_RGB)
7808 output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
7809 else
7810 output_row_pitch_in_blocks_or_pixels = num_blocks_x;
7811 }
7812 }
7813
7814 if (basis_block_format_is_uncompressed(fmt))
7815 {
7816 if (!output_rows_in_pixels)
7817 output_rows_in_pixels = orig_height;
7818 }
7819
7820 basisu::vector<uint32_t>* pPrev_frame_indices = nullptr;
7821 if (is_video)
7822 {
7823 // TODO: Add check to make sure the caller hasn't tried skipping past p-frames
7824 //const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
7825 //const uint32_t level_index = slice_desc.m_level_index;
7826
7827 if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels)
7828 {
7829 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n");
7830 return false;
7831 }
7832
7833 pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index];
7834 if (pPrev_frame_indices->size() < total_blocks)
7835 pPrev_frame_indices->resize(total_blocks);
7836 }
7837
7838 basist::bitwise_decoder sym_codec;
7839
7840 if (!sym_codec.init(pImage_data, image_data_size))
7841 {
7842 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n");
7843 return false;
7844 }
7845
7846 approx_move_to_front selector_history_buf(m_selector_history_buf_size);
7847
7848 uint32_t cur_selector_rle_count = 0;
7849
7850 decoder_etc_block block;
7851 memset(&block, 0, sizeof(block));
7852
7853 //block.set_flip_bit(true);
7854 // Setting the flip bit to false to be compatible with the Khronos KDFS.
7855 block.set_flip_bit(false);
7856
7857 block.set_diff_bit(true);
7858
7859 void* pPVRTC_work_mem = nullptr;
7860 uint32_t* pPVRTC_endpoints = nullptr;
7861 if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
7862 {
7863 pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t)));
7864 if (!pPVRTC_work_mem)
7865 {
7866 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n");
7867 return false;
7868 }
7869 pPVRTC_endpoints = (uint32_t*) & ((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y];
7870 }
7871
7872 if (pState->m_block_endpoint_preds[0].size() < num_blocks_x)
7873 {
7874 pState->m_block_endpoint_preds[0].resize(num_blocks_x);
7875 pState->m_block_endpoint_preds[1].resize(num_blocks_x);
7876 }
7877
7878 uint32_t cur_pred_bits = 0;
7879 int prev_endpoint_pred_sym = 0;
7880 int endpoint_pred_repeat_count = 0;
7881 uint32_t prev_endpoint_index = 0;
7882 const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints;
7883 const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors;
7884 if (!endpoints.size() || !selectors.size())
7885 {
7886 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n");
7887 return false;
7888 }
7889
7890 const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size();
7891 const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX;
7892
7893 for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
7894 {
7895 const uint32_t cur_block_endpoint_pred_array = block_y & 1;
7896
7897 for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
7898 {
7899 // Decode endpoint index predictor symbols
7900 if ((block_x & 1) == 0)
7901 {
7902 if ((block_y & 1) == 0)
7903 {
7904 if (endpoint_pred_repeat_count)
7905 {
7906 endpoint_pred_repeat_count--;
7907 cur_pred_bits = prev_endpoint_pred_sym;
7908 }
7909 else
7910 {
7911 cur_pred_bits = sym_codec.decode_huffman(m_endpoint_pred_model);
7912 if (cur_pred_bits == ENDPOINT_PRED_REPEAT_LAST_SYMBOL)
7913 {
7914 endpoint_pred_repeat_count = sym_codec.decode_vlc(ENDPOINT_PRED_COUNT_VLC_BITS) + ENDPOINT_PRED_MIN_REPEAT_COUNT - 1;
7915
7916 cur_pred_bits = prev_endpoint_pred_sym;
7917 }
7918 else
7919 {
7920 prev_endpoint_pred_sym = cur_pred_bits;
7921 }
7922 }
7923
7924 pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4);
7925 }
7926 else
7927 {
7928 cur_pred_bits = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits;
7929 }
7930 }
7931
7932 // Decode endpoint index
7933 uint32_t endpoint_index, selector_index = 0;
7934
7935 const uint32_t pred = cur_pred_bits & 3;
7936 cur_pred_bits >>= 2;
7937
7938 if (pred == 0)
7939 {
7940 // Left
7941 if (!block_x)
7942 {
7943 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n");
7944 if (pPVRTC_work_mem)
7945 free(pPVRTC_work_mem);
7946 return false;
7947 }
7948
7949 endpoint_index = prev_endpoint_index;
7950 }
7951 else if (pred == 1)
7952 {
7953 // Upper
7954 if (!block_y)
7955 {
7956 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n");
7957 if (pPVRTC_work_mem)
7958 free(pPVRTC_work_mem);
7959 return false;
7960 }
7961
7962 endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_endpoint_index;
7963 }
7964 else if (pred == 2)
7965 {
7966 if (is_video)
7967 {
7968 assert(pred == CR_ENDPOINT_PRED_INDEX);
7969 endpoint_index = (*pPrev_frame_indices)[block_x + block_y * num_blocks_x];
7970 selector_index = endpoint_index >> 16;
7971 endpoint_index &= 0xFFFFU;
7972 }
7973 else
7974 {
7975 // Upper left
7976 if ((!block_x) || (!block_y))
7977 {
7978 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n");
7979 if (pPVRTC_work_mem)
7980 free(pPVRTC_work_mem);
7981 return false;
7982 }
7983
7984 endpoint_index = pState->m_block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x - 1].m_endpoint_index;
7985 }
7986 }
7987 else
7988 {
7989 // Decode and apply delta
7990 const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model);
7991
7992 endpoint_index = delta_sym + prev_endpoint_index;
7993 if (endpoint_index >= endpoints.size())
7994 endpoint_index -= (int)endpoints.size();
7995 }
7996
7997 pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index;
7998
7999 prev_endpoint_index = endpoint_index;
8000
8001 // Decode selector index
8002 if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX))
8003 {
8004 int selector_sym;
8005 if (cur_selector_rle_count > 0)
8006 {
8007 cur_selector_rle_count--;
8008
8009 selector_sym = (int)selectors.size();
8010 }
8011 else
8012 {
8013 selector_sym = sym_codec.decode_huffman(m_selector_model);
8014
8015 if (selector_sym == static_cast<int>(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX))
8016 {
8017 int run_sym = sym_codec.decode_huffman(m_selector_history_buf_rle_model);
8018
8019 if (run_sym == (SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
8020 cur_selector_rle_count = sym_codec.decode_vlc(7) + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
8021 else
8022 cur_selector_rle_count = run_sym + SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
8023
8024 if (cur_selector_rle_count > total_blocks)
8025 {
8026 // The file is corrupted or we've got a bug.
8027 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n");
8028 if (pPVRTC_work_mem)
8029 free(pPVRTC_work_mem);
8030 return false;
8031 }
8032
8033 selector_sym = (int)selectors.size();
8034
8035 cur_selector_rle_count--;
8036 }
8037 }
8038
8039 if (selector_sym >= (int)selectors.size())
8040 {
8041 assert(m_selector_history_buf_size > 0);
8042
8043 int history_buf_index = selector_sym - (int)selectors.size();
8044
8045 if (history_buf_index >= (int)selector_history_buf.size())
8046 {
8047 // The file is corrupted or we've got a bug.
8048 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n");
8049 if (pPVRTC_work_mem)
8050 free(pPVRTC_work_mem);
8051 return false;
8052 }
8053
8054 selector_index = selector_history_buf[history_buf_index];
8055
8056 if (history_buf_index != 0)
8057 selector_history_buf.use(history_buf_index);
8058 }
8059 else
8060 {
8061 selector_index = selector_sym;
8062
8063 if (m_selector_history_buf_size)
8064 selector_history_buf.add(selector_index);
8065 }
8066 }
8067
8068 if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size()))
8069 {
8070 // The file is corrupted or we've got a bug.
8071 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n");
8072 if (pPVRTC_work_mem)
8073 free(pPVRTC_work_mem);
8074 return false;
8075 }
8076
8077 if (is_video)
8078 (*pPrev_frame_indices)[block_x + block_y * num_blocks_x] = endpoint_index | (selector_index << 16);
8079
8080#if BASISD_ENABLE_DEBUG_FLAGS
8081 if ((g_debug_flags & cDebugFlagVisCRs) && ((fmt == block_format::cETC1) || (fmt == block_format::cBC1)))
8082 {
8083 if ((is_video) && (pred == 2))
8084 {
8085 decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8086 memset(pDst_block, 0xFF, 8);
8087 continue;
8088 }
8089 }
8090#endif
8091
8092 const endpoint* pEndpoints = &endpoints[endpoint_index];
8093 const selector* pSelector = &selectors[selector_index];
8094
8095 switch (fmt)
8096 {
8097 case block_format::cETC1:
8098 {
8099 decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8100
8101 block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8102 block.set_inten_table(0, pEndpoints->m_inten5);
8103 block.set_inten_table(1, pEndpoints->m_inten5);
8104
8105 pDst_block->m_uint32[0] = block.m_uint32[0];
8106 pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
8107
8108 break;
8109 }
8110 case block_format::cBC1:
8111 {
8112#if BASISD_SUPPORT_DXT1
8113 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8114#if BASISD_ENABLE_DEBUG_FLAGS
8115 if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints))
8116 convert_etc1s_to_dxt1_vis(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
8117 else
8118#endif
8119 convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks);
8120#else
8121 assert(0);
8122#endif
8123 break;
8124 }
8125 case block_format::cBC4:
8126 {
8127#if BASISD_SUPPORT_DXT5A
8128 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8129 convert_etc1s_to_dxt5a(static_cast<dxt5a_block*>(pDst_block), pEndpoints, pSelector);
8130#else
8131 assert(0);
8132#endif
8133 break;
8134 }
8135 case block_format::cPVRTC1_4_RGB:
8136 {
8137#if BASISD_SUPPORT_PVRTC1
8138 block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8139 block.set_inten_table(0, pEndpoints->m_inten5);
8140 block.set_inten_table(1, pEndpoints->m_inten5);
8141 block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
8142
8143 ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
8144
8145 const color32& base_color = pEndpoints->m_color5;
8146 const uint32_t inten_table = pEndpoints->m_inten5;
8147
8148 const uint32_t low_selector = pSelector->m_lo_selector;
8149 const uint32_t high_selector = pSelector->m_hi_selector;
8150
8151 // Get block's RGB bounding box
8152 color32 block_colors[2];
8153 decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
8154
8155 assert(block_colors[0][0] <= block_colors[1][0]);
8156 assert(block_colors[0][1] <= block_colors[1][1]);
8157 assert(block_colors[0][2] <= block_colors[1][2]);
8158
8159 // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
8160 pvrtc4_block temp;
8161 temp.set_opaque_endpoint_floor(0, block_colors[0]);
8162 temp.set_opaque_endpoint_ceil(1, block_colors[1]);
8163
8164 pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
8165#else
8166 assert(0);
8167#endif
8168
8169 break;
8170 }
8171 case block_format::cPVRTC1_4_RGBA:
8172 {
8173#if BASISD_SUPPORT_PVRTC1
8174 assert(pAlpha_blocks);
8175
8176 block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false));
8177 block.set_inten_table(0, pEndpoints->m_inten5);
8178 block.set_inten_table(1, pEndpoints->m_inten5);
8179 block.set_raw_selector_bits(pSelector->m_selectors[0], pSelector->m_selectors[1], pSelector->m_selectors[2], pSelector->m_selectors[3]);
8180
8181 ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block;
8182
8183 // Get block's RGBA bounding box
8184 const color32& base_color = pEndpoints->m_color5;
8185 const uint32_t inten_table = pEndpoints->m_inten5;
8186 const uint32_t low_selector = pSelector->m_lo_selector;
8187 const uint32_t high_selector = pSelector->m_hi_selector;
8188 color32 block_colors[2];
8189 decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector);
8190
8191 assert(block_colors[0][0] <= block_colors[1][0]);
8192 assert(block_colors[0][1] <= block_colors[1][1]);
8193 assert(block_colors[0][2] <= block_colors[1][2]);
8194
8195 const uint16_t* pAlpha_block = reinterpret_cast<uint16_t*>(static_cast<uint8_t*>(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t));
8196
8197 const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]];
8198 const selector* pAlpha_selector = &selectors[pAlpha_block[1]];
8199
8200 const color32& alpha_base_color = pAlpha_endpoints->m_color5;
8201 const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5;
8202 const uint32_t alpha_low_selector = pAlpha_selector->m_lo_selector;
8203 const uint32_t alpha_high_selector = pAlpha_selector->m_hi_selector;
8204 uint32_t alpha_block_colors[2];
8205 decoder_etc_block::get_block_colors5_bounds_g(alpha_block_colors, alpha_base_color, alpha_inten_table, alpha_low_selector, alpha_high_selector);
8206 assert(alpha_block_colors[0] <= alpha_block_colors[1]);
8207 block_colors[0].a = (uint8_t)alpha_block_colors[0];
8208 block_colors[1].a = (uint8_t)alpha_block_colors[1];
8209
8210 // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
8211 pvrtc4_block temp;
8212 temp.set_endpoint_floor(0, block_colors[0]);
8213 temp.set_endpoint_ceil(1, block_colors[1]);
8214
8215 pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints;
8216#else
8217 assert(0);
8218#endif
8219
8220 break;
8221 }
8222 case block_format::cBC7: // for more consistency with UASTC
8223 case block_format::cBC7_M5_COLOR:
8224 {
8225#if BASISD_SUPPORT_BC7_MODE5
8226 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8227 convert_etc1s_to_bc7_m5_color(pDst_block, pEndpoints, pSelector);
8228#else
8229 assert(0);
8230#endif
8231 break;
8232 }
8233 case block_format::cBC7_M5_ALPHA:
8234 {
8235#if BASISD_SUPPORT_BC7_MODE5
8236 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8237 convert_etc1s_to_bc7_m5_alpha(pDst_block, pEndpoints, pSelector);
8238#else
8239 assert(0);
8240#endif
8241 break;
8242 }
8243 case block_format::cETC2_EAC_A8:
8244 {
8245#if BASISD_SUPPORT_ETC2_EAC_A8
8246 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8247 convert_etc1s_to_etc2_eac_a8(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
8248#else
8249 assert(0);
8250#endif
8251 break;
8252 }
8253 case block_format::cASTC_4x4:
8254 {
8255#if BASISD_SUPPORT_ASTC
8256 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8257 convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]);
8258#else
8259 assert(0);
8260#endif
8261 break;
8262 }
8263 case block_format::cATC_RGB:
8264 {
8265#if BASISD_SUPPORT_ATC
8266 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8267 convert_etc1s_to_atc(pDst_block, pEndpoints, pSelector);
8268#else
8269 assert(0);
8270#endif
8271 break;
8272 }
8273 case block_format::cFXT1_RGB:
8274 {
8275#if BASISD_SUPPORT_FXT1
8276 const uint32_t fxt1_block_x = block_x >> 1;
8277 const uint32_t fxt1_block_y = block_y;
8278 const uint32_t fxt1_subblock = block_x & 1;
8279
8280 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (fxt1_block_x + fxt1_block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8281
8282 convert_etc1s_to_fxt1(pDst_block, pEndpoints, pSelector, fxt1_subblock);
8283#else
8284 assert(0);
8285#endif
8286 break;
8287 }
8288 case block_format::cPVRTC2_4_RGB:
8289 {
8290#if BASISD_SUPPORT_PVRTC2
8291 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8292 convert_etc1s_to_pvrtc2_rgb(pDst_block, pEndpoints, pSelector);
8293#endif
8294 break;
8295 }
8296 case block_format::cPVRTC2_4_RGBA:
8297 {
8298#if BASISD_SUPPORT_PVRTC2
8299 assert(transcode_alpha);
8300
8301 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8302
8303 convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]);
8304#endif
8305 break;
8306 }
8307 case block_format::cIndices:
8308 {
8309 uint16_t* pDst_block = reinterpret_cast<uint16_t *>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes);
8310 pDst_block[0] = static_cast<uint16_t>(endpoint_index);
8311 pDst_block[1] = static_cast<uint16_t>(selector_index);
8312 break;
8313 }
8314 case block_format::cA32:
8315 {
8316 assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
8317 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
8318
8319 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8320 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8321
8322 int colors[4];
8323 decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8324
8325 if (max_x == 4)
8326 {
8327 for (uint32_t y = 0; y < max_y; y++)
8328 {
8329 const uint32_t s = pSelector->m_selectors[y];
8330
8331 pDst_pixels[3] = static_cast<uint8_t>(colors[s & 3]);
8332 pDst_pixels[3+4] = static_cast<uint8_t>(colors[(s >> 2) & 3]);
8333 pDst_pixels[3+8] = static_cast<uint8_t>(colors[(s >> 4) & 3]);
8334 pDst_pixels[3+12] = static_cast<uint8_t>(colors[(s >> 6) & 3]);
8335
8336 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
8337 }
8338 }
8339 else
8340 {
8341 for (uint32_t y = 0; y < max_y; y++)
8342 {
8343 const uint32_t s = pSelector->m_selectors[y];
8344
8345 for (uint32_t x = 0; x < max_x; x++)
8346 pDst_pixels[3 + 4 * x] = static_cast<uint8_t>(colors[(s >> (x * 2)) & 3]);
8347
8348 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
8349 }
8350 }
8351
8352 break;
8353 }
8354 case block_format::cRGB32:
8355 {
8356 assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
8357 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
8358
8359 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8360 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8361
8362 color32 colors[4];
8363 decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8364
8365 for (uint32_t y = 0; y < max_y; y++)
8366 {
8367 const uint32_t s = pSelector->m_selectors[y];
8368
8369 for (uint32_t x = 0; x < max_x; x++)
8370 {
8371 const color32& c = colors[(s >> (x * 2)) & 3];
8372
8373 pDst_pixels[0 + 4 * x] = c.r;
8374 pDst_pixels[1 + 4 * x] = c.g;
8375 pDst_pixels[2 + 4 * x] = c.b;
8376 }
8377
8378 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
8379 }
8380
8381 break;
8382 }
8383 case block_format::cRGBA32:
8384 {
8385 assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
8386 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
8387
8388 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8389 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8390
8391 color32 colors[4];
8392 decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8393
8394 for (uint32_t y = 0; y < max_y; y++)
8395 {
8396 const uint32_t s = pSelector->m_selectors[y];
8397
8398 for (uint32_t x = 0; x < max_x; x++)
8399 {
8400 const color32& c = colors[(s >> (x * 2)) & 3];
8401
8402 pDst_pixels[0 + 4 * x] = c.r;
8403 pDst_pixels[1 + 4 * x] = c.g;
8404 pDst_pixels[2 + 4 * x] = c.b;
8405 pDst_pixels[3 + 4 * x] = 255;
8406 }
8407
8408 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
8409 }
8410
8411 break;
8412 }
8413 case block_format::cRGB565:
8414 case block_format::cBGR565:
8415 {
8416 assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
8417 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
8418
8419 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8420 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8421
8422 color32 colors[4];
8423 decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8424
8425 uint16_t packed_colors[4];
8426 if (fmt == block_format::cRGB565)
8427 {
8428 for (uint32_t i = 0; i < 4; i++)
8429 {
8430 packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
8431 if (BASISD_IS_BIG_ENDIAN)
8432 packed_colors[i] = byteswap_uint16(packed_colors[i]);
8433 }
8434 }
8435 else
8436 {
8437 for (uint32_t i = 0; i < 4; i++)
8438 {
8439 packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
8440 if (BASISD_IS_BIG_ENDIAN)
8441 packed_colors[i] = byteswap_uint16(packed_colors[i]);
8442 }
8443 }
8444
8445 for (uint32_t y = 0; y < max_y; y++)
8446 {
8447 const uint32_t s = pSelector->m_selectors[y];
8448
8449 for (uint32_t x = 0; x < max_x; x++)
8450 reinterpret_cast<uint16_t *>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
8451
8452 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
8453 }
8454
8455 break;
8456 }
8457 case block_format::cRGBA4444_COLOR:
8458 {
8459 assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
8460 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
8461
8462 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8463 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8464
8465 color32 colors[4];
8466 decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8467
8468 uint16_t packed_colors[4];
8469 for (uint32_t i = 0; i < 4; i++)
8470 {
8471 packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4));
8472 }
8473
8474 for (uint32_t y = 0; y < max_y; y++)
8475 {
8476 const uint32_t s = pSelector->m_selectors[y];
8477
8478 for (uint32_t x = 0; x < max_x; x++)
8479 {
8480 uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
8481 if (BASISD_IS_BIG_ENDIAN)
8482 cur = byteswap_uint16(cur);
8483
8484 cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
8485
8486 if (BASISD_IS_BIG_ENDIAN)
8487 cur = byteswap_uint16(cur);
8488
8489 reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
8490 }
8491
8492 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
8493 }
8494
8495 break;
8496 }
8497 case block_format::cRGBA4444_COLOR_OPAQUE:
8498 {
8499 assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
8500 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
8501
8502 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8503 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8504
8505 color32 colors[4];
8506 decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8507
8508 uint16_t packed_colors[4];
8509 for (uint32_t i = 0; i < 4; i++)
8510 {
8511 packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
8512 if (BASISD_IS_BIG_ENDIAN)
8513 packed_colors[i] = byteswap_uint16(packed_colors[i]);
8514 }
8515
8516 for (uint32_t y = 0; y < max_y; y++)
8517 {
8518 const uint32_t s = pSelector->m_selectors[y];
8519
8520 for (uint32_t x = 0; x < max_x; x++)
8521 reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
8522
8523 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
8524 }
8525
8526 break;
8527 }
8528 case block_format::cRGBA4444_ALPHA:
8529 {
8530 assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
8531 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
8532
8533 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
8534 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
8535
8536 color32 colors[4];
8537 decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5);
8538
8539 uint16_t packed_colors[4];
8540 for (uint32_t i = 0; i < 4; i++)
8541 {
8542 packed_colors[i] = mul_8(colors[i].g, 15);
8543 if (BASISD_IS_BIG_ENDIAN)
8544 packed_colors[i] = byteswap_uint16(packed_colors[i]);
8545 }
8546
8547 for (uint32_t y = 0; y < max_y; y++)
8548 {
8549 const uint32_t s = pSelector->m_selectors[y];
8550
8551 for (uint32_t x = 0; x < max_x; x++)
8552 {
8553 reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3];
8554 }
8555
8556 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
8557 }
8558
8559 break;
8560 }
8561 case block_format::cETC2_EAC_R11:
8562 {
8563#if BASISD_SUPPORT_ETC2_EAC_RG11
8564 void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes;
8565 convert_etc1s_to_etc2_eac_r11(static_cast<eac_block*>(pDst_block), pEndpoints, pSelector);
8566#else
8567 assert(0);
8568#endif
8569 break;
8570 }
8571 default:
8572 {
8573 assert(0);
8574 break;
8575 }
8576 }
8577
8578 } // block_x
8579
8580 } // block-y
8581
8582 if (endpoint_pred_repeat_count != 0)
8583 {
8584 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n");
8585 return false;
8586 }
8587
8588 //assert(endpoint_pred_repeat_count == 0);
8589
8590#if BASISD_SUPPORT_PVRTC1
8591 // PVRTC post process - create per-pixel modulation values.
8592 if (fmt == block_format::cPVRTC1_4_RGB)
8593 fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y);
8594 else if (fmt == block_format::cPVRTC1_4_RGBA)
8595 fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]);
8596#endif // BASISD_SUPPORT_PVRTC1
8597
8598 if (pPVRTC_work_mem)
8599 free(pPVRTC_work_mem);
8600
8601 return true;
8602 }
8603
8604 bool basis_validate_output_buffer_size(transcoder_texture_format target_format,
8605 uint32_t output_blocks_buf_size_in_blocks_or_pixels,
8606 uint32_t orig_width, uint32_t orig_height,
8607 uint32_t output_row_pitch_in_blocks_or_pixels,
8608 uint32_t output_rows_in_pixels,
8609 uint32_t total_slice_blocks)
8610 {
8611 if (basis_transcoder_format_is_uncompressed(target_format))
8612 {
8613 // Assume the output buffer is orig_width by orig_height
8614 if (!output_row_pitch_in_blocks_or_pixels)
8615 output_row_pitch_in_blocks_or_pixels = orig_width;
8616
8617 if (!output_rows_in_pixels)
8618 output_rows_in_pixels = orig_height;
8619
8620 // Now make sure the output buffer is large enough, or we'll overwrite memory.
8621 if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
8622 {
8623 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
8624 return false;
8625 }
8626 }
8627 else if (target_format == transcoder_texture_format::cTFFXT1_RGB)
8628 {
8629 const uint32_t num_blocks_fxt1_x = (orig_width + 7) / 8;
8630 const uint32_t num_blocks_fxt1_y = (orig_height + 3) / 4;
8631 const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
8632
8633 if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
8634 {
8635 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
8636 return false;
8637 }
8638 }
8639 else
8640 {
8641 if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
8642 {
8643 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
8644 return false;
8645 }
8646 }
8647 return true;
8648 }
8649
8650 bool basisu_lowlevel_etc1s_transcoder::transcode_image(
8651 transcoder_texture_format target_format,
8652 void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
8653 const uint8_t* pCompressed_data, uint32_t compressed_data_length,
8654 uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
8655 uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length,
8656 uint32_t decode_flags,
8657 bool basis_file_has_alpha_slices,
8658 bool is_video,
8659 uint32_t output_row_pitch_in_blocks_or_pixels,
8660 basisu_transcoder_state* pState,
8661 uint32_t output_rows_in_pixels)
8662 {
8663 if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length)
8664 {
8665 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n");
8666 return false;
8667 }
8668
8669 if (alpha_length)
8670 {
8671 if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length)
8672 {
8673 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n");
8674 return false;
8675 }
8676 }
8677 else
8678 {
8679 assert(!basis_file_has_alpha_slices);
8680 }
8681
8682 if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
8683 {
8684 if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
8685 {
8686 // PVRTC1 only supports power of 2 dimensions
8687 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
8688 return false;
8689 }
8690 }
8691
8692 if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
8693 {
8694 // Switch to PVRTC1 RGB if the input doesn't have alpha.
8695 target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
8696 }
8697
8698 const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
8699 const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
8700 const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
8701
8702 if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
8703 {
8704 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n");
8705 return false;
8706 }
8707
8708 bool status = false;
8709
8710 const uint8_t* pData = pCompressed_data + rgb_offset;
8711 uint32_t data_len = rgb_length;
8712 bool is_alpha_slice = false;
8713
8714 // If the caller wants us to transcode the mip level's alpha data, then use the next slice.
8715 if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
8716 {
8717 pData = pCompressed_data + alpha_offset;
8718 data_len = alpha_length;
8719 is_alpha_slice = true;
8720 }
8721
8722 switch (target_format)
8723 {
8724 case transcoder_texture_format::cTFETC1_RGB:
8725 {
8726 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8727 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8728
8729 if (!status)
8730 {
8731 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
8732 }
8733 break;
8734 }
8735 case transcoder_texture_format::cTFBC1_RGB:
8736 {
8737#if !BASISD_SUPPORT_DXT1
8738 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n");
8739 return false;
8740#else
8741 // status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8742 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8743 if (!status)
8744 {
8745 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
8746 }
8747 break;
8748#endif
8749 }
8750 case transcoder_texture_format::cTFBC4_R:
8751 {
8752#if !BASISD_SUPPORT_DXT5A
8753 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n");
8754 return false;
8755#else
8756 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8757 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8758 if (!status)
8759 {
8760 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
8761 }
8762 break;
8763#endif
8764 }
8765 case transcoder_texture_format::cTFPVRTC1_4_RGB:
8766 {
8767#if !BASISD_SUPPORT_PVRTC1
8768 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
8769 return false;
8770#else
8771 // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
8772 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8773 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8774 if (!status)
8775 {
8776 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n");
8777 }
8778 break;
8779#endif
8780 }
8781 case transcoder_texture_format::cTFPVRTC1_4_RGBA:
8782 {
8783#if !BASISD_SUPPORT_PVRTC1
8784 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n");
8785 return false;
8786#else
8787 assert(basis_file_has_alpha_slices);
8788 assert(alpha_length);
8789
8790 // Temp buffer to hold alpha block endpoint/selector indices
8791 basisu::vector<uint32_t> temp_block_indices(total_slice_blocks);
8792
8793 // First transcode alpha data to temp buffer
8794 //status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState);
8795 status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0);
8796 if (!status)
8797 {
8798 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n");
8799 }
8800 else
8801 {
8802 // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
8803 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]);
8804 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0);
8805 if (!status)
8806 {
8807 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n");
8808 }
8809 }
8810
8811 break;
8812#endif
8813 }
8814 case transcoder_texture_format::cTFBC7_RGBA:
8815 case transcoder_texture_format::cTFBC7_ALT:
8816 {
8817#if !BASISD_SUPPORT_BC7_MODE5
8818 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n");
8819 return false;
8820#else
8821 assert(bytes_per_block_or_pixel == 16);
8822 // We used to support transcoding just alpha to BC7 - but is that useful at all?
8823
8824 // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks.
8825 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8826 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8827
8828 if ((status) && (basis_file_has_alpha_slices))
8829 {
8830 // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha.
8831 //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8832 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8833 }
8834
8835 if (!status)
8836 {
8837 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n");
8838 }
8839
8840 break;
8841#endif
8842 }
8843 case transcoder_texture_format::cTFETC2_RGBA:
8844 {
8845#if !BASISD_SUPPORT_ETC2_EAC_A8
8846 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n");
8847 return false;
8848#else
8849 assert(bytes_per_block_or_pixel == 16);
8850
8851 if (basis_file_has_alpha_slices)
8852 {
8853 // First decode the alpha data
8854 //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8855 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8856 }
8857 else
8858 {
8859 //write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
8860 basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels);
8861 status = true;
8862 }
8863
8864 if (status)
8865 {
8866 // Now decode the color data
8867 //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8868 status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8869 if (!status)
8870 {
8871 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n");
8872 }
8873 }
8874 else
8875 {
8876 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n");
8877 }
8878 break;
8879#endif
8880 }
8881 case transcoder_texture_format::cTFBC3_RGBA:
8882 {
8883#if !BASISD_SUPPORT_DXT1
8884 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n");
8885 return false;
8886#elif !BASISD_SUPPORT_DXT5A
8887 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
8888 return false;
8889#else
8890 assert(bytes_per_block_or_pixel == 16);
8891
8892 // First decode the alpha data
8893 if (basis_file_has_alpha_slices)
8894 {
8895 //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8896 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8897 }
8898 else
8899 {
8900 basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
8901 status = true;
8902 }
8903
8904 if (status)
8905 {
8906 // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
8907 //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState);
8908 status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8909 if (!status)
8910 {
8911 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n");
8912 }
8913 }
8914 else
8915 {
8916 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n");
8917 }
8918
8919 break;
8920#endif
8921 }
8922 case transcoder_texture_format::cTFBC5_RG:
8923 {
8924#if !BASISD_SUPPORT_DXT5A
8925 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
8926 return false;
8927#else
8928 assert(bytes_per_block_or_pixel == 16);
8929
8930 //bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
8931 // uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
8932 // basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
8933
8934 // Decode the R data (actually the green channel of the color data slice in the basis file)
8935 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8936 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8937 if (status)
8938 {
8939 if (basis_file_has_alpha_slices)
8940 {
8941 // Decode the G data (actually the green channel of the alpha data slice in the basis file)
8942 //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8943 status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8944 if (!status)
8945 {
8946 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n");
8947 }
8948 }
8949 else
8950 {
8951 basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
8952 status = true;
8953 }
8954 }
8955 else
8956 {
8957 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n");
8958 }
8959 break;
8960#endif
8961 }
8962 case transcoder_texture_format::cTFASTC_4x4_RGBA:
8963 {
8964#if !BASISD_SUPPORT_ASTC
8965 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n");
8966 return false;
8967#else
8968 assert(bytes_per_block_or_pixel == 16);
8969
8970 if (basis_file_has_alpha_slices)
8971 {
8972 // First decode the alpha data to the output (we're using the output texture as a temp buffer here).
8973 //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8974 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8975 if (status)
8976 {
8977 // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and
8978 // transcode both the alpha and color data at the same time to ASTC.
8979 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
8980 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels);
8981 }
8982 }
8983 else
8984 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
8985 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
8986
8987 if (!status)
8988 {
8989 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n");
8990 }
8991
8992 break;
8993#endif
8994 }
8995 case transcoder_texture_format::cTFATC_RGB:
8996 {
8997#if !BASISD_SUPPORT_ATC
8998 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
8999 return false;
9000#else
9001 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9002 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9003 if (!status)
9004 {
9005 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n");
9006 }
9007 break;
9008#endif
9009 }
9010 case transcoder_texture_format::cTFATC_RGBA:
9011 {
9012#if !BASISD_SUPPORT_ATC
9013 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n");
9014 return false;
9015#elif !BASISD_SUPPORT_DXT5A
9016 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n");
9017 return false;
9018#else
9019 assert(bytes_per_block_or_pixel == 16);
9020
9021 // First decode the alpha data
9022 if (basis_file_has_alpha_slices)
9023 {
9024 //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9025 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9026 }
9027 else
9028 {
9029 basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels);
9030 status = true;
9031 }
9032
9033 if (status)
9034 {
9035 //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9036 status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9037 if (!status)
9038 {
9039 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n");
9040 }
9041 }
9042 else
9043 {
9044 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n");
9045 }
9046 break;
9047#endif
9048 }
9049 case transcoder_texture_format::cTFPVRTC2_4_RGB:
9050 {
9051#if !BASISD_SUPPORT_PVRTC2
9052 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
9053 return false;
9054#else
9055 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9056 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9057 if (!status)
9058 {
9059 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n");
9060 }
9061 break;
9062#endif
9063 }
9064 case transcoder_texture_format::cTFPVRTC2_4_RGBA:
9065 {
9066#if !BASISD_SUPPORT_PVRTC2
9067 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n");
9068 return false;
9069#else
9070 if (basis_file_has_alpha_slices)
9071 {
9072 // First decode the alpha data to the output (we're using the output texture as a temp buffer here).
9073 //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9074 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9075 if (!status)
9076 {
9077 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n");
9078 }
9079 else
9080 {
9081 // Now decode the color data and transcode to PVRTC2 RGBA.
9082 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState);
9083 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels);
9084 }
9085 }
9086 else
9087 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9088 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9089
9090 if (!status)
9091 {
9092 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n");
9093 }
9094
9095 break;
9096#endif
9097 }
9098 case transcoder_texture_format::cTFRGBA32:
9099 {
9100 // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9101
9102 // First decode the alpha data
9103 if (basis_file_has_alpha_slices)
9104 //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9105 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9106 else
9107 status = true;
9108
9109 if (status)
9110 {
9111 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9112 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9113 if (!status)
9114 {
9115 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n");
9116 }
9117 }
9118 else
9119 {
9120 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n");
9121 }
9122
9123 break;
9124 }
9125 case transcoder_texture_format::cTFRGB565:
9126 case transcoder_texture_format::cTFBGR565:
9127 {
9128 // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9129
9130 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9131 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9132 if (!status)
9133 {
9134 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n");
9135 }
9136
9137 break;
9138 }
9139 case transcoder_texture_format::cTFRGBA4444:
9140 {
9141 // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory.
9142
9143 // First decode the alpha data
9144 if (basis_file_has_alpha_slices)
9145 //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9146 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9147 else
9148 status = true;
9149
9150 if (status)
9151 {
9152 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels);
9153 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9154 if (!status)
9155 {
9156 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n");
9157 }
9158 }
9159 else
9160 {
9161 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n");
9162 }
9163
9164 break;
9165 }
9166 case transcoder_texture_format::cTFFXT1_RGB:
9167 {
9168#if !BASISD_SUPPORT_FXT1
9169 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n");
9170 return false;
9171#else
9172 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9173 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9174 if (!status)
9175 {
9176 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n");
9177 }
9178 break;
9179#endif
9180 }
9181 case transcoder_texture_format::cTFETC2_EAC_R11:
9182 {
9183#if !BASISD_SUPPORT_ETC2_EAC_RG11
9184 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
9185 return false;
9186#else
9187 //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9188 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9189 if (!status)
9190 {
9191 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n");
9192 }
9193
9194 break;
9195#endif
9196 }
9197 case transcoder_texture_format::cTFETC2_EAC_RG11:
9198 {
9199#if !BASISD_SUPPORT_ETC2_EAC_RG11
9200 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n");
9201 return false;
9202#else
9203 assert(bytes_per_block_or_pixel == 16);
9204
9205 if (basis_file_has_alpha_slices)
9206 {
9207 // First decode the alpha data to G
9208 //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9209 status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9210 }
9211 else
9212 {
9213 basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels);
9214 status = true;
9215 }
9216
9217 if (status)
9218 {
9219 // Now decode the color data to R
9220 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9221 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels);
9222 if (!status)
9223 {
9224 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n");
9225 }
9226 }
9227 else
9228 {
9229 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n");
9230 }
9231
9232 break;
9233#endif
9234 }
9235 default:
9236 {
9237 assert(0);
9238 BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n");
9239 break;
9240 }
9241 }
9242
9243 return status;
9244 }
9245
9246 basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
9247 {
9248 }
9249
9250 bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
9251 uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
9252 basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
9253 {
9254 BASISU_NOTE_UNUSED(pState);
9255 BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
9256
9257 assert(g_transcoder_initialized);
9258 if (!g_transcoder_initialized)
9259 {
9260 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder not globally initialized.\n");
9261 return false;
9262 }
9263
9264#if BASISD_SUPPORT_UASTC
9265 const uint32_t total_blocks = num_blocks_x * num_blocks_y;
9266
9267 if (!output_row_pitch_in_blocks_or_pixels)
9268 {
9269 if (basis_block_format_is_uncompressed(fmt))
9270 output_row_pitch_in_blocks_or_pixels = orig_width;
9271 else
9272 {
9273 if (fmt == block_format::cFXT1_RGB)
9274 output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8;
9275 else
9276 output_row_pitch_in_blocks_or_pixels = num_blocks_x;
9277 }
9278 }
9279
9280 if (basis_block_format_is_uncompressed(fmt))
9281 {
9282 if (!output_rows_in_pixels)
9283 output_rows_in_pixels = orig_height;
9284 }
9285
9286 uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks;
9287 if (image_data_size < total_expected_block_bytes)
9288 {
9289 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
9290 return false;
9291 }
9292
9293 const uastc_block* pSource_block = reinterpret_cast<const uastc_block *>(pImage_data);
9294
9295 const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0;
9296 const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
9297
9298 bool status = false;
9299 if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
9300 {
9301 if (fmt == block_format::cPVRTC1_4_RGBA)
9302 transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality);
9303 else
9304 transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha);
9305 }
9306 else
9307 {
9308 for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
9309 {
9310 void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
9311
9312 for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes)
9313 {
9314 switch (fmt)
9315 {
9316 case block_format::cUASTC_4x4:
9317 {
9318 memcpy(pDst_block, pSource_block, sizeof(uastc_block));
9319 status = true;
9320 break;
9321 }
9322 case block_format::cETC1:
9323 {
9324 if (from_alpha)
9325 status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3);
9326 else
9327 status = transcode_uastc_to_etc1(*pSource_block, pDst_block);
9328 break;
9329 }
9330 case block_format::cETC2_RGBA:
9331 {
9332 status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block);
9333 break;
9334 }
9335 case block_format::cBC1:
9336 {
9337 status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality);
9338 break;
9339 }
9340 case block_format::cBC3:
9341 {
9342 status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality);
9343 break;
9344 }
9345 case block_format::cBC4:
9346 {
9347 if (channel0 < 0)
9348 channel0 = 0;
9349 status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0);
9350 break;
9351 }
9352 case block_format::cBC5:
9353 {
9354 if (channel0 < 0)
9355 channel0 = 0;
9356 if (channel1 < 0)
9357 channel1 = 3;
9358 status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1);
9359 break;
9360 }
9361 case block_format::cBC7:
9362 case block_format::cBC7_M5_COLOR: // for consistently with ETC1S
9363 {
9364 status = transcode_uastc_to_bc7(*pSource_block, pDst_block);
9365 break;
9366 }
9367 case block_format::cASTC_4x4:
9368 {
9369 status = transcode_uastc_to_astc(*pSource_block, pDst_block);
9370 break;
9371 }
9372 case block_format::cETC2_EAC_R11:
9373 {
9374 if (channel0 < 0)
9375 channel0 = 0;
9376 status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0);
9377 break;
9378 }
9379 case block_format::cETC2_EAC_RG11:
9380 {
9381 if (channel0 < 0)
9382 channel0 = 0;
9383 if (channel1 < 0)
9384 channel1 = 3;
9385 status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1);
9386 break;
9387 }
9388 case block_format::cRGBA32:
9389 {
9390 color32 block_pixels[4][4];
9391 status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false);
9392
9393 assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes);
9394 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t);
9395
9396 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9397 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9398
9399 for (uint32_t y = 0; y < max_y; y++)
9400 {
9401 for (uint32_t x = 0; x < max_x; x++)
9402 {
9403 const color32& c = block_pixels[y][x];
9404
9405 pDst_pixels[0 + 4 * x] = c.r;
9406 pDst_pixels[1 + 4 * x] = c.g;
9407 pDst_pixels[2 + 4 * x] = c.b;
9408 pDst_pixels[3 + 4 * x] = c.a;
9409 }
9410
9411 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t);
9412 }
9413
9414 break;
9415 }
9416 case block_format::cRGB565:
9417 case block_format::cBGR565:
9418 {
9419 color32 block_pixels[4][4];
9420 status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
9421
9422 assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9423 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9424
9425 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9426 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9427
9428 for (uint32_t y = 0; y < max_y; y++)
9429 {
9430 for (uint32_t x = 0; x < max_x; x++)
9431 {
9432 const color32& c = block_pixels[y][x];
9433
9434 const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast<uint16_t>((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) :
9435 static_cast<uint16_t>((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31));
9436
9437 pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
9438 pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
9439 }
9440
9441 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9442 }
9443
9444 break;
9445 }
9446 case block_format::cRGBA4444:
9447 {
9448 color32 block_pixels[4][4];
9449 status = unpack_uastc(*pSource_block, (color32*)block_pixels, false);
9450
9451 assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes);
9452 uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t);
9453
9454 const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
9455 const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
9456
9457 for (uint32_t y = 0; y < max_y; y++)
9458 {
9459 for (uint32_t x = 0; x < max_x; x++)
9460 {
9461 const color32& c = block_pixels[y][x];
9462
9463 const uint16_t packed = static_cast<uint16_t>((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15));
9464
9465 pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF);
9466 pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF);
9467 }
9468
9469 pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t);
9470 }
9471 break;
9472 }
9473 default:
9474 assert(0);
9475 break;
9476
9477 }
9478
9479 if (!status)
9480 {
9481 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n");
9482 return false;
9483 }
9484
9485 } // block_x
9486
9487 } // block_y
9488 }
9489
9490 return true;
9491#else
9492 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: UASTC is unsupported\n");
9493
9494 BASISU_NOTE_UNUSED(decode_flags);
9495 BASISU_NOTE_UNUSED(channel0);
9496 BASISU_NOTE_UNUSED(channel1);
9497 BASISU_NOTE_UNUSED(output_rows_in_pixels);
9498 BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
9499 BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
9500 BASISU_NOTE_UNUSED(fmt);
9501 BASISU_NOTE_UNUSED(image_data_size);
9502 BASISU_NOTE_UNUSED(pImage_data);
9503 BASISU_NOTE_UNUSED(num_blocks_x);
9504 BASISU_NOTE_UNUSED(num_blocks_y);
9505 BASISU_NOTE_UNUSED(pDst_blocks);
9506
9507 return false;
9508#endif
9509 }
9510
9511 bool basisu_lowlevel_uastc_transcoder::transcode_image(
9512 transcoder_texture_format target_format,
9513 void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
9514 const uint8_t* pCompressed_data, uint32_t compressed_data_length,
9515 uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
9516 uint32_t slice_offset, uint32_t slice_length,
9517 uint32_t decode_flags,
9518 bool has_alpha,
9519 bool is_video,
9520 uint32_t output_row_pitch_in_blocks_or_pixels,
9521 basisu_transcoder_state* pState,
9522 uint32_t output_rows_in_pixels,
9523 int channel0, int channel1)
9524 {
9525 BASISU_NOTE_UNUSED(is_video);
9526 BASISU_NOTE_UNUSED(level_index);
9527
9528 if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
9529 {
9530 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n");
9531 return false;
9532 }
9533
9534 if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA))
9535 {
9536 if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4)))
9537 {
9538 // PVRTC1 only supports power of 2 dimensions
9539 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n");
9540 return false;
9541 }
9542 }
9543
9544 if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha))
9545 {
9546 // Switch to PVRTC1 RGB if the input doesn't have alpha.
9547 target_format = transcoder_texture_format::cTFPVRTC1_4_RGB;
9548 }
9549
9550 const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
9551 const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
9552 const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
9553
9554 if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
9555 {
9556 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n");
9557 return false;
9558 }
9559
9560 bool status = false;
9561
9562 // UASTC4x4
9563 switch (target_format)
9564 {
9565 case transcoder_texture_format::cTFETC1_RGB:
9566 {
9567 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9568 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1,
9569 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
9570
9571 if (!status)
9572 {
9573 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n");
9574 }
9575 break;
9576 }
9577 case transcoder_texture_format::cTFETC2_RGBA:
9578 {
9579 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9580 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA,
9581 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
9582 if (!status)
9583 {
9584 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC2 failed\n");
9585 }
9586 break;
9587 }
9588 case transcoder_texture_format::cTFBC1_RGB:
9589 {
9590 // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though.
9591 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9592 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1,
9593 bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
9594 if (!status)
9595 {
9596 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC1 failed\n");
9597 }
9598 break;
9599 }
9600 case transcoder_texture_format::cTFBC3_RGBA:
9601 {
9602 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9603 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3,
9604 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
9605 if (!status)
9606 {
9607 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC3 failed\n");
9608 }
9609 break;
9610 }
9611 case transcoder_texture_format::cTFBC4_R:
9612 {
9613 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
9614 // nullptr, 0,
9615 // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
9616 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4,
9617 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
9618 ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
9619 if (!status)
9620 {
9621 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC4 failed\n");
9622 }
9623 break;
9624 }
9625 case transcoder_texture_format::cTFBC5_RG:
9626 {
9627 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
9628 // nullptr, 0,
9629 // 0, 3);
9630 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5,
9631 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
9632 0, 3);
9633 if (!status)
9634 {
9635 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC5 failed\n");
9636 }
9637 break;
9638 }
9639 case transcoder_texture_format::cTFBC7_RGBA:
9640 case transcoder_texture_format::cTFBC7_ALT:
9641 {
9642 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9643 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7,
9644 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9645 if (!status)
9646 {
9647 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC7 failed\n");
9648 }
9649 break;
9650 }
9651 case transcoder_texture_format::cTFPVRTC1_4_RGB:
9652 {
9653 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9654 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB,
9655 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9656 if (!status)
9657 {
9658 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n");
9659 }
9660 break;
9661 }
9662 case transcoder_texture_format::cTFPVRTC1_4_RGBA:
9663 {
9664 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9665 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA,
9666 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9667 if (!status)
9668 {
9669 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n");
9670 }
9671 break;
9672 }
9673 case transcoder_texture_format::cTFASTC_4x4_RGBA:
9674 {
9675 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9676 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4,
9677 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9678 if (!status)
9679 {
9680 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n");
9681 }
9682 break;
9683 }
9684 case transcoder_texture_format::cTFATC_RGB:
9685 case transcoder_texture_format::cTFATC_RGBA:
9686 {
9687 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->ATC currently unsupported\n");
9688 return false;
9689 }
9690 case transcoder_texture_format::cTFFXT1_RGB:
9691 {
9692 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n");
9693 return false;
9694 }
9695 case transcoder_texture_format::cTFPVRTC2_4_RGB:
9696 {
9697 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
9698 return false;
9699 }
9700 case transcoder_texture_format::cTFPVRTC2_4_RGBA:
9701 {
9702 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n");
9703 return false;
9704 }
9705 case transcoder_texture_format::cTFETC2_EAC_R11:
9706 {
9707 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
9708 // nullptr, 0,
9709 // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
9710 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11,
9711 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
9712 ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0);
9713 if (!status)
9714 {
9715 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n");
9716 }
9717 break;
9718 }
9719 case transcoder_texture_format::cTFETC2_EAC_RG11:
9720 {
9721 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState,
9722 // nullptr, 0,
9723 // 0, 3);
9724 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11,
9725 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels,
9726 0, 3);
9727 if (!status)
9728 {
9729 BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n");
9730 }
9731 break;
9732 }
9733 case transcoder_texture_format::cTFRGBA32:
9734 {
9735 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9736 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32,
9737 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9738 if (!status)
9739 {
9740 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n");
9741 }
9742 break;
9743 }
9744 case transcoder_texture_format::cTFRGB565:
9745 {
9746 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9747 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565,
9748 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9749 if (!status)
9750 {
9751 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
9752 }
9753 break;
9754 }
9755 case transcoder_texture_format::cTFBGR565:
9756 {
9757 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9758 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565,
9759 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9760 if (!status)
9761 {
9762 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n");
9763 }
9764 break;
9765 }
9766 case transcoder_texture_format::cTFRGBA4444:
9767 {
9768 //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState);
9769 status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444,
9770 bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
9771 if (!status)
9772 {
9773 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n");
9774 }
9775 break;
9776 }
9777 default:
9778 {
9779 assert(0);
9780 BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: Invalid format\n");
9781 break;
9782 }
9783 }
9784
9785 return status;
9786 }
9787
9788 basisu_transcoder::basisu_transcoder() :
9789 m_ready_to_transcode(false)
9790 {
9791 }
9792
9793 bool basisu_transcoder::validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const
9794 {
9795 if (!validate_header(pData, data_size))
9796 return false;
9797
9798 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
9799
9800#if !BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS
9801 if (crc16(&pHeader->m_data_size, sizeof(basis_file_header) - BASISU_OFFSETOF(basis_file_header, m_data_size), 0) != pHeader->m_header_crc16)
9802 {
9803 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header CRC check failed\n");
9804 return false;
9805 }
9806
9807 if (full_validation)
9808 {
9809 if (crc16(reinterpret_cast<const uint8_t*>(pData) + sizeof(basis_file_header), pHeader->m_data_size, 0) != pHeader->m_data_crc16)
9810 {
9811 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: data CRC check failed\n");
9812 return false;
9813 }
9814 }
9815#endif
9816
9817 return true;
9818 }
9819
9820 bool basisu_transcoder::validate_header_quick(const void* pData, uint32_t data_size) const
9821 {
9822 if (data_size <= sizeof(basis_file_header))
9823 return false;
9824
9825 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
9826
9827 if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
9828 {
9829 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
9830 return false;
9831 }
9832
9833 uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
9834 if (data_size < expected_file_size)
9835 {
9836 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: source buffer is too small\n");
9837 return false;
9838 }
9839
9840 if ((!pHeader->m_total_slices) || (!pHeader->m_total_images))
9841 {
9842 BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header is invalid\n");
9843 return false;
9844 }
9845
9846 if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
9847 ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
9848 )
9849 {
9850 BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
9851 return false;
9852 }
9853
9854 return true;
9855 }
9856
9857 bool basisu_transcoder::validate_header(const void* pData, uint32_t data_size) const
9858 {
9859 if (data_size <= sizeof(basis_file_header))
9860 {
9861 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small\n");
9862 return false;
9863 }
9864
9865 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
9866
9867 if ((pHeader->m_sig != basis_file_header::cBASISSigValue) || (pHeader->m_ver != BASISD_SUPPORTED_BASIS_VERSION) || (pHeader->m_header_size != sizeof(basis_file_header)))
9868 {
9869 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header has an invalid signature, or file version is unsupported\n");
9870 return false;
9871 }
9872
9873 uint32_t expected_file_size = sizeof(basis_file_header) + pHeader->m_data_size;
9874 if (data_size < expected_file_size)
9875 {
9876 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: input source buffer is too small, or header is corrupted\n");
9877 return false;
9878 }
9879
9880 if ((!pHeader->m_total_images) || (!pHeader->m_total_slices))
9881 {
9882 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (total images or slices are 0)\n");
9883 return false;
9884 }
9885
9886 if (pHeader->m_total_images > pHeader->m_total_slices)
9887 {
9888 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (too many images)\n");
9889 return false;
9890 }
9891
9892 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
9893 {
9894 if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
9895 {
9896 if (pHeader->m_total_slices & 1)
9897 {
9898 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n");
9899 return false;
9900 }
9901 }
9902
9903 // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too.
9904 if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0)
9905 {
9906 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
9907 return false;
9908 }
9909 }
9910 else
9911 {
9912 if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0)
9913 {
9914 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n");
9915 return false;
9916 }
9917 }
9918
9919 if ((pHeader->m_slice_desc_file_ofs >= data_size) ||
9920 ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices))
9921 )
9922 {
9923 BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: passed in buffer is too small or data is corrupted\n");
9924 return false;
9925 }
9926
9927 return true;
9928 }
9929
9930 basis_texture_type basisu_transcoder::get_texture_type(const void* pData, uint32_t data_size) const
9931 {
9932 if (!validate_header_quick(pData, data_size))
9933 {
9934 BASISU_DEVEL_ERROR("basisu_transcoder::get_texture_type: header validation failed\n");
9935 return cBASISTexType2DArray;
9936 }
9937
9938 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
9939
9940 basis_texture_type btt = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
9941
9942 if (btt >= cBASISTexTypeTotal)
9943 {
9944 BASISU_DEVEL_ERROR("basisu_transcoder::validate_header_quick: header's texture type field is invalid\n");
9945 return cBASISTexType2DArray;
9946 }
9947
9948 return btt;
9949 }
9950
9951 bool basisu_transcoder::get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const
9952 {
9953 if (!validate_header_quick(pData, data_size))
9954 {
9955 BASISU_DEVEL_ERROR("basisu_transcoder::get_userdata: header validation failed\n");
9956 return false;
9957 }
9958
9959 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
9960
9961 userdata0 = pHeader->m_userdata0;
9962 userdata1 = pHeader->m_userdata1;
9963 return true;
9964 }
9965
9966 uint32_t basisu_transcoder::get_total_images(const void* pData, uint32_t data_size) const
9967 {
9968 if (!validate_header_quick(pData, data_size))
9969 {
9970 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
9971 return 0;
9972 }
9973
9974 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
9975
9976 return pHeader->m_total_images;
9977 }
9978
9979 basis_tex_format basisu_transcoder::get_tex_format(const void* pData, uint32_t data_size) const
9980 {
9981 if (!validate_header_quick(pData, data_size))
9982 {
9983 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n");
9984 return basis_tex_format::cETC1S;
9985 }
9986
9987 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
9988
9989 return (basis_tex_format)(uint32_t)pHeader->m_tex_format;
9990 }
9991
9992 bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const
9993 {
9994 if (!validate_header_quick(pData, data_size))
9995 {
9996 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: header validation failed\n");
9997 return false;
9998 }
9999
10000 int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
10001 if (slice_index < 0)
10002 {
10003 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid slice index\n");
10004 return false;
10005 }
10006
10007 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
10008
10009 if (image_index >= pHeader->m_total_images)
10010 {
10011 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
10012 return false;
10013 }
10014
10015 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
10016
10017 uint32_t total_levels = 1;
10018 for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
10019 if (pSlice_descs[i].m_image_index == image_index)
10020 total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
10021 else
10022 break;
10023
10024 if (total_levels > 16)
10025 {
10026 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_info: invalid image_index\n");
10027 return false;
10028 }
10029
10030 const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
10031
10032 image_info.m_image_index = image_index;
10033 image_info.m_total_levels = total_levels;
10034
10035 image_info.m_alpha_flag = false;
10036
10037 // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
10038 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
10039 image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
10040 else
10041 image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
10042
10043 image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
10044
10045 image_info.m_width = slice_desc.m_num_blocks_x * 4;
10046 image_info.m_height = slice_desc.m_num_blocks_y * 4;
10047 image_info.m_orig_width = slice_desc.m_orig_width;
10048 image_info.m_orig_height = slice_desc.m_orig_height;
10049 image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
10050 image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
10051 image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
10052 image_info.m_first_slice_index = slice_index;
10053
10054 return true;
10055 }
10056
10057 uint32_t basisu_transcoder::get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const
10058 {
10059 if (!validate_header_quick(pData, data_size))
10060 {
10061 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: header validation failed\n");
10062 return false;
10063 }
10064
10065 int slice_index = find_first_slice_index(pData, data_size, image_index, 0);
10066 if (slice_index < 0)
10067 {
10068 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: failed finding slice\n");
10069 return false;
10070 }
10071
10072 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
10073
10074 if (image_index >= pHeader->m_total_images)
10075 {
10076 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image_index\n");
10077 return false;
10078 }
10079
10080 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
10081
10082 uint32_t total_levels = 1;
10083 for (uint32_t i = slice_index + 1; i < pHeader->m_total_slices; i++)
10084 if (pSlice_descs[i].m_image_index == image_index)
10085 total_levels = basisu::maximum<uint32_t>(total_levels, pSlice_descs[i].m_level_index + 1);
10086 else
10087 break;
10088
10089 const uint32_t cMaxSupportedLevels = 16;
10090 if (total_levels > cMaxSupportedLevels)
10091 {
10092 BASISU_DEVEL_ERROR("basisu_transcoder::get_total_image_levels: invalid image levels!\n");
10093 return false;
10094 }
10095
10096 return total_levels;
10097 }
10098
10099 bool basisu_transcoder::get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const
10100 {
10101 if (!validate_header_quick(pData, data_size))
10102 {
10103 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: header validation failed\n");
10104 return false;
10105 }
10106
10107 int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
10108 if (slice_index < 0)
10109 {
10110 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: failed finding slice\n");
10111 return false;
10112 }
10113
10114 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
10115
10116 if (image_index >= pHeader->m_total_images)
10117 {
10118 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_desc: invalid image_index\n");
10119 return false;
10120 }
10121
10122 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
10123
10124 const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
10125
10126 orig_width = slice_desc.m_orig_width;
10127 orig_height = slice_desc.m_orig_height;
10128 total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
10129
10130 return true;
10131 }
10132
10133 bool basisu_transcoder::get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& image_info, uint32_t image_index, uint32_t level_index) const
10134 {
10135 if (!validate_header_quick(pData, data_size))
10136 {
10137 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: validate_file_checksums failed\n");
10138 return false;
10139 }
10140
10141 int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
10142 if (slice_index < 0)
10143 {
10144 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: failed finding slice\n");
10145 return false;
10146 }
10147
10148 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
10149
10150 if (image_index >= pHeader->m_total_images)
10151 {
10152 BASISU_DEVEL_ERROR("basisu_transcoder::get_image_level_info: invalid image_index\n");
10153 return false;
10154 }
10155
10156 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
10157
10158 const basis_slice_desc& slice_desc = pSlice_descs[slice_index];
10159
10160 image_info.m_image_index = image_index;
10161 image_info.m_level_index = level_index;
10162
10163 // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha.
10164 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
10165 image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
10166 else
10167 image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
10168
10169 image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
10170 image_info.m_width = slice_desc.m_num_blocks_x * 4;
10171 image_info.m_height = slice_desc.m_num_blocks_y * 4;
10172 image_info.m_orig_width = slice_desc.m_orig_width;
10173 image_info.m_orig_height = slice_desc.m_orig_height;
10174 image_info.m_num_blocks_x = slice_desc.m_num_blocks_x;
10175 image_info.m_num_blocks_y = slice_desc.m_num_blocks_y;
10176 image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y;
10177 image_info.m_first_slice_index = slice_index;
10178
10179 image_info.m_rgb_file_ofs = slice_desc.m_file_ofs;
10180 image_info.m_rgb_file_len = slice_desc.m_file_size;
10181 image_info.m_alpha_file_ofs = 0;
10182 image_info.m_alpha_file_len = 0;
10183
10184 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
10185 {
10186 if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices)
10187 {
10188 assert((slice_index + 1) < (int)pHeader->m_total_slices);
10189 image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs;
10190 image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size;
10191 }
10192 }
10193
10194 return true;
10195 }
10196
10197 bool basisu_transcoder::get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const
10198 {
10199 if (!validate_file_checksums(pData, data_size, false))
10200 {
10201 BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: validate_file_checksums failed\n");
10202 return false;
10203 }
10204
10205 const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData);
10206 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(static_cast<const uint8_t*>(pData) + pHeader->m_slice_desc_file_ofs);
10207
10208 file_info.m_version = pHeader->m_ver;
10209
10210 file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc);
10211
10212 file_info.m_total_selectors = pHeader->m_total_selectors;
10213 file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs;
10214 file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size;
10215
10216 file_info.m_total_endpoints = pHeader->m_total_endpoints;
10217 file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs;
10218 file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size;
10219
10220 file_info.m_tables_ofs = pHeader->m_tables_file_ofs;
10221 file_info.m_tables_size = pHeader->m_tables_file_size;
10222
10223 file_info.m_tex_format = static_cast<basis_tex_format>(static_cast<int>(pHeader->m_tex_format));
10224
10225 file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S);
10226
10227 file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0;
10228 file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
10229
10230 const uint32_t total_slices = pHeader->m_total_slices;
10231
10232 file_info.m_slice_info.resize(total_slices);
10233
10234 file_info.m_slices_size = 0;
10235
10236 file_info.m_tex_type = static_cast<basis_texture_type>(static_cast<uint8_t>(pHeader->m_tex_type));
10237
10238 if (file_info.m_tex_type > cBASISTexTypeTotal)
10239 {
10240 BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: invalid texture type, file is corrupted\n");
10241 return false;
10242 }
10243
10244 file_info.m_us_per_frame = pHeader->m_us_per_frame;
10245 file_info.m_userdata0 = pHeader->m_userdata0;
10246 file_info.m_userdata1 = pHeader->m_userdata1;
10247
10248 file_info.m_image_mipmap_levels.resize(0);
10249 file_info.m_image_mipmap_levels.resize(pHeader->m_total_images);
10250
10251 file_info.m_total_images = pHeader->m_total_images;
10252
10253 for (uint32_t i = 0; i < total_slices; i++)
10254 {
10255 file_info.m_slices_size += pSlice_descs[i].m_file_size;
10256
10257 basisu_slice_info& slice_info = file_info.m_slice_info[i];
10258
10259 slice_info.m_orig_width = pSlice_descs[i].m_orig_width;
10260 slice_info.m_orig_height = pSlice_descs[i].m_orig_height;
10261 slice_info.m_width = pSlice_descs[i].m_num_blocks_x * 4;
10262 slice_info.m_height = pSlice_descs[i].m_num_blocks_y * 4;
10263 slice_info.m_num_blocks_x = pSlice_descs[i].m_num_blocks_x;
10264 slice_info.m_num_blocks_y = pSlice_descs[i].m_num_blocks_y;
10265 slice_info.m_total_blocks = slice_info.m_num_blocks_x * slice_info.m_num_blocks_y;
10266 slice_info.m_compressed_size = pSlice_descs[i].m_file_size;
10267 slice_info.m_slice_index = i;
10268 slice_info.m_image_index = pSlice_descs[i].m_image_index;
10269 slice_info.m_level_index = pSlice_descs[i].m_level_index;
10270 slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16;
10271 slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0;
10272 slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0;
10273
10274 if (pSlice_descs[i].m_image_index >= pHeader->m_total_images)
10275 {
10276 BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice desc's image index is invalid\n");
10277 return false;
10278 }
10279
10280 file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] = basisu::maximum<uint32_t>(file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index], pSlice_descs[i].m_level_index + 1);
10281
10282 if (file_info.m_image_mipmap_levels[pSlice_descs[i].m_image_index] > 16)
10283 {
10284 BASISU_DEVEL_ERROR("basisu_transcoder::get_file_info: slice mipmap level is invalid\n");
10285 return false;
10286 }
10287 }
10288
10289 return true;
10290 }
10291
10292 bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size)
10293 {
10294 if (!validate_header_quick(pData, data_size))
10295 {
10296 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n");
10297 return false;
10298 }
10299
10300 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
10301 const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
10302
10303 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
10304 {
10305 if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
10306 {
10307 m_lowlevel_etc1s_decoder.clear();
10308 }
10309
10310 if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook)
10311 {
10312 if (!m_lowlevel_etc1s_decoder.get_global_codebooks())
10313 {
10314 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n");
10315 return false;
10316 }
10317 if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size())
10318 {
10319 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n");
10320 return false;
10321 }
10322 if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) ||
10323 (m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors))
10324 {
10325 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n");
10326 return false;
10327 }
10328 if (!pHeader->m_tables_file_size)
10329 {
10330 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n");
10331 return false;
10332 }
10333 if (pHeader->m_tables_file_ofs > data_size)
10334 {
10335 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n");
10336 return false;
10337 }
10338 if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
10339 {
10340 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n");
10341 return false;
10342 }
10343 }
10344 else
10345 {
10346 if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size)
10347 {
10348 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n");
10349 return false;
10350 }
10351
10352 if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size))
10353 {
10354 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n");
10355 return false;
10356 }
10357
10358 if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs))
10359 {
10360 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n");
10361 return false;
10362 }
10363
10364 if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs))
10365 {
10366 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
10367 return false;
10368 }
10369
10370 if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs))
10371 {
10372 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n");
10373 return false;
10374 }
10375
10376 if (!m_lowlevel_etc1s_decoder.decode_palettes(
10377 pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size,
10378 pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size))
10379 {
10380 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n");
10381 return false;
10382 }
10383 }
10384
10385 if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size))
10386 {
10387 BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n");
10388 return false;
10389 }
10390 }
10391 else
10392 {
10393 // Nothing special to do for UASTC.
10394 if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
10395 {
10396 m_lowlevel_etc1s_decoder.clear();
10397 }
10398 }
10399
10400 m_ready_to_transcode = true;
10401
10402 return true;
10403 }
10404
10405 bool basisu_transcoder::stop_transcoding()
10406 {
10407 m_lowlevel_etc1s_decoder.clear();
10408
10409 m_ready_to_transcode = false;
10410
10411 return true;
10412 }
10413
10414 bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt,
10415 uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const
10416 {
10417 if (!m_ready_to_transcode)
10418 {
10419 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n");
10420 return false;
10421 }
10422
10423 if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
10424 {
10425 // TODO: Not yet supported
10426 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
10427 return false;
10428 }
10429
10430 if (!validate_header_quick(pData, data_size))
10431 {
10432 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: header validation failed\n");
10433 return false;
10434 }
10435
10436 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
10437
10438 const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
10439
10440 if (slice_index >= pHeader->m_total_slices)
10441 {
10442 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: slice_index >= pHeader->m_total_slices\n");
10443 return false;
10444 }
10445
10446 const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index];
10447
10448 uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
10449
10450 if (basis_block_format_is_uncompressed(fmt))
10451 {
10452 // Assume the output buffer is orig_width by orig_height
10453 if (!output_row_pitch_in_blocks_or_pixels)
10454 output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width;
10455
10456 if (!output_rows_in_pixels)
10457 output_rows_in_pixels = slice_desc.m_orig_height;
10458
10459 // Now make sure the output buffer is large enough, or we'll overwrite memory.
10460 if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
10461 {
10462 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
10463 return false;
10464 }
10465 }
10466 else if (fmt == block_format::cFXT1_RGB)
10467 {
10468 const uint32_t num_blocks_fxt1_x = (slice_desc.m_orig_width + 7) / 8;
10469 const uint32_t num_blocks_fxt1_y = (slice_desc.m_orig_height + 3) / 4;
10470 const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y;
10471
10472 if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
10473 {
10474 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
10475 return false;
10476 }
10477 }
10478 else
10479 {
10480 if (output_blocks_buf_size_in_blocks_or_pixels < total_4x4_blocks)
10481 {
10482 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: output_blocks_buf_size_in_blocks_or_pixels < total_blocks\n");
10483 return false;
10484 }
10485 }
10486
10487 if (fmt != block_format::cETC1)
10488 {
10489 if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA))
10490 {
10491 if ((!basisu::is_pow2(slice_desc.m_num_blocks_x * 4)) || (!basisu::is_pow2(slice_desc.m_num_blocks_y * 4)))
10492 {
10493 // PVRTC1 only supports power of 2 dimensions
10494 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: PVRTC1 only supports power of 2 dimensions\n");
10495 return false;
10496 }
10497 }
10498 }
10499
10500 if (slice_desc.m_file_ofs > data_size)
10501 {
10502 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_ofs, or passed in buffer too small\n");
10503 return false;
10504 }
10505
10506 const uint32_t data_size_left = data_size - slice_desc.m_file_ofs;
10507 if (data_size_left < slice_desc.m_file_size)
10508 {
10509 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n");
10510 return false;
10511 }
10512
10513 if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
10514 {
10515 return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
10516 pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
10517 fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
10518 output_rows_in_pixels, channel0, channel1, decode_flags);
10519 }
10520 else
10521 {
10522 return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
10523 pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
10524 fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
10525 (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels);
10526 }
10527 }
10528
10529 int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
10530 {
10531 BASISU_NOTE_UNUSED(data_size);
10532
10533 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
10534 const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
10535
10536 // For very large basis files this search could be painful
10537 // TODO: Binary search this
10538 for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
10539 {
10540 const basis_slice_desc& slice_desc = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_iter];
10541 if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
10542 return slice_iter;
10543 }
10544
10545 BASISU_DEVEL_ERROR("basisu_transcoder::find_first_slice_index: didn't find slice\n");
10546
10547 return -1;
10548 }
10549
10550 int basisu_transcoder::find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const
10551 {
10552 if (!validate_header_quick(pData, data_size))
10553 {
10554 BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: header validation failed\n");
10555 return false;
10556 }
10557
10558 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
10559 const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
10560 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
10561
10562 // For very large basis files this search could be painful
10563 // TODO: Binary search this
10564 for (uint32_t slice_iter = 0; slice_iter < pHeader->m_total_slices; slice_iter++)
10565 {
10566 const basis_slice_desc& slice_desc = pSlice_descs[slice_iter];
10567 if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index))
10568 {
10569 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
10570 {
10571 const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0;
10572 if (slice_alpha == alpha_data)
10573 return slice_iter;
10574 }
10575 else
10576 {
10577 return slice_iter;
10578 }
10579 }
10580 }
10581
10582 BASISU_DEVEL_ERROR("basisu_transcoder::find_slice: didn't find slice\n");
10583
10584 return -1;
10585 }
10586
10587 void basisu_transcoder::write_opaque_alpha_blocks(
10588 uint32_t num_blocks_x, uint32_t num_blocks_y,
10589 void* pOutput_blocks, block_format fmt,
10590 uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels)
10591 {
10592 // 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused
10593 // when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A
10594 BASISU_NOTE_UNUSED(num_blocks_y);
10595 BASISU_NOTE_UNUSED(pOutput_blocks);
10596 BASISU_NOTE_UNUSED(block_stride_in_bytes);
10597
10598 if (!output_row_pitch_in_blocks_or_pixels)
10599 output_row_pitch_in_blocks_or_pixels = num_blocks_x;
10600
10601 if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11))
10602 {
10603#if BASISD_SUPPORT_ETC2_EAC_A8
10604 eac_block blk;
10605 blk.m_base = 255;
10606 blk.m_multiplier = 1;
10607 blk.m_table = 13;
10608
10609 // Selectors are all 4's
10610 memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
10611
10612 for (uint32_t y = 0; y < num_blocks_y; y++)
10613 {
10614 uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
10615 for (uint32_t x = 0; x < num_blocks_x; x++)
10616 {
10617 memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
10618 dst_ofs += block_stride_in_bytes;
10619 }
10620 }
10621#endif
10622 }
10623 else if (fmt == block_format::cBC4)
10624 {
10625#if BASISD_SUPPORT_DXT5A
10626 dxt5a_block blk;
10627 blk.m_endpoints[0] = 255;
10628 blk.m_endpoints[1] = 255;
10629 memset(blk.m_selectors, 0, sizeof(blk.m_selectors));
10630
10631 for (uint32_t y = 0; y < num_blocks_y; y++)
10632 {
10633 uint32_t dst_ofs = y * output_row_pitch_in_blocks_or_pixels * block_stride_in_bytes;
10634 for (uint32_t x = 0; x < num_blocks_x; x++)
10635 {
10636 memcpy((uint8_t*)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
10637 dst_ofs += block_stride_in_bytes;
10638 }
10639 }
10640#endif
10641 }
10642 }
10643
10644 bool basisu_transcoder::transcode_image_level(
10645 const void* pData, uint32_t data_size,
10646 uint32_t image_index, uint32_t level_index,
10647 void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
10648 transcoder_texture_format fmt,
10649 uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const
10650 {
10651 const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt);
10652
10653 if (!m_ready_to_transcode)
10654 {
10655 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n");
10656 return false;
10657 }
10658
10659 //const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
10660
10661 if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
10662 {
10663 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
10664 // TODO: Not yet supported
10665 return false;
10666 }
10667
10668 if (!validate_header_quick(pData, data_size))
10669 {
10670 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: header validation failed\n");
10671 return false;
10672 }
10673
10674 const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData);
10675
10676 const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData);
10677
10678 const basis_slice_desc* pSlice_descs = reinterpret_cast<const basis_slice_desc*>(pDataU8 + pHeader->m_slice_desc_file_ofs);
10679
10680 const bool basis_file_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0;
10681
10682 int slice_index = find_first_slice_index(pData, data_size, image_index, level_index);
10683 if (slice_index < 0)
10684 {
10685 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n");
10686 // Unable to find the requested image/level
10687 return false;
10688 }
10689
10690 if ((fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices))
10691 {
10692 // Switch to PVRTC1 RGB if the input doesn't have alpha.
10693 fmt = transcoder_texture_format::cTFPVRTC1_4_RGB;
10694 }
10695
10696 if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S)
10697 {
10698 if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha)
10699 {
10700 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n");
10701
10702 // The first slice shouldn't have alpha data in a properly formed basis file
10703 return false;
10704 }
10705
10706 if (basis_file_has_alpha_slices)
10707 {
10708 // The alpha data should immediately follow the color data, and have the same resolution.
10709 if ((slice_index + 1U) >= pHeader->m_total_slices)
10710 {
10711 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n");
10712 // basis file is missing the alpha slice
10713 return false;
10714 }
10715
10716 // Basic sanity checks
10717 if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0)
10718 {
10719 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n");
10720 // This slice should have alpha data
10721 return false;
10722 }
10723
10724 if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y))
10725 {
10726 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n");
10727 // Alpha slice should have been the same res as the color slice
10728 return false;
10729 }
10730 }
10731 }
10732
10733 bool status = false;
10734
10735 const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y;
10736
10737 if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks))
10738 {
10739 // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves.
10740 // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8.
10741 // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory.
10742 memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
10743 }
10744
10745 if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
10746 {
10747 const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
10748
10749 // Use the container independent image transcode method.
10750 status = m_lowlevel_uastc_decoder.transcode_image(fmt,
10751 pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
10752 (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
10753 pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
10754 decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
10755 }
10756 else
10757 {
10758 // ETC1S
10759 const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
10760 const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr;
10761
10762 assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0);
10763
10764 if (pAlpha_slice_desc)
10765 {
10766 // Basic sanity checks
10767 assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0);
10768 assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x);
10769 assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y);
10770 assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index);
10771 }
10772
10773 // Use the container independent image transcode method.
10774 status = m_lowlevel_etc1s_decoder.transcode_image(fmt,
10775 pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
10776 (const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
10777 pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
10778 (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U,
10779 decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
10780
10781 } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
10782
10783 if (!status)
10784 {
10785 BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n");
10786 }
10787 else
10788 {
10789 //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n");
10790 }
10791
10792 return status;
10793 }
10794
10795 uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt)
10796 {
10797 switch (fmt)
10798 {
10799 case transcoder_texture_format::cTFETC1_RGB:
10800 case transcoder_texture_format::cTFBC1_RGB:
10801 case transcoder_texture_format::cTFBC4_R:
10802 case transcoder_texture_format::cTFPVRTC1_4_RGB:
10803 case transcoder_texture_format::cTFPVRTC1_4_RGBA:
10804 case transcoder_texture_format::cTFATC_RGB:
10805 case transcoder_texture_format::cTFPVRTC2_4_RGB:
10806 case transcoder_texture_format::cTFPVRTC2_4_RGBA:
10807 case transcoder_texture_format::cTFETC2_EAC_R11:
10808 return 8;
10809 case transcoder_texture_format::cTFBC7_RGBA:
10810 case transcoder_texture_format::cTFBC7_ALT:
10811 case transcoder_texture_format::cTFETC2_RGBA:
10812 case transcoder_texture_format::cTFBC3_RGBA:
10813 case transcoder_texture_format::cTFBC5_RG:
10814 case transcoder_texture_format::cTFASTC_4x4_RGBA:
10815 case transcoder_texture_format::cTFATC_RGBA:
10816 case transcoder_texture_format::cTFFXT1_RGB:
10817 case transcoder_texture_format::cTFETC2_EAC_RG11:
10818 return 16;
10819 case transcoder_texture_format::cTFRGBA32:
10820 return sizeof(uint32_t);
10821 case transcoder_texture_format::cTFRGB565:
10822 case transcoder_texture_format::cTFBGR565:
10823 case transcoder_texture_format::cTFRGBA4444:
10824 return sizeof(uint16_t);
10825 default:
10826 assert(0);
10827 BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
10828 break;
10829 }
10830 return 0;
10831 }
10832
10833 const char* basis_get_format_name(transcoder_texture_format fmt)
10834 {
10835 switch (fmt)
10836 {
10837 case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB";
10838 case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB";
10839 case transcoder_texture_format::cTFBC4_R: return "BC4_R";
10840 case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB";
10841 case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
10842 case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA";
10843 case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA";
10844 case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA";
10845 case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
10846 case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
10847 case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
10848 case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
10849 case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
10850 case transcoder_texture_format::cTFRGBA32: return "RGBA32";
10851 case transcoder_texture_format::cTFRGB565: return "RGB565";
10852 case transcoder_texture_format::cTFBGR565: return "BGR565";
10853 case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
10854 case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
10855 case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
10856 case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
10857 case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
10858 case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
10859 default:
10860 assert(0);
10861 BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
10862 break;
10863 }
10864 return "";
10865 }
10866
10867 const char* basis_get_block_format_name(block_format fmt)
10868 {
10869 switch (fmt)
10870 {
10871 case block_format::cETC1: return "ETC1";
10872 case block_format::cBC1: return "BC1";
10873 case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB";
10874 case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA";
10875 case block_format::cBC7: return "BC7";
10876 case block_format::cETC2_RGBA: return "ETC2_RGBA";
10877 case block_format::cBC3: return "BC3";
10878 case block_format::cASTC_4x4: return "ASTC_4x4";
10879 case block_format::cATC_RGB: return "ATC_RGB";
10880 case block_format::cRGBA32: return "RGBA32";
10881 case block_format::cRGB565: return "RGB565";
10882 case block_format::cBGR565: return "BGR565";
10883 case block_format::cRGBA4444: return "RGBA4444";
10884 case block_format::cUASTC_4x4: return "UASTC_4x4";
10885 case block_format::cFXT1_RGB: return "FXT1_RGB";
10886 case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
10887 case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
10888 case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11";
10889 case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11";
10890 default:
10891 assert(0);
10892 BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
10893 break;
10894 }
10895 return "";
10896 }
10897
10898 const char* basis_get_texture_type_name(basis_texture_type tex_type)
10899 {
10900 switch (tex_type)
10901 {
10902 case cBASISTexType2D: return "2D";
10903 case cBASISTexType2DArray: return "2D array";
10904 case cBASISTexTypeCubemapArray: return "cubemap array";
10905 case cBASISTexTypeVideoFrames: return "video";
10906 case cBASISTexTypeVolume: return "3D";
10907 default:
10908 assert(0);
10909 BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n");
10910 break;
10911 }
10912 return "";
10913 }
10914
10915 bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
10916 {
10917 switch (fmt)
10918 {
10919 case transcoder_texture_format::cTFETC2_RGBA:
10920 case transcoder_texture_format::cTFBC3_RGBA:
10921 case transcoder_texture_format::cTFASTC_4x4_RGBA:
10922 case transcoder_texture_format::cTFBC7_RGBA:
10923 case transcoder_texture_format::cTFBC7_ALT:
10924 case transcoder_texture_format::cTFPVRTC1_4_RGBA:
10925 case transcoder_texture_format::cTFPVRTC2_4_RGBA:
10926 case transcoder_texture_format::cTFATC_RGBA:
10927 case transcoder_texture_format::cTFRGBA32:
10928 case transcoder_texture_format::cTFRGBA4444:
10929 return true;
10930 default:
10931 break;
10932 }
10933 return false;
10934 }
10935
10936 basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt)
10937 {
10938 switch (fmt)
10939 {
10940 case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1;
10941 case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1;
10942 case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4;
10943 case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB;
10944 case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA;
10945 case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7;
10946 case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7;
10947 case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
10948 case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
10949 case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
10950 case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
10951 case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
10952 case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
10953 case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
10954 case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
10955 case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
10956 case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
10957 case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
10958 case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
10959 case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
10960 case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC;
10961 case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC;
10962 default:
10963 assert(0);
10964 BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
10965 break;
10966 }
10967 return basisu::texture_format::cInvalidTextureFormat;
10968 }
10969
10970 bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type)
10971 {
10972 switch (tex_type)
10973 {
10974 case transcoder_texture_format::cTFRGBA32:
10975 case transcoder_texture_format::cTFRGB565:
10976 case transcoder_texture_format::cTFBGR565:
10977 case transcoder_texture_format::cTFRGBA4444:
10978 return true;
10979 default:
10980 break;
10981 }
10982 return false;
10983 }
10984
10985 bool basis_block_format_is_uncompressed(block_format blk_fmt)
10986 {
10987 switch (blk_fmt)
10988 {
10989 case block_format::cRGB32:
10990 case block_format::cRGBA32:
10991 case block_format::cA32:
10992 case block_format::cRGB565:
10993 case block_format::cBGR565:
10994 case block_format::cRGBA4444:
10995 case block_format::cRGBA4444_COLOR:
10996 case block_format::cRGBA4444_ALPHA:
10997 case block_format::cRGBA4444_COLOR_OPAQUE:
10998 return true;
10999 default:
11000 break;
11001 }
11002 return false;
11003 }
11004
11005 uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt)
11006 {
11007 switch (fmt)
11008 {
11009 case transcoder_texture_format::cTFRGBA32:
11010 return sizeof(uint32_t);
11011 case transcoder_texture_format::cTFRGB565:
11012 case transcoder_texture_format::cTFBGR565:
11013 case transcoder_texture_format::cTFRGBA4444:
11014 return sizeof(uint16_t);
11015 default:
11016 break;
11017 }
11018 return 0;
11019 }
11020
11021 uint32_t basis_get_block_width(transcoder_texture_format tex_type)
11022 {
11023 switch (tex_type)
11024 {
11025 case transcoder_texture_format::cTFFXT1_RGB:
11026 return 8;
11027 default:
11028 break;
11029 }
11030 return 4;
11031 }
11032
11033 uint32_t basis_get_block_height(transcoder_texture_format tex_type)
11034 {
11035 BASISU_NOTE_UNUSED(tex_type);
11036 return 4;
11037 }
11038
11039 bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
11040 {
11041 if (fmt == basis_tex_format::cUASTC4x4)
11042 {
11043#if BASISD_SUPPORT_UASTC
11044 switch (tex_type)
11045 {
11046 // These niche formats aren't currently supported for UASTC - everything else is.
11047 case transcoder_texture_format::cTFPVRTC2_4_RGB:
11048 case transcoder_texture_format::cTFPVRTC2_4_RGBA:
11049 case transcoder_texture_format::cTFATC_RGB:
11050 case transcoder_texture_format::cTFATC_RGBA:
11051 case transcoder_texture_format::cTFFXT1_RGB:
11052 return false;
11053 default:
11054 return true;
11055 }
11056#endif
11057 }
11058 else
11059 {
11060 switch (tex_type)
11061 {
11062 // ETC1 and uncompressed are always supported.
11063 case transcoder_texture_format::cTFETC1_RGB:
11064 case transcoder_texture_format::cTFRGBA32:
11065 case transcoder_texture_format::cTFRGB565:
11066 case transcoder_texture_format::cTFBGR565:
11067 case transcoder_texture_format::cTFRGBA4444:
11068 return true;
11069#if BASISD_SUPPORT_DXT1
11070 case transcoder_texture_format::cTFBC1_RGB:
11071 return true;
11072#endif
11073#if BASISD_SUPPORT_DXT5A
11074 case transcoder_texture_format::cTFBC4_R:
11075 case transcoder_texture_format::cTFBC5_RG:
11076 return true;
11077#endif
11078#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A
11079 case transcoder_texture_format::cTFBC3_RGBA:
11080 return true;
11081#endif
11082#if BASISD_SUPPORT_PVRTC1
11083 case transcoder_texture_format::cTFPVRTC1_4_RGB:
11084 case transcoder_texture_format::cTFPVRTC1_4_RGBA:
11085 return true;
11086#endif
11087#if BASISD_SUPPORT_BC7_MODE5
11088 case transcoder_texture_format::cTFBC7_RGBA:
11089 case transcoder_texture_format::cTFBC7_ALT:
11090 return true;
11091#endif
11092#if BASISD_SUPPORT_ETC2_EAC_A8
11093 case transcoder_texture_format::cTFETC2_RGBA:
11094 return true;
11095#endif
11096#if BASISD_SUPPORT_ASTC
11097 case transcoder_texture_format::cTFASTC_4x4_RGBA:
11098 return true;
11099#endif
11100#if BASISD_SUPPORT_ATC
11101 case transcoder_texture_format::cTFATC_RGB:
11102 case transcoder_texture_format::cTFATC_RGBA:
11103 return true;
11104#endif
11105#if BASISD_SUPPORT_FXT1
11106 case transcoder_texture_format::cTFFXT1_RGB:
11107 return true;
11108#endif
11109#if BASISD_SUPPORT_PVRTC2
11110 case transcoder_texture_format::cTFPVRTC2_4_RGB:
11111 case transcoder_texture_format::cTFPVRTC2_4_RGBA:
11112 return true;
11113#endif
11114#if BASISD_SUPPORT_ETC2_EAC_RG11
11115 case transcoder_texture_format::cTFETC2_EAC_R11:
11116 case transcoder_texture_format::cTFETC2_EAC_RG11:
11117 return true;
11118#endif
11119 default:
11120 break;
11121 }
11122 }
11123
11124 return false;
11125 }
11126
11127 // ------------------------------------------------------------------------------------------------------
11128 // UASTC
11129 // ------------------------------------------------------------------------------------------------------
11130
11131#if BASISD_SUPPORT_UASTC
11132 const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] =
11133 {
11134 { 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false },
11135 { 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true },
11136 { 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true },
11137 { 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true },
11138 { 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false },
11139 { 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false },
11140 { 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true },
11141 { 33, 694, true }, { 52, 524, true }
11142 };
11143
11144 const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] =
11145 {
11146 { 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 },
11147 { 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 },
11148 { 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 },
11149 { 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 },
11150 { 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 }
11151 };
11152
11153 const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] =
11154 {
11155 { 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 },
11156 { 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 },
11157 { 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 }
11158 };
11159
11160 const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
11161
11162 const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } };
11163
11164 uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k)
11165 {
11166 assert(k < 6);
11167 switch (k >> 1)
11168 {
11169 case 0:
11170 if (p <= 1)
11171 p = 0;
11172 else
11173 p = 1;
11174 break;
11175 case 1:
11176 if (p == 0)
11177 p = 0;
11178 else
11179 p = 1;
11180 break;
11181 case 2:
11182 if ((p == 0) || (p == 2))
11183 p = 0;
11184 else
11185 p = 1;
11186 break;
11187 }
11188 if (k & 1)
11189 p = 1 - p;
11190 return p;
11191 }
11192
11193 static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
11194
11195 const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] =
11196 {
11197 { 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 },
11198 { 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 },
11199 { 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 },
11200 { 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 },
11201 { 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 },
11202 { 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 },
11203 { 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 },
11204 { 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 }
11205 };
11206
11207 const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] =
11208 {
11209 { 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 },
11210 { 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 },
11211 { 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 }
11212 };
11213
11214 const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] =
11215 {
11216 { 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 },
11217 { 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 },
11218 { 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 },
11219 { 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 },
11220 { 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }
11221 };
11222
11223 const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] =
11224 {
11225 { 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 },
11226 { 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 },
11227 { 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 },
11228 { 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 }
11229 };
11230
11231 const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] =
11232 {
11233 { 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 }
11234 };
11235
11236 const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] =
11237 {
11238 { 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 }
11239 };
11240
11241 const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] =
11242 {
11243 { 0x1, 4 },
11244 { 0x35, 6 },
11245 { 0x1D, 5 },
11246 { 0x3, 5 },
11247
11248 { 0x13, 5 },
11249 { 0xB, 5 },
11250 { 0x1B, 5 },
11251 { 0x7, 5 },
11252
11253 { 0x17, 5 },
11254 { 0xF, 5 },
11255 { 0x2, 3 },
11256 { 0x0, 2 },
11257
11258 { 0x6, 3 },
11259 { 0x1F, 5 },
11260 { 0xD, 5 },
11261 { 0x5, 7 },
11262
11263 { 0x15, 6 },
11264 { 0x25, 6 },
11265 { 0x9, 4 },
11266 { 0x45, 7 } // future expansion
11267 };
11268
11269 // If g_uastc_mode_huff_codes[] changes this table must be updated!
11270 static const uint8_t g_uastc_huff_modes[128] =
11271 {
11272 11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,
11273 19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13
11274 };
11275
11276 const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 };
11277 const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 };
11278 const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 };
11279 const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 };
11280 const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 };
11281 const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 };
11282 const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
11283 const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
11284 const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
11285 const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 };
11286 const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 };
11287 const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 };
11288 const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 };
11289
11290 // bits, trits, quints
11291 const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] =
11292 {
11293 { 1, 0, 0 }, // 0-1 0
11294 { 0, 1, 0 }, // 0-2 1
11295 { 2, 0, 0 }, // 0-3 2
11296 { 0, 0, 1 }, // 0-4 3
11297
11298 { 1, 1, 0 }, // 0-5 4
11299 { 3, 0, 0 }, // 0-7 5
11300 { 1, 0, 1 }, // 0-9 6
11301 { 2, 1, 0 }, // 0-11 7
11302
11303 { 4, 0, 0 }, // 0-15 8
11304 { 2, 0, 1 }, // 0-19 9
11305 { 3, 1, 0 }, // 0-23 10
11306 { 5, 0, 0 }, // 0-31 11
11307
11308 { 3, 0, 1 }, // 0-39 12
11309 { 4, 1, 0 }, // 0-47 13
11310 { 6, 0, 0 }, // 0-63 14
11311 { 4, 0, 1 }, // 0-79 15
11312
11313 { 5, 1, 0 }, // 0-95 16
11314 { 7, 0, 0 }, // 0-127 17
11315 { 5, 0, 1 }, // 0-159 18
11316 { 6, 1, 0 }, // 0-191 19
11317
11318 { 8, 0, 0 }, // 0-255 20
11319 };
11320
11321 int astc_get_levels(int range)
11322 {
11323 assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES);
11324 return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0];
11325 }
11326
11327 // g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[]
11328 astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index]
11329
11330 // Taken right from the ASTC spec.
11331 static struct
11332 {
11333 const char* m_pB_str;
11334 uint32_t m_c;
11335 } g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] =
11336 {
11337 { "", 0 },
11338 { "", 0 },
11339 { "", 0 },
11340 { "", 0 },
11341 { "000000000", 204, }, // 0-5
11342 { "", 0 },
11343 { "000000000", 113, }, // 0-9
11344 { "b000b0bb0", 93 }, // 0-11
11345 { "", 0 },
11346 { "b0000bb00", 54 }, // 0-19
11347 { "cb000cbcb", 44 }, // 0-23
11348 { "", 0 },
11349 { "cb0000cbc", 26 }, // 0-39
11350 { "dcb000dcb", 22 }, // 0-47
11351 { "", 0 },
11352 { "dcb0000dc", 13 }, // 0-79
11353 { "edcb000ed", 11 }, // 0-95
11354 { "", 0 },
11355 { "edcb0000e", 6 }, // 0-159
11356 { "fedcb000f", 5 }, // 0-191
11357 { "", 0 },
11358 };
11359
11360 bool astc_is_valid_endpoint_range(uint32_t range)
11361 {
11362 if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0))
11363 return true;
11364
11365 return g_astc_endpoint_unquant_params[range].m_c != 0;
11366 }
11367
11368 uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range)
11369 {
11370 assert(range < BC7ENC_TOTAL_ASTC_RANGES);
11371
11372 const uint32_t bits = g_astc_bise_range_table[range][0];
11373 const uint32_t trits = g_astc_bise_range_table[range][1];
11374 const uint32_t quints = g_astc_bise_range_table[range][2];
11375
11376 uint32_t val = 0;
11377 if ((!trits) && (!quints))
11378 {
11379 assert(!packed_trits && !packed_quints);
11380
11381 int bits_left = 8;
11382 while (bits_left > 0)
11383 {
11384 uint32_t v = packed_bits;
11385
11386 int n = basisu::minimumi(bits_left, bits);
11387 if (n < (int)bits)
11388 v >>= (bits - n);
11389
11390 assert(v < (1U << n));
11391
11392 val |= (v << (bits_left - n));
11393 bits_left -= n;
11394 }
11395 }
11396 else
11397 {
11398 const uint32_t A = (packed_bits & 1) ? 511 : 0;
11399 const uint32_t C = g_astc_endpoint_unquant_params[range].m_c;
11400 const uint32_t D = trits ? packed_trits : packed_quints;
11401
11402 assert(C);
11403
11404 uint32_t B = 0;
11405 for (uint32_t i = 0; i < 9; i++)
11406 {
11407 B <<= 1;
11408
11409 char c = g_astc_endpoint_unquant_params[range].m_pB_str[i];
11410 if (c != '0')
11411 {
11412 c -= 'a';
11413 B |= ((packed_bits >> c) & 1);
11414 }
11415 }
11416
11417 val = D * C + B;
11418 val = val ^ A;
11419 val = (A & 0x80) | (val >> 2);
11420 }
11421
11422 return val;
11423 }
11424
11425 uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range)
11426 {
11427 assert(range < BC7ENC_TOTAL_ASTC_RANGES);
11428 assert(packed_val < (uint32_t)astc_get_levels(range));
11429
11430 const uint32_t bits = g_astc_bise_range_table[range][0];
11431 const uint32_t trits = g_astc_bise_range_table[range][1];
11432 const uint32_t quints = g_astc_bise_range_table[range][2];
11433
11434 if ((!trits) && (!quints))
11435 return unquant_astc_endpoint(packed_val, 0, 0, range);
11436 else if (trits)
11437 return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range);
11438 else
11439 return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range);
11440 }
11441
11442 // BC7 - Various BC7 tables/helpers
11443 const uint32_t g_bc7_weights1[2] = { 0, 64 };
11444 const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 };
11445 const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
11446 const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
11447 const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 };
11448 const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 };
11449 const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 };
11450
11451 const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
11452
11453 const uint8_t g_bc7_partition2[64 * 16] =
11454 {
11455 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
11456 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
11457 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
11458 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
11459 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
11460 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
11461 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
11462 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
11463 };
11464
11465 const uint8_t g_bc7_partition3[64 * 16] =
11466 {
11467 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
11468 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
11469 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
11470 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
11471 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
11472 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
11473 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
11474 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
11475 };
11476
11477 const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 };
11478
11479 const uint8_t g_bc7_table_anchor_index_third_subset_1[64] =
11480 {
11481 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3
11482 };
11483
11484 const uint8_t g_bc7_table_anchor_index_third_subset_2[64] =
11485 {
11486 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8
11487 };
11488
11489 const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
11490 const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
11491 const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
11492
11493 const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
11494 const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 };
11495 const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
11496 const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
11497
11498 const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 };
11499
11500 endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit]
11501 endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c]
11502
11503 static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs)
11504 {
11505 assert((num_bits <= 32) && (val < (1ULL << num_bits)));
11506 while (num_bits)
11507 {
11508 const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits);
11509 pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7));
11510 val >>= n;
11511 num_bits -= n;
11512 *pCur_ofs += n;
11513 }
11514 assert(*pCur_ofs <= 128);
11515 }
11516
11517 // TODO: Optimize this.
11518 void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults)
11519 {
11520 const uint32_t best_mode = pResults->m_mode;
11521
11522 const uint32_t total_subsets = g_bc7_num_subsets[best_mode];
11523 const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode];
11524 //const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode];
11525 //const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1;
11526
11527 const uint8_t* pPartition;
11528 if (total_subsets == 1)
11529 pPartition = &g_bc7_partition1[0];
11530 else if (total_subsets == 2)
11531 pPartition = &g_bc7_partition2[pResults->m_partition * 16];
11532 else
11533 pPartition = &g_bc7_partition3[pResults->m_partition * 16];
11534
11535 uint8_t color_selectors[16];
11536 memcpy(color_selectors, pResults->m_selectors, 16);
11537
11538 uint8_t alpha_selectors[16];
11539 memcpy(alpha_selectors, pResults->m_alpha_selectors, 16);
11540
11541 color_quad_u8 low[3], high[3];
11542 memcpy(low, pResults->m_low, sizeof(low));
11543 memcpy(high, pResults->m_high, sizeof(high));
11544
11545 uint32_t pbits[3][2];
11546 memcpy(pbits, pResults->m_pbits, sizeof(pbits));
11547
11548 int anchor[3] = { -1, -1, -1 };
11549
11550 for (uint32_t k = 0; k < total_subsets; k++)
11551 {
11552 uint32_t anchor_index = 0;
11553 if (k)
11554 {
11555 if ((total_subsets == 3) && (k == 1))
11556 anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition];
11557 else if ((total_subsets == 3) && (k == 2))
11558 anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition];
11559 else
11560 anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition];
11561 }
11562
11563 anchor[k] = anchor_index;
11564
11565 const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector);
11566 const uint32_t num_color_indices = 1 << color_index_bits;
11567
11568 if (color_selectors[anchor_index] & (num_color_indices >> 1))
11569 {
11570 for (uint32_t i = 0; i < 16; i++)
11571 if (pPartition[i] == k)
11572 color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]);
11573
11574 if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
11575 {
11576 for (uint32_t q = 0; q < 3; q++)
11577 {
11578 uint8_t t = low[k].m_c[q];
11579 low[k].m_c[q] = high[k].m_c[q];
11580 high[k].m_c[q] = t;
11581 }
11582 }
11583 else
11584 {
11585 color_quad_u8 tmp = low[k];
11586 low[k] = high[k];
11587 high[k] = tmp;
11588 }
11589
11590 if (!g_bc7_mode_has_shared_p_bits[best_mode])
11591 {
11592 uint32_t t = pbits[k][0];
11593 pbits[k][0] = pbits[k][1];
11594 pbits[k][1] = t;
11595 }
11596 }
11597
11598 if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
11599 {
11600 const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
11601 const uint32_t num_alpha_indices = 1 << alpha_index_bits;
11602
11603 if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1))
11604 {
11605 for (uint32_t i = 0; i < 16; i++)
11606 if (pPartition[i] == k)
11607 alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]);
11608
11609 uint8_t t = low[k].m_c[3];
11610 low[k].m_c[3] = high[k].m_c[3];
11611 high[k].m_c[3] = t;
11612 }
11613 }
11614 }
11615
11616 uint8_t* pBlock_bytes = (uint8_t*)(pBlock);
11617 memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE);
11618
11619 uint32_t cur_bit_ofs = 0;
11620 bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs);
11621
11622 if ((best_mode == 4) || (best_mode == 5))
11623 bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs);
11624
11625 if (best_mode == 4)
11626 bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs);
11627
11628 if (total_partitions > 1)
11629 bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs);
11630
11631 const uint32_t total_comps = (best_mode >= 4) ? 4 : 3;
11632 for (uint32_t comp = 0; comp < total_comps; comp++)
11633 {
11634 for (uint32_t subset = 0; subset < total_subsets; subset++)
11635 {
11636 bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
11637 bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs);
11638 }
11639 }
11640
11641 if (g_bc7_mode_has_p_bits[best_mode])
11642 {
11643 for (uint32_t subset = 0; subset < total_subsets; subset++)
11644 {
11645 bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs);
11646 if (!g_bc7_mode_has_shared_p_bits[best_mode])
11647 bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs);
11648 }
11649 }
11650
11651 for (uint32_t y = 0; y < 4; y++)
11652 {
11653 for (uint32_t x = 0; x < 4; x++)
11654 {
11655 int idx = x + y * 4;
11656
11657 uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector);
11658
11659 if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
11660 n--;
11661
11662 bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs);
11663 }
11664 }
11665
11666 if (get_bc7_mode_has_seperate_alpha_selectors(best_mode))
11667 {
11668 for (uint32_t y = 0; y < 4; y++)
11669 {
11670 for (uint32_t x = 0; x < 4; x++)
11671 {
11672 int idx = x + y * 4;
11673
11674 uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector);
11675
11676 if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2]))
11677 n--;
11678
11679 bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs);
11680 }
11681 }
11682 }
11683
11684 assert(cur_bit_ofs == 128);
11685 }
11686
11687 // ASTC
11688 static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize)
11689 {
11690 uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
11691
11692 assert(codesize <= 9);
11693 if (codesize)
11694 {
11695 uint32_t byte_bit_offset = bit_offset & 7;
11696 uint32_t val = code << byte_bit_offset;
11697
11698 uint32_t index = bit_offset >> 3;
11699 pBuf[index] |= (uint8_t)val;
11700
11701 if (codesize > (8 - byte_bit_offset))
11702 pBuf[index + 1] |= (uint8_t)(val >> 8);
11703
11704 bit_offset += codesize;
11705 }
11706 }
11707
11708 void pack_astc_solid_block(void* pDst_block, const color32& color)
11709 {
11710 uint32_t r = color[0], g = color[1], b = color[2];
11711 uint32_t a = color[3];
11712
11713 uint32_t* pOutput = static_cast<uint32_t*>(pDst_block);
11714 uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block);
11715
11716 pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff;
11717
11718 pOutput[1] = 0xffffffff;
11719 pOutput[2] = 0;
11720 pOutput[3] = 0;
11721
11722 int bit_pos = 64;
11723 astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, r | (r << 8), 16);
11724 astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, g | (g << 8), 16);
11725 astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, b | (b << 8), 16);
11726 astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, a | (a << 8), 16);
11727 }
11728
11729 // See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation
11730#ifdef _DEBUG
11731 static inline uint32_t astc_hash52(uint32_t v)
11732 {
11733 uint32_t p = v;
11734 p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
11735 p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
11736 p ^= p << 6; p ^= p >> 17;
11737 return p;
11738 }
11739
11740 int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
11741 {
11742 if (small_block)
11743 {
11744 x <<= 1; y <<= 1; z <<= 1;
11745 }
11746 seed += (partitioncount - 1) * 1024;
11747 uint32_t rnum = astc_hash52(seed);
11748 uint8_t seed1 = rnum & 0xF;
11749 uint8_t seed2 = (rnum >> 4) & 0xF;
11750 uint8_t seed3 = (rnum >> 8) & 0xF;
11751 uint8_t seed4 = (rnum >> 12) & 0xF;
11752 uint8_t seed5 = (rnum >> 16) & 0xF;
11753 uint8_t seed6 = (rnum >> 20) & 0xF;
11754 uint8_t seed7 = (rnum >> 24) & 0xF;
11755 uint8_t seed8 = (rnum >> 28) & 0xF;
11756 uint8_t seed9 = (rnum >> 18) & 0xF;
11757 uint8_t seed10 = (rnum >> 22) & 0xF;
11758 uint8_t seed11 = (rnum >> 26) & 0xF;
11759 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
11760
11761 seed1 *= seed1; seed2 *= seed2;
11762 seed3 *= seed3; seed4 *= seed4;
11763 seed5 *= seed5; seed6 *= seed6;
11764 seed7 *= seed7; seed8 *= seed8;
11765 seed9 *= seed9; seed10 *= seed10;
11766 seed11 *= seed11; seed12 *= seed12;
11767
11768 int sh1, sh2, sh3;
11769 if (seed & 1)
11770 {
11771 sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5);
11772 }
11773 else
11774 {
11775 sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5);
11776 }
11777 sh3 = (seed & 0x10) ? sh1 : sh2;
11778
11779 seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
11780 seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
11781 seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
11782
11783 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
11784 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
11785 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
11786 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
11787
11788 a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
11789
11790 if (partitioncount < 4) d = 0;
11791 if (partitioncount < 3) c = 0;
11792
11793 if (a >= b && a >= c && a >= d)
11794 return 0;
11795 else if (b >= c && b >= d)
11796 return 1;
11797 else if (c >= d)
11798 return 2;
11799 else
11800 return 3;
11801 }
11802#endif
11803
11804 static const uint8_t g_astc_quint_encode[125] =
11805 {
11806 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
11807 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
11808 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
11809 126, 127, 94, 95, 62, 39, 47, 55, 63, 31
11810 };
11811
11812 // Encodes 3 values to output, usable for any range that uses quints and bits
11813 static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
11814 {
11815 // First extract the trits and the bits from the 5 input values
11816 int quints = 0, bits[3];
11817 const uint32_t bit_mask = (1 << n) - 1;
11818 for (int i = 0; i < 3; i++)
11819 {
11820 static const int s_muls[3] = { 1, 5, 25 };
11821
11822 const int t = pValues[i] >> n;
11823
11824 quints += t * s_muls[i];
11825 bits[i] = pValues[i] & bit_mask;
11826 }
11827
11828 // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
11829 // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
11830
11831 assert(quints < 125);
11832 const int T = g_astc_quint_encode[quints];
11833
11834 // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
11835 astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
11836 (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
11837 }
11838
11839 // Packs values using ASTC's BISE to output buffer.
11840 static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range)
11841 {
11842 uint32_t temp[5] = { 0, 0, 0, 0, 0 };
11843
11844 const int num_bits = g_astc_bise_range_table[range][0];
11845
11846 int group_size = 0;
11847 if (g_astc_bise_range_table[range][1])
11848 group_size = 5;
11849 else if (g_astc_bise_range_table[range][2])
11850 group_size = 3;
11851
11852 if (group_size)
11853 {
11854 // Range has trits or quints - pack each group of 5 or 3 values
11855 const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
11856
11857 for (int group_index = 0; group_index < total_groups; group_index++)
11858 {
11859 uint8_t vals[5] = { 0, 0, 0, 0, 0 };
11860
11861 const int limit = basisu::minimum(group_size, num_vals - group_index * group_size);
11862 for (int i = 0; i < limit; i++)
11863 vals[i] = pSrc_vals[group_index * group_size + i];
11864
11865 if (group_size == 5)
11866 astc_encode_trits(temp, vals, bit_pos, num_bits);
11867 else
11868 astc_encode_quints(temp, vals, bit_pos, num_bits);
11869 }
11870 }
11871 else
11872 {
11873 for (int i = 0; i < num_vals; i++)
11874 astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
11875 }
11876
11877 pDst[0] |= temp[0]; pDst[1] |= temp[1];
11878 pDst[2] |= temp[2]; pDst[3] |= temp[3];
11879 }
11880
11881 const uint32_t ASTC_BLOCK_MODE_BITS = 11;
11882 const uint32_t ASTC_PART_BITS = 2;
11883 const uint32_t ASTC_CEM_BITS = 4;
11884 const uint32_t ASTC_PARTITION_INDEX_BITS = 10;
11885 const uint32_t ASTC_CCS_BITS = 2;
11886
11887 const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 };
11888
11889 bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode)
11890 {
11891 assert(uastc_mode < TOTAL_UASTC_MODES);
11892 uint8_t* pDst_bytes = reinterpret_cast<uint8_t*>(pDst);
11893
11894 const int total_weights = pBlock->m_dual_plane ? 32 : 16;
11895
11896 // Set mode bits - see Table 146-147
11897 uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode];
11898 pDst_bytes[0] = (uint8_t)mode;
11899 pDst_bytes[1] = (uint8_t)(mode >> 8);
11900
11901 memset(pDst_bytes + 2, 0, 16 - 2);
11902
11903 int bit_pos = ASTC_BLOCK_MODE_BITS;
11904
11905 // We only support 1-5 bit weight indices
11906 assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]);
11907 const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0];
11908
11909 // See table 143 - PART
11910 astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS);
11911
11912 if (pBlock->m_subsets == 1)
11913 astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS);
11914 else
11915 {
11916 // See table 145
11917 astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS);
11918
11919 // Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM
11920 astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2);
11921 }
11922
11923 if (pBlock->m_dual_plane)
11924 {
11925 const int total_weight_bits = total_weights * bits_per_weight;
11926
11927 // See Illegal Encodings 23.24
11928 // https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings
11929 assert((total_weight_bits >= 24) && (total_weight_bits <= 96));
11930
11931 int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS;
11932 astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS);
11933 }
11934
11935 const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets;
11936 assert(num_cem_pairs <= 9);
11937
11938 astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]);
11939
11940 // Write the weight bits in reverse bit order.
11941 switch (bits_per_weight)
11942 {
11943 case 1:
11944 {
11945 const uint32_t N = 1;
11946 for (int i = 0; i < total_weights; i++)
11947 {
11948 const uint32_t ofs = 128 - N - i;
11949 assert((ofs >> 3) < 16);
11950 pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7));
11951 }
11952 break;
11953 }
11954 case 2:
11955 {
11956 const uint32_t N = 2;
11957 for (int i = 0; i < total_weights; i++)
11958 {
11959 static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 };
11960 const uint32_t ofs = 128 - N - (i * N);
11961 assert((ofs >> 3) < 16);
11962 pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7));
11963 }
11964 break;
11965 }
11966 case 3:
11967 {
11968 const uint32_t N = 3;
11969 for (int i = 0; i < total_weights; i++)
11970 {
11971 static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
11972
11973 const uint32_t ofs = 128 - N - (i * N);
11974 const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7);
11975
11976 uint32_t index = ofs >> 3;
11977 assert(index < 16);
11978 pDst_bytes[index++] |= rev & 0xFF;
11979 if (index < 16)
11980 pDst_bytes[index++] |= (rev >> 8);
11981 }
11982 break;
11983 }
11984 case 4:
11985 {
11986 const uint32_t N = 4;
11987 for (int i = 0; i < total_weights; i++)
11988 {
11989 static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
11990 const int ofs = 128 - N - (i * N);
11991 assert(ofs >= 0 && (ofs >> 3) < 16);
11992 pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7));
11993 }
11994 break;
11995 }
11996 case 5:
11997 {
11998 const uint32_t N = 5;
11999 for (int i = 0; i < total_weights; i++)
12000 {
12001 static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 };
12002
12003 const uint32_t ofs = 128 - N - (i * N);
12004 const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7);
12005
12006 uint32_t index = ofs >> 3;
12007 assert(index < 16);
12008 pDst_bytes[index++] |= rev & 0xFF;
12009 if (index < 16)
12010 pDst_bytes[index++] |= (rev >> 8);
12011 }
12012
12013 break;
12014 }
12015 default:
12016 assert(0);
12017 break;
12018 }
12019
12020 return true;
12021 }
12022
12023 const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern)
12024 {
12025 const uint8_t* pSubset_anchor_indices = g_zero_pattern;
12026 pPartition_pattern = g_zero_pattern;
12027
12028 if (subsets >= 2)
12029 {
12030 if (subsets == 3)
12031 {
12032 pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
12033 pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0];
12034 }
12035 else if (mode == 7)
12036 {
12037 pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
12038 pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0];
12039 }
12040 else
12041 {
12042 pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
12043 pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0];
12044 }
12045 }
12046
12047 return pSubset_anchor_indices;
12048 }
12049
12050 static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset)
12051 {
12052 uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7);
12053 bit_offset += 1;
12054 return byte_bits & 1;
12055 }
12056
12057 static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
12058 {
12059 assert(codesize <= 9);
12060 if (!codesize)
12061 return 0;
12062
12063 if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
12064 {
12065 const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
12066
12067 uint32_t byte_bit_offset = bit_offset & 7U;
12068
12069 uint32_t bits = pBytes[0] >> byte_bit_offset;
12070 uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
12071
12072 uint32_t bits_remaining = codesize - bits_read;
12073 if (bits_remaining)
12074 bits |= ((uint32_t)pBytes[1]) << bits_read;
12075
12076 bit_offset += codesize;
12077
12078 return bits & ((1U << codesize) - 1U);
12079 }
12080
12081 uint32_t byte_bit_offset = bit_offset & 7U;
12082 const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]);
12083 bit_offset += codesize;
12084 return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
12085 }
12086
12087 inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
12088 {
12089 assert(codesize <= 64U);
12090 uint64_t bits = 0;
12091 uint32_t total_bits = 0;
12092
12093 while (total_bits < codesize)
12094 {
12095 uint32_t byte_bit_offset = bit_offset & 7U;
12096 uint32_t bits_to_read = basisu::minimum<int>(codesize - total_bits, 8U - byte_bit_offset);
12097
12098 uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset;
12099 byte_bits &= ((1U << bits_to_read) - 1U);
12100
12101 bits |= ((uint64_t)(byte_bits) << total_bits);
12102
12103 total_bits += bits_to_read;
12104 bit_offset += bits_to_read;
12105 }
12106
12107 return bits;
12108 }
12109
12110 static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
12111 {
12112 assert(codesize <= 9);
12113 if (!codesize)
12114 return 0;
12115 assert(bit_offset < 112);
12116
12117 if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
12118 {
12119 const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
12120
12121 uint32_t byte_bit_offset = bit_offset & 7U;
12122
12123 uint32_t bits = pBytes[0] >> byte_bit_offset;
12124 uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
12125
12126 uint32_t bits_remaining = codesize - bits_read;
12127 if (bits_remaining)
12128 bits |= ((uint32_t)pBytes[1]) << bits_read;
12129
12130 bit_offset += codesize;
12131
12132 return bits & ((1U << codesize) - 1U);
12133 }
12134
12135 uint32_t byte_bit_offset = bit_offset & 7U;
12136 const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
12137 bit_offset += codesize;
12138 return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
12139 }
12140
12141 bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
12142 {
12143 //memset(&unpacked, 0, sizeof(unpacked));
12144
12145#if 0
12146 uint8_t table[128];
12147 memset(table, 0xFF, sizeof(table));
12148
12149 {
12150 for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++)
12151 {
12152 const uint32_t code = g_uastc_mode_huff_codes[mode][0];
12153 const uint32_t codesize = g_uastc_mode_huff_codes[mode][1];
12154
12155 table[code] = mode;
12156
12157 uint32_t bits_left = 7 - codesize;
12158 for (uint32_t i = 0; i < (1 << bits_left); i++)
12159 table[code | (i << codesize)] = mode;
12160 }
12161
12162 for (uint32_t i = 0; i < 128; i++)
12163 printf("%u,", table[i]);
12164 exit(0);
12165 }
12166#endif
12167
12168 const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127];
12169 if (mode >= (int)TOTAL_UASTC_MODES)
12170 return false;
12171
12172 unpacked.m_mode = mode;
12173
12174 uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
12175
12176 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
12177 {
12178 unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
12179 unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
12180 unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
12181 unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
12182
12183 if (read_hints)
12184 {
12185 unpacked.m_etc1_flip = false;
12186 unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
12187 unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
12188 unpacked.m_etc1_inten1 = 0;
12189 unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
12190 unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
12191 unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
12192 unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
12193 unpacked.m_etc1_bias = 0;
12194 unpacked.m_etc2_hints = 0;
12195 }
12196
12197 return true;
12198 }
12199
12200 if (read_hints)
12201 {
12202 if (g_uastc_mode_has_bc1_hint0[mode])
12203 unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0;
12204 else
12205 unpacked.m_bc1_hint0 = false;
12206
12207 if (g_uastc_mode_has_bc1_hint1[mode])
12208 unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0;
12209 else
12210 unpacked.m_bc1_hint1 = false;
12211
12212 unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0;
12213 unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0;
12214 unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
12215 unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3);
12216
12217 if (g_uastc_mode_has_etc1_bias[mode])
12218 unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
12219 else
12220 unpacked.m_etc1_bias = 0;
12221
12222 if (g_uastc_mode_has_alpha[mode])
12223 {
12224 unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8);
12225 //assert(unpacked.m_etc2_hints > 0);
12226 }
12227 else
12228 unpacked.m_etc2_hints = 0;
12229 }
12230 else
12231 bit_ofs += g_uastc_mode_total_hint_bits[mode];
12232
12233 uint32_t subsets = 1;
12234 switch (mode)
12235 {
12236 case 2:
12237 case 4:
12238 case 7:
12239 case 9:
12240 case 16:
12241 unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5);
12242 subsets = 2;
12243 break;
12244 case 3:
12245 unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4);
12246 subsets = 3;
12247 break;
12248 default:
12249 break;
12250 }
12251
12252 uint32_t part_seed = 0;
12253 switch (mode)
12254 {
12255 case 2:
12256 case 4:
12257 case 9:
12258 case 16:
12259 if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2)
12260 return false;
12261
12262 part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc;
12263 break;
12264 case 3:
12265 if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3)
12266 return false;
12267
12268 part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc;
12269 break;
12270 case 7:
12271 if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS)
12272 return false;
12273
12274 part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2;
12275 break;
12276 default:
12277 break;
12278 }
12279
12280 uint32_t total_planes = 1;
12281 switch (mode)
12282 {
12283 case 6:
12284 case 11:
12285 case 13:
12286 unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2);
12287 total_planes = 2;
12288 break;
12289 case 17:
12290 unpacked.m_astc.m_ccs = 3;
12291 total_planes = 2;
12292 break;
12293 default:
12294 break;
12295 }
12296
12297 unpacked.m_astc.m_dual_plane = (total_planes == 2);
12298
12299 unpacked.m_astc.m_subsets = subsets;
12300 unpacked.m_astc.m_partition_seed = part_seed;
12301
12302 const uint32_t total_comps = g_uastc_mode_comps[mode];
12303
12304 const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
12305
12306 unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode];
12307
12308 const uint32_t total_values = total_comps * 2 * subsets;
12309 const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
12310
12311 const uint32_t cem = g_uastc_mode_cem[mode];
12312 unpacked.m_astc.m_cem = cem;
12313
12314 const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
12315 const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
12316 const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
12317
12318 uint32_t total_tqs = 0;
12319 uint32_t bundle_size = 0, mul = 0;
12320 if (ep_trits)
12321 {
12322 total_tqs = (total_values + 4) / 5;
12323 bundle_size = 5;
12324 mul = 3;
12325 }
12326 else if (ep_quints)
12327 {
12328 total_tqs = (total_values + 2) / 3;
12329 bundle_size = 3;
12330 mul = 5;
12331 }
12332
12333 uint32_t tq_values[8];
12334 for (uint32_t i = 0; i < total_tqs; i++)
12335 {
12336 uint32_t num_bits = ep_trits ? 8 : 7;
12337 if (i == (total_tqs - 1))
12338 {
12339 uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size;
12340 if (ep_trits)
12341 {
12342 switch (num_remaining)
12343 {
12344 case 1: num_bits = 2; break;
12345 case 2: num_bits = 4; break;
12346 case 3: num_bits = 5; break;
12347 case 4: num_bits = 7; break;
12348 default: break;
12349 }
12350 }
12351 else if (ep_quints)
12352 {
12353 switch (num_remaining)
12354 {
12355 case 1: num_bits = 3; break;
12356 case 2: num_bits = 5; break;
12357 default: break;
12358 }
12359 }
12360 }
12361
12362 tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits);
12363 } // i
12364
12365 uint32_t accum = 0;
12366 uint32_t accum_remaining = 0;
12367 uint32_t next_tq_index = 0;
12368
12369 for (uint32_t i = 0; i < total_values; i++)
12370 {
12371 uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits);
12372
12373 if (total_tqs)
12374 {
12375 if (!accum_remaining)
12376 {
12377 assert(next_tq_index < total_tqs);
12378 accum = tq_values[next_tq_index++];
12379 accum_remaining = bundle_size;
12380 }
12381
12382 // TODO: Optimize with tables
12383 uint32_t v = accum % mul;
12384 accum /= mul;
12385 accum_remaining--;
12386
12387 value |= (v << ep_bits);
12388 }
12389
12390 unpacked.m_astc.m_endpoints[i] = (uint8_t)value;
12391 }
12392
12393 const uint8_t* pPartition_pattern;
12394 const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern);
12395
12396#ifdef _DEBUG
12397 for (uint32_t i = 0; i < 16; i++)
12398 assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
12399
12400 for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
12401 {
12402 uint32_t anchor_index = 0;
12403
12404 for (uint32_t i = 0; i < 16; i++)
12405 {
12406 if (pPartition_pattern[i] == subset_index)
12407 {
12408 anchor_index = i;
12409 break;
12410 }
12411 }
12412
12413 assert(pSubset_anchor_indices[subset_index] == anchor_index);
12414 }
12415#endif
12416
12417#if 0
12418 const uint32_t total_planes_shift = total_planes - 1;
12419 for (uint32_t i = 0; i < 16 * total_planes; i++)
12420 {
12421 uint32_t num_bits = weight_bits;
12422 for (uint32_t s = 0; s < subsets; s++)
12423 {
12424 if (pSubset_anchor_indices[s] == (i >> total_planes_shift))
12425 {
12426 num_bits--;
12427 break;
12428 }
12429 }
12430
12431 unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits);
12432 }
12433#endif
12434
12435 if (mode == 18)
12436 {
12437 // Mode 18 is the only mode with more than 64 weight bits.
12438 for (uint32_t i = 0; i < 16; i++)
12439 unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1));
12440 }
12441 else
12442 {
12443 // All other modes have <= 64 weight bits.
12444 uint64_t bits;
12445
12446 // Read the weight bits
12447 if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
12448 bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum<int>(64, 128 - (int)bit_ofs));
12449 else
12450 {
12451 bits = blk.m_dwords[2];
12452 bits |= (((uint64_t)blk.m_dwords[3]) << 32U);
12453
12454 if (bit_ofs >= 64U)
12455 bits >>= (bit_ofs - 64U);
12456 else
12457 {
12458 assert(bit_ofs >= 56U);
12459
12460 uint32_t bits_needed = 64U - bit_ofs;
12461 bits <<= bits_needed;
12462 bits |= (blk.m_bytes[7] >> (8U - bits_needed));
12463 }
12464 }
12465
12466 bit_ofs = 0;
12467
12468 const uint32_t mask = (1U << weight_bits) - 1U;
12469 const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U;
12470
12471 if (total_planes == 2)
12472 {
12473 // Dual plane modes always have a single subset, and the first 2 weights are anchors.
12474
12475 unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
12476 bit_ofs += (weight_bits - 1);
12477
12478 unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
12479 bit_ofs += (weight_bits - 1);
12480
12481 for (uint32_t i = 2; i < 32; i++)
12482 {
12483 unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
12484 bit_ofs += weight_bits;
12485 }
12486 }
12487 else
12488 {
12489 if (subsets == 1)
12490 {
12491 // Specialize the single subset case.
12492 if (weight_bits == 4)
12493 {
12494 assert(bit_ofs == 0);
12495
12496 // Specialize the most common case: 4-bit weights.
12497 unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7);
12498 unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15);
12499 unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15);
12500 unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15);
12501
12502 unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15);
12503 unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15);
12504 unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15);
12505 unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15);
12506
12507 unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15);
12508 unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15);
12509 unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15);
12510 unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15);
12511
12512 unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15);
12513 unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15);
12514 unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15);
12515 unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15);
12516 }
12517 else
12518 {
12519 // First weight is always an anchor.
12520 unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
12521 bit_ofs += (weight_bits - 1);
12522
12523 for (uint32_t i = 1; i < 16; i++)
12524 {
12525 unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
12526 bit_ofs += weight_bits;
12527 }
12528 }
12529 }
12530 else
12531 {
12532 const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2];
12533
12534 for (uint32_t i = 0; i < 16; i++)
12535 {
12536 if ((i == a0) || (i == a1) || (i == a2))
12537 {
12538 unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask);
12539 bit_ofs += (weight_bits - 1);
12540 }
12541 else
12542 {
12543 unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask);
12544 bit_ofs += weight_bits;
12545 }
12546 }
12547 }
12548 }
12549 }
12550
12551 if ((blue_contract_check) && (total_comps >= 3))
12552 {
12553 // We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care.
12554 bool invert_subset[3] = { false, false, false };
12555 bool any_flag = false;
12556
12557 for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
12558 {
12559 const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant +
12560 g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant +
12561 g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant;
12562
12563 const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant +
12564 g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant +
12565 g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant;
12566
12567 if (s1 < s0)
12568 {
12569 for (uint32_t c = 0; c < total_comps; c++)
12570 std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
12571
12572 invert_subset[subset_index] = true;
12573 any_flag = true;
12574 }
12575 }
12576
12577 if (any_flag)
12578 {
12579 const uint32_t weight_mask = (1 << weight_bits) - 1;
12580
12581 for (uint32_t i = 0; i < 16; i++)
12582 {
12583 uint32_t subset = pPartition_pattern[i];
12584
12585 if (invert_subset[subset])
12586 {
12587 unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]);
12588
12589 if (total_planes == 2)
12590 unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]);
12591 }
12592 }
12593 }
12594 }
12595
12596 return true;
12597 }
12598
12599 static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 };
12600
12601 bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb)
12602 {
12603 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
12604 {
12605 for (uint32_t i = 0; i < 16; i++)
12606 pPixels[i] = solid_color;
12607 return true;
12608 }
12609
12610 color32 endpoints[3][2];
12611
12612 const uint32_t total_subsets = g_uastc_mode_subsets[mode];
12613 const uint32_t total_comps = basisu::minimum<uint32_t>(4U, g_uastc_mode_comps[mode]);
12614 const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
12615 const uint32_t total_planes = g_uastc_mode_planes[mode];
12616 const uint32_t weight_bits = g_uastc_mode_weight_bits[mode];
12617 const uint32_t weight_levels = 1 << weight_bits;
12618
12619 for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
12620 {
12621 if (total_comps == 2)
12622 {
12623 const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant;
12624 const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant;
12625
12626 const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant;
12627 const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant;
12628
12629 endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al);
12630 endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah);
12631 }
12632 else
12633 {
12634 for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++)
12635 {
12636 endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant;
12637 endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant;
12638 }
12639 for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++)
12640 {
12641 endpoints[subset_index][0][comp_index] = 255;
12642 endpoints[subset_index][1][comp_index] = 255;
12643 }
12644 }
12645 }
12646
12647 color32 block_colors[3][32];
12648
12649 const uint32_t* pWeights = g_astc_weight_tables[weight_bits];
12650
12651 for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++)
12652 {
12653 for (uint32_t l = 0; l < weight_levels; l++)
12654 {
12655 if (total_comps == 2)
12656 {
12657 const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb);
12658 const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb);
12659
12660 block_colors[subset_index][l].set(lc, lc, lc, ac);
12661 }
12662 else
12663 {
12664 uint32_t comp_index;
12665 for (comp_index = 0; comp_index < total_comps; comp_index++)
12666 block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb);
12667
12668 for (; comp_index < 4; comp_index++)
12669 block_colors[subset_index][l][comp_index] = 255;
12670 }
12671 }
12672 }
12673
12674 const uint8_t* pPartition_pattern = g_zero_pattern;
12675
12676 if (total_subsets >= 2)
12677 {
12678 if (total_subsets == 3)
12679 pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0];
12680 else if (mode == 7)
12681 pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0];
12682 else
12683 pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0];
12684
12685#ifdef _DEBUG
12686 for (uint32_t i = 0; i < 16; i++)
12687 {
12688 assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true));
12689 }
12690#endif
12691 }
12692
12693 if (total_planes == 1)
12694 {
12695 if (total_subsets == 1)
12696 {
12697 for (uint32_t i = 0; i < 16; i++)
12698 {
12699 assert(astc.m_weights[i] < weight_levels);
12700 pPixels[i] = block_colors[0][astc.m_weights[i]];
12701 }
12702 }
12703 else
12704 {
12705 for (uint32_t i = 0; i < 16; i++)
12706 {
12707 assert(astc.m_weights[i] < weight_levels);
12708 pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]];
12709 }
12710 }
12711 }
12712 else
12713 {
12714 assert(total_subsets == 1);
12715
12716 for (uint32_t i = 0; i < 16; i++)
12717 {
12718 const uint32_t subset_index = 0; // pPartition_pattern[i];
12719
12720 const uint32_t weight_index0 = astc.m_weights[i * 2];
12721 const uint32_t weight_index1 = astc.m_weights[i * 2 + 1];
12722
12723 assert(weight_index0 < weight_levels && weight_index1 < weight_levels);
12724
12725 color32& c = pPixels[i];
12726 for (uint32_t comp = 0; comp < 4; comp++)
12727 {
12728 if ((int)comp == astc.m_ccs)
12729 c[comp] = block_colors[subset_index][weight_index1][comp];
12730 else
12731 c[comp] = block_colors[subset_index][weight_index0][comp];
12732 }
12733 }
12734 }
12735
12736 return true;
12737 }
12738
12739 bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb)
12740 {
12741 return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb);
12742 }
12743
12744 bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb)
12745 {
12746 unpacked_uastc_block unpacked_blk;
12747
12748 if (!unpack_uastc(blk, unpacked_blk, false, false))
12749 return false;
12750
12751 return unpack_uastc(unpacked_blk, pPixels, srgb);
12752 }
12753
12754 // Determines the best shared pbits to use to encode xl/xh
12755 static void determine_shared_pbits(
12756 uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
12757 color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
12758 {
12759 const uint32_t total_bits = comp_bits + 1;
12760 assert(total_bits >= 4 && total_bits <= 8);
12761
12762 const int iscalep = (1 << total_bits) - 1;
12763 const float scalep = (float)iscalep;
12764
12765 float best_err = 1e+9f;
12766
12767 for (int p = 0; p < 2; p++)
12768 {
12769 color_quad_u8 xMinColor, xMaxColor;
12770 for (uint32_t c = 0; c < 4; c++)
12771 {
12772 xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
12773 xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
12774 }
12775
12776 color_quad_u8 scaledLow, scaledHigh;
12777
12778 for (uint32_t i = 0; i < 4; i++)
12779 {
12780 scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
12781 scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
12782 assert(scaledLow.m_c[i] <= 255);
12783
12784 scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
12785 scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
12786 assert(scaledHigh.m_c[i] <= 255);
12787 }
12788
12789 float err = 0;
12790 for (uint32_t i = 0; i < total_comps; i++)
12791 err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]);
12792
12793 if (err < best_err)
12794 {
12795 best_err = err;
12796 best_pbits[0] = p;
12797 best_pbits[1] = p;
12798 for (uint32_t j = 0; j < 4; j++)
12799 {
12800 bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1;
12801 bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1;
12802 }
12803 }
12804 }
12805 }
12806
12807 // Determines the best unique pbits to use to encode xl/xh
12808 static void determine_unique_pbits(
12809 uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4],
12810 color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2])
12811 {
12812 const uint32_t total_bits = comp_bits + 1;
12813 const int iscalep = (1 << total_bits) - 1;
12814 const float scalep = (float)iscalep;
12815
12816 float best_err0 = 1e+9f;
12817 float best_err1 = 1e+9f;
12818
12819 for (int p = 0; p < 2; p++)
12820 {
12821 color_quad_u8 xMinColor, xMaxColor;
12822
12823 for (uint32_t c = 0; c < 4; c++)
12824 {
12825 xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
12826 xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p));
12827 }
12828
12829 color_quad_u8 scaledLow, scaledHigh;
12830 for (uint32_t i = 0; i < 4; i++)
12831 {
12832 scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits));
12833 scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits);
12834 assert(scaledLow.m_c[i] <= 255);
12835
12836 scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits));
12837 scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits);
12838 assert(scaledHigh.m_c[i] <= 255);
12839 }
12840
12841 float err0 = 0, err1 = 0;
12842 for (uint32_t i = 0; i < total_comps; i++)
12843 {
12844 err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f);
12845 err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f);
12846 }
12847
12848 if (err0 < best_err0)
12849 {
12850 best_err0 = err0;
12851 best_pbits[0] = p;
12852
12853 bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1;
12854 bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1;
12855 bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1;
12856 bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1;
12857 }
12858
12859 if (err1 < best_err1)
12860 {
12861 best_err1 = err1;
12862 best_pbits[1] = p;
12863
12864 bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1;
12865 bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1;
12866 bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1;
12867 bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1;
12868 }
12869 }
12870 }
12871
12872 bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst)
12873 {
12874 unpacked_uastc_block unpacked_src_blk;
12875 if (!unpack_uastc(src_blk, unpacked_src_blk, true, false))
12876 return false;
12877
12878 bool success = false;
12879 if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
12880 {
12881 pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color);
12882 success = true;
12883 }
12884 else
12885 {
12886 success = pack_astc_block(static_cast<uint32_t*>(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode);
12887 }
12888
12889 return success;
12890 }
12891
12892 bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk)
12893 {
12894 memset(&dst_blk, 0, sizeof(dst_blk));
12895
12896 const uint32_t mode = unpacked_src_blk.m_mode;
12897
12898 const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
12899 const uint32_t total_comps = g_uastc_mode_comps[mode];
12900
12901 switch (mode)
12902 {
12903 case 0:
12904 case 5:
12905 case 10:
12906 case 12:
12907 case 14:
12908 case 15:
12909 case 18:
12910 {
12911 // MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB
12912 // MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB
12913 // MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6
12914 // MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6
12915 // MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6
12916 // MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6
12917 // MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6
12918 dst_blk.m_mode = 6;
12919
12920 float xl[4], xh[4];
12921 if (total_comps == 2)
12922 {
12923 xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
12924 xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
12925
12926 xl[1] = xl[0];
12927 xh[1] = xh[0];
12928
12929 xl[2] = xl[0];
12930 xh[2] = xh[0];
12931
12932 xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
12933 xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
12934 }
12935 else
12936 {
12937 xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f;
12938 xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f;
12939 xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f;
12940
12941 xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f;
12942 xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f;
12943 xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f;
12944
12945 if (total_comps == 4)
12946 {
12947 xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f;
12948 xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f;
12949 }
12950 else
12951 {
12952 xl[3] = 1.0f;
12953 xh[3] = 1.0f;
12954 }
12955 }
12956
12957 uint32_t best_pbits[2];
12958 color_quad_u8 bestMinColor, bestMaxColor;
12959 determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
12960
12961 dst_blk.m_low[0] = bestMinColor;
12962 dst_blk.m_high[0] = bestMaxColor;
12963
12964 if (total_comps == 3)
12965 {
12966 dst_blk.m_low[0].m_c[3] = 127;
12967 dst_blk.m_high[0].m_c[3] = 127;
12968 }
12969
12970 dst_blk.m_pbits[0][0] = best_pbits[0];
12971 dst_blk.m_pbits[0][1] = best_pbits[1];
12972
12973 if (mode == 18)
12974 {
12975 const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 };
12976 for (uint32_t i = 0; i < 16; i++)
12977 dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]];
12978 }
12979 else if (mode == 14)
12980 {
12981 const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 };
12982 for (uint32_t i = 0; i < 16; i++)
12983 dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]];
12984 }
12985 else if ((mode == 5) || (mode == 12))
12986 {
12987 const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 };
12988 for (uint32_t i = 0; i < 16; i++)
12989 dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]];
12990 }
12991 else
12992 {
12993 for (uint32_t i = 0; i < 16; i++)
12994 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
12995 }
12996
12997 break;
12998 }
12999 case 1:
13000 {
13001 // DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3
13002 // Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables.
13003 dst_blk.m_mode = 3;
13004
13005 float xl[4], xh[4];
13006 xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f;
13007 xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f;
13008 xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f;
13009 xl[3] = 1.0f;
13010
13011 xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f;
13012 xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f;
13013 xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f;
13014 xh[3] = 1.0f;
13015
13016 uint32_t best_pbits[2];
13017 color_quad_u8 bestMinColor, bestMaxColor;
13018 memset(&bestMinColor, 0, sizeof(bestMinColor));
13019 memset(&bestMaxColor, 0, sizeof(bestMaxColor));
13020 determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
13021
13022 for (uint32_t i = 0; i < 3; i++)
13023 {
13024 dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i];
13025 dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i];
13026 dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i];
13027 dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i];
13028 }
13029 dst_blk.m_pbits[0][0] = best_pbits[0];
13030 dst_blk.m_pbits[0][1] = best_pbits[1];
13031 dst_blk.m_pbits[1][0] = best_pbits[0];
13032 dst_blk.m_pbits[1][1] = best_pbits[1];
13033
13034 for (uint32_t i = 0; i < 16; i++)
13035 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
13036
13037 break;
13038 }
13039 case 2:
13040 {
13041 // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1
13042 dst_blk.m_mode = 1;
13043 dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
13044
13045 const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
13046
13047 float xl[4], xh[4];
13048 xl[3] = 1.0f;
13049 xh[3] = 1.0f;
13050
13051 for (uint32_t subset = 0; subset < 2; subset++)
13052 {
13053 for (uint32_t i = 0; i < 3; i++)
13054 {
13055 uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6];
13056 v = (v << 4) | v;
13057 xl[i] = v / 255.0f;
13058
13059 v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1];
13060 v = (v << 4) | v;
13061 xh[i] = v / 255.0f;
13062 }
13063
13064 uint32_t best_pbits[2] = { 0, 0 };
13065 color_quad_u8 bestMinColor, bestMaxColor;
13066 memset(&bestMinColor, 0, sizeof(bestMinColor));
13067 memset(&bestMaxColor, 0, sizeof(bestMaxColor));
13068 determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits);
13069
13070 const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
13071
13072 for (uint32_t i = 0; i < 3; i++)
13073 {
13074 dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
13075 dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
13076 }
13077
13078 dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
13079 } // subset
13080
13081 for (uint32_t i = 0; i < 16; i++)
13082 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
13083
13084 break;
13085 }
13086 case 3:
13087 {
13088 // DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2
13089 dst_blk.m_mode = 2;
13090 dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7;
13091
13092 const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm;
13093
13094 for (uint32_t subset = 0; subset < 3; subset++)
13095 {
13096 for (uint32_t comp = 0; comp < 3; comp++)
13097 {
13098 uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant;
13099 uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant;
13100
13101 // TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion.
13102 lo = (lo * 31 + 127) / 255;
13103 hi = (hi * 31 + 127) / 255;
13104
13105 const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset];
13106
13107 dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo;
13108 dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi;
13109 }
13110 }
13111
13112 for (uint32_t i = 0; i < 16; i++)
13113 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
13114
13115 break;
13116 }
13117 case 4:
13118 {
13119 // 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3
13120 dst_blk.m_mode = 3;
13121 dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
13122
13123 const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
13124
13125 float xl[4], xh[4];
13126 xl[3] = 1.0f;
13127 xh[3] = 1.0f;
13128
13129 for (uint32_t subset = 0; subset < 2; subset++)
13130 {
13131 for (uint32_t i = 0; i < 3; i++)
13132 {
13133 xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f;
13134 xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f;
13135 }
13136
13137 uint32_t best_pbits[2] = { 0, 0 };
13138 color_quad_u8 bestMinColor, bestMaxColor;
13139 memset(&bestMinColor, 0, sizeof(bestMinColor));
13140 memset(&bestMaxColor, 0, sizeof(bestMaxColor));
13141 determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits);
13142
13143 const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset;
13144
13145 for (uint32_t i = 0; i < 3; i++)
13146 {
13147 dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i];
13148 dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i];
13149 }
13150 dst_blk.m_low[bc7_subset_index].m_c[3] = 127;
13151 dst_blk.m_high[bc7_subset_index].m_c[3] = 127;
13152
13153 dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
13154 dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
13155
13156 } // subset
13157
13158 for (uint32_t i = 0; i < 16; i++)
13159 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
13160
13161 break;
13162 }
13163 case 6:
13164 case 11:
13165 case 13:
13166 case 17:
13167 {
13168 // MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB
13169 // MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5
13170 // MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5
13171 // MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5
13172 dst_blk.m_mode = 5;
13173 dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3;
13174
13175 if (total_comps == 2)
13176 {
13177 assert(unpacked_src_blk.m_astc.m_ccs == 3);
13178
13179 dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255);
13180 dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255);
13181
13182 dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0];
13183 dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0];
13184
13185 dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0];
13186 dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0];
13187
13188 dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant);
13189 dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant);
13190 }
13191 else
13192 {
13193 for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++)
13194 {
13195 uint32_t bc7_comp = astc_comp;
13196 // ASTC and BC7 handle dual plane component rotations differently:
13197 // ASTC: 2nd plane separately interpolates the CCS channel.
13198 // BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel.
13199 if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs)
13200 bc7_comp = 3;
13201 else if (astc_comp == 3)
13202 bc7_comp = unpacked_src_blk.m_astc.m_ccs;
13203
13204 uint32_t l = 255, h = 255;
13205 if (astc_comp < total_comps)
13206 {
13207 l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant;
13208 h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant;
13209 }
13210
13211 if (bc7_comp < 3)
13212 {
13213 l = (l * 127 + 127) / 255;
13214 h = (h * 127 + 127) / 255;
13215 }
13216
13217 dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l;
13218 dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h;
13219 }
13220 }
13221
13222 if (mode == 13)
13223 {
13224 for (uint32_t i = 0; i < 16; i++)
13225 {
13226 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0;
13227 dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0;
13228 }
13229 }
13230 else
13231 {
13232 for (uint32_t i = 0; i < 16; i++)
13233 {
13234 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2];
13235 dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1];
13236 }
13237 }
13238
13239 break;
13240 }
13241 case 7:
13242 {
13243 // DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2
13244 dst_blk.m_mode = 2;
13245 dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73;
13246
13247 const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k;
13248
13249 for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
13250 {
13251 const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k);
13252
13253 for (uint32_t c = 0; c < 3; c++)
13254 {
13255 dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255;
13256 dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255;
13257 }
13258 }
13259
13260 for (uint32_t i = 0; i < 16; i++)
13261 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
13262
13263 break;
13264 }
13265 case UASTC_MODE_INDEX_SOLID_COLOR:
13266 {
13267 // Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6)
13268 const color32& solid_color = unpacked_src_blk.m_solid_color;
13269
13270 uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error +
13271 g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error;
13272
13273 uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error +
13274 g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error;
13275
13276 if (best_err0 > 0 && best_err1 > 0)
13277 {
13278 dst_blk.m_mode = 5;
13279
13280 for (uint32_t c = 0; c < 3; c++)
13281 {
13282 dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo;
13283 dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi;
13284 }
13285
13286 memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16);
13287
13288 dst_blk.m_low[0].m_c[3] = solid_color.c[3];
13289 dst_blk.m_high[0].m_c[3] = solid_color.c[3];
13290
13291 //memset(dst_blk.m_alpha_selectors, 0, 16);
13292 }
13293 else
13294 {
13295 dst_blk.m_mode = 6;
13296
13297 uint32_t best_p = 0;
13298 if (best_err1 < best_err0)
13299 best_p = 1;
13300
13301 for (uint32_t c = 0; c < 4; c++)
13302 {
13303 dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo;
13304 dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi;
13305 }
13306
13307 dst_blk.m_pbits[0][0] = best_p;
13308 dst_blk.m_pbits[0][1] = best_p;
13309 memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16);
13310 }
13311
13312 break;
13313 }
13314 case 9:
13315 case 16:
13316 {
13317 // 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7
13318 // 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7
13319
13320 dst_blk.m_mode = 7;
13321 dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7;
13322
13323 const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert;
13324
13325 for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++)
13326 {
13327 float xl[4], xh[4];
13328
13329 if (total_comps == 2)
13330 {
13331 xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f;
13332 xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f;
13333
13334 xl[1] = xl[0];
13335 xh[1] = xh[0];
13336
13337 xl[2] = xl[0];
13338 xh[2] = xh[0];
13339
13340 xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f;
13341 xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f;
13342 }
13343 else
13344 {
13345 xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f;
13346 xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f;
13347 xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f;
13348 xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f;
13349
13350 xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f;
13351 xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f;
13352 xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f;
13353 xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f;
13354 }
13355
13356 uint32_t best_pbits[2] = { 0, 0 };
13357 color_quad_u8 bestMinColor, bestMaxColor;
13358 memset(&bestMinColor, 0, sizeof(bestMinColor));
13359 memset(&bestMaxColor, 0, sizeof(bestMaxColor));
13360 determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits);
13361
13362 const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset;
13363
13364 dst_blk.m_low[bc7_subset_index] = bestMinColor;
13365 dst_blk.m_high[bc7_subset_index] = bestMaxColor;
13366
13367 dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0];
13368 dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1];
13369 } // astc_subset
13370
13371 for (uint32_t i = 0; i < 16; i++)
13372 dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i];
13373
13374 break;
13375 }
13376 default:
13377 return false;
13378 }
13379
13380 return true;
13381 }
13382
13383 bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk)
13384 {
13385 unpacked_uastc_block unpacked_src_blk;
13386 if (!unpack_uastc(src_blk, unpacked_src_blk, false, false))
13387 return false;
13388
13389 return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk);
13390 }
13391
13392 bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst)
13393 {
13394 bc7_optimization_results temp;
13395 if (!transcode_uastc_to_bc7(src_blk, temp))
13396 return false;
13397
13398 encode_bc7_block(pDst, &temp);
13399 return true;
13400 }
13401
13402 color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock)
13403 {
13404 color32 result;
13405
13406 for (uint32_t c = 0; c < 3; c++)
13407 {
13408 static const int s_divs[3] = { 1, 3, 9 };
13409
13410 int delta = 0;
13411
13412 switch (bias)
13413 {
13414 case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break;
13415 case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break;
13416 case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break;
13417
13418 case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break;
13419 case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break;
13420 case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break;
13421
13422 case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break;
13423 case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break;
13424 case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break;
13425
13426 case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break;
13427 case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break;
13428 case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break;
13429
13430 case 10: delta = -2; break;
13431
13432 case 27: delta = subblock ? 0 : -1; break;
13433 case 28: delta = subblock ? -1 : 1; break;
13434 case 29: delta = subblock ? 1 : 0; break;
13435 case 30: delta = subblock ? -1 : 0; break;
13436 case 31: delta = subblock ? 0 : 1; break;
13437
13438 default:
13439 delta = ((bias / s_divs[c]) % 3) - 1;
13440 break;
13441 }
13442
13443 int v = block_color[c];
13444 if (v == 0)
13445 {
13446 if (delta == -2)
13447 v += 3;
13448 else
13449 v += delta + 1;
13450 }
13451 else if (v == (int)limit)
13452 {
13453 v += (delta - 1);
13454 }
13455 else
13456 {
13457 v += delta;
13458 if ((v < 0) || (v > (int)limit))
13459 v = (v - delta) - delta;
13460 }
13461
13462 assert(v >= 0);
13463 assert(v <= (int)limit);
13464
13465 result[c] = (uint8_t)v;
13466 }
13467
13468 return result;
13469 }
13470
13471 static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock)
13472 {
13473 static const uint8_t s_tran[4] = { 1, 0, 2, 3 };
13474
13475 uint16_t l_bitmask = 0;
13476 uint16_t h_bitmask = 0;
13477
13478 for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++)
13479 {
13480 color32 block_colors[4];
13481 dst_blk.get_block_colors(block_colors, subblock);
13482
13483 uint32_t block_y[4];
13484 for (uint32_t i = 0; i < 4; i++)
13485 block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19;
13486
13487 const uint32_t block_y01 = block_y[0] + block_y[1];
13488 const uint32_t block_y12 = block_y[1] + block_y[2];
13489 const uint32_t block_y23 = block_y[2] + block_y[3];
13490
13491 // X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3
13492 // Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3
13493
13494 if (dst_blk.get_flip_bit())
13495 {
13496 uint32_t ofs = subblock * 2;
13497
13498 for (uint32_t y = 0; y < 2; y++)
13499 {
13500 for (uint32_t x = 0; x < 4; x++)
13501 {
13502 const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4];
13503 const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
13504
13505 uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
13506
13507 assert(ofs < 16);
13508 l_bitmask |= ((t & 1) << ofs);
13509 h_bitmask |= ((t >> 1) << ofs);
13510 ofs += 4;
13511 }
13512
13513 ofs = (int)ofs + 1 - 4 * 4;
13514 }
13515 }
13516 else
13517 {
13518 uint32_t ofs = (subblock * 2) * 4;
13519 for (uint32_t x = 0; x < 2; x++)
13520 {
13521 for (uint32_t y = 0; y < 4; y++)
13522 {
13523 const color32& c = pSource_pixels[subblock * 2 + x + y * 4];
13524 const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38;
13525
13526 uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)];
13527
13528 assert(ofs < 16);
13529 l_bitmask |= ((t & 1) << ofs);
13530 h_bitmask |= ((t >> 1) << ofs);
13531 ++ofs;
13532 }
13533 }
13534 }
13535 }
13536
13537 dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
13538 dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
13539 dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
13540 dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
13541 }
13542
13543 static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } };
13544
13545 struct etc_coord2
13546 {
13547 uint8_t m_x, m_y;
13548 };
13549
13550 // [flip][subblock][pixel_index]
13551 const etc_coord2 g_etc1_pixel_coords[2][2][8] =
13552 {
13553 {
13554 {
13555 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
13556 { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
13557 },
13558 {
13559 { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
13560 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
13561 }
13562 },
13563 {
13564 {
13565 { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
13566 { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
13567 },
13568 {
13569 { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
13570 { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
13571 },
13572 }
13573 };
13574
13575 void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
13576 {
13577 decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
13578
13579 if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
13580 {
13581 dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2));
13582
13583 if (unpacked_src_blk.m_etc1_diff)
13584 {
13585 dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3);
13586 dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3);
13587 dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3);
13588 }
13589 else
13590 {
13591 dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4));
13592 dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4));
13593 dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4));
13594 }
13595
13596 memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4);
13597
13598 return;
13599 }
13600
13601 const bool flip = unpacked_src_blk.m_etc1_flip != 0;
13602 const bool diff = unpacked_src_blk.m_etc1_diff != 0;
13603
13604 dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2));
13605
13606 const uint32_t limit = diff ? 31 : 15;
13607
13608 color32 block_colors[2];
13609
13610 for (uint32_t subset = 0; subset < 2; subset++)
13611 {
13612 uint32_t avg_color[3];
13613 memset(avg_color, 0, sizeof(avg_color));
13614
13615 for (uint32_t j = 0; j < 8; j++)
13616 {
13617 const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j];
13618
13619 avg_color[0] += block_pixels[c.m_y][c.m_x].r;
13620 avg_color[1] += block_pixels[c.m_y][c.m_x].g;
13621 avg_color[2] += block_pixels[c.m_y][c.m_x].b;
13622 } // j
13623
13624 block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255));
13625 block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255));
13626 block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255));
13627 block_colors[subset][3] = 0;
13628
13629 if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode])
13630 {
13631 block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset);
13632 }
13633
13634 } // subset
13635
13636 if (diff)
13637 {
13638 int dr = block_colors[1].r - block_colors[0].r;
13639 int dg = block_colors[1].g - block_colors[0].g;
13640 int db = block_colors[1].b - block_colors[0].b;
13641
13642 dr = basisu::clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
13643 dg = basisu::clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
13644 db = basisu::clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
13645
13646 if (dr < 0) dr += 8;
13647 if (dg < 0) dg += 8;
13648 if (db < 0) db += 8;
13649
13650 dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr);
13651 dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg);
13652 dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db);
13653 }
13654 else
13655 {
13656 dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4));
13657 dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4));
13658 dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4));
13659 }
13660
13661 etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2);
13662 }
13663
13664 bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst)
13665 {
13666 unpacked_uastc_block unpacked_src_blk;
13667 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
13668 return false;
13669
13670 color32 block_pixels[4][4];
13671 if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
13672 {
13673 const bool unpack_srgb = false;
13674 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
13675 return false;
13676 }
13677
13678 transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst);
13679
13680 return true;
13681 }
13682
13683 static inline int gray_distance2(const uint8_t c, int y)
13684 {
13685 int gray_dist = (int)c - y;
13686 return gray_dist * gray_dist;
13687 }
13688
13689 static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels,
13690 int& upper_avg, int& lower_avg, int& left_avg, int& right_avg)
13691 {
13692 int sums[2][2];
13693
13694#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)]
13695
13696 sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1);
13697 sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1);
13698 sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3);
13699 sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3);
13700
13701 upper_avg = (sums[0][0] + sums[1][0] + 4) / 8;
13702 lower_avg = (sums[0][1] + sums[1][1] + 4) / 8;
13703 left_avg = (sums[0][0] + sums[0][1] + 4) / 8;
13704 right_avg = (sums[1][0] + sums[1][1] + 4) / 8;
13705
13706#undef GET_XY
13707#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a)
13708
13709 int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
13710 for (uint32_t i = 0; i < 4; i++)
13711 {
13712 for (uint32_t j = 0; j < 2; j++)
13713 {
13714 upper_gray_dist += GET_XY(i, j, upper_avg);
13715 lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
13716 left_gray_dist += GET_XY(j, i, left_avg);
13717 right_gray_dist += GET_XY(2 + j, i, right_avg);
13718 }
13719 }
13720
13721#undef GET_XY
13722
13723 int upper_lower_sum = upper_gray_dist + lower_gray_dist;
13724 int left_right_sum = left_gray_dist + right_gray_dist;
13725
13726 return upper_lower_sum < left_right_sum;
13727 }
13728
13729 // Base Sel Table
13730 // XXXXX XX XXX
13731 static const uint16_t g_etc1_y_solid_block_configs[256] =
13732 {
13733 0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167,
13734 355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361,
13735 204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210,
13736 51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57,
13737 155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126
13738 };
13739
13740 // individual
13741 // table base sel0 sel1 sel2 sel3
13742 static const uint16_t g_etc1_y_solid_block_4i_configs[256] =
13743 {
13744 0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511,
13745 0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15,
13746 0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928,
13747 0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33,
13748 0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40,
13749 0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B,
13750 0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560,
13751 0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D,
13752 0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278
13753 };
13754
13755 static const uint16_t g_etc1_y_solid_block_2i_configs[256] =
13756 {
13757 0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09,
13758 0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19,
13759 0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29,
13760 0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39,
13761 0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49,
13762 0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59,
13763 0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69,
13764 0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278
13765 };
13766
13767 static const uint16_t g_etc1_y_solid_block_1i_configs[256] =
13768 {
13769 0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309,
13770 0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C,
13771 0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230,
13772 0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240,
13773 0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B,
13774 0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261,
13775 0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271,
13776 0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278
13777 };
13778
13779 // We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch.
13780 bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel)
13781 {
13782 unpacked_uastc_block unpacked_src_blk;
13783 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
13784 return false;
13785
13786#if 0
13787 for (uint32_t individ = 0; individ < 2; individ++)
13788 {
13789 uint32_t overall_error = 0;
13790
13791 for (uint32_t c = 0; c < 256; c++)
13792 {
13793 uint32_t best_err = UINT32_MAX;
13794 uint32_t best_individ = 0;
13795 uint32_t best_base = 0;
13796 uint32_t best_sels[4] = { 0,0,0,0 };
13797 uint32_t best_table = 0;
13798
13799 const uint32_t limit = individ ? 16 : 32;
13800
13801 for (uint32_t table = 0; table < 8; table++)
13802 {
13803 for (uint32_t base = 0; base < limit; base++)
13804 {
13805 uint32_t total_e = 0;
13806 uint32_t sels[4] = { 0,0,0,0 };
13807
13808 const uint32_t N = 4;
13809 for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
13810 {
13811 uint32_t best_sel_e = UINT32_MAX;
13812 uint32_t best_sel = 0;
13813
13814 for (uint32_t sel = 0; sel < 4; sel++)
13815 {
13816 int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
13817 val = clamp255(val + g_etc1_inten_tables[table][sel]);
13818
13819 int e = iabs(val - clamp255(c + i));
13820 if (e < best_sel_e)
13821 {
13822 best_sel_e = e;
13823 best_sel = sel;
13824 }
13825
13826 } // sel
13827
13828 sels[i] = best_sel;
13829 total_e += best_sel_e * best_sel_e;
13830
13831 } // i
13832
13833 if (total_e < best_err)
13834 {
13835 best_err = total_e;
13836 best_individ = individ;
13837 best_base = base;
13838 memcpy(best_sels, sels, sizeof(best_sels));
13839 best_table = table;
13840 }
13841
13842 } // base
13843 } // table
13844
13845 //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
13846
13847 uint32_t encoded = best_table | (best_base << 3) |
13848 (best_sels[0] << 8) |
13849 (best_sels[1] << 10) |
13850 (best_sels[2] << 12) |
13851 (best_sels[3] << 14);
13852
13853 printf("0x%X,", encoded);
13854
13855 overall_error += best_err;
13856 } // c
13857
13858 printf("\n");
13859 printf("Overall error: %u\n", overall_error);
13860
13861 } // individ
13862
13863 exit(0);
13864#endif
13865
13866#if 0
13867 for (uint32_t individ = 0; individ < 2; individ++)
13868 {
13869 uint32_t overall_error = 0;
13870
13871 for (uint32_t c = 0; c < 256; c++)
13872 {
13873 uint32_t best_err = UINT32_MAX;
13874 uint32_t best_individ = 0;
13875 uint32_t best_base = 0;
13876 uint32_t best_sels[4] = { 0,0,0,0 };
13877 uint32_t best_table = 0;
13878
13879 const uint32_t limit = individ ? 16 : 32;
13880
13881 for (uint32_t table = 0; table < 8; table++)
13882 {
13883 for (uint32_t base = 0; base < limit; base++)
13884 {
13885 uint32_t total_e = 0;
13886 uint32_t sels[4] = { 0,0,0,0 };
13887
13888 const uint32_t N = 1;
13889 for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++)
13890 {
13891 uint32_t best_sel_e = UINT32_MAX;
13892 uint32_t best_sel = 0;
13893
13894 for (uint32_t sel = 0; sel < 4; sel++)
13895 {
13896 int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2));
13897 val = clamp255(val + g_etc1_inten_tables[table][sel]);
13898
13899 int e = iabs(val - clamp255(c + i));
13900 if (e < best_sel_e)
13901 {
13902 best_sel_e = e;
13903 best_sel = sel;
13904 }
13905
13906 } // sel
13907
13908 sels[i] = best_sel;
13909 total_e += best_sel_e * best_sel_e;
13910
13911 } // i
13912
13913 if (total_e < best_err)
13914 {
13915 best_err = total_e;
13916 best_individ = individ;
13917 best_base = base;
13918 memcpy(best_sels, sels, sizeof(best_sels));
13919 best_table = table;
13920 }
13921
13922 } // base
13923 } // table
13924
13925 //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]);
13926
13927 uint32_t encoded = best_table | (best_base << 3) |
13928 (best_sels[0] << 8) |
13929 (best_sels[1] << 10) |
13930 (best_sels[2] << 12) |
13931 (best_sels[3] << 14);
13932
13933 printf("0x%X,", encoded);
13934
13935 overall_error += best_err;
13936 } // c
13937
13938 printf("\n");
13939 printf("Overall error: %u\n", overall_error);
13940
13941 } // individ
13942
13943 exit(0);
13944#endif
13945
13946 decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst);
13947
13948 if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)
13949 {
13950 const uint32_t y = unpacked_src_blk.m_solid_color[channel];
13951 const uint32_t encoded_config = g_etc1_y_solid_block_configs[y];
13952
13953 const uint32_t base = encoded_config & 31;
13954 const uint32_t sel = (encoded_config >> 5) & 3;
13955 const uint32_t table = encoded_config >> 7;
13956
13957 dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2));
13958
13959 dst_blk.m_bytes[0] = (uint8_t)(base << 3);
13960 dst_blk.m_bytes[1] = (uint8_t)(base << 3);
13961 dst_blk.m_bytes[2] = (uint8_t)(base << 3);
13962
13963 memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4);
13964 return true;
13965 }
13966
13967 color32 block_pixels[4][4];
13968 const bool unpack_srgb = false;
13969 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
13970 return false;
13971
13972 uint8_t block_y[4][4];
13973 for (uint32_t i = 0; i < 16; i++)
13974 ((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel];
13975
13976 int upper_avg, lower_avg, left_avg, right_avg;
13977 bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg);
13978
13979 // non-flipped: | |
13980 // vs.
13981 // flipped: --
13982 // --
13983
13984 uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 };
13985
13986 if (flip)
13987 {
13988 for (uint32_t y = 0; y < 2; y++)
13989 {
13990 for (uint32_t x = 0; x < 4; x++)
13991 {
13992 const uint32_t v = block_y[y][x];
13993 low[0] = basisu::minimum(low[0], v);
13994 high[0] = basisu::maximum(high[0], v);
13995 }
13996 }
13997 for (uint32_t y = 2; y < 4; y++)
13998 {
13999 for (uint32_t x = 0; x < 4; x++)
14000 {
14001 const uint32_t v = block_y[y][x];
14002 low[1] = basisu::minimum(low[1], v);
14003 high[1] = basisu::maximum(high[1], v);
14004 }
14005 }
14006 }
14007 else
14008 {
14009 for (uint32_t y = 0; y < 4; y++)
14010 {
14011 for (uint32_t x = 0; x < 2; x++)
14012 {
14013 const uint32_t v = block_y[y][x];
14014 low[0] = basisu::minimum(low[0], v);
14015 high[0] = basisu::maximum(high[0], v);
14016 }
14017 }
14018 for (uint32_t y = 0; y < 4; y++)
14019 {
14020 for (uint32_t x = 2; x < 4; x++)
14021 {
14022 const uint32_t v = block_y[y][x];
14023 low[1] = basisu::minimum(low[1], v);
14024 high[1] = basisu::maximum(high[1], v);
14025 }
14026 }
14027 }
14028
14029 const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] };
14030
14031 dst_blk.m_bytes[3] = (uint8_t)((int)flip);
14032
14033 if ((range[0] <= 3) && (range[1] <= 3))
14034 {
14035 // This is primarily for better gradients.
14036 dst_blk.m_bytes[0] = 0;
14037 dst_blk.m_bytes[1] = 0;
14038 dst_blk.m_bytes[2] = 0;
14039
14040 uint16_t l_bitmask = 0, h_bitmask = 0;
14041
14042 for (uint32_t subblock = 0; subblock < 2; subblock++)
14043 {
14044 const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
14045
14046 const uint32_t table = encoded & 7;
14047 const uint32_t base = (encoded >> 3) & 31;
14048 assert(base <= 15);
14049 const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
14050
14051 dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
14052
14053 const uint32_t sv = base << (subblock ? 0 : 4);
14054 dst_blk.m_bytes[0] |= (uint8_t)(sv);
14055 dst_blk.m_bytes[1] |= (uint8_t)(sv);
14056 dst_blk.m_bytes[2] |= (uint8_t)(sv);
14057
14058 if (flip)
14059 {
14060 uint32_t ofs = subblock * 2;
14061 for (uint32_t y = 0; y < 2; y++)
14062 {
14063 for (uint32_t x = 0; x < 4; x++)
14064 {
14065 uint32_t t = block_y[y + subblock * 2][x];
14066 assert(t >= low[subblock] && t <= high[subblock]);
14067 t -= low[subblock];
14068 assert(t <= 3);
14069
14070 t = g_selector_index_to_etc1[sels[t]];
14071
14072 assert(ofs < 16);
14073 l_bitmask |= ((t & 1) << ofs);
14074 h_bitmask |= ((t >> 1) << ofs);
14075 ofs += 4;
14076 }
14077
14078 ofs = (int)ofs + 1 - 4 * 4;
14079 }
14080 }
14081 else
14082 {
14083 uint32_t ofs = (subblock * 2) * 4;
14084 for (uint32_t x = 0; x < 2; x++)
14085 {
14086 for (uint32_t y = 0; y < 4; y++)
14087 {
14088 uint32_t t = block_y[y][x + subblock * 2];
14089 assert(t >= low[subblock] && t <= high[subblock]);
14090 t -= low[subblock];
14091 assert(t <= 3);
14092
14093 t = g_selector_index_to_etc1[sels[t]];
14094
14095 assert(ofs < 16);
14096 l_bitmask |= ((t & 1) << ofs);
14097 h_bitmask |= ((t >> 1) << ofs);
14098 ++ofs;
14099 }
14100 }
14101 }
14102 } // subblock
14103
14104 dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
14105 dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
14106 dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
14107 dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
14108
14109 return true;
14110 }
14111
14112 uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255;
14113 uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255;
14114
14115 bool diff = true;
14116
14117 int dy = y1 - y0;
14118
14119 if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax))
14120 {
14121 diff = false;
14122
14123 y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255;
14124 y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255;
14125
14126 dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4));
14127 dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4));
14128 dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4));
14129 }
14130 else
14131 {
14132 dy = basisu::clamp<int>(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax);
14133
14134 y1 = y0 + dy;
14135
14136 if (dy < 0) dy += 8;
14137
14138 dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy);
14139 dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy);
14140 dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy);
14141
14142 dst_blk.m_bytes[3] |= 2;
14143 }
14144
14145 const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) };
14146
14147 uint32_t enc_range[2];
14148 for (uint32_t subset = 0; subset < 2; subset++)
14149 {
14150 const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]);
14151 const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]);
14152
14153 enc_range[subset] = basisu::maximum(pos, neg);
14154 }
14155
14156 uint16_t l_bitmask = 0, h_bitmask = 0;
14157 for (uint32_t subblock = 0; subblock < 2; subblock++)
14158 {
14159 if ((!diff) && (range[subblock] <= 3))
14160 {
14161 const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]);
14162
14163 const uint32_t table = encoded & 7;
14164 const uint32_t base = (encoded >> 3) & 31;
14165 assert(base <= 15);
14166 const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 };
14167
14168 dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5));
14169
14170 const uint32_t mask = ~(0xF << (subblock ? 0 : 4));
14171
14172 dst_blk.m_bytes[0] &= mask;
14173 dst_blk.m_bytes[1] &= mask;
14174 dst_blk.m_bytes[2] &= mask;
14175
14176 const uint32_t sv = base << (subblock ? 0 : 4);
14177 dst_blk.m_bytes[0] |= (uint8_t)(sv);
14178 dst_blk.m_bytes[1] |= (uint8_t)(sv);
14179 dst_blk.m_bytes[2] |= (uint8_t)(sv);
14180
14181 if (flip)
14182 {
14183 uint32_t ofs = subblock * 2;
14184 for (uint32_t y = 0; y < 2; y++)
14185 {
14186 for (uint32_t x = 0; x < 4; x++)
14187 {
14188 uint32_t t = block_y[y + subblock * 2][x];
14189 assert(t >= low[subblock] && t <= high[subblock]);
14190 t -= low[subblock];
14191 assert(t <= 3);
14192
14193 t = g_selector_index_to_etc1[sels[t]];
14194
14195 assert(ofs < 16);
14196 l_bitmask |= ((t & 1) << ofs);
14197 h_bitmask |= ((t >> 1) << ofs);
14198 ofs += 4;
14199 }
14200
14201 ofs = (int)ofs + 1 - 4 * 4;
14202 }
14203 }
14204 else
14205 {
14206 uint32_t ofs = (subblock * 2) * 4;
14207 for (uint32_t x = 0; x < 2; x++)
14208 {
14209 for (uint32_t y = 0; y < 4; y++)
14210 {
14211 uint32_t t = block_y[y][x + subblock * 2];
14212 assert(t >= low[subblock] && t <= high[subblock]);
14213 t -= low[subblock];
14214 assert(t <= 3);
14215
14216 t = g_selector_index_to_etc1[sels[t]];
14217
14218 assert(ofs < 16);
14219 l_bitmask |= ((t & 1) << ofs);
14220 h_bitmask |= ((t >> 1) << ofs);
14221 ++ofs;
14222 }
14223 }
14224 }
14225
14226 continue;
14227 } // if
14228
14229 uint32_t best_err = UINT32_MAX;
14230 uint8_t best_sels[8];
14231 uint32_t best_inten = 0;
14232
14233 const int base = base_y[subblock];
14234
14235 const int low_limit = -base;
14236 const int high_limit = 255 - base;
14237
14238 assert(low_limit <= 0 && high_limit >= 0);
14239
14240 uint32_t inten_table_mask = 0xFF;
14241 const uint32_t er = enc_range[subblock];
14242 // Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful.
14243 if (er <= 51)
14244 {
14245 inten_table_mask = 0xF;
14246
14247 if (er > 22)
14248 inten_table_mask &= ~(1 << 0);
14249
14250 if ((er < 4) || (er > 39))
14251 inten_table_mask &= ~(1 << 1);
14252
14253 if (er < 9)
14254 inten_table_mask &= ~(1 << 2);
14255
14256 if (er < 12)
14257 inten_table_mask &= ~(1 << 3);
14258 }
14259 else
14260 {
14261 inten_table_mask &= ~((1 << 0) | (1 << 1));
14262
14263 if (er > 60)
14264 inten_table_mask &= ~(1 << 2);
14265
14266 if (er > 89)
14267 inten_table_mask &= ~(1 << 3);
14268
14269 if (er > 120)
14270 inten_table_mask &= ~(1 << 4);
14271
14272 if (er > 136)
14273 inten_table_mask &= ~(1 << 5);
14274
14275 if (er > 174)
14276 inten_table_mask &= ~(1 << 6);
14277 }
14278
14279 for (uint32_t inten = 0; inten < 8; inten++)
14280 {
14281 if ((inten_table_mask & (1 << inten)) == 0)
14282 continue;
14283
14284 const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]);
14285 const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]);
14286 const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]);
14287 const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]);
14288 assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3));
14289
14290 const int tv[4] = { t2, t3, t1, t0 };
14291
14292 const int thresh01 = t0 + t1;
14293 const int thresh12 = t1 + t2;
14294 const int thresh23 = t2 + t3;
14295
14296 assert(thresh01 <= thresh12 && thresh12 <= thresh23);
14297
14298 static const uint8_t s_table[4] = { 1, 0, 2, 3 };
14299
14300 uint32_t total_err = 0;
14301 uint8_t sels[8];
14302
14303 if (flip)
14304 {
14305 if (((int)high[subblock] - base) * 2 < thresh01)
14306 {
14307 memset(sels, 3, 8);
14308
14309 for (uint32_t y = 0; y < 2; y++)
14310 {
14311 for (uint32_t x = 0; x < 4; x++)
14312 {
14313 const int delta = (int)block_y[y + subblock * 2][x] - base;
14314
14315 const uint32_t c = 3;
14316
14317 uint32_t e = basisu::iabs(tv[c] - delta);
14318 total_err += e * e;
14319 }
14320 if (total_err >= best_err)
14321 break;
14322 }
14323 }
14324 else if (((int)low[subblock] - base) * 2 >= thresh23)
14325 {
14326 memset(sels, 1, 8);
14327
14328 for (uint32_t y = 0; y < 2; y++)
14329 {
14330 for (uint32_t x = 0; x < 4; x++)
14331 {
14332 const int delta = (int)block_y[y + subblock * 2][x] - base;
14333
14334 const uint32_t c = 1;
14335
14336 uint32_t e = basisu::iabs(tv[c] - delta);
14337 total_err += e * e;
14338 }
14339 if (total_err >= best_err)
14340 break;
14341 }
14342 }
14343 else
14344 {
14345 for (uint32_t y = 0; y < 2; y++)
14346 {
14347 for (uint32_t x = 0; x < 4; x++)
14348 {
14349 const int delta = (int)block_y[y + subblock * 2][x] - base;
14350 const int delta2 = delta * 2;
14351
14352 uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
14353 sels[y * 4 + x] = (uint8_t)c;
14354
14355 uint32_t e = basisu::iabs(tv[c] - delta);
14356 total_err += e * e;
14357 }
14358 if (total_err >= best_err)
14359 break;
14360 }
14361 }
14362 }
14363 else
14364 {
14365 if (((int)high[subblock] - base) * 2 < thresh01)
14366 {
14367 memset(sels, 3, 8);
14368
14369 for (uint32_t y = 0; y < 4; y++)
14370 {
14371 for (uint32_t x = 0; x < 2; x++)
14372 {
14373 const int delta = (int)block_y[y][x + subblock * 2] - base;
14374
14375 const uint32_t c = 3;
14376
14377 uint32_t e = basisu::iabs(tv[c] - delta);
14378 total_err += e * e;
14379 }
14380 if (total_err >= best_err)
14381 break;
14382 }
14383 }
14384 else if (((int)low[subblock] - base) * 2 >= thresh23)
14385 {
14386 memset(sels, 1, 8);
14387
14388 for (uint32_t y = 0; y < 4; y++)
14389 {
14390 for (uint32_t x = 0; x < 2; x++)
14391 {
14392 const int delta = (int)block_y[y][x + subblock * 2] - base;
14393
14394 const uint32_t c = 1;
14395
14396 uint32_t e = basisu::iabs(tv[c] - delta);
14397 total_err += e * e;
14398 }
14399 if (total_err >= best_err)
14400 break;
14401 }
14402 }
14403 else
14404 {
14405 for (uint32_t y = 0; y < 4; y++)
14406 {
14407 for (uint32_t x = 0; x < 2; x++)
14408 {
14409 const int delta = (int)block_y[y][x + subblock * 2] - base;
14410 const int delta2 = delta * 2;
14411
14412 uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)];
14413 sels[y * 2 + x] = (uint8_t)c;
14414
14415 uint32_t e = basisu::iabs(tv[c] - delta);
14416 total_err += e * e;
14417 }
14418 if (total_err >= best_err)
14419 break;
14420 }
14421 }
14422 }
14423
14424 if (total_err < best_err)
14425 {
14426 best_err = total_err;
14427 best_inten = inten;
14428 memcpy(best_sels, sels, 8);
14429 }
14430
14431 } // inten
14432
14433 //g_inten_hist[best_inten][enc_range[subblock]]++;
14434
14435 dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5));
14436
14437 if (flip)
14438 {
14439 uint32_t ofs = subblock * 2;
14440 for (uint32_t y = 0; y < 2; y++)
14441 {
14442 for (uint32_t x = 0; x < 4; x++)
14443 {
14444 uint32_t t = best_sels[y * 4 + x];
14445
14446 assert(ofs < 16);
14447 l_bitmask |= ((t & 1) << ofs);
14448 h_bitmask |= ((t >> 1) << ofs);
14449 ofs += 4;
14450 }
14451
14452 ofs = (int)ofs + 1 - 4 * 4;
14453 }
14454 }
14455 else
14456 {
14457 uint32_t ofs = (subblock * 2) * 4;
14458 for (uint32_t x = 0; x < 2; x++)
14459 {
14460 for (uint32_t y = 0; y < 4; y++)
14461 {
14462 uint32_t t = best_sels[y * 2 + x];
14463
14464 assert(ofs < 16);
14465 l_bitmask |= ((t & 1) << ofs);
14466 h_bitmask |= ((t >> 1) << ofs);
14467 ++ofs;
14468 }
14469 }
14470 }
14471
14472 } // subblock
14473
14474 dst_blk.m_bytes[7] = (uint8_t)(l_bitmask);
14475 dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8);
14476 dst_blk.m_bytes[5] = (uint8_t)(h_bitmask);
14477 dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8);
14478
14479 return true;
14480 }
14481
14482 const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7;
14483
14484 void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst)
14485 {
14486 eac_block& dst = *static_cast<eac_block*>(pDst);
14487 const color32* pSrc_pixels = &block_pixels[0][0];
14488
14489 if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR))
14490 {
14491 const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255;
14492
14493 dst.m_base = a;
14494 dst.m_table = 13;
14495 dst.m_multiplier = 1;
14496
14497 memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
14498
14499 return;
14500 }
14501
14502 uint32_t min_a = 255, max_a = 0;
14503 for (uint32_t i = 0; i < 16; i++)
14504 {
14505 min_a = basisu::minimum<uint32_t>(min_a, pSrc_pixels[i].a);
14506 max_a = basisu::maximum<uint32_t>(max_a, pSrc_pixels[i].a);
14507 }
14508
14509 if (min_a == max_a)
14510 {
14511 dst.m_base = min_a;
14512 dst.m_table = 13;
14513 dst.m_multiplier = 1;
14514
14515 memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
14516 return;
14517 }
14518
14519 const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF;
14520 const int multiplier = unpacked_src_blk.m_etc2_hints >> 4;
14521
14522 assert(multiplier >= 1);
14523
14524 dst.m_multiplier = multiplier;
14525 dst.m_table = table;
14526
14527 const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]);
14528 const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
14529
14530 dst.m_base = center;
14531
14532 const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0];
14533
14534 uint32_t vals[8];
14535 for (uint32_t j = 0; j < 8; j++)
14536 vals[j] = clamp255(center + (pTable[j] * multiplier));
14537
14538 uint64_t sels = 0;
14539 for (uint32_t i = 0; i < 16; i++)
14540 {
14541 const uint32_t a = block_pixels[i & 3][i >> 2].a;
14542
14543 const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0;
14544 const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1;
14545 const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2;
14546 const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3;
14547 const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4;
14548 const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5;
14549 const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6;
14550 const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7;
14551
14552 const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7);
14553
14554 const uint64_t best_index = min_err & 7;
14555 sels |= (best_index << (45 - i * 3));
14556 }
14557
14558 dst.set_selector_bits(sels);
14559 }
14560
14561 bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst)
14562 {
14563 eac_block& dst_etc2_eac_a8_blk = *static_cast<eac_block*>(pDst);
14564 decoder_etc_block& dst_etc1_blk = static_cast<decoder_etc_block*>(pDst)[1];
14565
14566 unpacked_uastc_block unpacked_src_blk;
14567 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
14568 return false;
14569
14570 color32 block_pixels[4][4];
14571 if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR)
14572 {
14573 const bool unpack_srgb = false;
14574 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
14575 return false;
14576 }
14577
14578 transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk);
14579
14580 transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk);
14581
14582 return true;
14583 }
14584
14585 static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 };
14586 static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 };
14587 static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 };
14588 static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 };
14589 static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 };
14590 const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 };
14591
14592 void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride)
14593 {
14594 uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v;
14595
14596 {
14597 min0_v = max0_v = pPixels[0 * stride];
14598 min1_v = max1_v = pPixels[1 * stride];
14599 min2_v = max2_v = pPixels[2 * stride];
14600 min3_v = max3_v = pPixels[3 * stride];
14601 }
14602
14603 {
14604 uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
14605 uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
14606 uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
14607 uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
14608 }
14609
14610 {
14611 uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
14612 uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
14613 uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
14614 uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
14615 }
14616
14617 {
14618 uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0);
14619 uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1);
14620 uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2);
14621 uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3);
14622 }
14623
14624 const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v);
14625 const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v);
14626
14627 uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst);
14628 pDst_bytes[0] = (uint8_t)max_v;
14629 pDst_bytes[1] = (uint8_t)min_v;
14630
14631 if (max_v == min_v)
14632 {
14633 memset(pDst_bytes + 2, 0, 6);
14634 return;
14635 }
14636
14637 const uint32_t delta = max_v - min_v;
14638
14639 // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
14640 const int t0 = delta * 13;
14641 const int t1 = delta * 11;
14642 const int t2 = delta * 9;
14643 const int t3 = delta * 7;
14644 const int t4 = delta * 5;
14645 const int t5 = delta * 3;
14646 const int t6 = delta * 1;
14647
14648 // BC4 floors in its divisions, which we compensate for with the 4 bias.
14649 // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
14650 const int bias = 4 - min_v * 14;
14651
14652 static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U };
14653 static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U };
14654 static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U };
14655 static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U };
14656
14657 uint64_t a0, a1, a2, a3;
14658 {
14659 const int v0 = pPixels[0 * stride] * 14 + bias;
14660 const int v1 = pPixels[1 * stride] * 14 + bias;
14661 const int v2 = pPixels[2 * stride] * 14 + bias;
14662 const int v3 = pPixels[3 * stride] * 14 + bias;
14663 a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
14664 a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
14665 a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
14666 a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
14667 }
14668
14669 {
14670 const int v0 = pPixels[4 * stride] * 14 + bias;
14671 const int v1 = pPixels[5 * stride] * 14 + bias;
14672 const int v2 = pPixels[6 * stride] * 14 + bias;
14673 const int v3 = pPixels[7 * stride] * 14 + bias;
14674 a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
14675 a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
14676 a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
14677 a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
14678 }
14679
14680 {
14681 const int v0 = pPixels[8 * stride] * 14 + bias;
14682 const int v1 = pPixels[9 * stride] * 14 + bias;
14683 const int v2 = pPixels[10 * stride] * 14 + bias;
14684 const int v3 = pPixels[11 * stride] * 14 + bias;
14685 a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
14686 a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
14687 a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
14688 a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
14689 }
14690
14691 {
14692 const int v0 = pPixels[12 * stride] * 14 + bias;
14693 const int v1 = pPixels[13 * stride] * 14 + bias;
14694 const int v2 = pPixels[14 * stride] * 14 + bias;
14695 const int v3 = pPixels[15 * stride] * 14 + bias;
14696 a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
14697 a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
14698 a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
14699 a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
14700 }
14701
14702 const uint64_t f = a0 | a1 | a2 | a3;
14703
14704 pDst_bytes[2] = (uint8_t)f;
14705 pDst_bytes[3] = (uint8_t)(f >> 8U);
14706 pDst_bytes[4] = (uint8_t)(f >> 16U);
14707 pDst_bytes[5] = (uint8_t)(f >> 24U);
14708 pDst_bytes[6] = (uint8_t)(f >> 32U);
14709 pDst_bytes[7] = (uint8_t)(f >> 40U);
14710 }
14711
14712 static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
14713 {
14714 uint32_t block_r[4], block_g[4], block_b[4];
14715
14716 block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
14717 block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
14718 block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
14719 block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
14720
14721 int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
14722
14723 int dots[4];
14724 for (uint32_t i = 0; i < 4; i++)
14725 dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
14726
14727 int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
14728
14729 ar *= 2; ag *= 2; ab *= 2;
14730
14731 for (uint32_t i = 0; i < 16; i++)
14732 {
14733 const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab;
14734 static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
14735
14736 // Rounding matters here!
14737 // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality.
14738 sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)];
14739 }
14740 }
14741
14742 static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16])
14743 {
14744 uint32_t block_r[4], block_g[4], block_b[4];
14745
14746 block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2);
14747 block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2);
14748 block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3;
14749 block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3;
14750
14751 int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0];
14752
14753 int dots[4];
14754 for (uint32_t i = 0; i < 4; i++)
14755 dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab;
14756
14757 int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
14758
14759 ar *= 2; ag *= 2; ab *= 2;
14760
14761 static const uint8_t s_sels[4] = { 3, 2, 1, 0 };
14762
14763 for (uint32_t i = 0; i < 16; i += 4)
14764 {
14765 const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab;
14766 const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab;
14767 const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab;
14768 const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab;
14769
14770 sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
14771 sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
14772 sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)];
14773 sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)];
14774 }
14775 }
14776
14777 struct vec3F { float c[3]; };
14778
14779 static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh)
14780 {
14781 // Derived from bc7enc16's LS function.
14782 // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
14783 // I did this in matrix form first, expanded out all the ops, then optimized it a bit.
14784 uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0;
14785
14786 // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
14787 // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
14788 static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 };
14789
14790 uint32_t weight_accum = 0;
14791 for (uint32_t i = 0; i < 16; i++)
14792 {
14793 const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
14794 const uint32_t sel = pSelectors[i];
14795 ut_r += r;
14796 ut_g += g;
14797 ut_b += b;
14798 weight_accum += s_weight_vals[sel];
14799 uq00_r += sel * r;
14800 uq00_g += sel * g;
14801 uq00_b += sel * b;
14802 }
14803
14804 float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r;
14805 float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g;
14806 float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b;
14807
14808 q10_r = t_r * 3.0f - q00_r;
14809 q10_g = t_g * 3.0f - q00_g;
14810 q10_b = t_b * 3.0f - q00_b;
14811
14812 float z00 = (float)((weight_accum >> 16) & 0xFF);
14813 float z10 = (float)((weight_accum >> 8) & 0xFF);
14814 float z11 = (float)(weight_accum & 0xFF);
14815 float z01 = z10;
14816
14817 float det = z00 * z11 - z01 * z10;
14818 if (fabs(det) < 1e-8f)
14819 return false;
14820
14821 det = 3.0f / det;
14822
14823 float iz00, iz01, iz10, iz11;
14824 iz00 = z11 * det;
14825 iz01 = -z01 * det;
14826 iz10 = -z10 * det;
14827 iz11 = z00 * det;
14828
14829 pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r;
14830 pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g;
14831 pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b;
14832
14833 // Check and fix channel singularities - might not be needed, but is in UASTC's encoder.
14834 for (uint32_t c = 0; c < 3; c++)
14835 {
14836 if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f))
14837 {
14838 uint32_t lo_v = UINT32_MAX, hi_v = 0;
14839 for (uint32_t i = 0; i < 16; i++)
14840 {
14841 lo_v = basisu::minimumu(lo_v, pColors[i].c[c]);
14842 hi_v = basisu::maximumu(hi_v, pColors[i].c[c]);
14843 }
14844
14845 if (lo_v == hi_v)
14846 {
14847 pXl->c[c] = (float)lo_v;
14848 pXh->c[c] = (float)hi_v;
14849 }
14850 }
14851 }
14852
14853 return true;
14854 }
14855
14856 void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb)
14857 {
14858 dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
14859
14860 uint32_t mask = 0xAA;
14861 uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi;
14862 uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo;
14863
14864 if (min16 == max16)
14865 {
14866 // Always forbid 3 color blocks
14867 // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
14868 mask = 0;
14869
14870 // Make l > h
14871 if (min16 > 0)
14872 min16--;
14873 else
14874 {
14875 // l = h = 0
14876 assert(min16 == max16 && max16 == 0);
14877
14878 max16 = 1;
14879 min16 = 0;
14880 mask = 0x55;
14881 }
14882
14883 assert(max16 > min16);
14884 }
14885
14886 if (max16 < min16)
14887 {
14888 std::swap(max16, min16);
14889 mask ^= 0x55;
14890 }
14891
14892 pDst_block->set_low_color(static_cast<uint16_t>(max16));
14893 pDst_block->set_high_color(static_cast<uint16_t>(min16));
14894 pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
14895 pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
14896 pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
14897 pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
14898 }
14899
14900 static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
14901 static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); }
14902
14903 // Good references: squish library, stb_dxt.
14904 void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags)
14905 {
14906 const color32* pSrc_pixels = (const color32*)pPixels;
14907 dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
14908
14909 int avg_r = -1, avg_g = 0, avg_b = 0;
14910 int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
14911 uint8_t sels[16];
14912
14913 const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
14914 if (use_sels)
14915 {
14916 // Caller is jamming in their own selectors for us to try.
14917 const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
14918
14919 static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
14920
14921 for (uint32_t i = 0; i < 16; i++)
14922 sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
14923 }
14924 else
14925 {
14926 const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
14927
14928 uint32_t j;
14929 for (j = 1; j < 16; j++)
14930 if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
14931 break;
14932
14933 if (j == 16)
14934 {
14935 encode_bc1_solid_block(pDst, fr, fg, fb);
14936 return;
14937 }
14938
14939 // Select 2 colors along the principle axis. (There must be a faster/simpler way.)
14940 int total_r = fr, total_g = fg, total_b = fb;
14941 int max_r = fr, max_g = fg, max_b = fb;
14942 int min_r = fr, min_g = fg, min_b = fb;
14943 for (uint32_t i = 1; i < 16; i++)
14944 {
14945 const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
14946 max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
14947 min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
14948 total_r += r; total_g += g; total_b += b;
14949 }
14950
14951 avg_r = (total_r + 8) >> 4;
14952 avg_g = (total_g + 8) >> 4;
14953 avg_b = (total_b + 8) >> 4;
14954
14955 int icov[6] = { 0, 0, 0, 0, 0, 0 };
14956 for (uint32_t i = 0; i < 16; i++)
14957 {
14958 int r = (int)pSrc_pixels[i].r - avg_r;
14959 int g = (int)pSrc_pixels[i].g - avg_g;
14960 int b = (int)pSrc_pixels[i].b - avg_b;
14961 icov[0] += r * r;
14962 icov[1] += r * g;
14963 icov[2] += r * b;
14964 icov[3] += g * g;
14965 icov[4] += g * b;
14966 icov[5] += b * b;
14967 }
14968
14969 float cov[6];
14970 for (uint32_t i = 0; i < 6; i++)
14971 cov[i] = static_cast<float>(icov[i])* (1.0f / 255.0f);
14972
14973#if 0
14974 // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference).
14975 // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta
14976 int saxis_r = max_r - min_r;
14977 int saxis_g = max_g - min_g;
14978 int saxis_b = max_b - min_b;
14979#else
14980 float xr = (float)(max_r - min_r);
14981 float xg = (float)(max_g - min_g);
14982 float xb = (float)(max_b - min_b);
14983 //float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's
14984 //float xg = (float)(max_g - avg_g);
14985 //float xb = (float)(max_b - avg_b);
14986 for (uint32_t power_iter = 0; power_iter < 4; power_iter++)
14987 {
14988 float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
14989 float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
14990 float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
14991 xr = r; xg = g; xb = b;
14992 }
14993
14994 float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb));
14995 int saxis_r = 306, saxis_g = 601, saxis_b = 117;
14996 if (k >= 2)
14997 {
14998 float m = 1024.0f / k;
14999 saxis_r = (int)(xr * m);
15000 saxis_g = (int)(xg * m);
15001 saxis_b = (int)(xb * m);
15002 }
15003#endif
15004
15005 int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0;
15006 for (uint32_t i = 0; i < 16; i++)
15007 {
15008 int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b;
15009 if (dot < low_dot)
15010 {
15011 low_dot = dot;
15012 low_c = i;
15013 }
15014 if (dot > high_dot)
15015 {
15016 high_dot = dot;
15017 high_c = i;
15018 }
15019 }
15020
15021 lr = to_5(pSrc_pixels[low_c].r);
15022 lg = to_6(pSrc_pixels[low_c].g);
15023 lb = to_5(pSrc_pixels[low_c].b);
15024
15025 hr = to_5(pSrc_pixels[high_c].r);
15026 hg = to_6(pSrc_pixels[high_c].g);
15027 hb = to_5(pSrc_pixels[high_c].b);
15028
15029 bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
15030 } // if (use_sels)
15031
15032 const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
15033 for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
15034 {
15035 // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
15036 vec3F xl, xh;
15037 if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
15038 {
15039 if (avg_r < 0)
15040 {
15041 int total_r = 0, total_g = 0, total_b = 0;
15042 for (uint32_t i = 0; i < 16; i++)
15043 {
15044 total_r += pSrc_pixels[i].r;
15045 total_g += pSrc_pixels[i].g;
15046 total_b += pSrc_pixels[i].b;
15047 }
15048
15049 avg_r = (total_r + 8) >> 4;
15050 avg_g = (total_g + 8) >> 4;
15051 avg_b = (total_b + 8) >> 4;
15052 }
15053
15054 // All selectors equal - treat it as a solid block which should always be equal or better.
15055 lr = g_bc1_match5_equals_1[avg_r].m_hi;
15056 lg = g_bc1_match6_equals_1[avg_g].m_hi;
15057 lb = g_bc1_match5_equals_1[avg_b].m_hi;
15058
15059 hr = g_bc1_match5_equals_1[avg_r].m_lo;
15060 hg = g_bc1_match6_equals_1[avg_g].m_lo;
15061 hb = g_bc1_match5_equals_1[avg_b].m_lo;
15062
15063 // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
15064 }
15065 else
15066 {
15067 lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
15068 lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
15069 lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
15070
15071 hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
15072 hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
15073 hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
15074 }
15075
15076 bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
15077 }
15078
15079 uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
15080 uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
15081
15082 // Always forbid 3 color blocks
15083 if (lc16 == hc16)
15084 {
15085 uint8_t mask = 0;
15086
15087 // Make l > h
15088 if (hc16 > 0)
15089 hc16--;
15090 else
15091 {
15092 // lc16 = hc16 = 0
15093 assert(lc16 == hc16 && hc16 == 0);
15094
15095 hc16 = 0;
15096 lc16 = 1;
15097 mask = 0x55; // select hc16
15098 }
15099
15100 assert(lc16 > hc16);
15101 pDst_block->set_low_color(static_cast<uint16_t>(lc16));
15102 pDst_block->set_high_color(static_cast<uint16_t>(hc16));
15103
15104 pDst_block->m_selectors[0] = mask;
15105 pDst_block->m_selectors[1] = mask;
15106 pDst_block->m_selectors[2] = mask;
15107 pDst_block->m_selectors[3] = mask;
15108 }
15109 else
15110 {
15111 uint8_t invert_mask = 0;
15112 if (lc16 < hc16)
15113 {
15114 std::swap(lc16, hc16);
15115 invert_mask = 0x55;
15116 }
15117
15118 assert(lc16 > hc16);
15119 pDst_block->set_low_color((uint16_t)lc16);
15120 pDst_block->set_high_color((uint16_t)hc16);
15121
15122 uint32_t packed_sels = 0;
15123 static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
15124 for (uint32_t i = 0; i < 16; i++)
15125 packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
15126
15127 pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
15128 pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
15129 pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
15130 pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
15131 }
15132 }
15133
15134 void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags)
15135 {
15136 const color32* pSrc_pixels = (const color32*)pPixels;
15137 dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst);
15138
15139 int avg_r = -1, avg_g = 0, avg_b = 0;
15140 int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0;
15141 uint8_t sels[16];
15142
15143 const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0;
15144 if (use_sels)
15145 {
15146 // Caller is jamming in their own selectors for us to try.
15147 const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24);
15148
15149 static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 };
15150
15151 for (uint32_t i = 0; i < 16; i++)
15152 sels[i] = s_sel_tran[(s >> (i * 2)) & 3];
15153 }
15154 else
15155 {
15156 const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
15157
15158 uint32_t j;
15159 for (j = 1; j < 16; j++)
15160 if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb))
15161 break;
15162
15163 if (j == 16)
15164 {
15165 encode_bc1_solid_block(pDst, fr, fg, fb);
15166 return;
15167 }
15168
15169 // Select 2 colors along the principle axis. (There must be a faster/simpler way.)
15170 int total_r = fr, total_g = fg, total_b = fb;
15171 int max_r = fr, max_g = fg, max_b = fb;
15172 int min_r = fr, min_g = fg, min_b = fb;
15173 uint32_t grayscale_flag = (fr == fg) && (fr == fb);
15174 for (uint32_t i = 1; i < 16; i++)
15175 {
15176 const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
15177 grayscale_flag &= ((r == g) && (r == b));
15178 max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b);
15179 min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b);
15180 total_r += r; total_g += g; total_b += b;
15181 }
15182
15183 if (grayscale_flag)
15184 {
15185 // Grayscale blocks are a common enough case to specialize.
15186 if ((max_r - min_r) < 2)
15187 {
15188 lr = lb = hr = hb = to_5(fr);
15189 lg = hg = to_6(fr);
15190 }
15191 else
15192 {
15193 lr = lb = to_5(min_r);
15194 lg = to_6(min_r);
15195
15196 hr = hb = to_5(max_r);
15197 hg = to_6(max_r);
15198 }
15199 }
15200 else
15201 {
15202 avg_r = (total_r + 8) >> 4;
15203 avg_g = (total_g + 8) >> 4;
15204 avg_b = (total_b + 8) >> 4;
15205
15206 // Find the shortest vector from a AABB corner to the block's average color.
15207 // This is to help avoid outliers.
15208
15209 uint32_t dist[3][2];
15210 dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3;
15211 dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3;
15212 dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3;
15213
15214 uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]);
15215 uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4;
15216 min_d0 = basisu::minimum(min_d0, d4);
15217
15218 uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1;
15219 uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5;
15220 min_d1 = basisu::minimum(min_d1, d5);
15221
15222 uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2;
15223 min_d0 = basisu::minimum(min_d0, d2);
15224
15225 uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3;
15226 min_d1 = basisu::minimum(min_d1, d3);
15227
15228 uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6;
15229 min_d0 = basisu::minimum(min_d0, d6);
15230
15231 uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7;
15232 min_d1 = basisu::minimum(min_d1, d7);
15233
15234 uint32_t min_d = basisu::minimum(min_d0, min_d1);
15235 uint32_t best_i = min_d & 7;
15236
15237 int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r);
15238 int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g);
15239 int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b);
15240
15241 // Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in.
15242 uint32_t low_c = 0, high_c = 0;
15243 if ((delta_r | delta_g | delta_b) != 0)
15244 {
15245 // Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB.
15246 // Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection.
15247 // One of these vectors will be a decent approximation of the block's PCA.
15248 const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b;
15249
15250 int low_dot0 = INT_MAX, high_dot0 = INT_MIN;
15251 int low_dot1 = INT_MAX, high_dot1 = INT_MIN;
15252 int low_dot2 = INT_MAX, high_dot2 = INT_MIN;
15253 int low_dot3 = INT_MAX, high_dot3 = INT_MIN;
15254
15255 //int low_c0, low_c1, low_c2, low_c3;
15256 //int high_c0, high_c1, high_c2, high_c3;
15257
15258 for (uint32_t i = 0; i < 16; i++)
15259 {
15260 const int dotx = pSrc_pixels[i].r * saxis0_r;
15261 const int doty = pSrc_pixels[i].g * saxis0_g;
15262 const int dotz = pSrc_pixels[i].b * saxis0_b;
15263
15264 const int dot0 = ((dotz + dotx + doty) << 4) + i;
15265 const int dot1 = ((dotz - dotx - doty) << 4) + i;
15266 const int dot2 = ((dotz - dotx + doty) << 4) + i;
15267 const int dot3 = ((dotz + dotx - doty) << 4) + i;
15268
15269 if (dot0 < low_dot0)
15270 {
15271 low_dot0 = dot0;
15272 //low_c0 = i;
15273 }
15274 if ((dot0 ^ 15) > high_dot0)
15275 {
15276 high_dot0 = dot0 ^ 15;
15277 //high_c0 = i;
15278 }
15279
15280 if (dot1 < low_dot1)
15281 {
15282 low_dot1 = dot1;
15283 //low_c1 = i;
15284 }
15285 if ((dot1 ^ 15) > high_dot1)
15286 {
15287 high_dot1 = dot1 ^ 15;
15288 //high_c1 = i;
15289 }
15290
15291 if (dot2 < low_dot2)
15292 {
15293 low_dot2 = dot2;
15294 //low_c2 = i;
15295 }
15296 if ((dot2 ^ 15) > high_dot2)
15297 {
15298 high_dot2 = dot2 ^ 15;
15299 //high_c2 = i;
15300 }
15301
15302 if (dot3 < low_dot3)
15303 {
15304 low_dot3 = dot3;
15305 //low_c3 = i;
15306 }
15307 if ((dot3 ^ 15) > high_dot3)
15308 {
15309 high_dot3 = dot3 ^ 15;
15310 //high_c3 = i;
15311 }
15312 }
15313
15314 low_c = low_dot0 & 15;
15315 high_c = ~high_dot0 & 15;
15316 uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15);
15317
15318 uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15);
15319 if (tr > r) {
15320 low_c = low_dot1 & 15;
15321 high_c = ~high_dot1 & 15;
15322 r = tr;
15323 }
15324
15325 tr = (high_dot2 & ~15) - (low_dot2 & ~15);
15326 if (tr > r) {
15327 low_c = low_dot2 & 15;
15328 high_c = ~high_dot2 & 15;
15329 r = tr;
15330 }
15331
15332 tr = (high_dot3 & ~15) - (low_dot3 & ~15);
15333 if (tr > r) {
15334 low_c = low_dot3 & 15;
15335 high_c = ~high_dot3 & 15;
15336 }
15337 }
15338
15339 lr = to_5(pSrc_pixels[low_c].r);
15340 lg = to_6(pSrc_pixels[low_c].g);
15341 lb = to_5(pSrc_pixels[low_c].b);
15342
15343 hr = to_5(pSrc_pixels[high_c].r);
15344 hg = to_6(pSrc_pixels[high_c].g);
15345 hb = to_5(pSrc_pixels[high_c].b);
15346 }
15347
15348 bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
15349 } // if (use_sels)
15350
15351 const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1);
15352 for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++)
15353 {
15354 int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb;
15355
15356 // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors.
15357 vec3F xl, xh;
15358 if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh))
15359 {
15360 if (avg_r < 0)
15361 {
15362 int total_r = 0, total_g = 0, total_b = 0;
15363 for (uint32_t i = 0; i < 16; i++)
15364 {
15365 total_r += pSrc_pixels[i].r;
15366 total_g += pSrc_pixels[i].g;
15367 total_b += pSrc_pixels[i].b;
15368 }
15369
15370 avg_r = (total_r + 8) >> 4;
15371 avg_g = (total_g + 8) >> 4;
15372 avg_b = (total_b + 8) >> 4;
15373 }
15374
15375 // All selectors equal - treat it as a solid block which should always be equal or better.
15376 lr = g_bc1_match5_equals_1[avg_r].m_hi;
15377 lg = g_bc1_match6_equals_1[avg_g].m_hi;
15378 lb = g_bc1_match5_equals_1[avg_b].m_hi;
15379
15380 hr = g_bc1_match5_equals_1[avg_r].m_lo;
15381 hg = g_bc1_match6_equals_1[avg_g].m_lo;
15382 hb = g_bc1_match5_equals_1[avg_b].m_lo;
15383
15384 // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge.
15385 }
15386 else
15387 {
15388 lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
15389 lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
15390 lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
15391
15392 hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31);
15393 hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63);
15394 hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31);
15395 }
15396
15397 if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb))
15398 break;
15399
15400 bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels);
15401 }
15402
15403 uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb);
15404 uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb);
15405
15406 // Always forbid 3 color blocks
15407 if (lc16 == hc16)
15408 {
15409 uint8_t mask = 0;
15410
15411 // Make l > h
15412 if (hc16 > 0)
15413 hc16--;
15414 else
15415 {
15416 // lc16 = hc16 = 0
15417 assert(lc16 == hc16 && hc16 == 0);
15418
15419 hc16 = 0;
15420 lc16 = 1;
15421 mask = 0x55; // select hc16
15422 }
15423
15424 assert(lc16 > hc16);
15425 pDst_block->set_low_color(static_cast<uint16_t>(lc16));
15426 pDst_block->set_high_color(static_cast<uint16_t>(hc16));
15427
15428 pDst_block->m_selectors[0] = mask;
15429 pDst_block->m_selectors[1] = mask;
15430 pDst_block->m_selectors[2] = mask;
15431 pDst_block->m_selectors[3] = mask;
15432 }
15433 else
15434 {
15435 uint8_t invert_mask = 0;
15436 if (lc16 < hc16)
15437 {
15438 std::swap(lc16, hc16);
15439 invert_mask = 0x55;
15440 }
15441
15442 assert(lc16 > hc16);
15443 pDst_block->set_low_color((uint16_t)lc16);
15444 pDst_block->set_high_color((uint16_t)hc16);
15445
15446 uint32_t packed_sels = 0;
15447 static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 };
15448 for (uint32_t i = 0; i < 16; i++)
15449 packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
15450
15451 pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask;
15452 pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
15453 pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
15454 pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
15455 }
15456 }
15457
15458 // Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest.
15459 void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst)
15460 {
15461 const uint32_t mode = unpacked_src_blk.m_mode;
15462 const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
15463
15464 dxt1_block& b = *static_cast<dxt1_block*>(pDst);
15465
15466 const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode];
15467
15468 const uint32_t total_comps = g_uastc_mode_comps[mode];
15469
15470 if (total_comps == 2)
15471 {
15472 const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant;
15473 const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant;
15474
15475 b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127));
15476 b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127));
15477 }
15478 else
15479 {
15480 b.set_low_color(dxt1_block::pack_color(
15481 color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant,
15482 g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant,
15483 g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant,
15484 255), true, 127)
15485 );
15486
15487 b.set_high_color(dxt1_block::pack_color(
15488 color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant,
15489 g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant,
15490 g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant,
15491 255), true, 127)
15492 );
15493 }
15494
15495 if (b.get_low_color() == b.get_high_color())
15496 {
15497 // Always forbid 3 color blocks
15498 uint16_t lc16 = (uint16_t)b.get_low_color();
15499 uint16_t hc16 = (uint16_t)b.get_high_color();
15500
15501 uint8_t mask = 0;
15502
15503 // Make l > h
15504 if (hc16 > 0)
15505 hc16--;
15506 else
15507 {
15508 // lc16 = hc16 = 0
15509 assert(lc16 == hc16 && hc16 == 0);
15510
15511 hc16 = 0;
15512 lc16 = 1;
15513 mask = 0x55; // select hc16
15514 }
15515
15516 assert(lc16 > hc16);
15517 b.set_low_color(static_cast<uint16_t>(lc16));
15518 b.set_high_color(static_cast<uint16_t>(hc16));
15519
15520 b.m_selectors[0] = mask;
15521 b.m_selectors[1] = mask;
15522 b.m_selectors[2] = mask;
15523 b.m_selectors[3] = mask;
15524 }
15525 else
15526 {
15527 bool invert = false;
15528 if (b.get_low_color() < b.get_high_color())
15529 {
15530 std::swap(b.m_low_color[0], b.m_high_color[0]);
15531 std::swap(b.m_low_color[1], b.m_high_color[1]);
15532 invert = true;
15533 }
15534
15535 const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
15536
15537 const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
15538
15539 uint32_t sels = 0;
15540 for (int i = 15; i >= 0; --i)
15541 {
15542 uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]];
15543
15544 if (invert)
15545 s ^= 1;
15546
15547 sels = (sels << 2) | s;
15548 }
15549 b.m_selectors[0] = sels & 0xFF;
15550 b.m_selectors[1] = (sels >> 8) & 0xFF;
15551 b.m_selectors[2] = (sels >> 16) & 0xFF;
15552 b.m_selectors[3] = (sels >> 24) & 0xFF;
15553 }
15554 }
15555
15556 // Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed.
15557 void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality)
15558 {
15559 const uint32_t mode = unpacked_src_blk.m_mode;
15560
15561 const astc_block_desc& astc_blk = unpacked_src_blk.m_astc;
15562
15563 dxt1_block& b = *static_cast<dxt1_block*>(pDst);
15564
15565 b.set_low_color(1);
15566 b.set_high_color(0);
15567
15568 const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]];
15569
15570 const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1;
15571
15572 uint32_t sels = 0;
15573 for (int i = 15; i >= 0; --i)
15574 {
15575 sels <<= 2;
15576 sels |= pTran[astc_blk.m_weights[i << plane_shift]];
15577 }
15578
15579 b.m_selectors[0] = sels & 0xFF;
15580 b.m_selectors[1] = (sels >> 8) & 0xFF;
15581 b.m_selectors[2] = (sels >> 16) & 0xFF;
15582 b.m_selectors[3] = (sels >> 24) & 0xFF;
15583
15584 encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors);
15585 }
15586
15587 bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality)
15588 {
15589 unpacked_uastc_block unpacked_src_blk;
15590 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
15591 return false;
15592
15593 const uint32_t mode = unpacked_src_blk.m_mode;
15594
15595 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
15596 {
15597 encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
15598 return true;
15599 }
15600
15601 if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
15602 transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst);
15603 else
15604 {
15605 color32 block_pixels[4][4];
15606 const bool unpack_srgb = false;
15607 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
15608 return false;
15609
15610 if (unpacked_src_blk.m_bc1_hint1)
15611 transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality);
15612 else
15613 encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
15614 }
15615
15616 return true;
15617 }
15618
15619 static void write_bc4_solid_block(uint8_t* pDst, uint32_t a)
15620 {
15621 pDst[0] = (uint8_t)a;
15622 pDst[1] = (uint8_t)a;
15623 memset(pDst + 2, 0, 6);
15624 }
15625
15626 bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality)
15627 {
15628 unpacked_uastc_block unpacked_src_blk;
15629 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
15630 return false;
15631
15632 const uint32_t mode = unpacked_src_blk.m_mode;
15633
15634 void* pBC4_block = pDst;
15635 dxt1_block* pBC1_block = &static_cast<dxt1_block*>(pDst)[1];
15636
15637 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
15638 {
15639 write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.a);
15640 encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b);
15641 return true;
15642 }
15643
15644 color32 block_pixels[4][4];
15645 const bool unpack_srgb = false;
15646 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
15647 return false;
15648
15649 basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32));
15650
15651 if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0))
15652 transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block);
15653 else
15654 {
15655 if (unpacked_src_blk.m_bc1_hint1)
15656 transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality);
15657 else
15658 encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0);
15659 }
15660
15661 return true;
15662 }
15663
15664 bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
15665 {
15666 BASISU_NOTE_UNUSED(high_quality);
15667
15668 unpacked_uastc_block unpacked_src_blk;
15669 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
15670 return false;
15671
15672 const uint32_t mode = unpacked_src_blk.m_mode;
15673
15674 void* pBC4_block = pDst;
15675
15676 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
15677 {
15678 write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]);
15679 return true;
15680 }
15681
15682 color32 block_pixels[4][4];
15683 const bool unpack_srgb = false;
15684 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
15685 return false;
15686
15687 basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32));
15688
15689 return true;
15690 }
15691
15692 bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
15693 {
15694 BASISU_NOTE_UNUSED(high_quality);
15695
15696 unpacked_uastc_block unpacked_src_blk;
15697 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
15698 return false;
15699
15700 const uint32_t mode = unpacked_src_blk.m_mode;
15701
15702 void* pBC4_block0 = pDst;
15703 void* pBC4_block1 = (uint8_t*)pDst + 8;
15704
15705 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
15706 {
15707 write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]);
15708 write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]);
15709 return true;
15710 }
15711
15712 color32 block_pixels[4][4];
15713 const bool unpack_srgb = false;
15714 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
15715 return false;
15716
15717 basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32));
15718 basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32));
15719
15720 return true;
15721 }
15722
15723 static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 };
15724
15725 static void pack_eac_solid_block(eac_block& blk, uint32_t a)
15726 {
15727 blk.m_base = static_cast<uint8_t>(a);
15728 blk.m_table = 13;
15729 blk.m_multiplier = 0;
15730
15731 memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4));
15732
15733 return;
15734 }
15735
15736 // Only checks 4 tables.
15737 static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
15738 {
15739 uint32_t min_alpha = 255, max_alpha = 0;
15740 for (uint32_t i = 0; i < 16; i++)
15741 {
15742 const uint32_t a = pPixels[i * stride];
15743 if (a < min_alpha) min_alpha = a;
15744 if (a > max_alpha) max_alpha = a;
15745 }
15746
15747 if (min_alpha == max_alpha)
15748 {
15749 pack_eac_solid_block(blk, min_alpha);
15750 return;
15751 }
15752
15753 const uint32_t alpha_range = max_alpha - min_alpha;
15754
15755 const uint32_t SINGLE_TABLE_THRESH = 5;
15756 if (alpha_range <= SINGLE_TABLE_THRESH)
15757 {
15758 // If alpha_range <= 5 table 13 is lossless
15759 int base = clamp255((int)max_alpha - 2);
15760
15761 blk.m_base = base;
15762 blk.m_multiplier = 1;
15763 blk.m_table = 13;
15764
15765 base -= 3;
15766
15767 uint64_t packed_sels = 0;
15768 for (uint32_t i = 0; i < 16; i++)
15769 {
15770 const int a = pPixels[i * stride];
15771
15772 static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
15773
15774 int sel = a - base;
15775 assert(sel >= 0 && sel <= 5);
15776
15777 packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
15778 }
15779
15780 blk.set_selector_bits(packed_sels);
15781
15782 return;
15783 }
15784
15785 const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13;
15786 static const uint8_t s_tables[4] = { T0, T1, T2, T3 };
15787
15788 int base[4], mul[4];
15789 uint32_t mul_or = 0;
15790 for (uint32_t i = 0; i < 4; i++)
15791 {
15792 const uint32_t table = s_tables[i];
15793
15794 const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
15795
15796 base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
15797 mul[i] = clampi((int)roundf(alpha_range / range), 1, 15);
15798 mul_or |= mul[i];
15799 }
15800
15801 uint32_t total_err[4] = { 0, 0, 0, 0 };
15802 uint8_t sels[4][16];
15803
15804 for (uint32_t i = 0; i < 16; i++)
15805 {
15806 const int a = pPixels[i * stride];
15807
15808 uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX;
15809
15810 if ((a < 7) || (a > (255 - 7)))
15811 {
15812 for (uint32_t s = 0; s < 8; s++)
15813 {
15814 const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]);
15815 const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]);
15816 const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]);
15817 const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]);
15818
15819 l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s);
15820 l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s);
15821 l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s);
15822 l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s);
15823 }
15824 }
15825 else if (mul_or == 1)
15826 {
15827 const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
15828
15829 for (uint32_t s = 0; s < 8; s++)
15830 {
15831 const int v0 = g_eac_modifier_table[T0][s] + a0;
15832 const int v1 = g_eac_modifier_table[T1][s] + a1;
15833 const int v2 = g_eac_modifier_table[T2][s] + a2;
15834 const int v3 = g_eac_modifier_table[T3][s] + a3;
15835
15836 l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
15837 l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
15838 l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
15839 l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
15840 }
15841 }
15842 else
15843 {
15844 const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a;
15845
15846 for (uint32_t s = 0; s < 8; s++)
15847 {
15848 const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0;
15849 const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1;
15850 const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2;
15851 const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3;
15852
15853 l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s);
15854 l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s);
15855 l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s);
15856 l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s);
15857 }
15858 }
15859
15860 sels[0][i] = l0 & 7;
15861 sels[1][i] = l1 & 7;
15862 sels[2][i] = l2 & 7;
15863 sels[3][i] = l3 & 7;
15864
15865 total_err[0] += basisu::square<uint32_t>(l0 >> 3);
15866 total_err[1] += basisu::square<uint32_t>(l1 >> 3);
15867 total_err[2] += basisu::square<uint32_t>(l2 >> 3);
15868 total_err[3] += basisu::square<uint32_t>(l3 >> 3);
15869 }
15870
15871 uint32_t min_err = total_err[0], min_index = 0;
15872 for (uint32_t i = 1; i < 4; i++)
15873 {
15874 if (total_err[i] < min_err)
15875 {
15876 min_err = total_err[i];
15877 min_index = i;
15878 }
15879 }
15880
15881 blk.m_base = base[min_index];
15882 blk.m_multiplier = mul[min_index];
15883 blk.m_table = s_tables[min_index];
15884
15885 uint64_t packed_sels = 0;
15886 const uint8_t* pSels = &sels[min_index][0];
15887 for (uint32_t i = 0; i < 16; i++)
15888 packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
15889
15890 blk.set_selector_bits(packed_sels);
15891 }
15892
15893 // Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal.
15894 static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride)
15895 {
15896 uint32_t min_alpha = 255, max_alpha = 0;
15897 for (uint32_t i = 0; i < 16; i++)
15898 {
15899 const uint32_t a = pPixels[i * stride];
15900 if (a < min_alpha) min_alpha = a;
15901 if (a > max_alpha) max_alpha = a;
15902 }
15903
15904 if (min_alpha == max_alpha)
15905 {
15906 pack_eac_solid_block(blk, min_alpha);
15907 return;
15908 }
15909
15910 const uint32_t alpha_range = max_alpha - min_alpha;
15911
15912 const uint32_t SINGLE_TABLE_THRESH = 5;
15913 if (alpha_range <= SINGLE_TABLE_THRESH)
15914 {
15915 // If alpha_range <= 5 table 13 is lossless
15916 int base = clamp255((int)max_alpha - 2);
15917
15918 blk.m_base = base;
15919 blk.m_multiplier = 1;
15920 blk.m_table = 13;
15921
15922 base -= 3;
15923
15924 uint64_t packed_sels = 0;
15925 for (uint32_t i = 0; i < 16; i++)
15926 {
15927 const int a = pPixels[i * stride];
15928
15929 static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 };
15930
15931 int sel = a - base;
15932 assert(sel >= 0 && sel <= 5);
15933
15934 packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]);
15935 }
15936
15937 blk.set_selector_bits(packed_sels);
15938
15939 return;
15940 }
15941
15942 int base[16], mul[16];
15943 for (uint32_t table = 0; table < 16; table++)
15944 {
15945 const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
15946
15947 base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)));
15948 mul[table] = clampi((int)roundf(alpha_range / range), 1, 15);
15949 }
15950
15951 uint32_t total_err[16];
15952 memset(total_err, 0, sizeof(total_err));
15953
15954 uint8_t sels[16][16];
15955
15956 for (uint32_t table = 0; table < 16; table++)
15957 {
15958 const int8_t* pTable = &g_eac_modifier_table[table][0];
15959 const int m = mul[table], b = base[table];
15960
15961 uint32_t prev_l = 0, prev_a = UINT32_MAX;
15962
15963 for (uint32_t i = 0; i < 16; i++)
15964 {
15965 const int a = pPixels[i * stride];
15966
15967 if ((uint32_t)a == prev_a)
15968 {
15969 sels[table][i] = prev_l & 7;
15970 total_err[table] += basisu::square<uint32_t>(prev_l >> 3);
15971 }
15972 else
15973 {
15974 uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3;
15975 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1);
15976 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2);
15977 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3);
15978 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4);
15979 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5);
15980 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6);
15981 l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7);
15982
15983 sels[table][i] = l & 7;
15984 total_err[table] += basisu::square<uint32_t>(l >> 3);
15985
15986 prev_l = l;
15987 prev_a = a;
15988 }
15989 }
15990 }
15991
15992 uint32_t min_err = total_err[0], min_index = 0;
15993 for (uint32_t i = 1; i < 16; i++)
15994 {
15995 if (total_err[i] < min_err)
15996 {
15997 min_err = total_err[i];
15998 min_index = i;
15999 }
16000 }
16001
16002 blk.m_base = base[min_index];
16003 blk.m_multiplier = mul[min_index];
16004 blk.m_table = min_index;
16005
16006 uint64_t packed_sels = 0;
16007 const uint8_t* pSels = &sels[min_index][0];
16008 for (uint32_t i = 0; i < 16; i++)
16009 packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]);
16010
16011 blk.set_selector_bits(packed_sels);
16012 }
16013
16014 bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0)
16015 {
16016 unpacked_uastc_block unpacked_src_blk;
16017 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
16018 return false;
16019
16020 const uint32_t mode = unpacked_src_blk.m_mode;
16021
16022 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
16023 {
16024 pack_eac_solid_block(*static_cast<eac_block*>(pDst), unpacked_src_blk.m_solid_color.c[chan0]);
16025 return true;
16026 }
16027
16028 color32 block_pixels[4][4];
16029 const bool unpack_srgb = false;
16030 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
16031 return false;
16032
16033 if (chan0 == 3)
16034 transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst);
16035 else
16036 (high_quality ? pack_eac_high_quality : pack_eac)(*static_cast<eac_block*>(pDst), &block_pixels[0][0].c[chan0], sizeof(color32));
16037
16038 return true;
16039 }
16040
16041 bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1)
16042 {
16043 unpacked_uastc_block unpacked_src_blk;
16044 if (!unpack_uastc(src_blk, unpacked_src_blk, false))
16045 return false;
16046
16047 const uint32_t mode = unpacked_src_blk.m_mode;
16048
16049 if (mode == UASTC_MODE_INDEX_SOLID_COLOR)
16050 {
16051 pack_eac_solid_block(static_cast<eac_block*>(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]);
16052 pack_eac_solid_block(static_cast<eac_block*>(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]);
16053 return true;
16054 }
16055
16056 color32 block_pixels[4][4];
16057 const bool unpack_srgb = false;
16058 if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb))
16059 return false;
16060
16061 if (chan0 == 3)
16062 transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[0]);
16063 else
16064 (high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32));
16065
16066 if (chan1 == 3)
16067 transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[1]);
16068 else
16069 (high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32));
16070 return true;
16071 }
16072
16073 // PVRTC1
16074 static void fixup_pvrtc1_4_modulation_rgb(
16075 const uastc_block* pSrc_blocks,
16076 const uint32_t* pPVRTC_endpoints,
16077 void* pDst_blocks,
16078 uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha)
16079 {
16080 const uint32_t x_mask = num_blocks_x - 1;
16081 const uint32_t y_mask = num_blocks_y - 1;
16082 const uint32_t x_bits = basisu::total_bits(x_mask);
16083 const uint32_t y_bits = basisu::total_bits(y_mask);
16084 const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
16085 //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
16086 const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
16087
16088 uint32_t block_index = 0;
16089
16090 // really 3x3
16091 int e0[4][4], e1[4][4];
16092
16093 for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
16094 {
16095 const uint32_t* pE_rows[3];
16096
16097 for (int ey = 0; ey < 3; ey++)
16098 {
16099 int by = y + ey - 1;
16100
16101 const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
16102
16103 pE_rows[ey] = pE;
16104
16105 for (int ex = 0; ex < 3; ex++)
16106 {
16107 int bx = 0 + ex - 1;
16108
16109 const uint32_t e = pE[bx & x_mask];
16110
16111 e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31;
16112 e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31;
16113 }
16114 }
16115
16116 const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
16117
16118 for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
16119 {
16120 const uastc_block& src_block = pSrc_blocks[block_index];
16121
16122 color32 block_pixels[4][4];
16123 unpack_uastc(src_block, &block_pixels[0][0], false);
16124 if (from_alpha)
16125 {
16126 // Just set RGB to alpha to avoid adding complexity below.
16127 for (uint32_t i = 0; i < 16; i++)
16128 {
16129 const uint8_t a = ((color32*)block_pixels)[i].a;
16130 ((color32*)block_pixels)[i].set(a, a, a, 255);
16131 }
16132 }
16133
16134 const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
16135
16136 uint32_t swizzled = x_swizzle | y_swizzle;
16137 if (num_blocks_x != num_blocks_y)
16138 {
16139 swizzled &= swizzle_mask;
16140
16141 if (num_blocks_x > num_blocks_y)
16142 swizzled |= ((x >> min_bits) << (min_bits * 2));
16143 else
16144 swizzled |= ((y >> min_bits) << (min_bits * 2));
16145 }
16146
16147 pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
16148 pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
16149
16150 {
16151 const uint32_t ex = 2;
16152 int bx = x + ex - 1;
16153 bx &= x_mask;
16154
16155#define DO_ROW(ey) \
16156 { \
16157 const uint32_t e = pE_rows[ey][bx]; \
16158 e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \
16159 e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \
16160 }
16161
16162 DO_ROW(0);
16163 DO_ROW(1);
16164 DO_ROW(2);
16165#undef DO_ROW
16166 }
16167
16168 uint32_t mod = 0;
16169
16170#define DO_PIX(lx, ly, w0, w1, w2, w3) \
16171 { \
16172 int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
16173 int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
16174 int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \
16175 int dl = cb_l - ca_l; \
16176 int vl = cl - ca_l; \
16177 int p = vl * 16; \
16178 if (ca_l > cb_l) { p = -p; dl = -dl; } \
16179 uint32_t m = 0; \
16180 if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
16181 if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
16182 if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
16183 mod |= m; \
16184 }
16185
16186 {
16187 const uint32_t ex = 0, ey = 0;
16188 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16189 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16190 DO_PIX(0, 0, 4, 4, 4, 4);
16191 DO_PIX(1, 0, 2, 6, 2, 6);
16192 DO_PIX(0, 1, 2, 2, 6, 6);
16193 DO_PIX(1, 1, 1, 3, 3, 9);
16194 }
16195
16196 {
16197 const uint32_t ex = 1, ey = 0;
16198 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16199 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16200 DO_PIX(2, 0, 8, 0, 8, 0);
16201 DO_PIX(3, 0, 6, 2, 6, 2);
16202 DO_PIX(2, 1, 4, 0, 12, 0);
16203 DO_PIX(3, 1, 3, 1, 9, 3);
16204 }
16205
16206 {
16207 const uint32_t ex = 0, ey = 1;
16208 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16209 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16210 DO_PIX(0, 2, 8, 8, 0, 0);
16211 DO_PIX(1, 2, 4, 12, 0, 0);
16212 DO_PIX(0, 3, 6, 6, 2, 2);
16213 DO_PIX(1, 3, 3, 9, 1, 3);
16214 }
16215
16216 {
16217 const uint32_t ex = 1, ey = 1;
16218 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16219 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16220 DO_PIX(2, 2, 16, 0, 0, 0);
16221 DO_PIX(3, 2, 12, 4, 0, 0);
16222 DO_PIX(2, 3, 12, 0, 4, 0);
16223 DO_PIX(3, 3, 9, 3, 3, 1);
16224 }
16225#undef DO_PIX
16226
16227 pDst_block->m_modulation = mod;
16228
16229 e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
16230 e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
16231 e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
16232
16233 e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
16234 e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
16235 e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
16236
16237 } // x
16238 } // y
16239 }
16240
16241 static void fixup_pvrtc1_4_modulation_rgba(
16242 const uastc_block* pSrc_blocks,
16243 const uint32_t* pPVRTC_endpoints,
16244 void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y)
16245 {
16246 const uint32_t x_mask = num_blocks_x - 1;
16247 const uint32_t y_mask = num_blocks_y - 1;
16248 const uint32_t x_bits = basisu::total_bits(x_mask);
16249 const uint32_t y_bits = basisu::total_bits(y_mask);
16250 const uint32_t min_bits = basisu::minimum(x_bits, y_bits);
16251 //const uint32_t max_bits = basisu::maximum(x_bits, y_bits);
16252 const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1;
16253
16254 uint32_t block_index = 0;
16255
16256 // really 3x3
16257 int e0[4][4], e1[4][4];
16258
16259 for (int y = 0; y < static_cast<int>(num_blocks_y); y++)
16260 {
16261 const uint32_t* pE_rows[3];
16262
16263 for (int ey = 0; ey < 3; ey++)
16264 {
16265 int by = y + ey - 1;
16266
16267 const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x];
16268
16269 pE_rows[ey] = pE;
16270
16271 for (int ex = 0; ex < 3; ex++)
16272 {
16273 int bx = 0 + ex - 1;
16274
16275 const uint32_t e = pE[bx & x_mask];
16276
16277 e0[ex][ey] = get_endpoint_l8(e, 0);
16278 e1[ex][ey] = get_endpoint_l8(e, 1);
16279 }
16280 }
16281
16282 const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF];
16283
16284 for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++)
16285 {
16286 const uastc_block& src_block = pSrc_blocks[block_index];
16287
16288 color32 block_pixels[4][4];
16289 unpack_uastc(src_block, &block_pixels[0][0], false);
16290
16291 const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1);
16292
16293 uint32_t swizzled = x_swizzle | y_swizzle;
16294 if (num_blocks_x != num_blocks_y)
16295 {
16296 swizzled &= swizzle_mask;
16297
16298 if (num_blocks_x > num_blocks_y)
16299 swizzled |= ((x >> min_bits) << (min_bits * 2));
16300 else
16301 swizzled |= ((y >> min_bits) << (min_bits * 2));
16302 }
16303
16304 pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled;
16305 pDst_block->m_endpoints = pPVRTC_endpoints[block_index];
16306
16307 {
16308 const uint32_t ex = 2;
16309 int bx = x + ex - 1;
16310 bx &= x_mask;
16311
16312#define DO_ROW(ey) \
16313 { \
16314 const uint32_t e = pE_rows[ey][bx]; \
16315 e0[ex][ey] = get_endpoint_l8(e, 0); \
16316 e1[ex][ey] = get_endpoint_l8(e, 1); \
16317 }
16318
16319 DO_ROW(0);
16320 DO_ROW(1);
16321 DO_ROW(2);
16322#undef DO_ROW
16323 }
16324
16325 uint32_t mod = 0;
16326
16327#define DO_PIX(lx, ly, w0, w1, w2, w3) \
16328 { \
16329 int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \
16330 int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \
16331 int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \
16332 int dl = cb_l - ca_l; \
16333 int vl = cl - ca_l; \
16334 int p = vl * 16; \
16335 if (ca_l > cb_l) { p = -p; dl = -dl; } \
16336 uint32_t m = 0; \
16337 if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \
16338 if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \
16339 if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \
16340 mod |= m; \
16341 }
16342
16343 {
16344 const uint32_t ex = 0, ey = 0;
16345 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16346 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16347 DO_PIX(0, 0, 4, 4, 4, 4);
16348 DO_PIX(1, 0, 2, 6, 2, 6);
16349 DO_PIX(0, 1, 2, 2, 6, 6);
16350 DO_PIX(1, 1, 1, 3, 3, 9);
16351 }
16352
16353 {
16354 const uint32_t ex = 1, ey = 0;
16355 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16356 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16357 DO_PIX(2, 0, 8, 0, 8, 0);
16358 DO_PIX(3, 0, 6, 2, 6, 2);
16359 DO_PIX(2, 1, 4, 0, 12, 0);
16360 DO_PIX(3, 1, 3, 1, 9, 3);
16361 }
16362
16363 {
16364 const uint32_t ex = 0, ey = 1;
16365 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16366 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16367 DO_PIX(0, 2, 8, 8, 0, 0);
16368 DO_PIX(1, 2, 4, 12, 0, 0);
16369 DO_PIX(0, 3, 6, 6, 2, 2);
16370 DO_PIX(1, 3, 3, 9, 1, 3);
16371 }
16372
16373 {
16374 const uint32_t ex = 1, ey = 1;
16375 const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1];
16376 const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1];
16377 DO_PIX(2, 2, 16, 0, 0, 0);
16378 DO_PIX(3, 2, 12, 4, 0, 0);
16379 DO_PIX(2, 3, 12, 0, 4, 0);
16380 DO_PIX(3, 3, 9, 3, 3, 1);
16381 }
16382#undef DO_PIX
16383
16384 pDst_block->m_modulation = mod;
16385
16386 e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0];
16387 e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1];
16388 e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2];
16389
16390 e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0];
16391 e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1];
16392 e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2];
16393
16394 } // x
16395 } // y
16396 }
16397
16398 bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha)
16399 {
16400 BASISU_NOTE_UNUSED(high_quality);
16401
16402 if ((!num_blocks_x) || (!num_blocks_y))
16403 return false;
16404
16405 const uint32_t width = num_blocks_x * 4;
16406 const uint32_t height = num_blocks_y * 4;
16407 if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
16408 return false;
16409
16410 basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
16411
16412 for (uint32_t y = 0; y < num_blocks_y; y++)
16413 {
16414 for (uint32_t x = 0; x < num_blocks_x; x++)
16415 {
16416 color32 block_pixels[16];
16417 if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
16418 return false;
16419
16420 // Get block's RGB bounding box
16421 color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
16422
16423 if (from_alpha)
16424 {
16425 uint32_t low_a = 255, high_a = 0;
16426 for (uint32_t i = 0; i < 16; i++)
16427 {
16428 low_a = basisu::minimum<uint32_t>(low_a, block_pixels[i].a);
16429 high_a = basisu::maximum<uint32_t>(high_a, block_pixels[i].a);
16430 }
16431 low_color.set(low_a, low_a, low_a, 255);
16432 high_color.set(high_a, high_a, high_a, 255);
16433 }
16434 else
16435 {
16436 for (uint32_t i = 0; i < 16; i++)
16437 {
16438 low_color = color32::comp_min(low_color, block_pixels[i]);
16439 high_color = color32::comp_max(high_color, block_pixels[i]);
16440 }
16441 }
16442
16443 // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
16444 pvrtc4_block temp;
16445 temp.set_opaque_endpoint_floor(0, low_color);
16446 temp.set_opaque_endpoint_ceil(1, high_color);
16447
16448 temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
16449 }
16450 }
16451
16452 fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha);
16453
16454 return true;
16455 }
16456
16457 bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality)
16458 {
16459 BASISU_NOTE_UNUSED(high_quality);
16460
16461 if ((!num_blocks_x) || (!num_blocks_y))
16462 return false;
16463
16464 const uint32_t width = num_blocks_x * 4;
16465 const uint32_t height = num_blocks_y * 4;
16466 if (!basisu::is_pow2(width) || !basisu::is_pow2(height))
16467 return false;
16468
16469 basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y);
16470
16471 for (uint32_t y = 0; y < num_blocks_y; y++)
16472 {
16473 for (uint32_t x = 0; x < num_blocks_x; x++)
16474 {
16475 color32 block_pixels[16];
16476 if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false))
16477 return false;
16478
16479 // Get block's RGBA bounding box
16480 color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0);
16481
16482 for (uint32_t i = 0; i < 16; i++)
16483 {
16484 low_color = color32::comp_min(low_color, block_pixels[i]);
16485 high_color = color32::comp_max(high_color, block_pixels[i]);
16486 }
16487
16488 // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates.
16489 pvrtc4_block temp;
16490 temp.set_endpoint_floor(0, low_color);
16491 temp.set_endpoint_ceil(1, high_color);
16492
16493 temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints;
16494 }
16495 }
16496
16497 fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y);
16498
16499 return true;
16500 }
16501
16502 void uastc_init()
16503 {
16504 for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++)
16505 {
16506 if (!astc_is_valid_endpoint_range(range))
16507 continue;
16508
16509 const uint32_t levels = astc_get_levels(range);
16510
16511 uint32_t vals[256];
16512 for (uint32_t i = 0; i < levels; i++)
16513 vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i;
16514
16515 std::sort(vals, vals + levels);
16516
16517 for (uint32_t i = 0; i < levels; i++)
16518 {
16519 const uint32_t order = vals[i] & 0xFF;
16520 const uint32_t unq = vals[i] >> 8;
16521
16522 g_astc_unquant[range][order].m_unquant = (uint8_t)unq;
16523 g_astc_unquant[range][order].m_index = (uint8_t)i;
16524
16525 } // i
16526 }
16527
16528 // TODO: Precompute?
16529 // BC7 777.1
16530 for (int c = 0; c < 256; c++)
16531 {
16532 for (uint32_t lp = 0; lp < 2; lp++)
16533 {
16534 endpoint_err best;
16535 best.m_error = (uint16_t)UINT16_MAX;
16536
16537 for (uint32_t l = 0; l < 128; l++)
16538 {
16539 const uint32_t low = (l << 1) | lp;
16540
16541 for (uint32_t h = 0; h < 128; h++)
16542 {
16543 const uint32_t high = (h << 1) | lp;
16544
16545 const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6;
16546
16547 const int err = (k - c) * (k - c);
16548 if (err < best.m_error)
16549 {
16550 best.m_error = (uint16_t)err;
16551 best.m_lo = (uint8_t)l;
16552 best.m_hi = (uint8_t)h;
16553 }
16554 } // h
16555 } // l
16556
16557 g_bc7_mode_6_optimal_endpoints[c][lp] = best;
16558 } // lp
16559
16560 } // c
16561
16562 // BC7 777
16563 for (int c = 0; c < 256; c++)
16564 {
16565 endpoint_err best;
16566 best.m_error = (uint16_t)UINT16_MAX;
16567
16568 for (uint32_t l = 0; l < 128; l++)
16569 {
16570 const uint32_t low = (l << 1) | (l >> 6);
16571
16572 for (uint32_t h = 0; h < 128; h++)
16573 {
16574 const uint32_t high = (h << 1) | (h >> 6);
16575
16576 const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6;
16577
16578 const int err = (k - c) * (k - c);
16579 if (err < best.m_error)
16580 {
16581 best.m_error = (uint16_t)err;
16582 best.m_lo = (uint8_t)l;
16583 best.m_hi = (uint8_t)h;
16584 }
16585 } // h
16586 } // l
16587
16588 g_bc7_mode_5_optimal_endpoints[c] = best;
16589
16590 } // c
16591 }
16592
16593#endif // #if BASISD_SUPPORT_UASTC
16594
16595// ------------------------------------------------------------------------------------------------------
16596// KTX2
16597// ------------------------------------------------------------------------------------------------------
16598
16599#if BASISD_SUPPORT_KTX2
16600 const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
16601
16602 ktx2_transcoder::ktx2_transcoder() :
16603 m_etc1s_transcoder()
16604 {
16605 clear();
16606 }
16607
16608 void ktx2_transcoder::clear()
16609 {
16610 m_pData = nullptr;
16611 m_data_size = 0;
16612
16613 memset(&m_header, 0, sizeof(m_header));
16614 m_levels.clear();
16615 m_dfd.clear();
16616 m_key_values.clear();
16617 memset(&m_etc1s_header, 0, sizeof(m_etc1s_header));
16618 m_etc1s_image_descs.clear();
16619
16620 m_format = basist::basis_tex_format::cETC1S;
16621
16622 m_dfd_color_model = 0;
16623 m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED;
16624 m_dfd_transfer_func = 0;
16625 m_dfd_flags = 0;
16626 m_dfd_samples = 0;
16627 m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB;
16628 m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB;
16629
16630 m_etc1s_transcoder.clear();
16631
16632 m_def_transcoder_state.clear();
16633
16634 m_has_alpha = false;
16635 m_is_video = false;
16636 }
16637
16638 bool ktx2_transcoder::init(const void* pData, uint32_t data_size)
16639 {
16640 clear();
16641
16642 if (!pData)
16643 {
16644 BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n");
16645 assert(0);
16646 return false;
16647 }
16648
16649 if (data_size <= sizeof(ktx2_header))
16650 {
16651 BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n");
16652 return false;
16653 }
16654
16655 if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0)
16656 {
16657 BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n");
16658 return false;
16659 }
16660
16661 m_pData = static_cast<const uint8_t *>(pData);
16662 m_data_size = data_size;
16663
16664 memcpy(&m_header, pData, sizeof(m_header));
16665
16666 // We only support UASTC and ETC1S
16667 if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
16668 {
16669 BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
16670 return false;
16671 }
16672
16673 // 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1."
16674 if (m_header.m_type_size != 1)
16675 {
16676 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n");
16677 return false;
16678 }
16679
16680 // We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case.
16681 // The BasisU library does not support 1D or 3D textures at all.
16682 if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0))
16683 {
16684 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n");
16685 return false;
16686 }
16687
16688 // Face count must be 1 or 6
16689 if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6))
16690 {
16691 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n");
16692 return false;
16693 }
16694
16695 if (m_header.m_face_count > 1)
16696 {
16697 // 3.4: Make sure cubemaps are square.
16698 if (m_header.m_pixel_width != m_header.m_pixel_height)
16699 {
16700 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n");
16701 return false;
16702 }
16703 }
16704
16705 // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats"
16706 if (m_header.m_level_count < 1)
16707 {
16708 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n");
16709 return false;
16710 }
16711
16712 // Sanity check the level count.
16713 if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT)
16714 {
16715 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n");
16716 return false;
16717 }
16718
16719 if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD)
16720 {
16721 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n");
16722 return false;
16723 }
16724
16725 if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
16726 {
16727 if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header))
16728 {
16729 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n");
16730 return false;
16731 }
16732
16733 if (m_header.m_sgd_byte_offset < sizeof(ktx2_header))
16734 {
16735 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n");
16736 return false;
16737 }
16738
16739 if (m_header.m_sgd_byte_offset + m_header.m_sgd_byte_length > m_data_size)
16740 {
16741 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n");
16742 return false;
16743 }
16744 }
16745
16746 if (!m_levels.try_resize(m_header.m_level_count))
16747 {
16748 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
16749 return false;
16750 }
16751
16752 const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index);
16753
16754 if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size)
16755 {
16756 BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n");
16757 return false;
16758 }
16759
16760 memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes);
16761
16762 // Sanity check the level offsets and byte sizes
16763 for (uint32_t i = 0; i < m_levels.size(); i++)
16764 {
16765 if (m_levels[i].m_byte_offset < sizeof(ktx2_header))
16766 {
16767 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n");
16768 return false;
16769 }
16770
16771 if (!m_levels[i].m_byte_length)
16772 {
16773 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n");
16774 }
16775
16776 if ((m_levels[i].m_byte_offset + m_levels[i].m_byte_length) > m_data_size)
16777 {
16778 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n");
16779 return false;
16780 }
16781
16782 const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL;
16783
16784 if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE)
16785 {
16786 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n");
16787 return false;
16788 }
16789
16790 if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
16791 {
16792 if (m_levels[i].m_uncompressed_byte_length)
16793 {
16794 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n");
16795 return false;
16796 }
16797 }
16798 else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD)
16799 {
16800 if (!m_levels[i].m_uncompressed_byte_length)
16801 {
16802 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n");
16803 return false;
16804 }
16805 }
16806 }
16807
16808 const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60;
16809 if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE))
16810 {
16811 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n");
16812 return false;
16813 }
16814
16815 if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header)))
16816 {
16817 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n");
16818 return false;
16819 }
16820
16821 const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset;
16822
16823 if (!m_dfd.try_resize(m_header.m_dfd_byte_length))
16824 {
16825 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n");
16826 return false;
16827 }
16828
16829 memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length);
16830
16831 // This is all hard coded for only ETC1S and UASTC.
16832 uint32_t dfd_total_size = basisu::read_le_dword(pDFD);
16833
16834 // 3.10.3: Sanity check
16835 if (dfd_total_size != m_header.m_dfd_byte_length)
16836 {
16837 BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n");
16838 return false;
16839 }
16840
16841 // 3.10.3: More sanity checking
16842 if (m_header.m_kvd_byte_length)
16843 {
16844 if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset)
16845 {
16846 BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n");
16847 return false;
16848 }
16849 }
16850
16851 const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t));
16852 const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t));
16853
16854 m_dfd_color_model = dfd_bits & 255;
16855 m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255);
16856 m_dfd_transfer_func = (dfd_bits >> 16) & 255;
16857 m_dfd_flags = (dfd_bits >> 24) & 255;
16858
16859 // See 3.10.1.Restrictions
16860 if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB))
16861 {
16862 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n");
16863 return false;
16864 }
16865
16866 if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S)
16867 {
16868 m_format = basist::basis_tex_format::cETC1S;
16869
16870 // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count."
16871 // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that.
16872 m_has_alpha = (m_header.m_dfd_byte_length == 60);
16873
16874 m_dfd_samples = m_has_alpha ? 2 : 1;
16875 m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
16876
16877 if (m_has_alpha)
16878 {
16879 const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t));
16880 m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15);
16881 }
16882 }
16883 else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC)
16884 {
16885 m_format = basist::basis_tex_format::cUASTC4x4;
16886
16887 m_dfd_samples = 1;
16888 m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
16889
16890 // We're assuming "DATA" means RGBA so it has alpha.
16891 m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
16892 }
16893 else
16894 {
16895 // Unsupported DFD color model.
16896 BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n");
16897 return false;
16898 }
16899
16900 if (!read_key_values())
16901 {
16902 BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n");
16903 return false;
16904 }
16905
16906 // Check for a KTXanimData key
16907 for (uint32_t i = 0; i < m_key_values.size(); i++)
16908 {
16909 if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "KTXanimData") == 0)
16910 {
16911 m_is_video = true;
16912 break;
16913 }
16914 }
16915
16916 return true;
16917 }
16918
16919 uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
16920 {
16921 const uint32_t etc1s_image_index =
16922 (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
16923 layer_index * m_header.m_face_count +
16924 face_index;
16925
16926 if (etc1s_image_index >= get_etc1s_image_descs().size())
16927 {
16928 assert(0);
16929 return 0;
16930 }
16931
16932 return get_etc1s_image_descs()[etc1s_image_index].m_image_flags;
16933 }
16934
16935 const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const
16936 {
16937 for (uint32_t i = 0; i < m_key_values.size(); i++)
16938 if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0)
16939 return &m_key_values[i].m_value;
16940
16941 return nullptr;
16942 }
16943
16944 bool ktx2_transcoder::start_transcoding()
16945 {
16946 if (!m_pData)
16947 {
16948 BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n");
16949 return false;
16950 }
16951
16952 if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ)
16953 {
16954 // Check if we've already decompressed the ETC1S global data. If so don't unpack it again.
16955 if (!m_etc1s_transcoder.get_endpoints().empty())
16956 return true;
16957
16958 if (!decompress_etc1s_global_data())
16959 {
16960 BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n");
16961 return false;
16962 }
16963
16964 if (!m_is_video)
16965 {
16966 // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key.
16967 // Video cannot be a cubemap, and it must be a texture array.
16968 if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1))
16969 {
16970 for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++)
16971 {
16972 if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME)
16973 {
16974 m_is_video = true;
16975 break;
16976 }
16977 }
16978 }
16979 }
16980 }
16981 else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
16982 {
16983#if !BASISD_SUPPORT_KTX2_ZSTD
16984 BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n");
16985 return false;
16986#endif
16987 }
16988
16989 return true;
16990 }
16991
16992 bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const
16993 {
16994 if (level_index >= m_levels.size())
16995 {
16996 BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n");
16997 return false;
16998 }
16999
17000 if (m_header.m_face_count > 1)
17001 {
17002 if (face_index >= 6)
17003 {
17004 BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n");
17005 return false;
17006 }
17007 }
17008 else if (face_index != 0)
17009 {
17010 BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n");
17011 return false;
17012 }
17013
17014 if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
17015 {
17016 BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
17017 return false;
17018 }
17019
17020 const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
17021 const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
17022 const uint32_t num_blocks_x = (level_width + 3) >> 2;
17023 const uint32_t num_blocks_y = (level_height + 3) >> 2;
17024
17025 level_info.m_face_index = face_index;
17026 level_info.m_layer_index = layer_index;
17027 level_info.m_level_index = level_index;
17028 level_info.m_orig_width = level_width;
17029 level_info.m_orig_height = level_height;
17030 level_info.m_width = num_blocks_x * 4;
17031 level_info.m_height = num_blocks_y * 4;
17032 level_info.m_num_blocks_x = num_blocks_x;
17033 level_info.m_num_blocks_y = num_blocks_y;
17034 level_info.m_total_blocks = num_blocks_x * num_blocks_y;
17035 level_info.m_alpha_flag = m_has_alpha;
17036 level_info.m_iframe_flag = false;
17037 if (m_etc1s_image_descs.size())
17038 {
17039 const uint32_t etc1s_image_index =
17040 (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
17041 layer_index * m_header.m_face_count +
17042 face_index;
17043
17044 level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0;
17045 }
17046
17047 return true;
17048 }
17049
17050 bool ktx2_transcoder::transcode_image_level(
17051 uint32_t level_index, uint32_t layer_index, uint32_t face_index,
17052 void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
17053 basist::transcoder_texture_format fmt,
17054 uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1,
17055 ktx2_transcoder_state* pState)
17056 {
17057 if (!m_pData)
17058 {
17059 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n");
17060 return false;
17061 }
17062
17063 if (!pState)
17064 pState = &m_def_transcoder_state;
17065
17066 if (level_index >= m_levels.size())
17067 {
17068 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n");
17069 return false;
17070 }
17071
17072 if (m_header.m_face_count > 1)
17073 {
17074 if (face_index >= 6)
17075 {
17076 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n");
17077 return false;
17078 }
17079 }
17080 else if (face_index != 0)
17081 {
17082 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n");
17083 return false;
17084 }
17085
17086 if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1))
17087 {
17088 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n");
17089 return false;
17090 }
17091
17092 const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset;
17093 uint64_t comp_level_data_size = m_levels[level_index].m_byte_length;
17094
17095 const uint8_t* pUncomp_level_data = pComp_level_data;
17096 uint64_t uncomp_level_data_size = comp_level_data_size;
17097
17098 if (uncomp_level_data_size > UINT32_MAX)
17099 {
17100 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n");
17101 return false;
17102 }
17103
17104 if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
17105 {
17106 // Check if we've already decompressed this level's supercompressed data.
17107 if ((int)level_index != pState->m_uncomp_data_level_index)
17108 {
17109 // Uncompress the entire level's supercompressed data.
17110 if (!decompress_level_data(level_index, pState->m_level_uncomp_data))
17111 {
17112 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n");
17113 return false;
17114 }
17115 pState->m_uncomp_data_level_index = level_index;
17116 }
17117
17118 pUncomp_level_data = pState->m_level_uncomp_data.data();
17119 uncomp_level_data_size = pState->m_level_uncomp_data.size();
17120 }
17121
17122 const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1);
17123 const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1);
17124 const uint32_t num_blocks_x = (level_width + 3) >> 2;
17125 const uint32_t num_blocks_y = (level_height + 3) >> 2;
17126
17127 if (m_format == basist::basis_tex_format::cETC1S)
17128 {
17129 // Ensure start_transcoding() was called.
17130 if (m_etc1s_transcoder.get_endpoints().empty())
17131 {
17132 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n");
17133 return false;
17134 }
17135
17136 const uint32_t etc1s_image_index =
17137 (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) +
17138 layer_index * m_header.m_face_count +
17139 face_index;
17140
17141 // Sanity check
17142 if (etc1s_image_index >= m_etc1s_image_descs.size())
17143 {
17144 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n");
17145 assert(0);
17146 return false;
17147 }
17148
17149 if (static_cast<uint32_t>(m_data_size) != m_data_size)
17150 {
17151 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: File is too large\n");
17152 return false;
17153 }
17154
17155 const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index];
17156
17157 if (!m_etc1s_transcoder.transcode_image(fmt,
17158 pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, static_cast<uint32_t>(m_data_size),
17159 num_blocks_x, num_blocks_y, level_width, level_height,
17160 level_index,
17161 m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length,
17162 image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length,
17163 decode_flags, m_has_alpha,
17164 m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels))
17165 {
17166 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
17167 return false;
17168 }
17169 }
17170 else if (m_format == basist::basis_tex_format::cUASTC4x4)
17171 {
17172 // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
17173 assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
17174 const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE;
17175
17176 const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size;
17177
17178 // Sanity checks
17179 if (uncomp_ofs >= uncomp_level_data_size)
17180 {
17181 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n");
17182 return false;
17183 }
17184
17185 if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size)
17186 {
17187 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n");
17188 return false;
17189 }
17190
17191 if (!m_uastc_transcoder.transcode_image(fmt,
17192 pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
17193 (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
17194 0, (uint32_t)total_2D_image_size,
17195 decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
17196 {
17197 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
17198 return false;
17199 }
17200 }
17201 else
17202 {
17203 // Shouldn't get here.
17204 BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n");
17205 assert(0);
17206 return false;
17207 }
17208
17209 return true;
17210 }
17211
17212 bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
17213 {
17214 const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
17215 const uint64_t comp_size = m_levels[level_index].m_byte_length;
17216
17217 const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length;
17218
17219 if (((size_t)comp_size) != comp_size)
17220 {
17221 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n");
17222 return false;
17223 }
17224 if (((size_t)uncomp_size) != uncomp_size)
17225 {
17226 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n");
17227 return false;
17228 }
17229
17230 if (!uncomp_data.try_resize((size_t)uncomp_size))
17231 {
17232 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n");
17233 return false;
17234 }
17235
17236 if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
17237 {
17238#if BASISD_SUPPORT_KTX2_ZSTD
17239 size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
17240 if (ZSTD_isError(actualUncompSize))
17241 {
17242 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n");
17243 return false;
17244 }
17245 if (actualUncompSize != uncomp_size)
17246 {
17247 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n");
17248 return false;
17249 }
17250#else
17251 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n");
17252 return false;
17253#endif
17254 }
17255
17256 return true;
17257 }
17258
17259 bool ktx2_transcoder::decompress_etc1s_global_data()
17260 {
17261 // Note: we don't actually support 3D textures in here yet
17262 //uint32_t layer_pixel_depth = basisu::maximum<uint32_t>(m_header.m_pixel_depth, 1);
17263 //for (uint32_t i = 1; i < m_header.m_level_count; i++)
17264 // layer_pixel_depth += basisu::maximum<uint32_t>(m_header.m_pixel_depth >> i, 1);
17265
17266 const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count;
17267 assert(image_count);
17268
17269 const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset;
17270
17271 memcpy(&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header));
17272 pSrc += sizeof(ktx2_etc1s_global_data_header);
17273
17274 if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length))
17275 {
17276 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n");
17277 return false;
17278 }
17279
17280 if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count))
17281 {
17282 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n");
17283 return false;
17284 }
17285
17286 // Sanity check the ETC1S header.
17287 if ((sizeof(ktx2_etc1s_global_data_header) +
17288 sizeof(ktx2_etc1s_image_desc) * image_count +
17289 m_etc1s_header.m_endpoints_byte_length +
17290 m_etc1s_header.m_selectors_byte_length +
17291 m_etc1s_header.m_tables_byte_length +
17292 m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length)
17293 {
17294 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n");
17295 return false;
17296 }
17297
17298 if (!m_etc1s_image_descs.try_resize(image_count))
17299 {
17300 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n");
17301 return false;
17302 }
17303
17304 memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count);
17305 pSrc += sizeof(ktx2_etc1s_image_desc) * image_count;
17306
17307 // Sanity check the ETC1S image descs
17308 for (uint32_t i = 0; i < image_count; i++)
17309 {
17310 // m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding.
17311
17312 if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length)
17313 {
17314 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n");
17315 return false;
17316 }
17317
17318 if (m_has_alpha)
17319 {
17320 if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length)
17321 {
17322 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n");
17323 return false;
17324 }
17325 }
17326 }
17327
17328 const uint8_t* pEndpoint_data = pSrc;
17329 const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length;
17330 const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length;
17331
17332 if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length))
17333 {
17334 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n");
17335 return false;
17336 }
17337
17338 if (!m_etc1s_transcoder.decode_palettes(
17339 m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length,
17340 m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length))
17341 {
17342 BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n");
17343 return false;
17344 }
17345
17346 return true;
17347 }
17348
17349 bool ktx2_transcoder::read_key_values()
17350 {
17351 if (!m_header.m_kvd_byte_length)
17352 {
17353 if (m_header.m_kvd_byte_offset)
17354 {
17355 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n");
17356 return false;
17357 }
17358
17359 return true;
17360 }
17361
17362 if (m_header.m_kvd_byte_offset < sizeof(ktx2_header))
17363 {
17364 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n");
17365 return false;
17366 }
17367
17368 if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size)
17369 {
17370 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n");
17371 return false;
17372 }
17373
17374 const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset;
17375 uint32_t src_left = m_header.m_kvd_byte_length;
17376
17377 if (!m_key_values.try_reserve(8))
17378 {
17379 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
17380 return false;
17381 }
17382
17383 while (src_left > sizeof(uint32_t))
17384 {
17385 uint32_t l = basisu::read_le_dword(pSrc);
17386
17387 pSrc += sizeof(uint32_t);
17388 src_left -= sizeof(uint32_t);
17389
17390 if (l < 2)
17391 {
17392 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n");
17393 return false;
17394 }
17395
17396 if (src_left < l)
17397 {
17398 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n");
17399 return false;
17400 }
17401
17402 if (!m_key_values.try_resize(m_key_values.size() + 1))
17403 {
17404 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
17405 return false;
17406 }
17407
17408 basisu::uint8_vec& key_data = m_key_values.back().m_key;
17409 basisu::uint8_vec& value_data = m_key_values.back().m_value;
17410
17411 do
17412 {
17413 if (!l)
17414 {
17415 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n");
17416 return false;
17417 }
17418
17419 if (!key_data.try_push_back(*pSrc++))
17420 {
17421 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
17422 return false;
17423 }
17424
17425 src_left--;
17426 l--;
17427
17428 } while (key_data.back());
17429
17430 if (!value_data.try_resize(l))
17431 {
17432 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n");
17433 return false;
17434 }
17435
17436 if (l)
17437 {
17438 memcpy(value_data.data(), pSrc, l);
17439 pSrc += l;
17440 src_left -= l;
17441 }
17442
17443 uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3;
17444 uint32_t alignment_bytes = (4 - ofs) & 3;
17445
17446 if (src_left < alignment_bytes)
17447 {
17448 BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n");
17449 return false;
17450 }
17451
17452 pSrc += alignment_bytes;
17453 src_left -= alignment_bytes;
17454 }
17455
17456 return true;
17457 }
17458
17459#endif // BASISD_SUPPORT_KTX2
17460
17461 bool basisu_transcoder_supports_ktx2()
17462 {
17463#if BASISD_SUPPORT_KTX2
17464 return true;
17465#else
17466 return false;
17467#endif
17468 }
17469
17470 bool basisu_transcoder_supports_ktx2_zstd()
17471 {
17472#if BASISD_SUPPORT_KTX2_ZSTD
17473 return true;
17474#else
17475 return false;
17476#endif
17477 }
17478
17479} // namespace basist
17480