1// basisu_gpu_texture.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15#include "basisu_gpu_texture.h"
16#include "basisu_enc.h"
17#include "basisu_pvrtc1_4.h"
18#if BASISU_USE_ASTC_DECOMPRESS
19#include "basisu_astc_decomp.h"
20#endif
21#include "basisu_bc7enc.h"
22
23namespace basisu
24{
25 void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
26 {
27 static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
28
29 const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);
30
31 const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
32
33 const uint64_t selector_bits = pBlock->get_selector_bits();
34
35 const int32_t base = pBlock->m_base;
36 const int32_t mul = pBlock->m_multiplier;
37
38 pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);
39 pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);
40 pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);
41 pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);
42
43 pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);
44 pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);
45 pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);
46 pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);
47
48 pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);
49 pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);
50 pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);
51 pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);
52
53 pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);
54 pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);
55 pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);
56 pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
57 }
58
59 struct bc1_block
60 {
61 enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
62
63 uint8_t m_low_color[cTotalEndpointBytes];
64 uint8_t m_high_color[cTotalEndpointBytes];
65 uint8_t m_selectors[cTotalSelectorBytes];
66
67 inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
68 inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
69
70 static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)
71 {
72 r = (c >> 11) & 31;
73 g = (c >> 5) & 63;
74 b = c & 31;
75
76 r = (r << 3) | (r >> 2);
77 g = (g << 2) | (g >> 4);
78 b = (b << 3) | (b >> 2);
79 }
80
81 inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }
82 };
83
84 // Returns true if the block uses 3 color punchthrough alpha mode.
85 bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
86 {
87 static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
88
89 const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
90
91 const uint32_t l = pBlock->get_low_color();
92 const uint32_t h = pBlock->get_high_color();
93
94 color_rgba c[4];
95
96 uint32_t r0, g0, b0, r1, g1, b1;
97 bc1_block::unpack_color(l, r0, g0, b0);
98 bc1_block::unpack_color(h, r1, g1, b1);
99
100 c[0].set_noclamp_rgba(r0, g0, b0, 255);
101 c[1].set_noclamp_rgba(r1, g1, b1, 255);
102
103 bool used_punchthrough = false;
104
105 if (l > h)
106 {
107 c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
108 c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
109 }
110 else
111 {
112 c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
113 c[3].set_noclamp_rgba(0, 0, 0, 0);
114 used_punchthrough = true;
115 }
116
117 if (set_alpha)
118 {
119 for (uint32_t y = 0; y < 4; y++, pPixels += 4)
120 {
121 pPixels[0] = c[pBlock->get_selector(0, y)];
122 pPixels[1] = c[pBlock->get_selector(1, y)];
123 pPixels[2] = c[pBlock->get_selector(2, y)];
124 pPixels[3] = c[pBlock->get_selector(3, y)];
125 }
126 }
127 else
128 {
129 for (uint32_t y = 0; y < 4; y++, pPixels += 4)
130 {
131 pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
132 pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
133 pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
134 pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
135 }
136 }
137
138 return used_punchthrough;
139 }
140
141 bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
142 {
143 static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
144
145 const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
146
147 const uint32_t l = pBlock->get_low_color();
148 const uint32_t h = pBlock->get_high_color();
149
150 color_rgba c[4];
151
152 int r0 = (l >> 11) & 31;
153 int g0 = (l >> 5) & 63;
154 int b0 = l & 31;
155 int r1 = (h >> 11) & 31;
156 int g1 = (h >> 5) & 63;
157 int b1 = h & 31;
158
159 c[0].b = (uint8_t)((3 * b0 * 22) / 8);
160 c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));
161 c[0].r = (uint8_t)((3 * r0 * 22) / 8);
162 c[0].a = 0xFF;
163
164 c[1].r = (uint8_t)((3 * r1 * 22) / 8);
165 c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));
166 c[1].b = (uint8_t)((3 * b1 * 22) / 8);
167 c[1].a = 0xFF;
168
169 int gdiff = c[1].g - c[0].g;
170
171 bool used_punchthrough = false;
172
173 if (l > h)
174 {
175 c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);
176 c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));
177 c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);
178 c[2].a = 0xFF;
179
180 c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);
181 c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);
182 c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);
183 c[3].a = 0xFF;
184 }
185 else
186 {
187 c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);
188 c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);
189 c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);
190 c[2].a = 0xFF;
191
192 c[3].set_noclamp_rgba(0, 0, 0, 0);
193 used_punchthrough = true;
194 }
195
196 if (set_alpha)
197 {
198 for (uint32_t y = 0; y < 4; y++, pPixels += 4)
199 {
200 pPixels[0] = c[pBlock->get_selector(0, y)];
201 pPixels[1] = c[pBlock->get_selector(1, y)];
202 pPixels[2] = c[pBlock->get_selector(2, y)];
203 pPixels[3] = c[pBlock->get_selector(3, y)];
204 }
205 }
206 else
207 {
208 for (uint32_t y = 0; y < 4; y++, pPixels += 4)
209 {
210 pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
211 pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
212 pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
213 pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
214 }
215 }
216
217 return used_punchthrough;
218 }
219
220 static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
221 static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
222
223 bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
224 {
225 const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
226
227 const uint32_t l = pBlock->get_low_color();
228 const uint32_t h = pBlock->get_high_color();
229
230 color_rgba c[4];
231
232 uint32_t r0, g0, b0, r1, g1, b1;
233 bc1_block::unpack_color(l, r0, g0, b0);
234 bc1_block::unpack_color(h, r1, g1, b1);
235
236 c[0].set_noclamp_rgba(r0, g0, b0, 255);
237 c[1].set_noclamp_rgba(r1, g1, b1, 255);
238
239 bool used_punchthrough = false;
240
241 if (l > h)
242 {
243 c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
244 c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
245 }
246 else
247 {
248 c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
249 c[3].set_noclamp_rgba(0, 0, 0, 0);
250 used_punchthrough = true;
251 }
252
253 if (set_alpha)
254 {
255 for (uint32_t y = 0; y < 4; y++, pPixels += 4)
256 {
257 pPixels[0] = c[pBlock->get_selector(0, y)];
258 pPixels[1] = c[pBlock->get_selector(1, y)];
259 pPixels[2] = c[pBlock->get_selector(2, y)];
260 pPixels[3] = c[pBlock->get_selector(3, y)];
261 }
262 }
263 else
264 {
265 for (uint32_t y = 0; y < 4; y++, pPixels += 4)
266 {
267 pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
268 pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
269 pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
270 pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
271 }
272 }
273
274 return used_punchthrough;
275 }
276
277 struct bc4_block
278 {
279 enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
280 uint8_t m_endpoints[2];
281
282 uint8_t m_selectors[cTotalSelectorBytes];
283
284 inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
285 inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
286 inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
287
288 inline uint64_t get_selector_bits() const
289 {
290 return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
291 (((uint64_t)m_selectors[4]) << 32U) |
292 (((uint64_t)m_selectors[5]) << 40U);
293 }
294
295 inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
296 {
297 assert((x < 4U) && (y < 4U));
298 return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
299 }
300
301 static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
302 {
303 pDst[0] = static_cast<uint8_t>(l);
304 pDst[1] = static_cast<uint8_t>(h);
305 pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
306 pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
307 pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
308 pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
309 pDst[6] = 0;
310 pDst[7] = 255;
311 return 6;
312 }
313
314 static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
315 {
316 pDst[0] = static_cast<uint8_t>(l);
317 pDst[1] = static_cast<uint8_t>(h);
318 pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
319 pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
320 pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
321 pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
322 pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
323 pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
324 return 8;
325 }
326
327 static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
328 {
329 if (l > h)
330 return get_block_values8(pDst, l, h);
331 else
332 return get_block_values6(pDst, l, h);
333 }
334 };
335
336 void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)
337 {
338 static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
339
340 const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);
341
342 uint8_t sel_values[8];
343 bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
344
345 const uint64_t selector_bits = pBlock->get_selector_bits();
346
347 for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
348 {
349 pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
350 pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
351 pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
352 pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
353 }
354 }
355
356 // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
357 bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)
358 {
359 bool success = true;
360
361 if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))
362 success = false;
363
364 unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));
365
366 return success;
367 }
368
369 // writes RG
370 void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)
371 {
372 unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
373 unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
374 }
375
376 // ATC isn't officially documented, so I'm assuming these references:
377 // http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
378 // https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
379 // The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.
380 void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)
381 {
382 const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);
383
384 const uint16_t color0 = pBytes[0] | (pBytes[1] << 8U);
385 const uint16_t color1 = pBytes[2] | (pBytes[3] << 8U);
386 uint32_t sels = pBytes[4] | (pBytes[5] << 8U) | (pBytes[6] << 16U) | (pBytes[7] << 24U);
387
388 const bool mode = (color0 & 0x8000) != 0;
389
390 color_rgba c[4];
391
392 c[0].set((color0 >> 10) & 31, (color0 >> 5) & 31, color0 & 31, 255);
393 c[0].r = (c[0].r << 3) | (c[0].r >> 2);
394 c[0].g = (c[0].g << 3) | (c[0].g >> 2);
395 c[0].b = (c[0].b << 3) | (c[0].b >> 2);
396
397 c[3].set((color1 >> 11) & 31, (color1 >> 5) & 63, color1 & 31, 255);
398 c[3].r = (c[3].r << 3) | (c[3].r >> 2);
399 c[3].g = (c[3].g << 2) | (c[3].g >> 4);
400 c[3].b = (c[3].b << 3) | (c[3].b >> 2);
401
402 if (mode)
403 {
404 c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255);
405 c[2] = c[0];
406 c[0].set(0, 0, 0, 255);
407 }
408 else
409 {
410 c[1].r = (c[0].r * 5 + c[3].r * 3) >> 3;
411 c[1].g = (c[0].g * 5 + c[3].g * 3) >> 3;
412 c[1].b = (c[0].b * 5 + c[3].b * 3) >> 3;
413
414 c[2].r = (c[0].r * 3 + c[3].r * 5) >> 3;
415 c[2].g = (c[0].g * 3 + c[3].g * 5) >> 3;
416 c[2].b = (c[0].b * 3 + c[3].b * 5) >> 3;
417 }
418
419 for (uint32_t i = 0; i < 16; i++)
420 {
421 const uint32_t s = sels & 3;
422
423 pPixels[i] = c[s];
424
425 sels >>= 2;
426 }
427 }
428
429 // BC7 mode 0-7 decompression.
430 // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
431
432 static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; }
433 static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; }
434
435 static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; }
436 static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; }
437 static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; }
438 static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
439 {
440 assert(l <= 255 && h <= 255);
441 switch (bits)
442 {
443 case 2: return bc7_interp2(l, h, w);
444 case 3: return bc7_interp3(l, h, w);
445 case 4: return bc7_interp4(l, h, w);
446 default:
447 break;
448 }
449 return 0;
450 }
451
452 bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
453 {
454 //const uint32_t SUBSETS = 3;
455 const uint32_t ENDPOINTS = 6;
456 const uint32_t COMPS = 3;
457 const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2;
458 const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5;
459 const uint32_t PBITS = (mode == 0) ? 6 : 0;
460 const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
461
462 uint32_t bit_offset = 0;
463 const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
464
465 if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
466
467 const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6);
468
469 color_rgba endpoints[ENDPOINTS];
470 for (uint32_t c = 0; c < COMPS; c++)
471 for (uint32_t e = 0; e < ENDPOINTS; e++)
472 endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
473
474 uint32_t pbits[6];
475 for (uint32_t p = 0; p < PBITS; p++)
476 pbits[p] = read_bits32(pBuf, bit_offset, 1);
477
478 uint32_t weights[16];
479 for (uint32_t i = 0; i < 16; i++)
480 weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
481
482 assert(bit_offset == 128);
483
484 for (uint32_t e = 0; e < ENDPOINTS; e++)
485 for (uint32_t c = 0; c < 4; c++)
486 endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
487
488 color_rgba block_colors[3][8];
489 for (uint32_t s = 0; s < 3; s++)
490 for (uint32_t i = 0; i < WEIGHT_VALS; i++)
491 {
492 for (uint32_t c = 0; c < 3; c++)
493 block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
494 block_colors[s][i][3] = 255;
495 }
496
497 for (uint32_t i = 0; i < 16; i++)
498 pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]];
499
500 return true;
501 }
502
503 bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
504 {
505 //const uint32_t SUBSETS = 2;
506 const uint32_t ENDPOINTS = 4;
507 const uint32_t COMPS = (mode == 7) ? 4 : 3;
508 const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2;
509 const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7);
510 const uint32_t PBITS = (mode == 1) ? 2 : 4;
511 const uint32_t SHARED_PBITS = (mode == 1) ? true : false;
512 const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
513
514 uint32_t bit_offset = 0;
515 const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
516
517 if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
518
519 const uint32_t part = read_bits32(pBuf, bit_offset, 6);
520
521 color_rgba endpoints[ENDPOINTS];
522 for (uint32_t c = 0; c < COMPS; c++)
523 for (uint32_t e = 0; e < ENDPOINTS; e++)
524 endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
525
526 uint32_t pbits[4];
527 for (uint32_t p = 0; p < PBITS; p++)
528 pbits[p] = read_bits32(pBuf, bit_offset, 1);
529
530 uint32_t weights[16];
531 for (uint32_t i = 0; i < 16; i++)
532 weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS);
533
534 assert(bit_offset == 128);
535
536 for (uint32_t e = 0; e < ENDPOINTS; e++)
537 for (uint32_t c = 0; c < 4; c++)
538 endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS));
539
540 color_rgba block_colors[2][8];
541 for (uint32_t s = 0; s < 2; s++)
542 for (uint32_t i = 0; i < WEIGHT_VALS; i++)
543 {
544 for (uint32_t c = 0; c < COMPS; c++)
545 block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS);
546 block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3];
547 }
548
549 for (uint32_t i = 0; i < 16; i++)
550 pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]];
551
552 return true;
553 }
554
555 bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
556 {
557 const uint32_t ENDPOINTS = 2;
558 const uint32_t COMPS = 4;
559 const uint32_t WEIGHT_BITS = 2;
560 const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2;
561 const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7;
562 const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8;
563 //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
564 //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
565
566 uint32_t bit_offset = 0;
567 const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
568
569 if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false;
570
571 const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2);
572 const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0;
573
574 color_rgba endpoints[ENDPOINTS];
575 for (uint32_t c = 0; c < COMPS; c++)
576 for (uint32_t e = 0; e < ENDPOINTS; e++)
577 endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
578
579 const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
580
581 uint32_t weights[16], a_weights[16];
582
583 for (uint32_t i = 0; i < 16; i++)
584 (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0));
585
586 for (uint32_t i = 0; i < 16; i++)
587 (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0));
588
589 assert(bit_offset == 128);
590
591 for (uint32_t e = 0; e < ENDPOINTS; e++)
592 for (uint32_t c = 0; c < 4; c++)
593 endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
594
595 color_rgba block_colors[8];
596 for (uint32_t i = 0; i < (1U << weight_bits[0]); i++)
597 for (uint32_t c = 0; c < 3; c++)
598 block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]);
599
600 for (uint32_t i = 0; i < (1U << weight_bits[1]); i++)
601 block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]);
602
603 for (uint32_t i = 0; i < 16; i++)
604 {
605 pPixels[i] = block_colors[weights[i]];
606 pPixels[i].a = block_colors[a_weights[i]].a;
607 if (comp_rot >= 1)
608 std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]);
609 }
610
611 return true;
612 }
613
614 struct bc7_mode_6
615 {
616 struct
617 {
618 uint64_t m_mode : 7;
619 uint64_t m_r0 : 7;
620 uint64_t m_r1 : 7;
621 uint64_t m_g0 : 7;
622 uint64_t m_g1 : 7;
623 uint64_t m_b0 : 7;
624 uint64_t m_b1 : 7;
625 uint64_t m_a0 : 7;
626 uint64_t m_a1 : 7;
627 uint64_t m_p0 : 1;
628 } m_lo;
629
630 union
631 {
632 struct
633 {
634 uint64_t m_p1 : 1;
635 uint64_t m_s00 : 3;
636 uint64_t m_s10 : 4;
637 uint64_t m_s20 : 4;
638 uint64_t m_s30 : 4;
639
640 uint64_t m_s01 : 4;
641 uint64_t m_s11 : 4;
642 uint64_t m_s21 : 4;
643 uint64_t m_s31 : 4;
644
645 uint64_t m_s02 : 4;
646 uint64_t m_s12 : 4;
647 uint64_t m_s22 : 4;
648 uint64_t m_s32 : 4;
649
650 uint64_t m_s03 : 4;
651 uint64_t m_s13 : 4;
652 uint64_t m_s23 : 4;
653 uint64_t m_s33 : 4;
654
655 } m_hi;
656
657 uint64_t m_hi_bits;
658 };
659 };
660
661 bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
662 {
663 static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
664
665 const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
666
667 if (block.m_lo.m_mode != (1 << 6))
668 return false;
669
670 const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0);
671 const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0);
672 const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0);
673 const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0);
674 const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1);
675 const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1);
676 const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1);
677 const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1);
678
679 color_rgba vals[16];
680 for (uint32_t i = 0; i < 16; i++)
681 {
682 const uint32_t w = basist::g_bc7_weights4[i];
683 const uint32_t iw = 64 - w;
684 vals[i].set_noclamp_rgba(
685 (r0 * iw + r1 * w + 32) >> 6,
686 (g0 * iw + g1 * w + 32) >> 6,
687 (b0 * iw + b1 * w + 32) >> 6,
688 (a0 * iw + a1 * w + 32) >> 6);
689 }
690
691 pPixels[0] = vals[block.m_hi.m_s00];
692 pPixels[1] = vals[block.m_hi.m_s10];
693 pPixels[2] = vals[block.m_hi.m_s20];
694 pPixels[3] = vals[block.m_hi.m_s30];
695
696 pPixels[4] = vals[block.m_hi.m_s01];
697 pPixels[5] = vals[block.m_hi.m_s11];
698 pPixels[6] = vals[block.m_hi.m_s21];
699 pPixels[7] = vals[block.m_hi.m_s31];
700
701 pPixels[8] = vals[block.m_hi.m_s02];
702 pPixels[9] = vals[block.m_hi.m_s12];
703 pPixels[10] = vals[block.m_hi.m_s22];
704 pPixels[11] = vals[block.m_hi.m_s32];
705
706 pPixels[12] = vals[block.m_hi.m_s03];
707 pPixels[13] = vals[block.m_hi.m_s13];
708 pPixels[14] = vals[block.m_hi.m_s23];
709 pPixels[15] = vals[block.m_hi.m_s33];
710
711 return true;
712 }
713
714 bool unpack_bc7(const void *pBlock, color_rgba *pPixels)
715 {
716 const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0];
717
718 for (uint32_t mode = 0; mode <= 7; mode++)
719 {
720 if (first_byte & (1U << mode))
721 {
722 switch (mode)
723 {
724 case 0:
725 case 2:
726 return unpack_bc7_mode0_2(mode, pBlock, pPixels);
727 case 1:
728 case 3:
729 case 7:
730 return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
731 case 4:
732 case 5:
733 return unpack_bc7_mode4_5(mode, pBlock, pPixels);
734 case 6:
735 return unpack_bc7_mode6(pBlock, pPixels);
736 default:
737 break;
738 }
739 }
740 }
741
742 return false;
743 }
744
745 struct fxt1_block
746 {
747 union
748 {
749 struct
750 {
751 uint64_t m_t00 : 2;
752 uint64_t m_t01 : 2;
753 uint64_t m_t02 : 2;
754 uint64_t m_t03 : 2;
755 uint64_t m_t04 : 2;
756 uint64_t m_t05 : 2;
757 uint64_t m_t06 : 2;
758 uint64_t m_t07 : 2;
759 uint64_t m_t08 : 2;
760 uint64_t m_t09 : 2;
761 uint64_t m_t10 : 2;
762 uint64_t m_t11 : 2;
763 uint64_t m_t12 : 2;
764 uint64_t m_t13 : 2;
765 uint64_t m_t14 : 2;
766 uint64_t m_t15 : 2;
767 uint64_t m_t16 : 2;
768 uint64_t m_t17 : 2;
769 uint64_t m_t18 : 2;
770 uint64_t m_t19 : 2;
771 uint64_t m_t20 : 2;
772 uint64_t m_t21 : 2;
773 uint64_t m_t22 : 2;
774 uint64_t m_t23 : 2;
775 uint64_t m_t24 : 2;
776 uint64_t m_t25 : 2;
777 uint64_t m_t26 : 2;
778 uint64_t m_t27 : 2;
779 uint64_t m_t28 : 2;
780 uint64_t m_t29 : 2;
781 uint64_t m_t30 : 2;
782 uint64_t m_t31 : 2;
783 } m_lo;
784 uint64_t m_lo_bits;
785 uint8_t m_sels[8];
786 };
787
788 union
789 {
790 struct
791 {
792#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
793 // This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted.
794 // Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation!
795 uint64_t m_b1 : 5;
796 uint64_t m_g1 : 5;
797 uint64_t m_r1 : 5;
798 uint64_t m_b0 : 5;
799 uint64_t m_g0 : 5;
800 uint64_t m_r0 : 5;
801 uint64_t m_b3 : 5;
802 uint64_t m_g3 : 5;
803 uint64_t m_r3 : 5;
804 uint64_t m_b2 : 5;
805 uint64_t m_g2 : 5;
806 uint64_t m_r2 : 5;
807#else
808 // Intel's encoding, and the encoding in the OpenGL FXT1 spec.
809 uint64_t m_b0 : 5;
810 uint64_t m_g0 : 5;
811 uint64_t m_r0 : 5;
812 uint64_t m_b1 : 5;
813 uint64_t m_g1 : 5;
814 uint64_t m_r1 : 5;
815 uint64_t m_b2 : 5;
816 uint64_t m_g2 : 5;
817 uint64_t m_r2 : 5;
818 uint64_t m_b3 : 5;
819 uint64_t m_g3 : 5;
820 uint64_t m_r3 : 5;
821#endif
822 uint64_t m_alpha : 1;
823 uint64_t m_glsb : 2;
824 uint64_t m_mode : 1;
825 } m_hi;
826
827 uint64_t m_hi_bits;
828 };
829 };
830
831 static color_rgba expand_565(const color_rgba& c)
832 {
833 return color_rgba((c.r << 3) | (c.r >> 2), (c.g << 2) | (c.g >> 4), (c.b << 3) | (c.b >> 2), 255);
834 }
835
836 // We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
837 bool unpack_fxt1(const void *p, color_rgba *pPixels)
838 {
839 const fxt1_block* pBlock = static_cast<const fxt1_block*>(p);
840
841 if (pBlock->m_hi.m_mode == 0)
842 return false;
843 if (pBlock->m_hi.m_alpha == 1)
844 return false;
845
846 color_rgba colors[4];
847
848 colors[0].r = pBlock->m_hi.m_r0;
849 colors[0].g = (uint8_t)((pBlock->m_hi.m_g0 << 1) | ((pBlock->m_lo.m_t00 >> 1) ^ (pBlock->m_hi.m_glsb & 1)));
850 colors[0].b = pBlock->m_hi.m_b0;
851 colors[0].a = 255;
852
853 colors[1].r = pBlock->m_hi.m_r1;
854 colors[1].g = (uint8_t)((pBlock->m_hi.m_g1 << 1) | (pBlock->m_hi.m_glsb & 1));
855 colors[1].b = pBlock->m_hi.m_b1;
856 colors[1].a = 255;
857
858 colors[2].r = pBlock->m_hi.m_r2;
859 colors[2].g = (uint8_t)((pBlock->m_hi.m_g2 << 1) | ((pBlock->m_lo.m_t16 >> 1) ^ (pBlock->m_hi.m_glsb >> 1)));
860 colors[2].b = pBlock->m_hi.m_b2;
861 colors[2].a = 255;
862
863 colors[3].r = pBlock->m_hi.m_r3;
864 colors[3].g = (uint8_t)((pBlock->m_hi.m_g3 << 1) | (pBlock->m_hi.m_glsb >> 1));
865 colors[3].b = pBlock->m_hi.m_b3;
866 colors[3].a = 255;
867
868 for (uint32_t i = 0; i < 4; i++)
869 colors[i] = expand_565(colors[i]);
870
871 color_rgba block0_colors[4];
872 block0_colors[0] = colors[0];
873 block0_colors[1] = color_rgba((colors[0].r * 2 + colors[1].r + 1) / 3, (colors[0].g * 2 + colors[1].g + 1) / 3, (colors[0].b * 2 + colors[1].b + 1) / 3, 255);
874 block0_colors[2] = color_rgba((colors[1].r * 2 + colors[0].r + 1) / 3, (colors[1].g * 2 + colors[0].g + 1) / 3, (colors[1].b * 2 + colors[0].b + 1) / 3, 255);
875 block0_colors[3] = colors[1];
876
877 for (uint32_t i = 0; i < 16; i++)
878 {
879 const uint32_t sel = (pBlock->m_sels[i >> 2] >> ((i & 3) * 2)) & 3;
880
881 const uint32_t x = i & 3;
882 const uint32_t y = i >> 2;
883 pPixels[x + y * 8] = block0_colors[sel];
884 }
885
886 color_rgba block1_colors[4];
887 block1_colors[0] = colors[2];
888 block1_colors[1] = color_rgba((colors[2].r * 2 + colors[3].r + 1) / 3, (colors[2].g * 2 + colors[3].g + 1) / 3, (colors[2].b * 2 + colors[3].b + 1) / 3, 255);
889 block1_colors[2] = color_rgba((colors[3].r * 2 + colors[2].r + 1) / 3, (colors[3].g * 2 + colors[2].g + 1) / 3, (colors[3].b * 2 + colors[2].b + 1) / 3, 255);
890 block1_colors[3] = colors[3];
891
892 for (uint32_t i = 0; i < 16; i++)
893 {
894 const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3;
895
896 const uint32_t x = i & 3;
897 const uint32_t y = i >> 2;
898 pPixels[4 + x + y * 8] = block1_colors[sel];
899 }
900
901 return true;
902 }
903
904 struct pvrtc2_block
905 {
906 uint8_t m_modulation[4];
907
908 union
909 {
910 union
911 {
912 // Opaque mode: RGB colora=554 and colorb=555
913 struct
914 {
915 uint32_t m_mod_flag : 1;
916 uint32_t m_blue_a : 4;
917 uint32_t m_green_a : 5;
918 uint32_t m_red_a : 5;
919 uint32_t m_hard_flag : 1;
920 uint32_t m_blue_b : 5;
921 uint32_t m_green_b : 5;
922 uint32_t m_red_b : 5;
923 uint32_t m_opaque_flag : 1;
924
925 } m_opaque_color_data;
926
927 // Transparent mode: RGBA colora=4433 and colorb=4443
928 struct
929 {
930 uint32_t m_mod_flag : 1;
931 uint32_t m_blue_a : 3;
932 uint32_t m_green_a : 4;
933 uint32_t m_red_a : 4;
934 uint32_t m_alpha_a : 3;
935 uint32_t m_hard_flag : 1;
936 uint32_t m_blue_b : 4;
937 uint32_t m_green_b : 4;
938 uint32_t m_red_b : 4;
939 uint32_t m_alpha_b : 3;
940 uint32_t m_opaque_flag : 1;
941
942 } m_trans_color_data;
943 };
944
945 uint32_t m_color_data_bits;
946 };
947 };
948
949 static color_rgba convert_rgb_555_to_888(const color_rgba& col)
950 {
951 return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255);
952 }
953
954 static color_rgba convert_rgba_5554_to_8888(const color_rgba& col)
955 {
956 return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]);
957 }
958
959 // PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
960 bool unpack_pvrtc2(const void *p, color_rgba *pPixels)
961 {
962 const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p);
963
964 if ((!pBlock->m_opaque_color_data.m_hard_flag) || (pBlock->m_opaque_color_data.m_mod_flag))
965 {
966 // This mode isn't supported by the transcoder, so we aren't bothering with it here.
967 return false;
968 }
969
970 color_rgba colors[4];
971
972 if (pBlock->m_opaque_color_data.m_opaque_flag)
973 {
974 // colora=554
975 color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255);
976
977 // colora=555
978 color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255);
979
980 colors[0] = convert_rgb_555_to_888(color_a);
981 colors[3] = convert_rgb_555_to_888(color_b);
982
983 colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, 255);
984 colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, 255);
985 }
986 else
987 {
988 // colora=4433
989 color_rgba color_a(
990 (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3),
991 (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3),
992 (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1),
993 pBlock->m_trans_color_data.m_alpha_a << 1);
994
995 //colorb=4443
996 color_rgba color_b(
997 (pBlock->m_trans_color_data.m_red_b << 1) | (pBlock->m_trans_color_data.m_red_b >> 3),
998 (pBlock->m_trans_color_data.m_green_b << 1) | (pBlock->m_trans_color_data.m_green_b >> 3),
999 (pBlock->m_trans_color_data.m_blue_b << 1) | (pBlock->m_trans_color_data.m_blue_b >> 3),
1000 (pBlock->m_trans_color_data.m_alpha_b << 1) | 1);
1001
1002 colors[0] = convert_rgba_5554_to_8888(color_a);
1003 colors[3] = convert_rgba_5554_to_8888(color_b);
1004 }
1005
1006 colors[1].set((colors[0].r * 5 + colors[3].r * 3) / 8, (colors[0].g * 5 + colors[3].g * 3) / 8, (colors[0].b * 5 + colors[3].b * 3) / 8, (colors[0].a * 5 + colors[3].a * 3) / 8);
1007 colors[2].set((colors[0].r * 3 + colors[3].r * 5) / 8, (colors[0].g * 3 + colors[3].g * 5) / 8, (colors[0].b * 3 + colors[3].b * 5) / 8, (colors[0].a * 3 + colors[3].a * 5) / 8);
1008
1009 for (uint32_t i = 0; i < 16; i++)
1010 {
1011 const uint32_t sel = (pBlock->m_modulation[i >> 2] >> ((i & 3) * 2)) & 3;
1012 pPixels[i] = colors[sel];
1013 }
1014
1015 return true;
1016 }
1017
1018 struct etc2_eac_r11
1019 {
1020 uint64_t m_base : 8;
1021 uint64_t m_table : 4;
1022 uint64_t m_mul : 4;
1023 uint64_t m_sels_0 : 8;
1024 uint64_t m_sels_1 : 8;
1025 uint64_t m_sels_2 : 8;
1026 uint64_t m_sels_3 : 8;
1027 uint64_t m_sels_4 : 8;
1028 uint64_t m_sels_5 : 8;
1029
1030 uint64_t get_sels() const
1031 {
1032 return ((uint64_t)m_sels_0 << 40U) | ((uint64_t)m_sels_1 << 32U) | ((uint64_t)m_sels_2 << 24U) | ((uint64_t)m_sels_3 << 16U) | ((uint64_t)m_sels_4 << 8U) | m_sels_5;
1033 }
1034
1035 void set_sels(uint64_t v)
1036 {
1037 m_sels_0 = (v >> 40U) & 0xFF;
1038 m_sels_1 = (v >> 32U) & 0xFF;
1039 m_sels_2 = (v >> 24U) & 0xFF;
1040 m_sels_3 = (v >> 16U) & 0xFF;
1041 m_sels_4 = (v >> 8U) & 0xFF;
1042 m_sels_5 = v & 0xFF;
1043 }
1044 };
1045
1046 struct etc2_eac_rg11
1047 {
1048 etc2_eac_r11 m_c[2];
1049 };
1050
1051 void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c)
1052 {
1053 const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p);
1054 const uint64_t sels = pBlock->get_sels();
1055
1056 const int base = (int)pBlock->m_base * 8 + 4;
1057 const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1;
1058 const int table = (int)pBlock->m_table;
1059
1060 for (uint32_t y = 0; y < 4; y++)
1061 {
1062 for (uint32_t x = 0; x < 4; x++)
1063 {
1064 const uint32_t shift = 45 - ((y + x * 4) * 3);
1065
1066 const uint32_t sel = (uint32_t)((sels >> shift) & 7);
1067
1068 int val = base + g_etc2_eac_tables[table][sel] * mul;
1069 val = clamp<int>(val, 0, 2047);
1070
1071 // Convert to 8-bits with rounding
1072 //pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047);
1073 pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047);
1074
1075 } // x
1076 } // y
1077 }
1078
1079 void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels)
1080 {
1081 for (uint32_t c = 0; c < 2; c++)
1082 {
1083 const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c];
1084
1085 unpack_etc2_eac_r(pBlock, pPixels, c);
1086 }
1087 }
1088
1089 void unpack_uastc(const void* p, color_rgba* pPixels)
1090 {
1091 basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false);
1092 }
1093
1094 // Unpacks to RGBA, R, RG, or A
1095 bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
1096 {
1097 switch (fmt)
1098 {
1099 case texture_format::cBC1:
1100 {
1101 unpack_bc1(pBlock, pPixels, true);
1102 break;
1103 }
1104 case texture_format::cBC1_NV:
1105 {
1106 unpack_bc1_nv(pBlock, pPixels, true);
1107 break;
1108 }
1109 case texture_format::cBC1_AMD:
1110 {
1111 unpack_bc1_amd(pBlock, pPixels, true);
1112 break;
1113 }
1114 case texture_format::cBC3:
1115 {
1116 return unpack_bc3(pBlock, pPixels);
1117 }
1118 case texture_format::cBC4:
1119 {
1120 // Unpack to R
1121 unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));
1122 break;
1123 }
1124 case texture_format::cBC5:
1125 {
1126 unpack_bc5(pBlock, pPixels);
1127 break;
1128 }
1129 case texture_format::cBC7:
1130 {
1131 return unpack_bc7(pBlock, pPixels);
1132 }
1133 // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
1134 case texture_format::cETC2_RGB:
1135 case texture_format::cETC1:
1136 case texture_format::cETC1S:
1137 {
1138 return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
1139 }
1140 case texture_format::cETC2_RGBA:
1141 {
1142 if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))
1143 return false;
1144 unpack_etc2_eac(pBlock, pPixels);
1145 break;
1146 }
1147 case texture_format::cETC2_ALPHA:
1148 {
1149 // Unpack to A
1150 unpack_etc2_eac(pBlock, pPixels);
1151 break;
1152 }
1153 case texture_format::cASTC4x4:
1154 {
1155#if BASISU_USE_ASTC_DECOMPRESS
1156 const bool astc_srgb = false;
1157 basisu_astc::astc::decompress(reinterpret_cast<uint8_t*>(pPixels), static_cast<const uint8_t*>(pBlock), astc_srgb, 4, 4);
1158#else
1159 memset(pPixels, 255, 16 * sizeof(color_rgba));
1160#endif
1161 break;
1162 }
1163 case texture_format::cATC_RGB:
1164 {
1165 unpack_atc(pBlock, pPixels);
1166 break;
1167 }
1168 case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
1169 {
1170 unpack_atc(static_cast<const uint8_t*>(pBlock) + 8, pPixels);
1171 unpack_bc4(pBlock, &pPixels[0].a, sizeof(color_rgba));
1172 break;
1173 }
1174 case texture_format::cFXT1_RGB:
1175 {
1176 unpack_fxt1(pBlock, pPixels);
1177 break;
1178 }
1179 case texture_format::cPVRTC2_4_RGBA:
1180 {
1181 unpack_pvrtc2(pBlock, pPixels);
1182 break;
1183 }
1184 case texture_format::cETC2_R11_EAC:
1185 {
1186 unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, 0);
1187 break;
1188 }
1189 case texture_format::cETC2_RG11_EAC:
1190 {
1191 unpack_etc2_eac_rg(pBlock, pPixels);
1192 break;
1193 }
1194 case texture_format::cUASTC4x4:
1195 {
1196 unpack_uastc(pBlock, pPixels);
1197 break;
1198 }
1199 default:
1200 {
1201 assert(0);
1202 // TODO
1203 return false;
1204 }
1205 }
1206 return true;
1207 }
1208
1209 bool gpu_image::unpack(image& img) const
1210 {
1211 img.resize(get_pixel_width(), get_pixel_height());
1212 img.set_all(g_black_color);
1213
1214 if (!img.get_width() || !img.get_height())
1215 return true;
1216
1217 if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA))
1218 {
1219 pvrtc4_image pi(m_width, m_height);
1220
1221 if (get_total_blocks() != pi.get_total_blocks())
1222 return false;
1223
1224 memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes());
1225
1226 pi.deswizzle();
1227
1228 pi.unpack_all_pixels(img);
1229
1230 return true;
1231 }
1232
1233 assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
1234 color_rgba pixels[cMaxBlockSize * cMaxBlockSize];
1235 for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)
1236 pixels[i] = g_black_color;
1237
1238 bool success = true;
1239
1240 for (uint32_t by = 0; by < m_blocks_y; by++)
1241 {
1242 for (uint32_t bx = 0; bx < m_blocks_x; bx++)
1243 {
1244 const void* pBlock = get_block_ptr(bx, by);
1245
1246 if (!unpack_block(m_fmt, pBlock, pixels))
1247 success = false;
1248
1249 img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
1250 } // bx
1251 } // by
1252
1253 return success;
1254 }
1255
1256 static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };
1257
1258 // KTX/GL enums
1259 enum
1260 {
1261 KTX_ENDIAN = 0x04030201,
1262 KTX_OPPOSITE_ENDIAN = 0x01020304,
1263 KTX_ETC1_RGB8_OES = 0x8D64,
1264 KTX_RED = 0x1903,
1265 KTX_RG = 0x8227,
1266 KTX_RGB = 0x1907,
1267 KTX_RGBA = 0x1908,
1268 KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,
1269 KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,
1270 KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,
1271 KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD,
1272 KTX_COMPRESSED_RGB8_ETC2 = 0x9274,
1273 KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278,
1274 KTX_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C,
1275 KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D,
1276 KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
1277 KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
1278 KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
1279 KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,
1280 KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value!
1281 KTX_ATC_RGB_AMD = 0x8C92,
1282 KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE,
1283 KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0,
1284 KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1,
1285 KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138,
1286 KTX_COMPRESSED_R11_EAC = 0x9270,
1287 KTX_COMPRESSED_RG11_EAC = 0x9272
1288 };
1289
1290 struct ktx_header
1291 {
1292 uint8_t m_identifier[12];
1293 packed_uint<4> m_endianness;
1294 packed_uint<4> m_glType;
1295 packed_uint<4> m_glTypeSize;
1296 packed_uint<4> m_glFormat;
1297 packed_uint<4> m_glInternalFormat;
1298 packed_uint<4> m_glBaseInternalFormat;
1299 packed_uint<4> m_pixelWidth;
1300 packed_uint<4> m_pixelHeight;
1301 packed_uint<4> m_pixelDepth;
1302 packed_uint<4> m_numberOfArrayElements;
1303 packed_uint<4> m_numberOfFaces;
1304 packed_uint<4> m_numberOfMipmapLevels;
1305 packed_uint<4> m_bytesOfKeyValueData;
1306
1307 void clear() { clear_obj(*this); }
1308 };
1309
1310 // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]
1311 bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)
1312 {
1313 if (!gpu_images.size())
1314 {
1315 assert(0);
1316 return false;
1317 }
1318
1319 uint32_t width = 0, height = 0, total_levels = 0;
1320 basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
1321
1322 if (cubemap_flag)
1323 {
1324 if ((gpu_images.size() % 6) != 0)
1325 {
1326 assert(0);
1327 return false;
1328 }
1329 }
1330
1331 for (uint32_t array_index = 0; array_index < gpu_images.size(); array_index++)
1332 {
1333 const gpu_image_vec &levels = gpu_images[array_index];
1334
1335 if (!levels.size())
1336 {
1337 // Empty mip chain
1338 assert(0);
1339 return false;
1340 }
1341
1342 if (!array_index)
1343 {
1344 width = levels[0].get_pixel_width();
1345 height = levels[0].get_pixel_height();
1346 total_levels = (uint32_t)levels.size();
1347 fmt = levels[0].get_format();
1348 }
1349 else
1350 {
1351 if ((width != levels[0].get_pixel_width()) ||
1352 (height != levels[0].get_pixel_height()) ||
1353 (total_levels != levels.size()))
1354 {
1355 // All cubemap/texture array faces must be the same dimension
1356 assert(0);
1357 return false;
1358 }
1359 }
1360
1361 for (uint32_t level_index = 0; level_index < levels.size(); level_index++)
1362 {
1363 if (level_index)
1364 {
1365 if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
1366 (levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )
1367 {
1368 // Malformed mipmap chain
1369 assert(0);
1370 return false;
1371 }
1372 }
1373
1374 if (fmt != levels[level_index].get_format())
1375 {
1376 // All input textures must use the same GPU format
1377 assert(0);
1378 return false;
1379 }
1380 }
1381 }
1382
1383 uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;
1384
1385 switch (fmt)
1386 {
1387 case texture_format::cBC1:
1388 case texture_format::cBC1_NV:
1389 case texture_format::cBC1_AMD:
1390 {
1391 internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
1392 break;
1393 }
1394 case texture_format::cBC3:
1395 {
1396 internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;
1397 base_internal_fmt = KTX_RGBA;
1398 break;
1399 }
1400 case texture_format::cBC4:
1401 {
1402 internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;
1403 base_internal_fmt = KTX_RED;
1404 break;
1405 }
1406 case texture_format::cBC5:
1407 {
1408 internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;
1409 base_internal_fmt = KTX_RG;
1410 break;
1411 }
1412 case texture_format::cETC1:
1413 case texture_format::cETC1S:
1414 {
1415 internal_fmt = KTX_ETC1_RGB8_OES;
1416 break;
1417 }
1418 case texture_format::cETC2_RGB:
1419 {
1420 internal_fmt = KTX_COMPRESSED_RGB8_ETC2;
1421 break;
1422 }
1423 case texture_format::cETC2_RGBA:
1424 {
1425 internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;
1426 base_internal_fmt = KTX_RGBA;
1427 break;
1428 }
1429 case texture_format::cBC7:
1430 {
1431 internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
1432 base_internal_fmt = KTX_RGBA;
1433 break;
1434 }
1435 case texture_format::cPVRTC1_4_RGB:
1436 {
1437 internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;
1438 break;
1439 }
1440 case texture_format::cPVRTC1_4_RGBA:
1441 {
1442 internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;
1443 base_internal_fmt = KTX_RGBA;
1444 break;
1445 }
1446 case texture_format::cASTC4x4:
1447 {
1448 internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
1449 base_internal_fmt = KTX_RGBA;
1450 break;
1451 }
1452 case texture_format::cATC_RGB:
1453 {
1454 internal_fmt = KTX_ATC_RGB_AMD;
1455 break;
1456 }
1457 case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
1458 {
1459 internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;
1460 base_internal_fmt = KTX_RGBA;
1461 break;
1462 }
1463 case texture_format::cETC2_R11_EAC:
1464 {
1465 internal_fmt = KTX_COMPRESSED_R11_EAC;
1466 base_internal_fmt = KTX_RED;
1467 break;
1468 }
1469 case texture_format::cETC2_RG11_EAC:
1470 {
1471 internal_fmt = KTX_COMPRESSED_RG11_EAC;
1472 base_internal_fmt = KTX_RG;
1473 break;
1474 }
1475 case texture_format::cUASTC4x4:
1476 {
1477 internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR;
1478 base_internal_fmt = KTX_RGBA;
1479 break;
1480 }
1481 case texture_format::cFXT1_RGB:
1482 {
1483 internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX;
1484 break;
1485 }
1486 case texture_format::cPVRTC2_4_RGBA:
1487 {
1488 internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;
1489 base_internal_fmt = KTX_RGBA;
1490 break;
1491 }
1492 default:
1493 {
1494 // TODO
1495 assert(0);
1496 return false;
1497 }
1498 }
1499
1500 ktx_header header;
1501 header.clear();
1502 memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
1503 header.m_endianness = KTX_ENDIAN;
1504
1505 header.m_pixelWidth = width;
1506 header.m_pixelHeight = height;
1507
1508 header.m_glTypeSize = 1;
1509
1510 header.m_glInternalFormat = internal_fmt;
1511 header.m_glBaseInternalFormat = base_internal_fmt;
1512
1513 header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / 6) : gpu_images.size());
1514 if (header.m_numberOfArrayElements == 1)
1515 header.m_numberOfArrayElements = 0;
1516
1517 header.m_numberOfMipmapLevels = total_levels;
1518 header.m_numberOfFaces = cubemap_flag ? 6 : 1;
1519
1520 append_vector(ktx_data, (uint8_t *)&header, sizeof(header));
1521
1522 for (uint32_t level_index = 0; level_index < total_levels; level_index++)
1523 {
1524 uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes();
1525
1526 if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1))
1527 {
1528 img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(1, header.m_numberOfArrayElements);
1529 }
1530
1531 assert(img_size && ((img_size & 3) == 0));
1532
1533 packed_uint<4> packed_img_size(img_size);
1534 append_vector(ktx_data, (uint8_t *)&packed_img_size, sizeof(packed_img_size));
1535
1536 uint32_t bytes_written = 0;
1537
1538 for (uint32_t array_index = 0; array_index < maximum<uint32_t>(1, header.m_numberOfArrayElements); array_index++)
1539 {
1540 for (uint32_t face_index = 0; face_index < header.m_numberOfFaces; face_index++)
1541 {
1542 const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index];
1543
1544 append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes());
1545
1546 bytes_written += img.get_size_in_bytes();
1547 }
1548
1549 } // array_index
1550
1551 } // level_index
1552
1553 return true;
1554 }
1555
1556 bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag)
1557 {
1558 std::string extension(string_tolower(string_get_extension(pFilename)));
1559
1560 uint8_vec filedata;
1561 if (extension == "ktx")
1562 {
1563 if (!create_ktx_texture_file(filedata, g, cubemap_flag))
1564 return false;
1565 }
1566 else if (extension == "pvr")
1567 {
1568 // TODO
1569 return false;
1570 }
1571 else if (extension == "dds")
1572 {
1573 // TODO
1574 return false;
1575 }
1576 else
1577 {
1578 // unsupported texture format
1579 assert(0);
1580 return false;
1581 }
1582
1583 return basisu::write_vec_to_file(pFilename, filedata);
1584 }
1585
1586 bool write_compressed_texture_file(const char* pFilename, const gpu_image& g)
1587 {
1588 basisu::vector<gpu_image_vec> v;
1589 enlarge_vector(v, 1)->push_back(g);
1590 return write_compressed_texture_file(pFilename, v, false);
1591 }
1592
1593 //const uint32_t OUT_FILE_MAGIC = 'TEXC';
1594 struct out_file_header
1595 {
1596 packed_uint<4> m_magic;
1597 packed_uint<4> m_pad;
1598 packed_uint<4> m_width;
1599 packed_uint<4> m_height;
1600 };
1601
1602 // As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output.
1603 bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi)
1604 {
1605 out_file_header hdr;
1606 //hdr.m_magic = OUT_FILE_MAGIC;
1607 hdr.m_magic.m_bytes[0] = 67;
1608 hdr.m_magic.m_bytes[1] = 88;
1609 hdr.m_magic.m_bytes[2] = 69;
1610 hdr.m_magic.m_bytes[3] = 84;
1611 hdr.m_pad = 0;
1612 hdr.m_width = gi.get_blocks_x() * 8;
1613 hdr.m_height = gi.get_blocks_y() * 4;
1614
1615 FILE* pFile = nullptr;
1616#ifdef _WIN32
1617 fopen_s(&pFile, pFilename, "wb");
1618#else
1619 pFile = fopen(pFilename, "wb");
1620#endif
1621 if (!pFile)
1622 return false;
1623
1624 fwrite(&hdr, sizeof(hdr), 1, pFile);
1625 fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile);
1626
1627 return fclose(pFile) != EOF;
1628 }
1629} // basisu
1630
1631