basisu_gpu_texture.cpp source code [Godot/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp]

1	// basisu_gpu_texture.cpp
2	// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3	//
4	// Licensed under the Apache License, Version 2.0 (the "License");
5	// you may not use this file except in compliance with the License.
6	// You may obtain a copy of the License at
7	//
8	// http://www.apache.org/licenses/LICENSE-2.0
9	//
10	// Unless required by applicable law or agreed to in writing, software
11	// distributed under the License is distributed on an "AS IS" BASIS,
12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	// See the License for the specific language governing permissions and
14	// limitations under the License.
15	#include "basisu_gpu_texture.h"
16	#include "basisu_enc.h"
17	#include "basisu_pvrtc1_4.h"
18	#if BASISU_USE_ASTC_DECOMPRESS
19	#include "basisu_astc_decomp.h"
20	#endif
21	#include "basisu_bc7enc.h"
22
23	namespace basisu
24	{
25	void unpack_etc2_eac(const void pBlock_bits, color_rgba pPixels)
26	{
27	static_assert(sizeof(eac_a8_block) == `8`, "sizeof(eac_a8_block) == 8");
28
29	const eac_a8_block pBlock = static_cast<const* eac_a8_block *>(pBlock_bits);
30
31	const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
32
33	const uint64_t selector_bits = pBlock->get_selector_bits();
34
35	const int32_t base = pBlock->m_base;
36	const int32_t mul = pBlock->m_multiplier;
37
38	pPixels[`0`].a = clamp255(base + pTable[pBlock->get_selector(`0`, `0`, selector_bits)] * mul);
39	pPixels[`1`].a = clamp255(base + pTable[pBlock->get_selector(`1`, `0`, selector_bits)] * mul);
40	pPixels[`2`].a = clamp255(base + pTable[pBlock->get_selector(`2`, `0`, selector_bits)] * mul);
41	pPixels[`3`].a = clamp255(base + pTable[pBlock->get_selector(`3`, `0`, selector_bits)] * mul);
42
43	pPixels[`4`].a = clamp255(base + pTable[pBlock->get_selector(`0`, `1`, selector_bits)] * mul);
44	pPixels[`5`].a = clamp255(base + pTable[pBlock->get_selector(`1`, `1`, selector_bits)] * mul);
45	pPixels[`6`].a = clamp255(base + pTable[pBlock->get_selector(`2`, `1`, selector_bits)] * mul);
46	pPixels[`7`].a = clamp255(base + pTable[pBlock->get_selector(`3`, `1`, selector_bits)] * mul);
47
48	pPixels[`8`].a = clamp255(base + pTable[pBlock->get_selector(`0`, `2`, selector_bits)] * mul);
49	pPixels[`9`].a = clamp255(base + pTable[pBlock->get_selector(`1`, `2`, selector_bits)] * mul);
50	pPixels[`10`].a = clamp255(base + pTable[pBlock->get_selector(`2`, `2`, selector_bits)] * mul);
51	pPixels[`11`].a = clamp255(base + pTable[pBlock->get_selector(`3`, `2`, selector_bits)] * mul);
52
53	pPixels[`12`].a = clamp255(base + pTable[pBlock->get_selector(`0`, `3`, selector_bits)] * mul);
54	pPixels[`13`].a = clamp255(base + pTable[pBlock->get_selector(`1`, `3`, selector_bits)] * mul);
55	pPixels[`14`].a = clamp255(base + pTable[pBlock->get_selector(`2`, `3`, selector_bits)] * mul);
56	pPixels[`15`].a = clamp255(base + pTable[pBlock->get_selector(`3`, `3`, selector_bits)] * mul);
57	}
58
59	struct bc1_block
60	{
61	enum { cTotalEndpointBytes = `2`, cTotalSelectorBytes = `4` };
62
63	uint8_t m_low_color[cTotalEndpointBytes];
64	uint8_t m_high_color[cTotalEndpointBytes];
65	uint8_t m_selectors[cTotalSelectorBytes];
66
67	inline uint32_t get_high_color() const { return m_high_color[`0`] \| (m_high_color[`1`] << `8U`); }
68	inline uint32_t get_low_color() const { return m_low_color[`0`] \| (m_low_color[`1`] << `8U`); }
69
70	static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)
71	{
72	r = (c >> `11`) & `31`;
73	g = (c >> `5`) & `63`;
74	b = c & `31`;
75
76	r = (r << `3`) \| (r >> `2`);
77	g = (g << `2`) \| (g >> `4`);
78	b = (b << `3`) \| (b >> `2`);
79	}
80
81	inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < `4U`) && (y < `4U`)); return (m_selectors[y] >> (x * `2`)) & `3`; }
82	};
83
84	// Returns true if the block uses 3 color punchthrough alpha mode.
85	bool unpack_bc1(const void pBlock_bits, color_rgba pPixels, bool set_alpha)
86	{
87	static_assert(sizeof(bc1_block) == `8`, "sizeof(bc1_block) == 8");
88
89	const bc1_block pBlock = static_cast<const* bc1_block *>(pBlock_bits);
90
91	const uint32_t l = pBlock->get_low_color();
92	const uint32_t h = pBlock->get_high_color();
93
94	color_rgba c[`4`];
95
96	uint32_t r0, g0, b0, r1, g1, b1;
97	bc1_block::unpack_color(l, r0, g0, b0);
98	bc1_block::unpack_color(h, r1, g1, b1);
99
100	c[`0`].set_noclamp_rgba(r0, g0, b0, `255`);
101	c[`1`].set_noclamp_rgba(r1, g1, b1, `255`);
102
103	bool used_punchthrough = false;
104
105	if (l > h)
106	{
107	c[`2`].set_noclamp_rgba((r0 * `2` + r1) / `3`, (g0 * `2` + g1) / `3`, (b0 * `2` + b1) / `3`, `255`);
108	c[`3`].set_noclamp_rgba((r1 * `2` + r0) / `3`, (g1 * `2` + g0) / `3`, (b1 * `2` + b0) / `3`, `255`);
109	}
110	else
111	{
112	c[`2`].set_noclamp_rgba((r0 + r1) / `2`, (g0 + g1) / `2`, (b0 + b1) / `2`, `255`);
113	c[`3`].set_noclamp_rgba(`0`, `0`, `0`, `0`);
114	used_punchthrough = true;
115	}
116
117	if (set_alpha)
118	{
119	for (uint32_t y = `0`; y < `4`; y++, pPixels += `4`)
120	{
121	pPixels[`0`] = c[pBlock->get_selector(`0`, y)];
122	pPixels[`1`] = c[pBlock->get_selector(`1`, y)];
123	pPixels[`2`] = c[pBlock->get_selector(`2`, y)];
124	pPixels[`3`] = c[pBlock->get_selector(`3`, y)];
125	}
126	}
127	else
128	{
129	for (uint32_t y = `0`; y < `4`; y++, pPixels += `4`)
130	{
131	pPixels[`0`].set_rgb(c[pBlock->get_selector(`0`, y)]);
132	pPixels[`1`].set_rgb(c[pBlock->get_selector(`1`, y)]);
133	pPixels[`2`].set_rgb(c[pBlock->get_selector(`2`, y)]);
134	pPixels[`3`].set_rgb(c[pBlock->get_selector(`3`, y)]);
135	}
136	}
137
138	return used_punchthrough;
139	}
140
141	bool unpack_bc1_nv(const void pBlock_bits, color_rgba pPixels, bool set_alpha)
142	{
143	static_assert(sizeof(bc1_block) == `8`, "sizeof(bc1_block) == 8");
144
145	const bc1_block pBlock = static_cast<const* bc1_block *>(pBlock_bits);
146
147	const uint32_t l = pBlock->get_low_color();
148	const uint32_t h = pBlock->get_high_color();
149
150	color_rgba c[`4`];
151
152	int r0 = (l >> `11`) & `31`;
153	int g0 = (l >> `5`) & `63`;
154	int b0 = l & `31`;
155	int r1 = (h >> `11`) & `31`;
156	int g1 = (h >> `5`) & `63`;
157	int b1 = h & `31`;
158
159	c[`0`].b = (uint8_t)((`3` * b0 * `22`) / `8`);
160	c[`0`].g = (uint8_t)((g0 << `2`) \| (g0 >> `4`));
161	c[`0`].r = (uint8_t)((`3` * r0 * `22`) / `8`);
162	c[`0`].a = `0xFF`;
163
164	c[`1`].r = (uint8_t)((`3` * r1 * `22`) / `8`);
165	c[`1`].g = (uint8_t)((g1 << `2`) \| (g1 >> `4`));
166	c[`1`].b = (uint8_t)((`3` * b1 * `22`) / `8`);
167	c[`1`].a = `0xFF`;
168
169	int gdiff = c[`1`].g - c[`0`].g;
170
171	bool used_punchthrough = false;
172
173	if (l > h)
174	{
175	c[`2`].r = (uint8_t)(((`2` * r0 + r1) * `22`) / `8`);
176	c[`2`].g = (uint8_t)(((`256` * c[`0`].g + gdiff/`4` + `128` + gdiff * `80`) / `256`));
177	c[`2`].b = (uint8_t)(((`2` * b0 + b1) * `22`) / `8`);
178	c[`2`].a = `0xFF`;
179
180	c[`3`].r = (uint8_t)(((`2` * r1 + r0) * `22`) / `8`);
181	c[`3`].g = (uint8_t)((`256` * c[`1`].g - gdiff/`4` + `128` - gdiff * `80`) / `256`);
182	c[`3`].b = (uint8_t)(((`2` * b1 + b0) * `22`) / `8`);
183	c[`3`].a = `0xFF`;
184	}
185	else
186	{
187	c[`2`].r = (uint8_t)(((r0 + r1) * `33`) / `8`);
188	c[`2`].g = (uint8_t)((`256` * c[`0`].g + gdiff/`4` + `128` + gdiff * `128`) / `256`);
189	c[`2`].b = (uint8_t)(((b0 + b1) * `33`) / `8`);
190	c[`2`].a = `0xFF`;
191
192	c[`3`].set_noclamp_rgba(`0`, `0`, `0`, `0`);
193	used_punchthrough = true;
194	}
195
196	if (set_alpha)
197	{
198	for (uint32_t y = `0`; y < `4`; y++, pPixels += `4`)
199	{
200	pPixels[`0`] = c[pBlock->get_selector(`0`, y)];
201	pPixels[`1`] = c[pBlock->get_selector(`1`, y)];
202	pPixels[`2`] = c[pBlock->get_selector(`2`, y)];
203	pPixels[`3`] = c[pBlock->get_selector(`3`, y)];
204	}
205	}
206	else
207	{
208	for (uint32_t y = `0`; y < `4`; y++, pPixels += `4`)
209	{
210	pPixels[`0`].set_rgb(c[pBlock->get_selector(`0`, y)]);
211	pPixels[`1`].set_rgb(c[pBlock->get_selector(`1`, y)]);
212	pPixels[`2`].set_rgb(c[pBlock->get_selector(`2`, y)]);
213	pPixels[`3`].set_rgb(c[pBlock->get_selector(`3`, y)]);
214	}
215	}
216
217	return used_punchthrough;
218	}
219
220	static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < `256` && c1 < `256`); return (c0 * `43` + c1 * `21` + `32`) >> `6`; }
221	static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < `256` && c1 < `256`); return (c0 + c1 + `1`) >> `1`; }
222
223	bool unpack_bc1_amd(const void pBlock_bits, color_rgba pPixels, bool set_alpha)
224	{
225	const bc1_block pBlock = static_cast<const* bc1_block *>(pBlock_bits);
226
227	const uint32_t l = pBlock->get_low_color();
228	const uint32_t h = pBlock->get_high_color();
229
230	color_rgba c[`4`];
231
232	uint32_t r0, g0, b0, r1, g1, b1;
233	bc1_block::unpack_color(l, r0, g0, b0);
234	bc1_block::unpack_color(h, r1, g1, b1);
235
236	c[`0`].set_noclamp_rgba(r0, g0, b0, `255`);
237	c[`1`].set_noclamp_rgba(r1, g1, b1, `255`);
238
239	bool used_punchthrough = false;
240
241	if (l > h)
242	{
243	c[`2`].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), `255`);
244	c[`3`].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), `255`);
245	}
246	else
247	{
248	c[`2`].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), `255`);
249	c[`3`].set_noclamp_rgba(`0`, `0`, `0`, `0`);
250	used_punchthrough = true;
251	}
252
253	if (set_alpha)
254	{
255	for (uint32_t y = `0`; y < `4`; y++, pPixels += `4`)
256	{
257	pPixels[`0`] = c[pBlock->get_selector(`0`, y)];
258	pPixels[`1`] = c[pBlock->get_selector(`1`, y)];
259	pPixels[`2`] = c[pBlock->get_selector(`2`, y)];
260	pPixels[`3`] = c[pBlock->get_selector(`3`, y)];
261	}
262	}
263	else
264	{
265	for (uint32_t y = `0`; y < `4`; y++, pPixels += `4`)
266	{
267	pPixels[`0`].set_rgb(c[pBlock->get_selector(`0`, y)]);
268	pPixels[`1`].set_rgb(c[pBlock->get_selector(`1`, y)]);
269	pPixels[`2`].set_rgb(c[pBlock->get_selector(`2`, y)]);
270	pPixels[`3`].set_rgb(c[pBlock->get_selector(`3`, y)]);
271	}
272	}
273
274	return used_punchthrough;
275	}
276
277	struct bc4_block
278	{
279	enum { cBC4SelectorBits = `3`, cTotalSelectorBytes = `6`, cMaxSelectorValues = `8` };
280	uint8_t m_endpoints[`2`];
281
282	uint8_t m_selectors[cTotalSelectorBytes];
283
284	inline uint32_t get_low_alpha() const { return m_endpoints[`0`]; }
285	inline uint32_t get_high_alpha() const { return m_endpoints[`1`]; }
286	inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
287
288	inline uint64_t get_selector_bits() const
289	{
290	return ((uint64_t)((uint32_t)m_selectors[`0`] \| ((uint32_t)m_selectors[`1`] << `8U`) \| ((uint32_t)m_selectors[`2`] << `16U`) \| ((uint32_t)m_selectors[`3`] << `24U`))) \|
291	(((uint64_t)m_selectors[`4`]) << `32U`) \|
292	(((uint64_t)m_selectors[`5`]) << `40U`);
293	}
294
295	inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
296	{
297	assert((x < `4U`) && (y < `4U`));
298	return (selector_bits >> (((y * `4`) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - `1`);
299	}
300
301	static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
302	{
303	pDst[`0`] = static_cast<uint8_t>(l);
304	pDst[`1`] = static_cast<uint8_t>(h);
305	pDst[`2`] = static_cast<uint8_t>((l * `4` + h) / `5`);
306	pDst[`3`] = static_cast<uint8_t>((l * `3` + h * `2`) / `5`);
307	pDst[`4`] = static_cast<uint8_t>((l * `2` + h * `3`) / `5`);
308	pDst[`5`] = static_cast<uint8_t>((l + h * `4`) / `5`);
309	pDst[`6`] = `0`;
310	pDst[`7`] = `255`;
311	return `6`;
312	}
313
314	static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
315	{
316	pDst[`0`] = static_cast<uint8_t>(l);
317	pDst[`1`] = static_cast<uint8_t>(h);
318	pDst[`2`] = static_cast<uint8_t>((l * `6` + h) / `7`);
319	pDst[`3`] = static_cast<uint8_t>((l * `5` + h * `2`) / `7`);
320	pDst[`4`] = static_cast<uint8_t>((l * `4` + h * `3`) / `7`);
321	pDst[`5`] = static_cast<uint8_t>((l * `3` + h * `4`) / `7`);
322	pDst[`6`] = static_cast<uint8_t>((l * `2` + h * `5`) / `7`);
323	pDst[`7`] = static_cast<uint8_t>((l + h * `6`) / `7`);
324	return `8`;
325	}
326
327	static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
328	{
329	if (l > h)
330	return get_block_values8(pDst, l, h);
331	else
332	return get_block_values6(pDst, l, h);
333	}
334	};
335
336	void unpack_bc4(const void pBlock_bits, uint8_t pPixels, uint32_t stride)
337	{
338	static_assert(sizeof(bc4_block) == `8`, "sizeof(bc4_block) == 8");
339
340	const bc4_block pBlock = static_cast<const* bc4_block *>(pBlock_bits);
341
342	uint8_t sel_values[`8`];
343	bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
344
345	const uint64_t selector_bits = pBlock->get_selector_bits();
346
347	for (uint32_t y = `0`; y < `4`; y++, pPixels += (stride * `4U`))
348	{
349	pPixels[`0`] = sel_values[pBlock->get_selector(`0`, y, selector_bits)];
350	pPixels[stride * `1`] = sel_values[pBlock->get_selector(`1`, y, selector_bits)];
351	pPixels[stride * `2`] = sel_values[pBlock->get_selector(`2`, y, selector_bits)];
352	pPixels[stride * `3`] = sel_values[pBlock->get_selector(`3`, y, selector_bits)];
353	}
354	}
355
356	// Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
357	bool unpack_bc3(const void pBlock_bits, color_rgba pPixels)
358	{
359	bool success = true;
360
361	if (unpack_bc1((const uint8_t )pBlock_bits + sizeof(bc4_block), pPixels, true*))
362	success = false;
363
364	unpack_bc4(pBlock_bits, &pPixels[`0`].a, sizeof(color_rgba));
365
366	return success;
367	}
368
369	// writes RG
370	void unpack_bc5(const void pBlock_bits, color_rgba pPixels)
371	{
372	unpack_bc4(pBlock_bits, &pPixels[`0`].r, sizeof(color_rgba));
373	unpack_bc4((const uint8_t )pBlock_bits + sizeof(bc4_block), &pPixels[`0`].g, sizeof*(color_rgba));
374	}
375
376	// ATC isn't officially documented, so I'm assuming these references:
377	// http://www.guildsoftware.com/papers/2012.Converting.DXTC.to.ATC.pdf
378	// https://github.com/Triang3l/S3TConv/blob/master/s3tconv_atitc.c
379	// The paper incorrectly says the ATC lerp factors are 1/3 and 2/3, but they are actually 3/8 and 5/8.
380	void unpack_atc(const void* pBlock_bits, color_rgba* pPixels)
381	{
382	const uint8_t* pBytes = static_cast<const uint8_t*>(pBlock_bits);
383
384	const uint16_t color0 = pBytes[`0`] \| (pBytes[`1`] << `8U`);
385	const uint16_t color1 = pBytes[`2`] \| (pBytes[`3`] << `8U`);
386	uint32_t sels = pBytes[`4`] \| (pBytes[`5`] << `8U`) \| (pBytes[`6`] << `16U`) \| (pBytes[`7`] << `24U`);
387
388	const bool mode = (color0 & `0x8000`) != `0`;
389
390	color_rgba c[`4`];
391
392	c[`0`].set((color0 >> `10`) & `31`, (color0 >> `5`) & `31`, color0 & `31`, `255`);
393	c[`0`].r = (c[`0`].r << `3`) \| (c[`0`].r >> `2`);
394	c[`0`].g = (c[`0`].g << `3`) \| (c[`0`].g >> `2`);
395	c[`0`].b = (c[`0`].b << `3`) \| (c[`0`].b >> `2`);
396
397	c[`3`].set((color1 >> `11`) & `31`, (color1 >> `5`) & `63`, color1 & `31`, `255`);
398	c[`3`].r = (c[`3`].r << `3`) \| (c[`3`].r >> `2`);
399	c[`3`].g = (c[`3`].g << `2`) \| (c[`3`].g >> `4`);
400	c[`3`].b = (c[`3`].b << `3`) \| (c[`3`].b >> `2`);
401
402	if (mode)
403	{
404	c[`1`].set(basisu::maximum(`0`, c[`0`].r - (c[`3`].r >> `2`)), basisu::maximum(`0`, c[`0`].g - (c[`3`].g >> `2`)), basisu::maximum(`0`, c[`0`].b - (c[`3`].b >> `2`)), `255`);
405	c[`2`] = c[`0`];
406	c[`0`].set(`0`, `0`, `0`, `255`);
407	}
408	else
409	{
410	c[`1`].r = (c[`0`].r * `5` + c[`3`].r * `3`) >> `3`;
411	c[`1`].g = (c[`0`].g * `5` + c[`3`].g * `3`) >> `3`;
412	c[`1`].b = (c[`0`].b * `5` + c[`3`].b * `3`) >> `3`;
413
414	c[`2`].r = (c[`0`].r * `3` + c[`3`].r * `5`) >> `3`;
415	c[`2`].g = (c[`0`].g * `3` + c[`3`].g * `5`) >> `3`;
416	c[`2`].b = (c[`0`].b * `3` + c[`3`].b * `5`) >> `3`;
417	}
418
419	for (uint32_t i = `0`; i < `16`; i++)
420	{
421	const uint32_t s = sels & `3`;
422
423	pPixels[i] = c[s];
424
425	sels >>= `2`;
426	}
427	}
428
429	// BC7 mode 0-7 decompression.
430	// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines.
431
432	static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (`1U` << val_bits)); assert(pbit < `2`); assert(val_bits >= `4` && val_bits <= `8`); const uint32_t total_bits = val_bits + `1`; val = (val << `1`) \| pbit; val <<= (`8` - total_bits); val \|= (val >> total_bits); assert(val <= `255`); return val; }
433	static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (`1U` << val_bits)); assert(val_bits >= `4` && val_bits <= `8`); val <<= (`8` - val_bits); val \|= (val >> val_bits); assert(val <= `255`); return val; }
434
435	static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < `4`); return (l * (`64` - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + `32`) >> `6`; }
436	static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < `8`); return (l * (`64` - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + `32`) >> `6`; }
437	static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < `16`); return (l * (`64` - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + `32`) >> `6`; }
438	static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits)
439	{
440	assert(l <= `255` && h <= `255`);
441	switch (bits)
442	{
443	case `2`: return bc7_interp2(l, h, w);
444	case `3`: return bc7_interp3(l, h, w);
445	case `4`: return bc7_interp4(l, h, w);
446	default:
447	break;
448	}
449	return `0`;
450	}
451
452	bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
453	{
454	//const uint32_t SUBSETS = 3;
455	const uint32_t ENDPOINTS = `6`;
456	const uint32_t COMPS = `3`;
457	const uint32_t WEIGHT_BITS = (mode == `0`) ? `3` : `2`;
458	const uint32_t ENDPOINT_BITS = (mode == `0`) ? `4` : `5`;
459	const uint32_t PBITS = (mode == `0`) ? `6` : `0`;
460	const uint32_t WEIGHT_VALS = `1` << WEIGHT_BITS;
461
462	uint32_t bit_offset = `0`;
463	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
464
465	if (read_bits32(pBuf, bit_offset, mode + `1`) != (`1U` << mode)) return false;
466
467	const uint32_t part = read_bits32(pBuf, bit_offset, (mode == `0`) ? `4` : `6`);
468
469	color_rgba endpoints[ENDPOINTS];
470	for (uint32_t c = `0`; c < COMPS; c++)
471	for (uint32_t e = `0`; e < ENDPOINTS; e++)
472	endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
473
474	uint32_t pbits[`6`];
475	for (uint32_t p = `0`; p < PBITS; p++)
476	pbits[p] = read_bits32(pBuf, bit_offset, `1`);
477
478	uint32_t weights[`16`];
479	for (uint32_t i = `0`; i < `16`; i++)
480	weights[i] = read_bits32(pBuf, bit_offset, ((!i) \|\| (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) \|\| (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - `1`) : WEIGHT_BITS);
481
482	assert(bit_offset == `128`);
483
484	for (uint32_t e = `0`; e < ENDPOINTS; e++)
485	for (uint32_t c = `0`; c < `4`; c++)
486	endpoints[e][c] = (uint8_t)((c == `3`) ? `255` : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS)));
487
488	color_rgba block_colors[`3`][`8`];
489	for (uint32_t s = `0`; s < `3`; s++)
490	for (uint32_t i = `0`; i < WEIGHT_VALS; i++)
491	{
492	for (uint32_t c = `0`; c < `3`; c++)
493	block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * `2` + `0`][c], endpoints[s * `2` + `1`][c], i, WEIGHT_BITS);
494	block_colors[s][i][`3`] = `255`;
495	}
496
497	for (uint32_t i = `0`; i < `16`; i++)
498	pPixels[i] = block_colors[basist::g_bc7_partition3[part * `16` + i]][weights[i]];
499
500	return true;
501	}
502
503	bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
504	{
505	//const uint32_t SUBSETS = 2;
506	const uint32_t ENDPOINTS = `4`;
507	const uint32_t COMPS = (mode == `7`) ? `4` : `3`;
508	const uint32_t WEIGHT_BITS = (mode == `1`) ? `3` : `2`;
509	const uint32_t ENDPOINT_BITS = (mode == `7`) ? `5` : ((mode == `1`) ? `6` : `7`);
510	const uint32_t PBITS = (mode == `1`) ? `2` : `4`;
511	const uint32_t SHARED_PBITS = (mode == `1`) ? true : false;
512	const uint32_t WEIGHT_VALS = `1` << WEIGHT_BITS;
513
514	uint32_t bit_offset = `0`;
515	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
516
517	if (read_bits32(pBuf, bit_offset, mode + `1`) != (`1U` << mode)) return false;
518
519	const uint32_t part = read_bits32(pBuf, bit_offset, `6`);
520
521	color_rgba endpoints[ENDPOINTS];
522	for (uint32_t c = `0`; c < COMPS; c++)
523	for (uint32_t e = `0`; e < ENDPOINTS; e++)
524	endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS);
525
526	uint32_t pbits[`4`];
527	for (uint32_t p = `0`; p < PBITS; p++)
528	pbits[p] = read_bits32(pBuf, bit_offset, `1`);
529
530	uint32_t weights[`16`];
531	for (uint32_t i = `0`; i < `16`; i++)
532	weights[i] = read_bits32(pBuf, bit_offset, ((!i) \|\| (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - `1`) : WEIGHT_BITS);
533
534	assert(bit_offset == `128`);
535
536	for (uint32_t e = `0`; e < ENDPOINTS; e++)
537	for (uint32_t c = `0`; c < `4`; c++)
538	endpoints[e][c] = (uint8_t)((c == ((mode == `7U`) ? `4U` : `3U`)) ? `255` : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> `1`) : e], ENDPOINT_BITS));
539
540	color_rgba block_colors[`2`][`8`];
541	for (uint32_t s = `0`; s < `2`; s++)
542	for (uint32_t i = `0`; i < WEIGHT_VALS; i++)
543	{
544	for (uint32_t c = `0`; c < COMPS; c++)
545	block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * `2` + `0`][c], endpoints[s * `2` + `1`][c], i, WEIGHT_BITS);
546	block_colors[s][i][`3`] = (COMPS == `3`) ? `255` : block_colors[s][i][`3`];
547	}
548
549	for (uint32_t i = `0`; i < `16`; i++)
550	pPixels[i] = block_colors[basist::g_bc7_partition2[part * `16` + i]][weights[i]];
551
552	return true;
553	}
554
555	bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels)
556	{
557	const uint32_t ENDPOINTS = `2`;
558	const uint32_t COMPS = `4`;
559	const uint32_t WEIGHT_BITS = `2`;
560	const uint32_t A_WEIGHT_BITS = (mode == `4`) ? `3` : `2`;
561	const uint32_t ENDPOINT_BITS = (mode == `4`) ? `5` : `7`;
562	const uint32_t A_ENDPOINT_BITS = (mode == `4`) ? `6` : `8`;
563	//const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS;
564	//const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS;
565
566	uint32_t bit_offset = `0`;
567	const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits);
568
569	if (read_bits32(pBuf, bit_offset, mode + `1`) != (`1U` << mode)) return false;
570
571	const uint32_t comp_rot = read_bits32(pBuf, bit_offset, `2`);
572	const uint32_t index_mode = (mode == `4`) ? read_bits32(pBuf, bit_offset, `1`) : `0`;
573
574	color_rgba endpoints[ENDPOINTS];
575	for (uint32_t c = `0`; c < COMPS; c++)
576	for (uint32_t e = `0`; e < ENDPOINTS; e++)
577	endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == `3`) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
578
579	const uint32_t weight_bits[`2`] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS };
580
581	uint32_t weights[`16`], a_weights[`16`];
582
583	for (uint32_t i = `0`; i < `16`; i++)
584	(index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? `1` : `0`));
585
586	for (uint32_t i = `0`; i < `16`; i++)
587	(index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[`1` - index_mode] - ((!i) ? `1` : `0`));
588
589	assert(bit_offset == `128`);
590
591	for (uint32_t e = `0`; e < ENDPOINTS; e++)
592	for (uint32_t c = `0`; c < `4`; c++)
593	endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == `3`) ? A_ENDPOINT_BITS : ENDPOINT_BITS);
594
595	color_rgba block_colors[`8`];
596	for (uint32_t i = `0`; i < (`1U` << weight_bits[`0`]); i++)
597	for (uint32_t c = `0`; c < `3`; c++)
598	block_colors[i][c] = (uint8_t)bc7_interp(endpoints[`0`][c], endpoints[`1`][c], i, weight_bits[`0`]);
599
600	for (uint32_t i = `0`; i < (`1U` << weight_bits[`1`]); i++)
601	block_colors[i][`3`] = (uint8_t)bc7_interp(endpoints[`0`][`3`], endpoints[`1`][`3`], i, weight_bits[`1`]);
602
603	for (uint32_t i = `0`; i < `16`; i++)
604	{
605	pPixels[i] = block_colors[weights[i]];
606	pPixels[i].a = block_colors[a_weights[i]].a;
607	if (comp_rot >= `1`)
608	std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - `1`]);
609	}
610
611	return true;
612	}
613
614	struct bc7_mode_6
615	{
616	struct
617	{
618	uint64_t m_mode : `7`;
619	uint64_t m_r0 : `7`;
620	uint64_t m_r1 : `7`;
621	uint64_t m_g0 : `7`;
622	uint64_t m_g1 : `7`;
623	uint64_t m_b0 : `7`;
624	uint64_t m_b1 : `7`;
625	uint64_t m_a0 : `7`;
626	uint64_t m_a1 : `7`;
627	uint64_t m_p0 : `1`;
628	} m_lo;
629
630	union
631	{
632	struct
633	{
634	uint64_t m_p1 : `1`;
635	uint64_t m_s00 : `3`;
636	uint64_t m_s10 : `4`;
637	uint64_t m_s20 : `4`;
638	uint64_t m_s30 : `4`;
639
640	uint64_t m_s01 : `4`;
641	uint64_t m_s11 : `4`;
642	uint64_t m_s21 : `4`;
643	uint64_t m_s31 : `4`;
644
645	uint64_t m_s02 : `4`;
646	uint64_t m_s12 : `4`;
647	uint64_t m_s22 : `4`;
648	uint64_t m_s32 : `4`;
649
650	uint64_t m_s03 : `4`;
651	uint64_t m_s13 : `4`;
652	uint64_t m_s23 : `4`;
653	uint64_t m_s33 : `4`;
654
655	} m_hi;
656
657	uint64_t m_hi_bits;
658	};
659	};
660
661	bool unpack_bc7_mode6(const void pBlock_bits, color_rgba pPixels)
662	{
663	static_assert(sizeof(bc7_mode_6) == `16`, "sizeof(bc7_mode_6) == 16");
664
665	const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
666
667	if (block.m_lo.m_mode != (`1` << `6`))
668	return false;
669
670	const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << `1`) \| block.m_lo.m_p0);
671	const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << `1`) \| block.m_lo.m_p0);
672	const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << `1`) \| block.m_lo.m_p0);
673	const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << `1`) \| block.m_lo.m_p0);
674	const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << `1`) \| block.m_hi.m_p1);
675	const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << `1`) \| block.m_hi.m_p1);
676	const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << `1`) \| block.m_hi.m_p1);
677	const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << `1`) \| block.m_hi.m_p1);
678
679	color_rgba vals[`16`];
680	for (uint32_t i = `0`; i < `16`; i++)
681	{
682	const uint32_t w = basist::g_bc7_weights4[i];
683	const uint32_t iw = `64` - w;
684	vals[i].set_noclamp_rgba(
685	(r0 * iw + r1 * w + `32`) >> `6`,
686	(g0 * iw + g1 * w + `32`) >> `6`,
687	(b0 * iw + b1 * w + `32`) >> `6`,
688	(a0 * iw + a1 * w + `32`) >> `6`);
689	}
690
691	pPixels[`0`] = vals[block.m_hi.m_s00];
692	pPixels[`1`] = vals[block.m_hi.m_s10];
693	pPixels[`2`] = vals[block.m_hi.m_s20];
694	pPixels[`3`] = vals[block.m_hi.m_s30];
695
696	pPixels[`4`] = vals[block.m_hi.m_s01];
697	pPixels[`5`] = vals[block.m_hi.m_s11];
698	pPixels[`6`] = vals[block.m_hi.m_s21];
699	pPixels[`7`] = vals[block.m_hi.m_s31];
700
701	pPixels[`8`] = vals[block.m_hi.m_s02];
702	pPixels[`9`] = vals[block.m_hi.m_s12];
703	pPixels[`10`] = vals[block.m_hi.m_s22];
704	pPixels[`11`] = vals[block.m_hi.m_s32];
705
706	pPixels[`12`] = vals[block.m_hi.m_s03];
707	pPixels[`13`] = vals[block.m_hi.m_s13];
708	pPixels[`14`] = vals[block.m_hi.m_s23];
709	pPixels[`15`] = vals[block.m_hi.m_s33];
710
711	return true;
712	}
713
714	bool unpack_bc7(const void pBlock, color_rgba pPixels)
715	{
716	const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[`0`];
717
718	for (uint32_t mode = `0`; mode <= `7`; mode++)
719	{
720	if (first_byte & (`1U` << mode))
721	{
722	switch (mode)
723	{
724	case `0`:
725	case `2`:
726	return unpack_bc7_mode0_2(mode, pBlock, pPixels);
727	case `1`:
728	case `3`:
729	case `7`:
730	return unpack_bc7_mode1_3_7(mode, pBlock, pPixels);
731	case `4`:
732	case `5`:
733	return unpack_bc7_mode4_5(mode, pBlock, pPixels);
734	case `6`:
735	return unpack_bc7_mode6(pBlock, pPixels);
736	default:
737	break;
738	}
739	}
740	}
741
742	return false;
743	}
744
745	struct fxt1_block
746	{
747	union
748	{
749	struct
750	{
751	uint64_t m_t00 : `2`;
752	uint64_t m_t01 : `2`;
753	uint64_t m_t02 : `2`;
754	uint64_t m_t03 : `2`;
755	uint64_t m_t04 : `2`;
756	uint64_t m_t05 : `2`;
757	uint64_t m_t06 : `2`;
758	uint64_t m_t07 : `2`;
759	uint64_t m_t08 : `2`;
760	uint64_t m_t09 : `2`;
761	uint64_t m_t10 : `2`;
762	uint64_t m_t11 : `2`;
763	uint64_t m_t12 : `2`;
764	uint64_t m_t13 : `2`;
765	uint64_t m_t14 : `2`;
766	uint64_t m_t15 : `2`;
767	uint64_t m_t16 : `2`;
768	uint64_t m_t17 : `2`;
769	uint64_t m_t18 : `2`;
770	uint64_t m_t19 : `2`;
771	uint64_t m_t20 : `2`;
772	uint64_t m_t21 : `2`;
773	uint64_t m_t22 : `2`;
774	uint64_t m_t23 : `2`;
775	uint64_t m_t24 : `2`;
776	uint64_t m_t25 : `2`;
777	uint64_t m_t26 : `2`;
778	uint64_t m_t27 : `2`;
779	uint64_t m_t28 : `2`;
780	uint64_t m_t29 : `2`;
781	uint64_t m_t30 : `2`;
782	uint64_t m_t31 : `2`;
783	} m_lo;
784	uint64_t m_lo_bits;
785	uint8_t m_sels[`8`];
786	};
787
788	union
789	{
790	struct
791	{
792	#ifdef BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING
793	// This is the format that 3DFX's DECOMP.EXE tool expects, which I'm assuming is what the actual 3DFX hardware wanted.
794	// Unfortunately, color0/color1 and color2/color3 are flipped relative to the official OpenGL extension and Intel's documentation!
795	uint64_t m_b1 : `5`;
796	uint64_t m_g1 : `5`;
797	uint64_t m_r1 : `5`;
798	uint64_t m_b0 : `5`;
799	uint64_t m_g0 : `5`;
800	uint64_t m_r0 : `5`;
801	uint64_t m_b3 : `5`;
802	uint64_t m_g3 : `5`;
803	uint64_t m_r3 : `5`;
804	uint64_t m_b2 : `5`;
805	uint64_t m_g2 : `5`;
806	uint64_t m_r2 : `5`;
807	#else
808	// Intel's encoding, and the encoding in the OpenGL FXT1 spec.
809	uint64_t m_b0 : `5`;
810	uint64_t m_g0 : `5`;
811	uint64_t m_r0 : `5`;
812	uint64_t m_b1 : `5`;
813	uint64_t m_g1 : `5`;
814	uint64_t m_r1 : `5`;
815	uint64_t m_b2 : `5`;
816	uint64_t m_g2 : `5`;
817	uint64_t m_r2 : `5`;
818	uint64_t m_b3 : `5`;
819	uint64_t m_g3 : `5`;
820	uint64_t m_r3 : `5`;
821	#endif
822	uint64_t m_alpha : `1`;
823	uint64_t m_glsb : `2`;
824	uint64_t m_mode : `1`;
825	} m_hi;
826
827	uint64_t m_hi_bits;
828	};
829	};
830
831	static color_rgba expand_565(const color_rgba& c)
832	{
833	return color_rgba ((c.r << `3`) \| (c.r >> `2`), (c.g << `2`) \| (c.g >> `4`), (c.b << `3`) \| (c.b >> `2`), `255`);
834	}
835
836	// We only support CC_MIXED non-alpha blocks here because that's the only mode the transcoder uses at the moment.
837	bool unpack_fxt1(const void p, color_rgba pPixels)
838	{
839	const fxt1_block* pBlock = static_cast<const fxt1_block*>(p);
840
841	if (pBlock->m_hi.m_mode == `0`)
842	return false;
843	if (pBlock->m_hi.m_alpha == `1`)
844	return false;
845
846	color_rgba colors[`4`];
847
848	colors[`0`].r = pBlock->m_hi.m_r0;
849	colors[`0`].g = (uint8_t)((pBlock->m_hi.m_g0 << `1`) \| ((pBlock->m_lo.m_t00 >> `1`) ^ (pBlock->m_hi.m_glsb & `1`)));
850	colors[`0`].b = pBlock->m_hi.m_b0;
851	colors[`0`].a = `255`;
852
853	colors[`1`].r = pBlock->m_hi.m_r1;
854	colors[`1`].g = (uint8_t)((pBlock->m_hi.m_g1 << `1`) \| (pBlock->m_hi.m_glsb & `1`));
855	colors[`1`].b = pBlock->m_hi.m_b1;
856	colors[`1`].a = `255`;
857
858	colors[`2`].r = pBlock->m_hi.m_r2;
859	colors[`2`].g = (uint8_t)((pBlock->m_hi.m_g2 << `1`) \| ((pBlock->m_lo.m_t16 >> `1`) ^ (pBlock->m_hi.m_glsb >> `1`)));
860	colors[`2`].b = pBlock->m_hi.m_b2;
861	colors[`2`].a = `255`;
862
863	colors[`3`].r = pBlock->m_hi.m_r3;
864	colors[`3`].g = (uint8_t)((pBlock->m_hi.m_g3 << `1`) \| (pBlock->m_hi.m_glsb >> `1`));
865	colors[`3`].b = pBlock->m_hi.m_b3;
866	colors[`3`].a = `255`;
867
868	for (uint32_t i = `0`; i < `4`; i++)
869	colors[i] = expand_565(colors[i]);
870
871	color_rgba block0_colors[`4`];
872	block0_colors[`0`] = colors[`0`];
873	block0_colors[`1`] = color_rgba ((colors[`0`].r * `2` + colors[`1`].r + `1`) / `3`, (colors[`0`].g * `2` + colors[`1`].g + `1`) / `3`, (colors[`0`].b * `2` + colors[`1`].b + `1`) / `3`, `255`);
874	block0_colors[`2`] = color_rgba ((colors[`1`].r * `2` + colors[`0`].r + `1`) / `3`, (colors[`1`].g * `2` + colors[`0`].g + `1`) / `3`, (colors[`1`].b * `2` + colors[`0`].b + `1`) / `3`, `255`);
875	block0_colors[`3`] = colors[`1`];
876
877	for (uint32_t i = `0`; i < `16`; i++)
878	{
879	const uint32_t sel = (pBlock->m_sels[i >> `2`] >> ((i & `3`) * `2`)) & `3`;
880
881	const uint32_t x = i & `3`;
882	const uint32_t y = i >> `2`;
883	pPixels[x + y * `8`] = block0_colors[sel];
884	}
885
886	color_rgba block1_colors[`4`];
887	block1_colors[`0`] = colors[`2`];
888	block1_colors[`1`] = color_rgba ((colors[`2`].r * `2` + colors[`3`].r + `1`) / `3`, (colors[`2`].g * `2` + colors[`3`].g + `1`) / `3`, (colors[`2`].b * `2` + colors[`3`].b + `1`) / `3`, `255`);
889	block1_colors[`2`] = color_rgba ((colors[`3`].r * `2` + colors[`2`].r + `1`) / `3`, (colors[`3`].g * `2` + colors[`2`].g + `1`) / `3`, (colors[`3`].b * `2` + colors[`2`].b + `1`) / `3`, `255`);
890	block1_colors[`3`] = colors[`3`];
891
892	for (uint32_t i = `0`; i < `16`; i++)
893	{
894	const uint32_t sel = (pBlock->m_sels[`4` + (i >> `2`)] >> ((i & `3`) * `2`)) & `3`;
895
896	const uint32_t x = i & `3`;
897	const uint32_t y = i >> `2`;
898	pPixels[`4` + x + y * `8`] = block1_colors[sel];
899	}
900
901	return true;
902	}
903
904	struct pvrtc2_block
905	{
906	uint8_t m_modulation[`4`];
907
908	union
909	{
910	union
911	{
912	// Opaque mode: RGB colora=554 and colorb=555
913	struct
914	{
915	uint32_t m_mod_flag : `1`;
916	uint32_t m_blue_a : `4`;
917	uint32_t m_green_a : `5`;
918	uint32_t m_red_a : `5`;
919	uint32_t m_hard_flag : `1`;
920	uint32_t m_blue_b : `5`;
921	uint32_t m_green_b : `5`;
922	uint32_t m_red_b : `5`;
923	uint32_t m_opaque_flag : `1`;
924
925	} m_opaque_color_data;
926
927	// Transparent mode: RGBA colora=4433 and colorb=4443
928	struct
929	{
930	uint32_t m_mod_flag : `1`;
931	uint32_t m_blue_a : `3`;
932	uint32_t m_green_a : `4`;
933	uint32_t m_red_a : `4`;
934	uint32_t m_alpha_a : `3`;
935	uint32_t m_hard_flag : `1`;
936	uint32_t m_blue_b : `4`;
937	uint32_t m_green_b : `4`;
938	uint32_t m_red_b : `4`;
939	uint32_t m_alpha_b : `3`;
940	uint32_t m_opaque_flag : `1`;
941
942	} m_trans_color_data;
943	};
944
945	uint32_t m_color_data_bits;
946	};
947	};
948
949	static color_rgba convert_rgb_555_to_888(const color_rgba& col)
950	{
951	return color_rgba ((col [`0`] << `3`) \| (col [`0`] >> `2`), (col [`1`] << `3`) \| (col [`1`] >> `2`), (col [`2`] << `3`) \| (col [`2`] >> `2`), `255`);
952	}
953
954	static color_rgba convert_rgba_5554_to_8888(const color_rgba& col)
955	{
956	return color_rgba ((col [`0`] << `3`) \| (col [`0`] >> `2`), (col [`1`] << `3`) \| (col [`1`] >> `2`), (col [`2`] << `3`) \| (col [`2`] >> `2`), (col [`3`] << `4`) \| col [`3`]);
957	}
958
959	// PVRTC2 is currently limited to only what our transcoder outputs (non-interpolated, hard_flag=1 modulation=0). In this mode, PVRTC2 looks much like BC1/ATC.
960	bool unpack_pvrtc2(const void p, color_rgba pPixels)
961	{
962	const pvrtc2_block* pBlock = static_cast<const pvrtc2_block*>(p);
963
964	if ((!pBlock->m_opaque_color_data.m_hard_flag) \|\| (pBlock->m_opaque_color_data.m_mod_flag))
965	{
966	// This mode isn't supported by the transcoder, so we aren't bothering with it here.
967	return false;
968	}
969
970	color_rgba colors[`4`];
971
972	if (pBlock->m_opaque_color_data.m_opaque_flag)
973	{
974	// colora=554
975	color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << `1`) \| (pBlock->m_opaque_color_data.m_blue_a >> `3`), `255`);
976
977	// colora=555
978	color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, `255`);
979
980	colors[`0`] = convert_rgb_555_to_888(color_a);
981	colors[`3`] = convert_rgb_555_to_888(color_b);
982
983	colors[`1`].set((colors[`0`].r * `5` + colors[`3`].r * `3`) / `8`, (colors[`0`].g * `5` + colors[`3`].g * `3`) / `8`, (colors[`0`].b * `5` + colors[`3`].b * `3`) / `8`, `255`);
984	colors[`2`].set((colors[`0`].r * `3` + colors[`3`].r * `5`) / `8`, (colors[`0`].g * `3` + colors[`3`].g * `5`) / `8`, (colors[`0`].b * `3` + colors[`3`].b * `5`) / `8`, `255`);
985	}
986	else
987	{
988	// colora=4433
989	color_rgba color_a(
990	(pBlock->m_trans_color_data.m_red_a << `1`) \| (pBlock->m_trans_color_data.m_red_a >> `3`),
991	(pBlock->m_trans_color_data.m_green_a << `1`) \| (pBlock->m_trans_color_data.m_green_a >> `3`),
992	(pBlock->m_trans_color_data.m_blue_a << `2`) \| (pBlock->m_trans_color_data.m_blue_a >> `1`),
993	pBlock->m_trans_color_data.m_alpha_a << `1`);
994
995	//colorb=4443
996	color_rgba color_b(
997	(pBlock->m_trans_color_data.m_red_b << `1`) \| (pBlock->m_trans_color_data.m_red_b >> `3`),
998	(pBlock->m_trans_color_data.m_green_b << `1`) \| (pBlock->m_trans_color_data.m_green_b >> `3`),
999	(pBlock->m_trans_color_data.m_blue_b << `1`) \| (pBlock->m_trans_color_data.m_blue_b >> `3`),
1000	(pBlock->m_trans_color_data.m_alpha_b << `1`) \| `1`);
1001
1002	colors[`0`] = convert_rgba_5554_to_8888(color_a);
1003	colors[`3`] = convert_rgba_5554_to_8888(color_b);
1004	}
1005
1006	colors[`1`].set((colors[`0`].r * `5` + colors[`3`].r * `3`) / `8`, (colors[`0`].g * `5` + colors[`3`].g * `3`) / `8`, (colors[`0`].b * `5` + colors[`3`].b * `3`) / `8`, (colors[`0`].a * `5` + colors[`3`].a * `3`) / `8`);
1007	colors[`2`].set((colors[`0`].r * `3` + colors[`3`].r * `5`) / `8`, (colors[`0`].g * `3` + colors[`3`].g * `5`) / `8`, (colors[`0`].b * `3` + colors[`3`].b * `5`) / `8`, (colors[`0`].a * `3` + colors[`3`].a * `5`) / `8`);
1008
1009	for (uint32_t i = `0`; i < `16`; i++)
1010	{
1011	const uint32_t sel = (pBlock->m_modulation[i >> `2`] >> ((i & `3`) * `2`)) & `3`;
1012	pPixels[i] = colors[sel];
1013	}
1014
1015	return true;
1016	}
1017
1018	struct etc2_eac_r11
1019	{
1020	uint64_t m_base : `8`;
1021	uint64_t m_table : `4`;
1022	uint64_t m_mul : `4`;
1023	uint64_t m_sels_0 : `8`;
1024	uint64_t m_sels_1 : `8`;
1025	uint64_t m_sels_2 : `8`;
1026	uint64_t m_sels_3 : `8`;
1027	uint64_t m_sels_4 : `8`;
1028	uint64_t m_sels_5 : `8`;
1029
1030	uint64_t get_sels() const
1031	{
1032	return ((uint64_t)m_sels_0 << `40U`) \| ((uint64_t)m_sels_1 << `32U`) \| ((uint64_t)m_sels_2 << `24U`) \| ((uint64_t)m_sels_3 << `16U`) \| ((uint64_t)m_sels_4 << `8U`) \| m_sels_5;
1033	}
1034
1035	void set_sels(uint64_t v)
1036	{
1037	m_sels_0 = (v >> `40U`) & `0xFF`;
1038	m_sels_1 = (v >> `32U`) & `0xFF`;
1039	m_sels_2 = (v >> `24U`) & `0xFF`;
1040	m_sels_3 = (v >> `16U`) & `0xFF`;
1041	m_sels_4 = (v >> `8U`) & `0xFF`;
1042	m_sels_5 = v & `0xFF`;
1043	}
1044	};
1045
1046	struct etc2_eac_rg11
1047	{
1048	etc2_eac_r11 m_c[`2`];
1049	};
1050
1051	void unpack_etc2_eac_r(const void p, color_rgba pPixels, uint32_t c)
1052	{
1053	const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p);
1054	const uint64_t sels = pBlock->get_sels();
1055
1056	const int base = (int)pBlock->m_base * `8` + `4`;
1057	const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * `8`) : `1`;
1058	const int table = (int)pBlock->m_table;
1059
1060	for (uint32_t y = `0`; y < `4`; y++)
1061	{
1062	for (uint32_t x = `0`; x < `4`; x++)
1063	{
1064	const uint32_t shift = `45` - ((y + x * `4`) * `3`);
1065
1066	const uint32_t sel = (uint32_t)((sels >> shift) & `7`);
1067
1068	int val = base + g_etc2_eac_tables[table][sel] * mul;
1069	val = clamp<int>(val, `0`, `2047`);
1070
1071	// Convert to 8-bits with rounding
1072	//pPixels[x + y 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047);*
1073	pPixels[x + y * `4`].m_comps[c] = static_cast<uint8_t>((val * `255` + `1023`) / `2047`);
1074
1075	} // x
1076	} // y
1077	}
1078
1079	void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels)
1080	{
1081	for (uint32_t c = `0`; c < `2`; c++)
1082	{
1083	const etc2_eac_r11* pBlock = &static_cast<const etc2_eac_rg11*>(p)->m_c[c];
1084
1085	unpack_etc2_eac_r(pBlock, pPixels, c);
1086	}
1087	}
1088
1089	void unpack_uastc(const void* p, color_rgba* pPixels)
1090	{
1091	basist::unpack_uastc(*static_cast<const basist::uastc_block>(p), (basist::color32 )pPixels, false);
1092	}
1093
1094	// Unpacks to RGBA, R, RG, or A
1095	bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
1096	{
1097	switch (fmt)
1098	{
1099	case texture_format::cBC1:
1100	{
1101	unpack_bc1(pBlock, pPixels, true);
1102	break;
1103	}
1104	case texture_format::cBC1_NV:
1105	{
1106	unpack_bc1_nv(pBlock, pPixels, true);
1107	break;
1108	}
1109	case texture_format::cBC1_AMD:
1110	{
1111	unpack_bc1_amd(pBlock, pPixels, true);
1112	break;
1113	}
1114	case texture_format::cBC3:
1115	{
1116	return unpack_bc3(pBlock, pPixels);
1117	}
1118	case texture_format::cBC4:
1119	{
1120	// Unpack to R
1121	unpack_bc4(pBlock, &pPixels[`0`].r, sizeof(color_rgba));
1122	break;
1123	}
1124	case texture_format::cBC5:
1125	{
1126	unpack_bc5(pBlock, pPixels);
1127	break;
1128	}
1129	case texture_format::cBC7:
1130	{
1131	return unpack_bc7(pBlock, pPixels);
1132	}
1133	// Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
1134	case texture_format::cETC2_RGB:
1135	case texture_format::cETC1:
1136	case texture_format::cETC1S:
1137	{
1138	return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
1139	}
1140	case texture_format::cETC2_RGBA:
1141	{
1142	if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[`1`], pPixels))
1143	return false;
1144	unpack_etc2_eac(pBlock, pPixels);
1145	break;
1146	}
1147	case texture_format::cETC2_ALPHA:
1148	{
1149	// Unpack to A
1150	unpack_etc2_eac(pBlock, pPixels);
1151	break;
1152	}
1153	case texture_format::cASTC4x4:
1154	{
1155	#if BASISU_USE_ASTC_DECOMPRESS
1156	const bool astc_srgb = false;
1157	basisu_astc::astc::decompress(reinterpret_cast<uint8_t>(pPixels), static_cast<const* uint8_t*>(pBlock), astc_srgb, `4`, `4`);
1158	#else
1159	memset(pPixels, `255`, `16` * sizeof(color_rgba));
1160	#endif
1161	break;
1162	}
1163	case texture_format::cATC_RGB:
1164	{
1165	unpack_atc(pBlock, pPixels);
1166	break;
1167	}
1168	case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
1169	{
1170	unpack_atc(static_cast<const uint8_t*>(pBlock) + `8`, pPixels);
1171	unpack_bc4(pBlock, &pPixels[`0`].a, sizeof(color_rgba));
1172	break;
1173	}
1174	case texture_format::cFXT1_RGB:
1175	{
1176	unpack_fxt1(pBlock, pPixels);
1177	break;
1178	}
1179	case texture_format::cPVRTC2_4_RGBA:
1180	{
1181	unpack_pvrtc2(pBlock, pPixels);
1182	break;
1183	}
1184	case texture_format::cETC2_R11_EAC:
1185	{
1186	unpack_etc2_eac_r(static_cast<const etc2_eac_r11 *>(pBlock), pPixels, `0`);
1187	break;
1188	}
1189	case texture_format::cETC2_RG11_EAC:
1190	{
1191	unpack_etc2_eac_rg(pBlock, pPixels);
1192	break;
1193	}
1194	case texture_format::cUASTC4x4:
1195	{
1196	unpack_uastc(pBlock, pPixels);
1197	break;
1198	}
1199	default:
1200	{
1201	assert(`0`);
1202	// TODO
1203	return false;
1204	}
1205	}
1206	return true;
1207	}
1208
1209	bool gpu_image::unpack(image& img) const
1210	{
1211	img.resize(get_pixel_width(), get_pixel_height());
1212	img.set_all(g_black_color);
1213
1214	if (!img.get_width() \|\| !img.get_height())
1215	return true;
1216
1217	if ((m_fmt == texture_format::cPVRTC1_4_RGB) \|\| (m_fmt == texture_format::cPVRTC1_4_RGBA))
1218	{
1219	pvrtc4_image pi(m_width, m_height);
1220
1221	if (get_total_blocks() != pi.get_total_blocks())
1222	return false;
1223
1224	memcpy(&pi.get_blocks()[`0`], get_ptr(), get_size_in_bytes());
1225
1226	pi.deswizzle();
1227
1228	pi.unpack_all_pixels(img);
1229
1230	return true;
1231	}
1232
1233	assert((m_block_width <= cMaxBlockSize) && (m_block_height <= cMaxBlockSize));
1234	color_rgba pixels[cMaxBlockSize * cMaxBlockSize];
1235	for (uint32_t i = `0`; i < cMaxBlockSize * cMaxBlockSize; i++)
1236	pixels[i] = g_black_color;
1237
1238	bool success = true;
1239
1240	for (uint32_t by = `0`; by < m_blocks_y; by++)
1241	{
1242	for (uint32_t bx = `0`; bx < m_blocks_x; bx++)
1243	{
1244	const void* pBlock = get_block_ptr(bx, by);
1245
1246	if (!unpack_block(m_fmt, pBlock, pixels))
1247	success = false;
1248
1249	img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
1250	} // bx
1251	} // by
1252
1253	return success;
1254	}
1255
1256	static const uint8_t g_ktx_file_id[`12`] = { `0xAB`, `0x4B`, `0x54`, `0x58`, `0x20`, `0x31`, `0x31`, `0xBB`, `0x0D`, `0x0A`, `0x1A`, `0x0A` };
1257
1258	// KTX/GL enums
1259	enum
1260	{
1261	KTX_ENDIAN = `0x04030201`,
1262	KTX_OPPOSITE_ENDIAN = `0x01020304`,
1263	KTX_ETC1_RGB8_OES = `0x8D64`,
1264	KTX_RED = `0x1903`,
1265	KTX_RG = `0x8227`,
1266	KTX_RGB = `0x1907`,
1267	KTX_RGBA = `0x1908`,
1268	KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = `0x83F0`,
1269	KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = `0x83F3`,
1270	KTX_COMPRESSED_RED_RGTC1_EXT = `0x8DBB`,
1271	KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = `0x8DBD`,
1272	KTX_COMPRESSED_RGB8_ETC2 = `0x9274`,
1273	KTX_COMPRESSED_RGBA8_ETC2_EAC = `0x9278`,
1274	KTX_COMPRESSED_RGBA_BPTC_UNORM = `0x8E8C`,
1275	KTX_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = `0x8E8D`,
1276	KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = `0x8C00`,
1277	KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = `0x8C02`,
1278	KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = `0x93B0`,
1279	KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = `0x93D0`,
1280	KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = `0x94CC`, // TODO - Use proper value!
1281	KTX_ATC_RGB_AMD = `0x8C92`,
1282	KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = `0x87EE`,
1283	KTX_COMPRESSED_RGB_FXT1_3DFX = `0x86B0`,
1284	KTX_COMPRESSED_RGBA_FXT1_3DFX = `0x86B1`,
1285	KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = `0x9138`,
1286	KTX_COMPRESSED_R11_EAC = `0x9270`,
1287	KTX_COMPRESSED_RG11_EAC = `0x9272`
1288	};
1289
1290	struct ktx_header
1291	{
1292	uint8_t m_identifier[`12`];
1293	packed_uint<`4`> m_endianness;
1294	packed_uint<`4`> m_glType;
1295	packed_uint<`4`> m_glTypeSize;
1296	packed_uint<`4`> m_glFormat;
1297	packed_uint<`4`> m_glInternalFormat;
1298	packed_uint<`4`> m_glBaseInternalFormat;
1299	packed_uint<`4`> m_pixelWidth;
1300	packed_uint<`4`> m_pixelHeight;
1301	packed_uint<`4`> m_pixelDepth;
1302	packed_uint<`4`> m_numberOfArrayElements;
1303	packed_uint<`4`> m_numberOfFaces;
1304	packed_uint<`4`> m_numberOfMipmapLevels;
1305	packed_uint<`4`> m_bytesOfKeyValueData;
1306
1307	void clear() { clear_obj(*this); }
1308	};
1309
1310	// Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index]
1311	bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag)
1312	{
1313	if (!gpu_images.size())
1314	{
1315	assert(`0`);
1316	return false;
1317	}
1318
1319	uint32_t width = `0`, height = `0`, total_levels = `0`;
1320	basisu::texture_format fmt = texture_format::cInvalidTextureFormat;
1321
1322	if (cubemap_flag)
1323	{
1324	if ((gpu_images.size() % `6`) != `0`)
1325	{
1326	assert(`0`);
1327	return false;
1328	}
1329	}
1330
1331	for (uint32_t array_index = `0`; array_index < gpu_images.size(); array_index++)
1332	{
1333	const gpu_image_vec &levels = gpu_images [array_index];
1334
1335	if (!levels.size())
1336	{
1337	// Empty mip chain
1338	assert(`0`);
1339	return false;
1340	}
1341
1342	if (!array_index)
1343	{
1344	width = levels [`0`].get_pixel_width();
1345	height = levels [`0`].get_pixel_height();
1346	total_levels = (uint32_t)levels.size();
1347	fmt = levels [`0`].get_format();
1348	}
1349	else
1350	{
1351	if ((width != levels [`0`].get_pixel_width()) \|\|
1352	(height != levels [`0`].get_pixel_height()) \|\|
1353	(total_levels != levels.size()))
1354	{
1355	// All cubemap/texture array faces must be the same dimension
1356	assert(`0`);
1357	return false;
1358	}
1359	}
1360
1361	for (uint32_t level_index = `0`; level_index < levels.size(); level_index++)
1362	{
1363	if (level_index)
1364	{
1365	if ( (levels [level_index].get_pixel_width() != maximum<uint32_t>(`1`, levels [`0`].get_pixel_width() >> level_index)) \|\|
1366	(levels [level_index].get_pixel_height() != maximum<uint32_t>(`1`, levels [`0`].get_pixel_height() >> level_index)) )
1367	{
1368	// Malformed mipmap chain
1369	assert(`0`);
1370	return false;
1371	}
1372	}
1373
1374	if (fmt != levels [level_index].get_format())
1375	{
1376	// All input textures must use the same GPU format
1377	assert(`0`);
1378	return false;
1379	}
1380	}
1381	}
1382
1383	uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB;
1384
1385	switch (fmt)
1386	{
1387	case texture_format::cBC1:
1388	case texture_format::cBC1_NV:
1389	case texture_format::cBC1_AMD:
1390	{
1391	internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
1392	break;
1393	}
1394	case texture_format::cBC3:
1395	{
1396	internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT;
1397	base_internal_fmt = KTX_RGBA;
1398	break;
1399	}
1400	case texture_format::cBC4:
1401	{
1402	internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT;
1403	base_internal_fmt = KTX_RED;
1404	break;
1405	}
1406	case texture_format::cBC5:
1407	{
1408	internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT;
1409	base_internal_fmt = KTX_RG;
1410	break;
1411	}
1412	case texture_format::cETC1:
1413	case texture_format::cETC1S:
1414	{
1415	internal_fmt = KTX_ETC1_RGB8_OES;
1416	break;
1417	}
1418	case texture_format::cETC2_RGB:
1419	{
1420	internal_fmt = KTX_COMPRESSED_RGB8_ETC2;
1421	break;
1422	}
1423	case texture_format::cETC2_RGBA:
1424	{
1425	internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC;
1426	base_internal_fmt = KTX_RGBA;
1427	break;
1428	}
1429	case texture_format::cBC7:
1430	{
1431	internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM;
1432	base_internal_fmt = KTX_RGBA;
1433	break;
1434	}
1435	case texture_format::cPVRTC1_4_RGB:
1436	{
1437	internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG;
1438	break;
1439	}
1440	case texture_format::cPVRTC1_4_RGBA:
1441	{
1442	internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;
1443	base_internal_fmt = KTX_RGBA;
1444	break;
1445	}
1446	case texture_format::cASTC4x4:
1447	{
1448	internal_fmt = KTX_COMPRESSED_RGBA_ASTC_4x4_KHR;
1449	base_internal_fmt = KTX_RGBA;
1450	break;
1451	}
1452	case texture_format::cATC_RGB:
1453	{
1454	internal_fmt = KTX_ATC_RGB_AMD;
1455	break;
1456	}
1457	case texture_format::cATC_RGBA_INTERPOLATED_ALPHA:
1458	{
1459	internal_fmt = KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD;
1460	base_internal_fmt = KTX_RGBA;
1461	break;
1462	}
1463	case texture_format::cETC2_R11_EAC:
1464	{
1465	internal_fmt = KTX_COMPRESSED_R11_EAC;
1466	base_internal_fmt = KTX_RED;
1467	break;
1468	}
1469	case texture_format::cETC2_RG11_EAC:
1470	{
1471	internal_fmt = KTX_COMPRESSED_RG11_EAC;
1472	base_internal_fmt = KTX_RG;
1473	break;
1474	}
1475	case texture_format::cUASTC4x4:
1476	{
1477	internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR;
1478	base_internal_fmt = KTX_RGBA;
1479	break;
1480	}
1481	case texture_format::cFXT1_RGB:
1482	{
1483	internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX;
1484	break;
1485	}
1486	case texture_format::cPVRTC2_4_RGBA:
1487	{
1488	internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;
1489	base_internal_fmt = KTX_RGBA;
1490	break;
1491	}
1492	default:
1493	{
1494	// TODO
1495	assert(`0`);
1496	return false;
1497	}
1498	}
1499
1500	ktx_header header;
1501	header.clear();
1502	memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id));
1503	header.m_endianness = KTX_ENDIAN;
1504
1505	header.m_pixelWidth = width;
1506	header.m_pixelHeight = height;
1507
1508	header.m_glTypeSize = `1`;
1509
1510	header.m_glInternalFormat = internal_fmt;
1511	header.m_glBaseInternalFormat = base_internal_fmt;
1512
1513	header.m_numberOfArrayElements = (uint32_t)(cubemap_flag ? (gpu_images.size() / `6`) : gpu_images.size());
1514	if (header.m_numberOfArrayElements == `1`)
1515	header.m_numberOfArrayElements = `0`;
1516
1517	header.m_numberOfMipmapLevels = total_levels;
1518	header.m_numberOfFaces = cubemap_flag ? `6` : `1`;
1519
1520	append_vector(ktx_data, (uint8_t )&header, sizeof*(header));
1521
1522	for (uint32_t level_index = `0`; level_index < total_levels; level_index++)
1523	{
1524	uint32_t img_size = gpu_images [`0`][level_index].get_size_in_bytes();
1525
1526	if ((header.m_numberOfFaces == `1`) \|\| (header.m_numberOfArrayElements > `1`))
1527	{
1528	img_size = img_size * header.m_numberOfFaces * maximum<uint32_t>(`1`, header.m_numberOfArrayElements);
1529	}
1530
1531	assert(img_size && ((img_size & `3`) == `0`));
1532
1533	packed_uint<`4`> packed_img_size(img_size);
1534	append_vector(ktx_data, (uint8_t )&packed_img_size, sizeof*(packed_img_size));
1535
1536	uint32_t bytes_written = `0`;
1537
1538	for (uint32_t array_index = `0`; array_index < maximum<uint32_t>(`1`, header.m_numberOfArrayElements); array_index++)
1539	{
1540	for (uint32_t face_index = `0`; face_index < header.m_numberOfFaces; face_index++)
1541	{
1542	const gpu_image& img = gpu_images [cubemap_flag ? (array_index * `6` + face_index) : array_index][level_index];
1543
1544	append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes());
1545
1546	bytes_written += img.get_size_in_bytes();
1547	}
1548
1549	} // array_index
1550
1551	} // level_index
1552
1553	return true;
1554	}
1555
1556	bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag)
1557	{
1558	std::string extension(string_tolower(string_get_extension(pFilename)));
1559
1560	uint8_vec filedata;
1561	if (extension == "ktx")
1562	{
1563	if (!create_ktx_texture_file(filedata, g, cubemap_flag))
1564	return false;
1565	}
1566	else if (extension == "pvr")
1567	{
1568	// TODO
1569	return false;
1570	}
1571	else if (extension == "dds")
1572	{
1573	// TODO
1574	return false;
1575	}
1576	else
1577	{
1578	// unsupported texture format
1579	assert(`0`);
1580	return false;
1581	}
1582
1583	return basisu::write_vec_to_file(pFilename, filedata);
1584	}
1585
1586	bool write_compressed_texture_file(const char* pFilename, const gpu_image& g)
1587	{
1588	basisu::vector<gpu_image_vec> v;
1589	enlarge_vector(v, `1`)->push_back(g);
1590	return write_compressed_texture_file(pFilename, v, false);
1591	}
1592
1593	//const uint32_t OUT_FILE_MAGIC = 'TEXC';
1594	struct out_file_header
1595	{
1596	packed_uint<`4`> m_magic;
1597	packed_uint<`4`> m_pad;
1598	packed_uint<`4`> m_width;
1599	packed_uint<`4`> m_height;
1600	};
1601
1602	// As no modern tool supports FXT1 format .KTX files, let's write .OUT files and make sure 3DFX's original tools shipped in 1999 can decode our encoded output.
1603	bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi)
1604	{
1605	out_file_header hdr;
1606	//hdr.m_magic = OUT_FILE_MAGIC;
1607	hdr.m_magic.m_bytes[`0`] = `67`;
1608	hdr.m_magic.m_bytes[`1`] = `88`;
1609	hdr.m_magic.m_bytes[`2`] = `69`;
1610	hdr.m_magic.m_bytes[`3`] = `84`;
1611	hdr.m_pad = `0`;
1612	hdr.m_width = gi.get_blocks_x() * `8`;
1613	hdr.m_height = gi.get_blocks_y() * `4`;
1614
1615	FILE* pFile = nullptr;
1616	#ifdef _WIN32
1617	fopen_s(&pFile, pFilename, "wb");
1618	#else
1619	pFile = fopen(pFilename, "wb");
1620	#endif
1621	if (!pFile)
1622	return false;
1623
1624	fwrite(&hdr, sizeof(hdr), `1`, pFile);
1625	fwrite(gi.get_ptr(), gi.get_size_in_bytes(), `1`, pFile);
1626
1627	return fclose(pFile) != EOF;
1628	}
1629	} // basisu
1630
1631

Browse the source code of Godot/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp