astcenc_decompress_symbolic.cpp source code [Godot/thirdparty/astcenc/astcenc_decompress_symbolic.cpp]

1	// SPDX-License-Identifier: Apache-2.0
2	// ----------------------------------------------------------------------------
3	// Copyright 2011-2023 Arm Limited
4	//
5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6	// use this file except in compliance with the License. You may obtain a copy
7	// of the License at:
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing, software
12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14	// License for the specific language governing permissions and limitations
15	// under the License.
16	// ----------------------------------------------------------------------------
17
18	/**
19	* @brief Functions to decompress a symbolic block.
20	*/
21
22	#include "astcenc_internal.h"
23
24	#include <stdio.h>
25	#include <assert.h>
26
27	/**
28	* @brief Compute the integer linear interpolation of two color endpoints.
29	*
30	* @param decode_mode The ASTC profile (linear or sRGB)
31	* @param color0 The endpoint0 color.
32	* @param color1 The endpoint1 color.
33	* @param weights The interpolation weight (between 0 and 64).
34	*
35	* @return The interpolated color.
36	*/
37	static vint4 lerp_color_int(
38	astcenc_profile decode_mode,
39	vint4 color0,
40	vint4 color1,
41	vint4 weights
42	) {
43	vint4 weight1 = weights;
44	vint4 weight0 = vint4 (`64`) - weight1;
45
46	if (decode_mode == ASTCENC_PRF_LDR_SRGB)
47	{
48	color0 = asr<`8`>(color0);
49	color1 = asr<`8`>(color1);
50	}
51
52	vint4 color = (color0 * weight0) + (color1 * weight1) + vint4 (`32`);
53	color = asr<`6`>(color);
54
55	if (decode_mode == ASTCENC_PRF_LDR_SRGB)
56	{
57	color = color * vint4 (`257`);
58	}
59
60	return color;
61	}
62
63
64	/**
65	* @brief Convert integer color value into a float value for the decoder.
66	*
67	* @param data The integer color value post-interpolation.
68	* @param lns_mask If set treat lane as HDR (LNS) else LDR (unorm16).
69	*
70	* @return The float color value.
71	*/
72	static inline vfloat4 decode_texel(
73	vint4 data,
74	vmask4 lns_mask
75	) {
76	vint4 color_lns = vint4::zero();
77	vint4 color_unorm = vint4::zero();
78
79	if (any(lns_mask))
80	{
81	color_lns = lns_to_sf16(data);
82	}
83
84	if (!all(lns_mask))
85	{
86	color_unorm = unorm16_to_sf16(data);
87	}
88
89	// Pick components and then convert to FP16
90	vint4 datai = select(color_unorm, color_lns, lns_mask);
91	return float16_to_float(datai);
92	}
93
94	/ See header for documentation. /
95	void unpack_weights(
96	const block_size_descriptor& bsd,
97	const symbolic_compressed_block& scb,
98	const decimation_info& di,
99	bool is_dual_plane,
100	int weights_plane1[BLOCK_MAX_TEXELS],
101	int weights_plane2[BLOCK_MAX_TEXELS]
102	) {
103	// Safe to overshoot as all arrays are allocated to full size
104	if (!is_dual_plane)
105	{
106	// Build full 64-entry weight lookup table
107	vint4 tab0(reinterpret_cast<const int*>(scb.weights + `0`));
108	vint4 tab1(reinterpret_cast<const int*>(scb.weights + `16`));
109	vint4 tab2(reinterpret_cast<const int*>(scb.weights + `32`));
110	vint4 tab3(reinterpret_cast<const int*>(scb.weights + `48`));
111
112	vint tab0p, tab1p, tab2p, tab3p;
113	vtable_prepare(tab0, tab1, tab2, tab3, tab0p, tab1p, tab2p, tab3p);
114
115	for (unsigned int i = `0`; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
116	{
117	vint summed_value(`8`);
118	vint weight_count(di.texel_weight_count + i);
119	int max_weight_count = hmax(weight_count).lane<`0`>();
120
121	promise(max_weight_count > `0`);
122	for (int j = `0`; j < max_weight_count; j++)
123	{
124	vint texel_weights(di.texel_weights_tr[j] + i);
125	vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
126
127	summed_value += vtable_8bt_32bi(tab0p, tab1p, tab2p, tab3p, texel_weights) * texel_weights_int;
128	}
129
130	store(lsr<`4`>(summed_value), weights_plane1 + i);
131	}
132	}
133	else
134	{
135	// Build a 32-entry weight lookup table per plane
136	// Plane 1
137	vint4 tab0_plane1(reinterpret_cast<const int*>(scb.weights + `0`));
138	vint4 tab1_plane1(reinterpret_cast<const int*>(scb.weights + `16`));
139	vint tab0_plane1p, tab1_plane1p;
140	vtable_prepare(tab0_plane1, tab1_plane1, tab0_plane1p, tab1_plane1p);
141
142	// Plane 2
143	vint4 tab0_plane2(reinterpret_cast<const int*>(scb.weights + `32`));
144	vint4 tab1_plane2(reinterpret_cast<const int*>(scb.weights + `48`));
145	vint tab0_plane2p, tab1_plane2p;
146	vtable_prepare(tab0_plane2, tab1_plane2, tab0_plane2p, tab1_plane2p);
147
148	for (unsigned int i = `0`; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
149	{
150	vint sum_plane1(`8`);
151	vint sum_plane2(`8`);
152
153	vint weight_count(di.texel_weight_count + i);
154	int max_weight_count = hmax(weight_count).lane<`0`>();
155
156	promise(max_weight_count > `0`);
157	for (int j = `0`; j < max_weight_count; j++)
158	{
159	vint texel_weights(di.texel_weights_tr[j] + i);
160	vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
161
162	sum_plane1 += vtable_8bt_32bi(tab0_plane1p, tab1_plane1p, texel_weights) * texel_weights_int;
163	sum_plane2 += vtable_8bt_32bi(tab0_plane2p, tab1_plane2p, texel_weights) * texel_weights_int;
164	}
165
166	store(lsr<`4`>(sum_plane1), weights_plane1 + i);
167	store(lsr<`4`>(sum_plane2), weights_plane2 + i);
168	}
169	}
170	}
171
172	/**
173	* @brief Return an FP32 NaN value for use in error colors.
174	*
175	* This NaN encoding will turn into 0xFFFF when converted to an FP16 NaN.
176	*
177	* @return The float color value.
178	*/
179	static float error_color_nan()
180	{
181	if32 v;
182	v.u = `0xFFFFE000U`;
183	return v.f;
184	}
185
186	/ See header for documentation. /
187	void decompress_symbolic_block(
188	astcenc_profile decode_mode,
189	const block_size_descriptor& bsd,
190	int xpos,
191	int ypos,
192	int zpos,
193	const symbolic_compressed_block& scb,
194	image_block& blk
195	) {
196	blk.xpos = xpos;
197	blk.ypos = ypos;
198	blk.zpos = zpos;
199
200	blk.data_min = vfloat4::zero();
201	blk.data_mean = vfloat4::zero();
202	blk.data_max = vfloat4::zero();
203	blk.grayscale = false;
204
205	// If we detected an error-block, blow up immediately.
206	if (scb.block_type == SYM_BTYPE_ERROR)
207	{
208	for (unsigned int i = `0`; i < bsd.texel_count; i++)
209	{
210	blk.data_r[i] = error_color_nan();
211	blk.data_g[i] = error_color_nan();
212	blk.data_b[i] = error_color_nan();
213	blk.data_a[i] = error_color_nan();
214	blk.rgb_lns[i] = `0`;
215	blk.alpha_lns[i] = `0`;
216	}
217
218	return;
219	}
220
221	if ((scb.block_type == SYM_BTYPE_CONST_F16) \|\|
222	(scb.block_type == SYM_BTYPE_CONST_U16))
223	{
224	vfloat4 color;
225	uint8_t use_lns = `0`;
226
227	// UNORM16 constant color block
228	if (scb.block_type == SYM_BTYPE_CONST_U16)
229	{
230	vint4 colori(scb.constant_color);
231
232	// For sRGB decoding a real decoder would just use the top 8 bits for color conversion.
233	// We don't color convert, so rescale the top 8 bits into the full 16 bit dynamic range.
234	if (decode_mode == ASTCENC_PRF_LDR_SRGB)
235	{
236	colori = asr<`8`>(colori) * `257`;
237	}
238
239	vint4 colorf16 = unorm16_to_sf16(colori);
240	color = float16_to_float(colorf16);
241	}
242	// FLOAT16 constant color block
243	else
244	{
245	switch (decode_mode)
246	{
247	case ASTCENC_PRF_LDR_SRGB:
248	case ASTCENC_PRF_LDR:
249	color = vfloat4 (error_color_nan());
250	break;
251	case ASTCENC_PRF_HDR_RGB_LDR_A:
252	case ASTCENC_PRF_HDR:
253	// Constant-color block; unpack from FP16 to FP32.
254	color = float16_to_float(vint4 (scb.constant_color));
255	use_lns = `1`;
256	break;
257	}
258	}
259
260	for (unsigned int i = `0`; i < bsd.texel_count; i++)
261	{
262	blk.data_r[i] = color.lane<`0`>();
263	blk.data_g[i] = color.lane<`1`>();
264	blk.data_b[i] = color.lane<`2`>();
265	blk.data_a[i] = color.lane<`3`>();
266	blk.rgb_lns[i] = use_lns;
267	blk.alpha_lns[i] = use_lns;
268	}
269
270	return;
271	}
272
273	// Get the appropriate partition-table entry
274	int partition_count = scb.partition_count;
275	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
276
277	// Get the appropriate block descriptors
278	const auto& bm = bsd.get_block_mode(scb.block_mode);
279	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
280
281	bool is_dual_plane = static_cast<bool>(bm.is_dual_plane);
282
283	// Unquantize and undecimate the weights
284	int plane1_weights[BLOCK_MAX_TEXELS];
285	int plane2_weights[BLOCK_MAX_TEXELS];
286	unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);
287
288	// Now that we have endpoint colors and weights, we can unpack texel colors
289	int plane2_component = scb.plane2_component;
290	vmask4 plane2_mask = vint4::lane_id() == vint4 (plane2_component);
291
292	for (int i = `0`; i < partition_count; i++)
293	{
294	// Decode the color endpoints for this partition
295	vint4 ep0;
296	vint4 ep1;
297	bool rgb_lns;
298	bool a_lns;
299
300	unpack_color_endpoints(decode_mode,
301	scb.color_formats[i],
302	scb.color_values[i],
303	rgb_lns, a_lns,
304	ep0, ep1);
305
306	vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);
307
308	int texel_count = pi.partition_texel_count[i];
309	for (int j = `0`; j < texel_count; j++)
310	{
311	int tix = pi.texels_of_partition[i][j];
312	vint4 weight = select(vint4 (plane1_weights[tix]), vint4 (plane2_weights[tix]), plane2_mask);
313	vint4 color = lerp_color_int(decode_mode, ep0, ep1, weight);
314	vfloat4 colorf = decode_texel(color, lns_mask);
315
316	blk.data_r[tix] = colorf.lane<`0`>();
317	blk.data_g[tix] = colorf.lane<`1`>();
318	blk.data_b[tix] = colorf.lane<`2`>();
319	blk.data_a[tix] = colorf.lane<`3`>();
320	}
321	}
322	}
323
324	#if !defined(ASTCENC_DECOMPRESS_ONLY)
325
326	/ See header for documentation. /
327	float compute_symbolic_block_difference_2plane(
328	const astcenc_config& config,
329	const block_size_descriptor& bsd,
330	const symbolic_compressed_block& scb,
331	const image_block& blk
332	) {
333	// If we detected an error-block, blow up immediately.
334	if (scb.block_type == SYM_BTYPE_ERROR)
335	{
336	return ERROR_CALC_DEFAULT;
337	}
338
339	assert(scb.block_mode >= `0`);
340	assert(scb.partition_count == `1`);
341	assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == `1`);
342
343	// Get the appropriate block descriptor
344	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
345	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
346
347	// Unquantize and undecimate the weights
348	int plane1_weights[BLOCK_MAX_TEXELS];
349	int plane2_weights[BLOCK_MAX_TEXELS];
350	unpack_weights(bsd, scb, di, true, plane1_weights, plane2_weights);
351
352	vmask4 plane2_mask = vint4::lane_id() == vint4 (scb.plane2_component);
353
354	vfloat4 summa = vfloat4::zero();
355
356	// Decode the color endpoints for this partition
357	vint4 ep0;
358	vint4 ep1;
359	bool rgb_lns;
360	bool a_lns;
361
362	unpack_color_endpoints(config.profile,
363	scb.color_formats[`0`],
364	scb.color_values[`0`],
365	rgb_lns, a_lns,
366	ep0, ep1);
367
368	// Unpack and compute error for each texel in the partition
369	unsigned int texel_count = bsd.texel_count;
370	for (unsigned int i = `0`; i < texel_count; i++)
371	{
372	vint4 weight = select(vint4 (plane1_weights[i]), vint4 (plane2_weights[i]), plane2_mask);
373	vint4 colori = lerp_color_int(config.profile, ep0, ep1, weight);
374
375	vfloat4 color = int_to_float(colori);
376	vfloat4 oldColor = blk.texel(i);
377
378	// Compare error using a perceptual decode metric for RGBM textures
379	if (config.flags & ASTCENC_FLG_MAP_RGBM)
380	{
381	// Fail encodings that result in zero weight M pixels. Note that this can cause
382	// "interesting" artifacts if we reject all useful encodings - we typically get max
383	// brightness encodings instead which look just as bad. We recommend users apply a
384	// bias to their stored M value, limiting the lower value to 16 or 32 to avoid
385	// getting small M values post-quantization, but we can't prove it would never
386	// happen, especially at low bit rates ...
387	if (color.lane<`3`>() == `0.0f`)
388	{
389	return -ERROR_CALC_DEFAULT;
390	}
391
392	// Compute error based on decoded RGBM color
393	color = vfloat4 (
394	color.lane<`0`>() * color.lane<`3`>() * config.rgbm_m_scale,
395	color.lane<`1`>() * color.lane<`3`>() * config.rgbm_m_scale,
396	color.lane<`2`>() * color.lane<`3`>() * config.rgbm_m_scale,
397	`1.0f`
398	);
399
400	oldColor = vfloat4 (
401	oldColor.lane<`0`>() * oldColor.lane<`3`>() * config.rgbm_m_scale,
402	oldColor.lane<`1`>() * oldColor.lane<`3`>() * config.rgbm_m_scale,
403	oldColor.lane<`2`>() * oldColor.lane<`3`>() * config.rgbm_m_scale,
404	`1.0f`
405	);
406	}
407
408	vfloat4 error = oldColor - color;
409	error = min(abs(error), `1e15f`);
410	error = error * error;
411
412	summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
413	}
414
415	return summa.lane<`0`>();
416	}
417
418	/ See header for documentation. /
419	float compute_symbolic_block_difference_1plane(
420	const astcenc_config& config,
421	const block_size_descriptor& bsd,
422	const symbolic_compressed_block& scb,
423	const image_block& blk
424	) {
425	assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == `0`);
426
427	// If we detected an error-block, blow up immediately.
428	if (scb.block_type == SYM_BTYPE_ERROR)
429	{
430	return ERROR_CALC_DEFAULT;
431	}
432
433	assert(scb.block_mode >= `0`);
434
435	// Get the appropriate partition-table entry
436	unsigned int partition_count = scb.partition_count;
437	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
438
439	// Get the appropriate block descriptor
440	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
441	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
442
443	// Unquantize and undecimate the weights
444	int plane1_weights[BLOCK_MAX_TEXELS];
445	unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
446
447	vfloat4 summa = vfloat4::zero();
448	for (unsigned int i = `0`; i < partition_count; i++)
449	{
450	// Decode the color endpoints for this partition
451	vint4 ep0;
452	vint4 ep1;
453	bool rgb_lns;
454	bool a_lns;
455
456	unpack_color_endpoints(config.profile,
457	scb.color_formats[i],
458	scb.color_values[i],
459	rgb_lns, a_lns,
460	ep0, ep1);
461
462	// Unpack and compute error for each texel in the partition
463	unsigned int texel_count = pi.partition_texel_count[i];
464	for (unsigned int j = `0`; j < texel_count; j++)
465	{
466	unsigned int tix = pi.texels_of_partition[i][j];
467	vint4 colori = lerp_color_int(config.profile, ep0, ep1,
468	vint4 (plane1_weights[tix]));
469
470	vfloat4 color = int_to_float(colori);
471	vfloat4 oldColor = blk.texel(tix);
472
473	// Compare error using a perceptual decode metric for RGBM textures
474	if (config.flags & ASTCENC_FLG_MAP_RGBM)
475	{
476	// Fail encodings that result in zero weight M pixels. Note that this can cause
477	// "interesting" artifacts if we reject all useful encodings - we typically get max
478	// brightness encodings instead which look just as bad. We recommend users apply a
479	// bias to their stored M value, limiting the lower value to 16 or 32 to avoid
480	// getting small M values post-quantization, but we can't prove it would never
481	// happen, especially at low bit rates ...
482	if (color.lane<`3`>() == `0.0f`)
483	{
484	return -ERROR_CALC_DEFAULT;
485	}
486
487	// Compute error based on decoded RGBM color
488	color = vfloat4 (
489	color.lane<`0`>() * color.lane<`3`>() * config.rgbm_m_scale,
490	color.lane<`1`>() * color.lane<`3`>() * config.rgbm_m_scale,
491	color.lane<`2`>() * color.lane<`3`>() * config.rgbm_m_scale,
492	`1.0f`
493	);
494
495	oldColor = vfloat4 (
496	oldColor.lane<`0`>() * oldColor.lane<`3`>() * config.rgbm_m_scale,
497	oldColor.lane<`1`>() * oldColor.lane<`3`>() * config.rgbm_m_scale,
498	oldColor.lane<`2`>() * oldColor.lane<`3`>() * config.rgbm_m_scale,
499	`1.0f`
500	);
501	}
502
503	vfloat4 error = oldColor - color;
504	error = min(abs(error), `1e15f`);
505	error = error * error;
506
507	summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
508	}
509	}
510
511	return summa.lane<`0`>();
512	}
513
514	/ See header for documentation. /
515	float compute_symbolic_block_difference_1plane_1partition(
516	const astcenc_config& config,
517	const block_size_descriptor& bsd,
518	const symbolic_compressed_block& scb,
519	const image_block& blk
520	) {
521	// If we detected an error-block, blow up immediately.
522	if (scb.block_type == SYM_BTYPE_ERROR)
523	{
524	return ERROR_CALC_DEFAULT;
525	}
526
527	assert(scb.block_mode >= `0`);
528	assert(bsd.get_partition_info(scb.partition_count, scb.partition_index).partition_count == `1`);
529
530	// Get the appropriate block descriptor
531	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
532	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
533
534	// Unquantize and undecimate the weights
535	alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
536	unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
537
538	// Decode the color endpoints for this partition
539	vint4 ep0;
540	vint4 ep1;
541	bool rgb_lns;
542	bool a_lns;
543
544	unpack_color_endpoints(config.profile,
545	scb.color_formats[`0`],
546	scb.color_values[`0`],
547	rgb_lns, a_lns,
548	ep0, ep1);
549
550
551	// Pre-shift sRGB so things round correctly
552	if (config.profile == ASTCENC_PRF_LDR_SRGB)
553	{
554	ep0 = asr<`8`>(ep0);
555	ep1 = asr<`8`>(ep1);
556	}
557
558	// Unpack and compute error for each texel in the partition
559	vfloatacc summav = vfloatacc::zero();
560
561	vint lane_id = vint::lane_id();
562	vint srgb_scale(config.profile == ASTCENC_PRF_LDR_SRGB ? `257` : `1`);
563
564	unsigned int texel_count = bsd.texel_count;
565	for (unsigned int i = `0`; i < texel_count; i += ASTCENC_SIMD_WIDTH)
566	{
567	// Compute EP1 contribution
568	vint weight1 = vint::loada(plane1_weights + i);
569	vint ep1_r = vint (ep1.lane<`0`>()) * weight1;
570	vint ep1_g = vint (ep1.lane<`1`>()) * weight1;
571	vint ep1_b = vint (ep1.lane<`2`>()) * weight1;
572	vint ep1_a = vint (ep1.lane<`3`>()) * weight1;
573
574	// Compute EP0 contribution
575	vint weight0 = vint (`64`) - weight1;
576	vint ep0_r = vint (ep0.lane<`0`>()) * weight0;
577	vint ep0_g = vint (ep0.lane<`1`>()) * weight0;
578	vint ep0_b = vint (ep0.lane<`2`>()) * weight0;
579	vint ep0_a = vint (ep0.lane<`3`>()) * weight0;
580
581	// Shift so things round correctly
582	vint colori_r = asr<`6`>(ep0_r + ep1_r + vint (`32`)) * srgb_scale;
583	vint colori_g = asr<`6`>(ep0_g + ep1_g + vint (`32`)) * srgb_scale;
584	vint colori_b = asr<`6`>(ep0_b + ep1_b + vint (`32`)) * srgb_scale;
585	vint colori_a = asr<`6`>(ep0_a + ep1_a + vint (`32`)) * srgb_scale;
586
587	// Compute color diff
588	vfloat color_r = int_to_float(colori_r);
589	vfloat color_g = int_to_float(colori_g);
590	vfloat color_b = int_to_float(colori_b);
591	vfloat color_a = int_to_float(colori_a);
592
593	vfloat color_orig_r = loada(blk.data_r + i);
594	vfloat color_orig_g = loada(blk.data_g + i);
595	vfloat color_orig_b = loada(blk.data_b + i);
596	vfloat color_orig_a = loada(blk.data_a + i);
597
598	vfloat color_error_r = min(abs(color_orig_r - color_r), vfloat (`1e15f`));
599	vfloat color_error_g = min(abs(color_orig_g - color_g), vfloat (`1e15f`));
600	vfloat color_error_b = min(abs(color_orig_b - color_b), vfloat (`1e15f`));
601	vfloat color_error_a = min(abs(color_orig_a - color_a), vfloat (`1e15f`));
602
603	// Compute squared error metric
604	color_error_r = color_error_r * color_error_r;
605	color_error_g = color_error_g * color_error_g;
606	color_error_b = color_error_b * color_error_b;
607	color_error_a = color_error_a * color_error_a;
608
609	vfloat metric = color_error_r * blk.channel_weight.lane<`0`>()
610	+ color_error_g * blk.channel_weight.lane<`1`>()
611	+ color_error_b * blk.channel_weight.lane<`2`>()
612	+ color_error_a * blk.channel_weight.lane<`3`>();
613
614	// Mask off bad lanes
615	vmask mask = lane_id < vint (texel_count);
616	lane_id += vint (ASTCENC_SIMD_WIDTH);
617	haccumulate(summav, metric, mask);
618	}
619
620	return hadd_s(summav);
621	}
622
623	#endif
624

Browse the source code of Godot/thirdparty/astcenc/astcenc_decompress_symbolic.cpp