1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2023 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Functions to decompress a symbolic block.
20 */
21
22#include "astcenc_internal.h"
23
24#include <stdio.h>
25#include <assert.h>
26
27/**
28 * @brief Compute the integer linear interpolation of two color endpoints.
29 *
30 * @param decode_mode The ASTC profile (linear or sRGB)
31 * @param color0 The endpoint0 color.
32 * @param color1 The endpoint1 color.
33 * @param weights The interpolation weight (between 0 and 64).
34 *
35 * @return The interpolated color.
36 */
37static vint4 lerp_color_int(
38 astcenc_profile decode_mode,
39 vint4 color0,
40 vint4 color1,
41 vint4 weights
42) {
43 vint4 weight1 = weights;
44 vint4 weight0 = vint4(64) - weight1;
45
46 if (decode_mode == ASTCENC_PRF_LDR_SRGB)
47 {
48 color0 = asr<8>(color0);
49 color1 = asr<8>(color1);
50 }
51
52 vint4 color = (color0 * weight0) + (color1 * weight1) + vint4(32);
53 color = asr<6>(color);
54
55 if (decode_mode == ASTCENC_PRF_LDR_SRGB)
56 {
57 color = color * vint4(257);
58 }
59
60 return color;
61}
62
63
64/**
65 * @brief Convert integer color value into a float value for the decoder.
66 *
67 * @param data The integer color value post-interpolation.
68 * @param lns_mask If set treat lane as HDR (LNS) else LDR (unorm16).
69 *
70 * @return The float color value.
71 */
72static inline vfloat4 decode_texel(
73 vint4 data,
74 vmask4 lns_mask
75) {
76 vint4 color_lns = vint4::zero();
77 vint4 color_unorm = vint4::zero();
78
79 if (any(lns_mask))
80 {
81 color_lns = lns_to_sf16(data);
82 }
83
84 if (!all(lns_mask))
85 {
86 color_unorm = unorm16_to_sf16(data);
87 }
88
89 // Pick components and then convert to FP16
90 vint4 datai = select(color_unorm, color_lns, lns_mask);
91 return float16_to_float(datai);
92}
93
94/* See header for documentation. */
95void unpack_weights(
96 const block_size_descriptor& bsd,
97 const symbolic_compressed_block& scb,
98 const decimation_info& di,
99 bool is_dual_plane,
100 int weights_plane1[BLOCK_MAX_TEXELS],
101 int weights_plane2[BLOCK_MAX_TEXELS]
102) {
103 // Safe to overshoot as all arrays are allocated to full size
104 if (!is_dual_plane)
105 {
106 // Build full 64-entry weight lookup table
107 vint4 tab0(reinterpret_cast<const int*>(scb.weights + 0));
108 vint4 tab1(reinterpret_cast<const int*>(scb.weights + 16));
109 vint4 tab2(reinterpret_cast<const int*>(scb.weights + 32));
110 vint4 tab3(reinterpret_cast<const int*>(scb.weights + 48));
111
112 vint tab0p, tab1p, tab2p, tab3p;
113 vtable_prepare(tab0, tab1, tab2, tab3, tab0p, tab1p, tab2p, tab3p);
114
115 for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
116 {
117 vint summed_value(8);
118 vint weight_count(di.texel_weight_count + i);
119 int max_weight_count = hmax(weight_count).lane<0>();
120
121 promise(max_weight_count > 0);
122 for (int j = 0; j < max_weight_count; j++)
123 {
124 vint texel_weights(di.texel_weights_tr[j] + i);
125 vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
126
127 summed_value += vtable_8bt_32bi(tab0p, tab1p, tab2p, tab3p, texel_weights) * texel_weights_int;
128 }
129
130 store(lsr<4>(summed_value), weights_plane1 + i);
131 }
132 }
133 else
134 {
135 // Build a 32-entry weight lookup table per plane
136 // Plane 1
137 vint4 tab0_plane1(reinterpret_cast<const int*>(scb.weights + 0));
138 vint4 tab1_plane1(reinterpret_cast<const int*>(scb.weights + 16));
139 vint tab0_plane1p, tab1_plane1p;
140 vtable_prepare(tab0_plane1, tab1_plane1, tab0_plane1p, tab1_plane1p);
141
142 // Plane 2
143 vint4 tab0_plane2(reinterpret_cast<const int*>(scb.weights + 32));
144 vint4 tab1_plane2(reinterpret_cast<const int*>(scb.weights + 48));
145 vint tab0_plane2p, tab1_plane2p;
146 vtable_prepare(tab0_plane2, tab1_plane2, tab0_plane2p, tab1_plane2p);
147
148 for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
149 {
150 vint sum_plane1(8);
151 vint sum_plane2(8);
152
153 vint weight_count(di.texel_weight_count + i);
154 int max_weight_count = hmax(weight_count).lane<0>();
155
156 promise(max_weight_count > 0);
157 for (int j = 0; j < max_weight_count; j++)
158 {
159 vint texel_weights(di.texel_weights_tr[j] + i);
160 vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
161
162 sum_plane1 += vtable_8bt_32bi(tab0_plane1p, tab1_plane1p, texel_weights) * texel_weights_int;
163 sum_plane2 += vtable_8bt_32bi(tab0_plane2p, tab1_plane2p, texel_weights) * texel_weights_int;
164 }
165
166 store(lsr<4>(sum_plane1), weights_plane1 + i);
167 store(lsr<4>(sum_plane2), weights_plane2 + i);
168 }
169 }
170}
171
172/**
173 * @brief Return an FP32 NaN value for use in error colors.
174 *
175 * This NaN encoding will turn into 0xFFFF when converted to an FP16 NaN.
176 *
177 * @return The float color value.
178 */
179static float error_color_nan()
180{
181 if32 v;
182 v.u = 0xFFFFE000U;
183 return v.f;
184}
185
186/* See header for documentation. */
187void decompress_symbolic_block(
188 astcenc_profile decode_mode,
189 const block_size_descriptor& bsd,
190 int xpos,
191 int ypos,
192 int zpos,
193 const symbolic_compressed_block& scb,
194 image_block& blk
195) {
196 blk.xpos = xpos;
197 blk.ypos = ypos;
198 blk.zpos = zpos;
199
200 blk.data_min = vfloat4::zero();
201 blk.data_mean = vfloat4::zero();
202 blk.data_max = vfloat4::zero();
203 blk.grayscale = false;
204
205 // If we detected an error-block, blow up immediately.
206 if (scb.block_type == SYM_BTYPE_ERROR)
207 {
208 for (unsigned int i = 0; i < bsd.texel_count; i++)
209 {
210 blk.data_r[i] = error_color_nan();
211 blk.data_g[i] = error_color_nan();
212 blk.data_b[i] = error_color_nan();
213 blk.data_a[i] = error_color_nan();
214 blk.rgb_lns[i] = 0;
215 blk.alpha_lns[i] = 0;
216 }
217
218 return;
219 }
220
221 if ((scb.block_type == SYM_BTYPE_CONST_F16) ||
222 (scb.block_type == SYM_BTYPE_CONST_U16))
223 {
224 vfloat4 color;
225 uint8_t use_lns = 0;
226
227 // UNORM16 constant color block
228 if (scb.block_type == SYM_BTYPE_CONST_U16)
229 {
230 vint4 colori(scb.constant_color);
231
232 // For sRGB decoding a real decoder would just use the top 8 bits for color conversion.
233 // We don't color convert, so rescale the top 8 bits into the full 16 bit dynamic range.
234 if (decode_mode == ASTCENC_PRF_LDR_SRGB)
235 {
236 colori = asr<8>(colori) * 257;
237 }
238
239 vint4 colorf16 = unorm16_to_sf16(colori);
240 color = float16_to_float(colorf16);
241 }
242 // FLOAT16 constant color block
243 else
244 {
245 switch (decode_mode)
246 {
247 case ASTCENC_PRF_LDR_SRGB:
248 case ASTCENC_PRF_LDR:
249 color = vfloat4(error_color_nan());
250 break;
251 case ASTCENC_PRF_HDR_RGB_LDR_A:
252 case ASTCENC_PRF_HDR:
253 // Constant-color block; unpack from FP16 to FP32.
254 color = float16_to_float(vint4(scb.constant_color));
255 use_lns = 1;
256 break;
257 }
258 }
259
260 for (unsigned int i = 0; i < bsd.texel_count; i++)
261 {
262 blk.data_r[i] = color.lane<0>();
263 blk.data_g[i] = color.lane<1>();
264 blk.data_b[i] = color.lane<2>();
265 blk.data_a[i] = color.lane<3>();
266 blk.rgb_lns[i] = use_lns;
267 blk.alpha_lns[i] = use_lns;
268 }
269
270 return;
271 }
272
273 // Get the appropriate partition-table entry
274 int partition_count = scb.partition_count;
275 const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
276
277 // Get the appropriate block descriptors
278 const auto& bm = bsd.get_block_mode(scb.block_mode);
279 const auto& di = bsd.get_decimation_info(bm.decimation_mode);
280
281 bool is_dual_plane = static_cast<bool>(bm.is_dual_plane);
282
283 // Unquantize and undecimate the weights
284 int plane1_weights[BLOCK_MAX_TEXELS];
285 int plane2_weights[BLOCK_MAX_TEXELS];
286 unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);
287
288 // Now that we have endpoint colors and weights, we can unpack texel colors
289 int plane2_component = scb.plane2_component;
290 vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);
291
292 for (int i = 0; i < partition_count; i++)
293 {
294 // Decode the color endpoints for this partition
295 vint4 ep0;
296 vint4 ep1;
297 bool rgb_lns;
298 bool a_lns;
299
300 unpack_color_endpoints(decode_mode,
301 scb.color_formats[i],
302 scb.color_values[i],
303 rgb_lns, a_lns,
304 ep0, ep1);
305
306 vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);
307
308 int texel_count = pi.partition_texel_count[i];
309 for (int j = 0; j < texel_count; j++)
310 {
311 int tix = pi.texels_of_partition[i][j];
312 vint4 weight = select(vint4(plane1_weights[tix]), vint4(plane2_weights[tix]), plane2_mask);
313 vint4 color = lerp_color_int(decode_mode, ep0, ep1, weight);
314 vfloat4 colorf = decode_texel(color, lns_mask);
315
316 blk.data_r[tix] = colorf.lane<0>();
317 blk.data_g[tix] = colorf.lane<1>();
318 blk.data_b[tix] = colorf.lane<2>();
319 blk.data_a[tix] = colorf.lane<3>();
320 }
321 }
322}
323
324#if !defined(ASTCENC_DECOMPRESS_ONLY)
325
326/* See header for documentation. */
327float compute_symbolic_block_difference_2plane(
328 const astcenc_config& config,
329 const block_size_descriptor& bsd,
330 const symbolic_compressed_block& scb,
331 const image_block& blk
332) {
333 // If we detected an error-block, blow up immediately.
334 if (scb.block_type == SYM_BTYPE_ERROR)
335 {
336 return ERROR_CALC_DEFAULT;
337 }
338
339 assert(scb.block_mode >= 0);
340 assert(scb.partition_count == 1);
341 assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 1);
342
343 // Get the appropriate block descriptor
344 const block_mode& bm = bsd.get_block_mode(scb.block_mode);
345 const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
346
347 // Unquantize and undecimate the weights
348 int plane1_weights[BLOCK_MAX_TEXELS];
349 int plane2_weights[BLOCK_MAX_TEXELS];
350 unpack_weights(bsd, scb, di, true, plane1_weights, plane2_weights);
351
352 vmask4 plane2_mask = vint4::lane_id() == vint4(scb.plane2_component);
353
354 vfloat4 summa = vfloat4::zero();
355
356 // Decode the color endpoints for this partition
357 vint4 ep0;
358 vint4 ep1;
359 bool rgb_lns;
360 bool a_lns;
361
362 unpack_color_endpoints(config.profile,
363 scb.color_formats[0],
364 scb.color_values[0],
365 rgb_lns, a_lns,
366 ep0, ep1);
367
368 // Unpack and compute error for each texel in the partition
369 unsigned int texel_count = bsd.texel_count;
370 for (unsigned int i = 0; i < texel_count; i++)
371 {
372 vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask);
373 vint4 colori = lerp_color_int(config.profile, ep0, ep1, weight);
374
375 vfloat4 color = int_to_float(colori);
376 vfloat4 oldColor = blk.texel(i);
377
378 // Compare error using a perceptual decode metric for RGBM textures
379 if (config.flags & ASTCENC_FLG_MAP_RGBM)
380 {
381 // Fail encodings that result in zero weight M pixels. Note that this can cause
382 // "interesting" artifacts if we reject all useful encodings - we typically get max
383 // brightness encodings instead which look just as bad. We recommend users apply a
384 // bias to their stored M value, limiting the lower value to 16 or 32 to avoid
385 // getting small M values post-quantization, but we can't prove it would never
386 // happen, especially at low bit rates ...
387 if (color.lane<3>() == 0.0f)
388 {
389 return -ERROR_CALC_DEFAULT;
390 }
391
392 // Compute error based on decoded RGBM color
393 color = vfloat4(
394 color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
395 color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
396 color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
397 1.0f
398 );
399
400 oldColor = vfloat4(
401 oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
402 oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
403 oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
404 1.0f
405 );
406 }
407
408 vfloat4 error = oldColor - color;
409 error = min(abs(error), 1e15f);
410 error = error * error;
411
412 summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
413 }
414
415 return summa.lane<0>();
416}
417
418/* See header for documentation. */
419float compute_symbolic_block_difference_1plane(
420 const astcenc_config& config,
421 const block_size_descriptor& bsd,
422 const symbolic_compressed_block& scb,
423 const image_block& blk
424) {
425 assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 0);
426
427 // If we detected an error-block, blow up immediately.
428 if (scb.block_type == SYM_BTYPE_ERROR)
429 {
430 return ERROR_CALC_DEFAULT;
431 }
432
433 assert(scb.block_mode >= 0);
434
435 // Get the appropriate partition-table entry
436 unsigned int partition_count = scb.partition_count;
437 const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
438
439 // Get the appropriate block descriptor
440 const block_mode& bm = bsd.get_block_mode(scb.block_mode);
441 const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
442
443 // Unquantize and undecimate the weights
444 int plane1_weights[BLOCK_MAX_TEXELS];
445 unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
446
447 vfloat4 summa = vfloat4::zero();
448 for (unsigned int i = 0; i < partition_count; i++)
449 {
450 // Decode the color endpoints for this partition
451 vint4 ep0;
452 vint4 ep1;
453 bool rgb_lns;
454 bool a_lns;
455
456 unpack_color_endpoints(config.profile,
457 scb.color_formats[i],
458 scb.color_values[i],
459 rgb_lns, a_lns,
460 ep0, ep1);
461
462 // Unpack and compute error for each texel in the partition
463 unsigned int texel_count = pi.partition_texel_count[i];
464 for (unsigned int j = 0; j < texel_count; j++)
465 {
466 unsigned int tix = pi.texels_of_partition[i][j];
467 vint4 colori = lerp_color_int(config.profile, ep0, ep1,
468 vint4(plane1_weights[tix]));
469
470 vfloat4 color = int_to_float(colori);
471 vfloat4 oldColor = blk.texel(tix);
472
473 // Compare error using a perceptual decode metric for RGBM textures
474 if (config.flags & ASTCENC_FLG_MAP_RGBM)
475 {
476 // Fail encodings that result in zero weight M pixels. Note that this can cause
477 // "interesting" artifacts if we reject all useful encodings - we typically get max
478 // brightness encodings instead which look just as bad. We recommend users apply a
479 // bias to their stored M value, limiting the lower value to 16 or 32 to avoid
480 // getting small M values post-quantization, but we can't prove it would never
481 // happen, especially at low bit rates ...
482 if (color.lane<3>() == 0.0f)
483 {
484 return -ERROR_CALC_DEFAULT;
485 }
486
487 // Compute error based on decoded RGBM color
488 color = vfloat4(
489 color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
490 color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
491 color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
492 1.0f
493 );
494
495 oldColor = vfloat4(
496 oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
497 oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
498 oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
499 1.0f
500 );
501 }
502
503 vfloat4 error = oldColor - color;
504 error = min(abs(error), 1e15f);
505 error = error * error;
506
507 summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
508 }
509 }
510
511 return summa.lane<0>();
512}
513
514/* See header for documentation. */
515float compute_symbolic_block_difference_1plane_1partition(
516 const astcenc_config& config,
517 const block_size_descriptor& bsd,
518 const symbolic_compressed_block& scb,
519 const image_block& blk
520) {
521 // If we detected an error-block, blow up immediately.
522 if (scb.block_type == SYM_BTYPE_ERROR)
523 {
524 return ERROR_CALC_DEFAULT;
525 }
526
527 assert(scb.block_mode >= 0);
528 assert(bsd.get_partition_info(scb.partition_count, scb.partition_index).partition_count == 1);
529
530 // Get the appropriate block descriptor
531 const block_mode& bm = bsd.get_block_mode(scb.block_mode);
532 const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
533
534 // Unquantize and undecimate the weights
535 alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
536 unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
537
538 // Decode the color endpoints for this partition
539 vint4 ep0;
540 vint4 ep1;
541 bool rgb_lns;
542 bool a_lns;
543
544 unpack_color_endpoints(config.profile,
545 scb.color_formats[0],
546 scb.color_values[0],
547 rgb_lns, a_lns,
548 ep0, ep1);
549
550
551 // Pre-shift sRGB so things round correctly
552 if (config.profile == ASTCENC_PRF_LDR_SRGB)
553 {
554 ep0 = asr<8>(ep0);
555 ep1 = asr<8>(ep1);
556 }
557
558 // Unpack and compute error for each texel in the partition
559 vfloatacc summav = vfloatacc::zero();
560
561 vint lane_id = vint::lane_id();
562 vint srgb_scale(config.profile == ASTCENC_PRF_LDR_SRGB ? 257 : 1);
563
564 unsigned int texel_count = bsd.texel_count;
565 for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
566 {
567 // Compute EP1 contribution
568 vint weight1 = vint::loada(plane1_weights + i);
569 vint ep1_r = vint(ep1.lane<0>()) * weight1;
570 vint ep1_g = vint(ep1.lane<1>()) * weight1;
571 vint ep1_b = vint(ep1.lane<2>()) * weight1;
572 vint ep1_a = vint(ep1.lane<3>()) * weight1;
573
574 // Compute EP0 contribution
575 vint weight0 = vint(64) - weight1;
576 vint ep0_r = vint(ep0.lane<0>()) * weight0;
577 vint ep0_g = vint(ep0.lane<1>()) * weight0;
578 vint ep0_b = vint(ep0.lane<2>()) * weight0;
579 vint ep0_a = vint(ep0.lane<3>()) * weight0;
580
581 // Shift so things round correctly
582 vint colori_r = asr<6>(ep0_r + ep1_r + vint(32)) * srgb_scale;
583 vint colori_g = asr<6>(ep0_g + ep1_g + vint(32)) * srgb_scale;
584 vint colori_b = asr<6>(ep0_b + ep1_b + vint(32)) * srgb_scale;
585 vint colori_a = asr<6>(ep0_a + ep1_a + vint(32)) * srgb_scale;
586
587 // Compute color diff
588 vfloat color_r = int_to_float(colori_r);
589 vfloat color_g = int_to_float(colori_g);
590 vfloat color_b = int_to_float(colori_b);
591 vfloat color_a = int_to_float(colori_a);
592
593 vfloat color_orig_r = loada(blk.data_r + i);
594 vfloat color_orig_g = loada(blk.data_g + i);
595 vfloat color_orig_b = loada(blk.data_b + i);
596 vfloat color_orig_a = loada(blk.data_a + i);
597
598 vfloat color_error_r = min(abs(color_orig_r - color_r), vfloat(1e15f));
599 vfloat color_error_g = min(abs(color_orig_g - color_g), vfloat(1e15f));
600 vfloat color_error_b = min(abs(color_orig_b - color_b), vfloat(1e15f));
601 vfloat color_error_a = min(abs(color_orig_a - color_a), vfloat(1e15f));
602
603 // Compute squared error metric
604 color_error_r = color_error_r * color_error_r;
605 color_error_g = color_error_g * color_error_g;
606 color_error_b = color_error_b * color_error_b;
607 color_error_a = color_error_a * color_error_a;
608
609 vfloat metric = color_error_r * blk.channel_weight.lane<0>()
610 + color_error_g * blk.channel_weight.lane<1>()
611 + color_error_b * blk.channel_weight.lane<2>()
612 + color_error_a * blk.channel_weight.lane<3>();
613
614 // Mask off bad lanes
615 vmask mask = lane_id < vint(texel_count);
616 lane_id += vint(ASTCENC_SIMD_WIDTH);
617 haccumulate(summav, metric, mask);
618 }
619
620 return hadd_s(summav);
621}
622
623#endif
624