1// basisu_uastc_enc.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15#include "basisu_uastc_enc.h"
16
17#if BASISU_USE_ASTC_DECOMPRESS
18#include "basisu_astc_decomp.h"
19#endif
20
21#include "basisu_gpu_texture.h"
22#include "basisu_bc7enc.h"
23
24#ifdef _DEBUG
25// When BASISU_VALIDATE_UASTC_ENC is 1, we pack and unpack to/from UASTC and ASTC, then validate that each codec returns the exact same results. This is slower.
26#define BASISU_VALIDATE_UASTC_ENC 1
27#endif
28
29#define BASISU_SUPPORT_FORCE_MODE 0
30
31using namespace basist;
32
33namespace basisu
34{
35 const uint32_t MAX_ENCODE_RESULTS = 512;
36
37#if BASISU_VALIDATE_UASTC_ENC
38 static void validate_func(bool condition, int line)
39 {
40 if (!condition)
41 {
42 fprintf(stderr, "basisu_uastc_enc: Internal validation failed on line %u!\n", line);
43 }
44 }
45
46 #define VALIDATE(c) validate_func(c, __LINE__);
47#else
48 #define VALIDATE(c)
49#endif
50
51 enum dxt_constants
52 {
53 cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U,
54 cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U,
55 };
56
57 struct dxt1_block
58 {
59 enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
60
61 uint8_t m_low_color[cTotalEndpointBytes];
62 uint8_t m_high_color[cTotalEndpointBytes];
63 uint8_t m_selectors[cTotalSelectorBytes];
64
65 inline void clear() { basisu::clear_obj(*this); }
66
67 inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
68 inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
69 inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
70 inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); }
71 inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits))& cDXT1SelectorMask; }
72 inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); }
73
74 static uint16_t pack_color(const color_rgba& color, bool scaled, uint32_t bias = 127U)
75 {
76 uint32_t r = color.r, g = color.g, b = color.b;
77 if (scaled)
78 {
79 r = (r * 31U + bias) / 255U;
80 g = (g * 63U + bias) / 255U;
81 b = (b * 31U + bias) / 255U;
82 }
83 return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U));
84 }
85
86 static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
87 };
88
89#define UASTC_WRITE_MODE_DESCS 0
90
91 static inline void uastc_write_bits(uint8_t* pBuf, uint32_t& bit_offset, uint64_t code, uint32_t codesize, const char* pDesc)
92 {
93 (void)pDesc;
94
95#if UASTC_WRITE_MODE_DESCS
96 if (pDesc)
97 printf("%s: %u %u\n", pDesc, bit_offset, codesize);
98#endif
99
100 assert((codesize == 64) || (code < (1ULL << codesize)));
101
102 while (codesize)
103 {
104 uint32_t byte_bit_offset = bit_offset & 7;
105 uint32_t bits_to_write = basisu::minimum<int>(codesize, 8 - byte_bit_offset);
106
107 pBuf[bit_offset >> 3] |= (code << byte_bit_offset);
108
109 code >>= bits_to_write;
110 codesize -= bits_to_write;
111 bit_offset += bits_to_write;
112 }
113 }
114
115 void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1)
116 {
117 if ((g_uastc_mode_has_alpha[result.m_uastc_mode]) && (result.m_uastc_mode != UASTC_MODE_INDEX_SOLID_COLOR))
118 {
119 assert(etc_eac_a8_blk.m_multiplier >= 1);
120 }
121
122 uint8_t buf[32];
123 memset(buf, 0, sizeof(buf));
124
125 uint32_t block_bit_offset = 0;
126
127#if UASTC_WRITE_MODE_DESCS
128 printf("**** Mode: %u\n", result.m_uastc_mode);
129#endif
130
131 uastc_write_bits(buf, block_bit_offset, g_uastc_mode_huff_codes[result.m_uastc_mode][0], g_uastc_mode_huff_codes[result.m_uastc_mode][1], "mode");
132
133 if (result.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
134 {
135 uastc_write_bits(buf, block_bit_offset, result.m_solid_color.r, 8, "R");
136 uastc_write_bits(buf, block_bit_offset, result.m_solid_color.g, 8, "G");
137 uastc_write_bits(buf, block_bit_offset, result.m_solid_color.b, 8, "B");
138 uastc_write_bits(buf, block_bit_offset, result.m_solid_color.a, 8, "A");
139
140 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D");
141 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I");
142 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_selector(0, 0), 2, "ETC1S");
143
144 uint32_t r, g, b;
145 if (etc1_blk.get_diff_bit())
146 etc_block::unpack_color5(r, g, b, etc1_blk.get_base5_color(), false);
147 else
148 etc_block::unpack_color4(r, g, b, etc1_blk.get_base4_color(0), false);
149
150 uastc_write_bits(buf, block_bit_offset, r, 5, "ETC1R");
151 uastc_write_bits(buf, block_bit_offset, g, 5, "ETC1G");
152 uastc_write_bits(buf, block_bit_offset, b, 5, "ETC1B");
153
154 memcpy(&blk, buf, sizeof(blk));
155 return;
156 }
157
158 if (g_uastc_mode_has_bc1_hint0[result.m_uastc_mode])
159 uastc_write_bits(buf, block_bit_offset, bc1_hint0, 1, "BC1H0");
160 else
161 {
162 assert(bc1_hint0 == false);
163 }
164
165 if (g_uastc_mode_has_bc1_hint1[result.m_uastc_mode])
166 uastc_write_bits(buf, block_bit_offset, bc1_hint1, 1, "BC1H1");
167 else
168 {
169 assert(bc1_hint1 == false);
170 }
171
172 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_flip_bit(), 1, "ETC1F");
173 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D");
174 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I0");
175 uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(1), 3, "ETC1I1");
176
177 if (g_uastc_mode_has_etc1_bias[result.m_uastc_mode])
178 uastc_write_bits(buf, block_bit_offset, etc1_bias, 5, "ETC1BIAS");
179 else
180 {
181 assert(etc1_bias == 0);
182 }
183
184 if (g_uastc_mode_has_alpha[result.m_uastc_mode])
185 {
186 const uint32_t etc2_hints = etc_eac_a8_blk.m_table | (etc_eac_a8_blk.m_multiplier << 4);
187
188 assert(etc2_hints > 0 && etc2_hints <= 0xFF);
189 uastc_write_bits(buf, block_bit_offset, etc2_hints, 8, "ETC2TM");
190 }
191
192 uint32_t subsets = 1;
193 switch (result.m_uastc_mode)
194 {
195 case 2:
196 case 4:
197 case 7:
198 case 9:
199 case 16:
200 uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 5, "PAT");
201 subsets = 2;
202 break;
203 case 3:
204 uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 4, "PAT");
205 subsets = 3;
206 break;
207 default:
208 break;
209 }
210
211#ifdef _DEBUG
212 uint32_t part_seed = 0;
213 switch (result.m_uastc_mode)
214 {
215 case 2:
216 case 4:
217 case 9:
218 case 16:
219 part_seed = g_astc_bc7_common_partitions2[result.m_common_pattern].m_astc;
220 break;
221 case 3:
222 part_seed = g_astc_bc7_common_partitions3[result.m_common_pattern].m_astc;
223 break;
224 case 7:
225 part_seed = g_bc7_3_astc2_common_partitions[result.m_common_pattern].m_astc2;
226 break;
227 default:
228 break;
229 }
230#endif
231
232 uint32_t total_planes = 1;
233 switch (result.m_uastc_mode)
234 {
235 case 6:
236 case 11:
237 case 13:
238 uastc_write_bits(buf, block_bit_offset, result.m_astc.m_ccs, 2, "COMPSEL");
239 total_planes = 2;
240 break;
241 case 17:
242 // CCS field is always 3 for dual plane LA.
243 assert(result.m_astc.m_ccs == 3);
244 total_planes = 2;
245 break;
246 default:
247 break;
248 }
249
250 uint8_t weights[32];
251 memcpy(weights, result.m_astc.m_weights, 16 * total_planes);
252
253 uint8_t endpoints[18];
254 memcpy(endpoints, result.m_astc.m_endpoints, sizeof(endpoints));
255
256 const uint32_t total_comps = g_uastc_mode_comps[result.m_uastc_mode];
257
258 // LLAA
259 // LLAA LLAA
260 // LLAA LLAA LLAA
261 // RRGGBB
262 // RRGGBB RRGGBB
263 // RRGGBB RRGGBB RRGGBB
264 // RRGGBBAA
265 // RRGGBBAA RRGGBBAA
266
267 const uint32_t weight_bits = g_uastc_mode_weight_bits[result.m_uastc_mode];
268
269 const uint8_t* pPartition_pattern;
270 const uint8_t* pSubset_anchor_indices = basist::get_anchor_indices(subsets, result.m_uastc_mode, result.m_common_pattern, pPartition_pattern);
271
272 for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++)
273 {
274 for (uint32_t subset_index = 0; subset_index < subsets; subset_index++)
275 {
276 const uint32_t anchor_index = pSubset_anchor_indices[subset_index];
277
278#ifdef _DEBUG
279 if (subsets >= 2)
280 {
281 for (uint32_t i = 0; i < 16; i++)
282 {
283 const uint32_t part_index = astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true);
284 if (part_index == subset_index)
285 {
286 assert(anchor_index == i);
287 break;
288 }
289 }
290 }
291 else
292 {
293 assert(!anchor_index);
294 }
295#endif
296
297 // Check anchor weight's MSB - if it's set then invert this subset's weights and swap the endpoints
298 if (weights[anchor_index * total_planes + plane_index] & (1 << (weight_bits - 1)))
299 {
300 for (uint32_t i = 0; i < 16; i++)
301 {
302 const uint32_t part_index = pPartition_pattern[i];
303
304#ifdef _DEBUG
305 if (subsets >= 2)
306 {
307 assert(part_index == (uint32_t)astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true));
308 }
309 else
310 {
311 assert(!part_index);
312 }
313#endif
314
315 if (part_index == subset_index)
316 weights[i * total_planes + plane_index] = ((1 << weight_bits) - 1) - weights[i * total_planes + plane_index];
317 }
318
319 if (total_planes == 2)
320 {
321 for (int c = 0; c < (int)total_comps; c++)
322 {
323 const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0);
324
325 if (comp_plane == plane_index)
326 std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]);
327 }
328 }
329 else
330 {
331 for (uint32_t c = 0; c < total_comps; c++)
332 std::swap(endpoints[subset_index * total_comps * 2 + c * 2 + 0], endpoints[subset_index * total_comps * 2 + c * 2 + 1]);
333 }
334 }
335 } // subset_index
336 } // plane_index
337
338 const uint32_t total_values = total_comps * 2 * subsets;
339 const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[result.m_uastc_mode];
340
341 uint32_t bit_values[18];
342 uint32_t tq_values[8];
343 uint32_t total_tq_values = 0;
344 uint32_t tq_accum = 0;
345 uint32_t tq_mul = 1;
346
347 const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0];
348 const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1];
349 const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2];
350
351 for (uint32_t i = 0; i < total_values; i++)
352 {
353 uint32_t val = endpoints[i];
354
355 uint32_t bits = val & ((1 << ep_bits) - 1);
356 uint32_t tq = val >> ep_bits;
357
358 bit_values[i] = bits;
359
360 if (ep_trits)
361 {
362 assert(tq < 3);
363 tq_accum += tq * tq_mul;
364 tq_mul *= 3;
365 if (tq_mul == 243)
366 {
367 tq_values[total_tq_values++] = tq_accum;
368 tq_accum = 0;
369 tq_mul = 1;
370 }
371 }
372 else if (ep_quints)
373 {
374 assert(tq < 5);
375 tq_accum += tq * tq_mul;
376 tq_mul *= 5;
377 if (tq_mul == 125)
378 {
379 tq_values[total_tq_values++] = tq_accum;
380 tq_accum = 0;
381 tq_mul = 1;
382 }
383 }
384 }
385
386 uint32_t total_endpoint_bits = 0;
387
388 for (uint32_t i = 0; i < total_tq_values; i++)
389 {
390 const uint32_t num_bits = ep_trits ? 8 : 7;
391 uastc_write_bits(buf, block_bit_offset, tq_values[i], num_bits, "ETQ");
392 total_endpoint_bits += num_bits;
393 }
394
395 if (tq_mul > 1)
396 {
397 uint32_t num_bits;
398 if (ep_trits)
399 {
400 if (tq_mul == 3)
401 num_bits = 2;
402 else if (tq_mul == 9)
403 num_bits = 4;
404 else if (tq_mul == 27)
405 num_bits = 5;
406 else //if (tq_mul == 81)
407 num_bits = 7;
408 }
409 else
410 {
411 if (tq_mul == 5)
412 num_bits = 3;
413 else //if (tq_mul == 25)
414 num_bits = 5;
415 }
416 uastc_write_bits(buf, block_bit_offset, tq_accum, num_bits, "ETQ");
417 total_endpoint_bits += num_bits;
418 }
419
420 for (uint32_t i = 0; i < total_values; i++)
421 {
422 uastc_write_bits(buf, block_bit_offset, bit_values[i], ep_bits, "EBITS");
423 total_endpoint_bits += ep_bits;
424 }
425
426#if UASTC_WRITE_MODE_DESCS
427 uint32_t weight_start = block_bit_offset;
428#endif
429
430 uint32_t total_weight_bits = 0;
431 const uint32_t plane_shift = (total_planes == 2) ? 1 : 0;
432 for (uint32_t i = 0; i < 16 * total_planes; i++)
433 {
434 uint32_t numbits = weight_bits;
435 for (uint32_t s = 0; s < subsets; s++)
436 {
437 if (pSubset_anchor_indices[s] == (i >> plane_shift))
438 {
439 numbits--;
440 break;
441 }
442 }
443
444 uastc_write_bits(buf, block_bit_offset, weights[i], numbits, nullptr);
445
446 total_weight_bits += numbits;
447 }
448
449#if UASTC_WRITE_MODE_DESCS
450 printf("WEIGHTS: %u %u\n", weight_start, total_weight_bits);
451#endif
452
453 assert(block_bit_offset <= 128);
454 memcpy(&blk, buf, sizeof(blk));
455
456#if UASTC_WRITE_MODE_DESCS
457 printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits);
458#endif
459 }
460
461 // MODE 0
462 // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB
463 // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB
464 static void astc_mode0_or_18(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, const uint8_t *pForce_selectors = nullptr)
465 {
466 const uint32_t endpoint_range = (mode == 18) ? 11 : 19;
467 const uint32_t weight_range = (mode == 18) ? 11 : 8;
468
469 color_cell_compressor_params ccell_params;
470 memset(&ccell_params, 0, sizeof(ccell_params));
471
472 ccell_params.m_num_pixels = 16;
473 ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
474 ccell_params.m_num_selector_weights = (mode == 18) ? 32 : 16;
475 ccell_params.m_pSelector_weights = (mode == 18) ? g_astc_weights5 : g_astc_weights4;
476 ccell_params.m_pSelector_weightsx = (mode == 18) ? (const bc7enc_vec4F*)g_astc_weights5x : (const bc7enc_vec4F*)g_astc_weights4x;
477 ccell_params.m_astc_endpoint_range = endpoint_range;
478 ccell_params.m_weights[0] = 1;
479 ccell_params.m_weights[1] = 1;
480 ccell_params.m_weights[2] = 1;
481 ccell_params.m_weights[3] = 1;
482 ccell_params.m_pForce_selectors = pForce_selectors;
483
484 color_cell_compressor_results ccell_results;
485 uint8_t ccell_result_selectors[16];
486 uint8_t ccell_result_selectors_temp[16];
487 memset(&ccell_results, 0, sizeof(ccell_results));
488 ccell_results.m_pSelectors = &ccell_result_selectors[0];
489 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
490
491 uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
492
493 // ASTC
494 astc_block_desc astc_results;
495 memset(&astc_results, 0, sizeof(astc_results));
496
497 astc_results.m_dual_plane = false;
498 astc_results.m_weight_range = weight_range;// (mode == 18) ? 11 : 8;
499
500 astc_results.m_ccs = 0;
501 astc_results.m_subsets = 1;
502 astc_results.m_partition_seed = 0;
503 astc_results.m_cem = 8;
504
505 astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
506 astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
507 astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
508 astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
509 astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
510 astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
511
512 bool invert = false;
513
514 if (pForce_selectors == nullptr)
515 {
516 int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
517 int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
518 if (s1 < s0)
519 {
520 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
521 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
522 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
523 invert = true;
524 }
525 }
526
527 for (uint32_t y = 0; y < 4; y++)
528 {
529 for (uint32_t x = 0; x < 4; x++)
530 {
531 astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
532
533 if (invert)
534 astc_results.m_weights[x + y * 4] = ((mode == 18) ? 31 : 15) - astc_results.m_weights[x + y * 4];
535 }
536 }
537
538 assert(total_results < MAX_ENCODE_RESULTS);
539 if (total_results < MAX_ENCODE_RESULTS)
540 {
541 pResults[total_results].m_uastc_mode = mode;
542 pResults[total_results].m_common_pattern = 0;
543 pResults[total_results].m_astc = astc_results;
544 pResults[total_results].m_astc_err = part_err;
545 total_results++;
546 }
547 }
548
549 // MODE 1
550 // 1-subset, 2-bit indices, 8-bit endpoints, BC7 mode 3
551 // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) MODE3 or MODE5 RGB
552 static void astc_mode1(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
553 {
554 color_cell_compressor_params ccell_params;
555 memset(&ccell_params, 0, sizeof(ccell_params));
556
557 ccell_params.m_num_pixels = 16;
558 ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
559 ccell_params.m_num_selector_weights = 4;
560 ccell_params.m_pSelector_weights = g_bc7_weights2;
561 ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
562 ccell_params.m_astc_endpoint_range = 20;
563 ccell_params.m_weights[0] = 1;
564 ccell_params.m_weights[1] = 1;
565 ccell_params.m_weights[2] = 1;
566 ccell_params.m_weights[3] = 1;
567
568 color_cell_compressor_results ccell_results;
569 uint8_t ccell_result_selectors[16];
570 uint8_t ccell_result_selectors_temp[16];
571 memset(&ccell_results, 0, sizeof(ccell_results));
572 ccell_results.m_pSelectors = &ccell_result_selectors[0];
573 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
574
575 uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
576
577 // ASTC
578 astc_block_desc astc_results;
579 memset(&astc_results, 0, sizeof(astc_results));
580
581 astc_results.m_dual_plane = false;
582 astc_results.m_weight_range = 2;
583
584 astc_results.m_ccs = 0;
585 astc_results.m_subsets = 1;
586 astc_results.m_partition_seed = 0;
587 astc_results.m_cem = 8;
588
589 astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
590 astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
591 astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
592 astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
593 astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
594 astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
595
596 const uint32_t range = 20;
597
598 bool invert = false;
599
600 int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant;
601 int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant;
602 if (s1 < s0)
603 {
604 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
605 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
606 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
607 invert = true;
608 }
609
610 for (uint32_t y = 0; y < 4; y++)
611 {
612 for (uint32_t x = 0; x < 4; x++)
613 {
614 astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
615
616 if (invert)
617 astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
618 }
619 }
620
621 assert(total_results < MAX_ENCODE_RESULTS);
622 if (total_results < MAX_ENCODE_RESULTS)
623 {
624 pResults[total_results].m_uastc_mode = 1;
625 pResults[total_results].m_common_pattern = 0;
626 pResults[total_results].m_astc = astc_results;
627 pResults[total_results].m_astc_err = part_err;
628 total_results++;
629 }
630 }
631
632 static uint32_t estimate_partition2(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], const uint32_t weights[4])
633 {
634 assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64);
635
636 uint64_t best_err = UINT64_MAX;
637 uint32_t best_common_pattern = 0;
638
639 for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++)
640 {
641 const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
642
643 const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16];
644
645 color_quad_u8 subset_colors[2][16];
646 uint32_t subset_total_colors[2] = { 0, 0 };
647 for (uint32_t index = 0; index < 16; index++)
648 subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
649
650 uint64_t total_subset_err = 0;
651 for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++)
652 total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
653
654 if (total_subset_err < best_err)
655 {
656 best_err = total_subset_err;
657 best_common_pattern = common_pattern;
658 }
659 }
660
661 return best_common_pattern;
662 }
663
664 // MODE 2
665 // 2-subset, 3-bit indices, 4-bit endpoints, BC7 mode 1
666 // DualPlane: 0, WeightRange: 5 (8), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 8 (16) MODE1
667 static void astc_mode2(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
668 {
669 uint32_t first_common_pattern = 0;
670 uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
671
672 if (estimate_partition)
673 {
674 const uint32_t weights[4] = { 1, 1, 1, 1 };
675 first_common_pattern = estimate_partition2(8, 3, g_bc7_weights3, block, weights);
676 last_common_pattern = first_common_pattern + 1;
677 }
678
679 for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
680 {
681 const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
682
683 color_rgba part_pixels[2][16];
684 uint32_t part_pixel_index[4][4];
685 uint32_t num_part_pixels[2] = { 0, 0 };
686
687 for (uint32_t y = 0; y < 4; y++)
688 {
689 for (uint32_t x = 0; x < 4; x++)
690 {
691 const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
692 part_pixel_index[y][x] = num_part_pixels[part];
693 part_pixels[part][num_part_pixels[part]++] = block[y][x];
694 }
695 }
696
697 color_cell_compressor_params ccell_params[2];
698 color_cell_compressor_results ccell_results[2];
699 uint8_t ccell_result_selectors[2][16];
700 uint8_t ccell_result_selectors_temp[2][16];
701
702 uint64_t total_part_err = 0;
703 for (uint32_t part = 0; part < 2; part++)
704 {
705 memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
706
707 ccell_params[part].m_num_pixels = num_part_pixels[part];
708 ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
709 ccell_params[part].m_num_selector_weights = 8;
710 ccell_params[part].m_pSelector_weights = g_bc7_weights3;
711 ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
712 ccell_params[part].m_astc_endpoint_range = 8;
713 ccell_params[part].m_weights[0] = 1;
714 ccell_params[part].m_weights[1] = 1;
715 ccell_params[part].m_weights[2] = 1;
716 ccell_params[part].m_weights[3] = 1;
717
718 memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
719 ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
720 ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
721
722 uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
723 total_part_err += part_err;
724 } // part
725
726 {
727 // ASTC
728 astc_block_desc astc_results;
729 memset(&astc_results, 0, sizeof(astc_results));
730
731 astc_results.m_dual_plane = false;
732 astc_results.m_weight_range = 5;
733
734 astc_results.m_ccs = 0;
735 astc_results.m_subsets = 2;
736 astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
737 astc_results.m_cem = 8;
738
739 uint32_t p0 = 0;
740 uint32_t p1 = 1;
741 if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
742 std::swap(p0, p1);
743
744 astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
745 astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
746 astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
747 astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
748 astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
749 astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
750
751 const uint32_t range = 8;
752
753 bool invert[2] = { false, false };
754
755 int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant;
756 int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant;
757 if (s1 < s0)
758 {
759 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
760 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
761 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
762 invert[0] = true;
763 }
764
765 astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
766 astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
767 astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
768 astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
769 astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
770 astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
771
772 s0 = g_astc_unquant[range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4 + 6]].m_unquant;
773 s1 = g_astc_unquant[range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5 + 6]].m_unquant;
774
775 if (s1 < s0)
776 {
777 std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]);
778 std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]);
779 std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]);
780 invert[1] = true;
781 }
782
783 for (uint32_t y = 0; y < 4; y++)
784 {
785 for (uint32_t x = 0; x < 4; x++)
786 {
787 const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
788
789 astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
790
791 uint32_t astc_part = bc7_part;
792 if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
793 astc_part = 1 - astc_part;
794
795 if (invert[astc_part])
796 astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4];
797 }
798 }
799
800 assert(total_results < MAX_ENCODE_RESULTS);
801 if (total_results < MAX_ENCODE_RESULTS)
802 {
803 pResults[total_results].m_uastc_mode = 2;
804 pResults[total_results].m_common_pattern = common_pattern;
805 pResults[total_results].m_astc = astc_results;
806 pResults[total_results].m_astc_err = total_part_err;
807 total_results++;
808 }
809 }
810
811 } // common_pattern
812 }
813
814 // MODE 3
815 // 3-subsets, 2-bit indices, [0,11] endpoints, BC7 mode 2
816 // DualPlane: 0, WeightRange: 2 (4), Subsets: 3, CEM: 8 (RGB Direct ), EndpointRange: 7 (12) MODE2
817 static void astc_mode3(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
818 {
819 uint32_t first_common_pattern = 0;
820 uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS3;
821
822 if (estimate_partition)
823 {
824 uint64_t best_err = UINT64_MAX;
825 uint32_t best_common_pattern = 0;
826 const uint32_t weights[4] = { 1, 1, 1, 1 };
827
828 for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS3; common_pattern++)
829 {
830 const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7;
831
832 const uint8_t* pPartition = &g_bc7_partition3[bc7_pattern * 16];
833
834 color_quad_u8 subset_colors[3][16];
835 uint32_t subset_total_colors[3] = { 0, 0 };
836 for (uint32_t index = 0; index < 16; index++)
837 subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
838
839 uint64_t total_subset_err = 0;
840 for (uint32_t subset = 0; (subset < 3) && (total_subset_err < best_err); subset++)
841 total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
842
843 if (total_subset_err < best_err)
844 {
845 best_err = total_subset_err;
846 best_common_pattern = common_pattern;
847 }
848 }
849
850 first_common_pattern = best_common_pattern;
851 last_common_pattern = best_common_pattern + 1;
852 }
853
854 for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
855 {
856 const uint32_t endpoint_range = 7;
857
858 const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7;
859
860 color_rgba part_pixels[3][16];
861 uint32_t part_pixel_index[4][4];
862 uint32_t num_part_pixels[3] = { 0, 0, 0 };
863
864 for (uint32_t y = 0; y < 4; y++)
865 {
866 for (uint32_t x = 0; x < 4; x++)
867 {
868 const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4];
869 part_pixel_index[y][x] = num_part_pixels[bc7_part];
870 part_pixels[bc7_part][num_part_pixels[bc7_part]++] = block[y][x];
871 }
872 }
873
874 color_cell_compressor_params ccell_params[3];
875 color_cell_compressor_results ccell_results[3];
876 uint8_t ccell_result_selectors[3][16];
877 uint8_t ccell_result_selectors_temp[3][16];
878
879 uint64_t total_part_err = 0;
880 for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++)
881 {
882 memset(&ccell_params[bc7_part], 0, sizeof(ccell_params[bc7_part]));
883
884 ccell_params[bc7_part].m_num_pixels = num_part_pixels[bc7_part];
885 ccell_params[bc7_part].m_pPixels = (color_quad_u8*)&part_pixels[bc7_part][0];
886 ccell_params[bc7_part].m_num_selector_weights = 4;
887 ccell_params[bc7_part].m_pSelector_weights = g_bc7_weights2;
888 ccell_params[bc7_part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
889 ccell_params[bc7_part].m_astc_endpoint_range = endpoint_range;
890 ccell_params[bc7_part].m_weights[0] = 1;
891 ccell_params[bc7_part].m_weights[1] = 1;
892 ccell_params[bc7_part].m_weights[2] = 1;
893 ccell_params[bc7_part].m_weights[3] = 1;
894
895 memset(&ccell_results[bc7_part], 0, sizeof(ccell_results[bc7_part]));
896 ccell_results[bc7_part].m_pSelectors = &ccell_result_selectors[bc7_part][0];
897 ccell_results[bc7_part].m_pSelectors_temp = &ccell_result_selectors_temp[bc7_part][0];
898
899 uint64_t part_err = color_cell_compression(255, &ccell_params[bc7_part], &ccell_results[bc7_part], &comp_params);
900 total_part_err += part_err;
901 } // part
902
903 {
904 // ASTC
905 astc_block_desc astc_results;
906 memset(&astc_results, 0, sizeof(astc_results));
907
908 astc_results.m_dual_plane = false;
909 astc_results.m_weight_range = 2;
910
911 astc_results.m_ccs = 0;
912 astc_results.m_subsets = 3;
913 astc_results.m_partition_seed = g_astc_bc7_common_partitions3[common_pattern].m_astc;
914 astc_results.m_cem = 8;
915
916 uint32_t astc_to_bc7_part[3]; // converts ASTC to BC7 partition index
917 const uint32_t perm = g_astc_bc7_common_partitions3[common_pattern].m_astc_to_bc7_perm;
918 astc_to_bc7_part[0] = g_astc_to_bc7_partition_index_perm_tables[perm][0];
919 astc_to_bc7_part[1] = g_astc_to_bc7_partition_index_perm_tables[perm][1];
920 astc_to_bc7_part[2] = g_astc_to_bc7_partition_index_perm_tables[perm][2];
921
922 bool invert_astc_part[3] = { false, false, false };
923
924 for (uint32_t astc_part = 0; astc_part < 3; astc_part++)
925 {
926 uint8_t* pEndpoints = &astc_results.m_endpoints[6 * astc_part];
927
928 pEndpoints[0] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[0];
929 pEndpoints[1] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[0];
930 pEndpoints[2] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[1];
931 pEndpoints[3] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[1];
932 pEndpoints[4] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[2];
933 pEndpoints[5] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[2];
934
935 int s0 = g_astc_unquant[endpoint_range][pEndpoints[0]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[2]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[4]].m_unquant;
936 int s1 = g_astc_unquant[endpoint_range][pEndpoints[1]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[3]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[5]].m_unquant;
937 if (s1 < s0)
938 {
939 std::swap(pEndpoints[0], pEndpoints[1]);
940 std::swap(pEndpoints[2], pEndpoints[3]);
941 std::swap(pEndpoints[4], pEndpoints[5]);
942 invert_astc_part[astc_part] = true;
943 }
944 }
945
946 for (uint32_t y = 0; y < 4; y++)
947 {
948 for (uint32_t x = 0; x < 4; x++)
949 {
950 const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4];
951
952 astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
953
954 uint32_t astc_part = 0;
955 for (uint32_t i = 0; i < 3; i++)
956 {
957 if (astc_to_bc7_part[i] == bc7_part)
958 {
959 astc_part = i;
960 break;
961 }
962 }
963
964 if (invert_astc_part[astc_part])
965 astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
966 }
967 }
968
969 assert(total_results < MAX_ENCODE_RESULTS);
970 if (total_results < MAX_ENCODE_RESULTS)
971 {
972 pResults[total_results].m_uastc_mode = 3;
973 pResults[total_results].m_common_pattern = common_pattern;
974 pResults[total_results].m_astc = astc_results;
975 pResults[total_results].m_astc_err = total_part_err;
976 total_results++;
977 }
978
979 }
980
981 } // common_pattern
982 }
983
984 // MODE 4
985 // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE3
986 static void astc_mode4(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
987 {
988 //const uint32_t weight_range = 2;
989 const uint32_t endpoint_range = 12;
990
991 uint32_t first_common_pattern = 0;
992 uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
993
994 if (estimate_partition)
995 {
996 const uint32_t weights[4] = { 1, 1, 1, 1 };
997 first_common_pattern = estimate_partition2(4, 3, g_bc7_weights2, block, weights);
998 last_common_pattern = first_common_pattern + 1;
999 }
1000
1001 for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
1002 {
1003 const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
1004
1005 color_rgba part_pixels[2][16];
1006 uint32_t part_pixel_index[4][4];
1007 uint32_t num_part_pixels[2] = { 0, 0 };
1008
1009 for (uint32_t y = 0; y < 4; y++)
1010 {
1011 for (uint32_t x = 0; x < 4; x++)
1012 {
1013 const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1014 part_pixel_index[y][x] = num_part_pixels[part];
1015 part_pixels[part][num_part_pixels[part]++] = block[y][x];
1016 }
1017 }
1018
1019 color_cell_compressor_params ccell_params[2];
1020 color_cell_compressor_results ccell_results[2];
1021 uint8_t ccell_result_selectors[2][16];
1022 uint8_t ccell_result_selectors_temp[2][16];
1023
1024 uint64_t total_part_err = 0;
1025 for (uint32_t part = 0; part < 2; part++)
1026 {
1027 memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
1028
1029 ccell_params[part].m_num_pixels = num_part_pixels[part];
1030 ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
1031 ccell_params[part].m_num_selector_weights = 4;
1032 ccell_params[part].m_pSelector_weights = g_bc7_weights2;
1033 ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1034 ccell_params[part].m_astc_endpoint_range = endpoint_range;
1035 ccell_params[part].m_weights[0] = 1;
1036 ccell_params[part].m_weights[1] = 1;
1037 ccell_params[part].m_weights[2] = 1;
1038 ccell_params[part].m_weights[3] = 1;
1039
1040 memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
1041 ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
1042 ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
1043
1044 uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
1045 total_part_err += part_err;
1046 } // part
1047
1048 // ASTC
1049 astc_block_desc astc_results;
1050 memset(&astc_results, 0, sizeof(astc_results));
1051
1052 astc_results.m_dual_plane = false;
1053 astc_results.m_weight_range = 2;
1054
1055 astc_results.m_ccs = 0;
1056 astc_results.m_subsets = 2;
1057 astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
1058 astc_results.m_cem = 8;
1059
1060 uint32_t p0 = 0;
1061 uint32_t p1 = 1;
1062 if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1063 std::swap(p0, p1);
1064
1065 astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
1066 astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
1067 astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
1068 astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
1069 astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
1070 astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
1071
1072 bool invert[2] = { false, false };
1073
1074 int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
1075 int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
1076 if (s1 < s0)
1077 {
1078 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
1079 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
1080 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
1081 invert[0] = true;
1082 }
1083
1084 astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
1085 astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
1086 astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
1087 astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
1088 astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
1089 astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
1090
1091 s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4 + 6]].m_unquant;
1092 s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5 + 6]].m_unquant;
1093
1094 if (s1 < s0)
1095 {
1096 std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]);
1097 std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]);
1098 std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]);
1099 invert[1] = true;
1100 }
1101
1102 for (uint32_t y = 0; y < 4; y++)
1103 {
1104 for (uint32_t x = 0; x < 4; x++)
1105 {
1106 const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1107
1108 astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
1109
1110 uint32_t astc_part = bc7_part;
1111 if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1112 astc_part = 1 - astc_part;
1113
1114 if (invert[astc_part])
1115 astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
1116 }
1117 }
1118
1119 assert(total_results < MAX_ENCODE_RESULTS);
1120 if (total_results < MAX_ENCODE_RESULTS)
1121 {
1122 pResults[total_results].m_uastc_mode = 4;
1123 pResults[total_results].m_common_pattern = common_pattern;
1124 pResults[total_results].m_astc = astc_results;
1125 pResults[total_results].m_astc_err = total_part_err;
1126 total_results++;
1127 }
1128
1129 } // common_pattern
1130 }
1131
1132 // MODE 5
1133 // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset)
1134 static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1135 {
1136 const uint32_t weight_range = 5;
1137 const uint32_t endpoint_range = 20;
1138
1139 color_cell_compressor_params ccell_params;
1140 memset(&ccell_params, 0, sizeof(ccell_params));
1141
1142 ccell_params.m_num_pixels = 16;
1143 ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
1144 ccell_params.m_num_selector_weights = 8;
1145 ccell_params.m_pSelector_weights = g_bc7_weights3;
1146 ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
1147 ccell_params.m_astc_endpoint_range = endpoint_range;
1148 ccell_params.m_weights[0] = 1;
1149 ccell_params.m_weights[1] = 1;
1150 ccell_params.m_weights[2] = 1;
1151 ccell_params.m_weights[3] = 1;
1152
1153 color_cell_compressor_results ccell_results;
1154 uint8_t ccell_result_selectors[16];
1155 uint8_t ccell_result_selectors_temp[16];
1156 memset(&ccell_results, 0, sizeof(ccell_results));
1157 ccell_results.m_pSelectors = &ccell_result_selectors[0];
1158 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1159
1160 uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
1161
1162 // ASTC
1163 astc_block_desc blk;
1164 memset(&blk, 0, sizeof(blk));
1165
1166 blk.m_dual_plane = false;
1167 blk.m_weight_range = weight_range;
1168
1169 blk.m_ccs = 0;
1170 blk.m_subsets = 1;
1171 blk.m_partition_seed = 0;
1172 blk.m_cem = 8;
1173
1174 blk.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
1175 blk.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
1176 blk.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
1177 blk.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
1178 blk.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
1179 blk.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
1180
1181 bool invert = false;
1182
1183 int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
1184 int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
1185 if (s1 < s0)
1186 {
1187 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
1188 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
1189 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
1190 invert = true;
1191 }
1192
1193 for (uint32_t y = 0; y < 4; y++)
1194 {
1195 for (uint32_t x = 0; x < 4; x++)
1196 {
1197 blk.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
1198
1199 if (invert)
1200 blk.m_weights[x + y * 4] = 7 - blk.m_weights[x + y * 4];
1201 }
1202 }
1203
1204 assert(total_results < MAX_ENCODE_RESULTS);
1205 if (total_results < MAX_ENCODE_RESULTS)
1206 {
1207 pResults[total_results].m_uastc_mode = 5;
1208 pResults[total_results].m_common_pattern = 0;
1209 pResults[total_results].m_astc = blk;
1210 pResults[total_results].m_astc_err = part_err;
1211 total_results++;
1212 }
1213 }
1214
1215 // MODE 6
1216 // DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 18 (160) BC7 MODE5
1217 static void astc_mode6(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1218 {
1219 for (uint32_t rot_comp = 0; rot_comp < 3; rot_comp++)
1220 {
1221 const uint32_t weight_range = 2;
1222 const uint32_t endpoint_range = 18;
1223
1224 color_quad_u8 block_rgb[16];
1225 color_quad_u8 block_a[16];
1226 for (uint32_t i = 0; i < 16; i++)
1227 {
1228 block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
1229 block_a[i] = block_rgb[i];
1230
1231 uint8_t c = block_a[i].m_c[rot_comp];
1232 block_a[i].m_c[0] = c;
1233 block_a[i].m_c[1] = c;
1234 block_a[i].m_c[2] = c;
1235 block_a[i].m_c[3] = 255;
1236
1237 block_rgb[i].m_c[rot_comp] = 255;
1238 }
1239
1240 uint8_t ccell_result_selectors_temp[16];
1241
1242 color_cell_compressor_params ccell_params_rgb;
1243 memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
1244
1245 ccell_params_rgb.m_num_pixels = 16;
1246 ccell_params_rgb.m_pPixels = block_rgb;
1247 ccell_params_rgb.m_num_selector_weights = 4;
1248 ccell_params_rgb.m_pSelector_weights = g_bc7_weights2;
1249 ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1250 ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
1251 ccell_params_rgb.m_weights[0] = 1;
1252 ccell_params_rgb.m_weights[1] = 1;
1253 ccell_params_rgb.m_weights[2] = 1;
1254 ccell_params_rgb.m_weights[3] = 1;
1255
1256 color_cell_compressor_results ccell_results_rgb;
1257 uint8_t ccell_result_selectors_rgb[16];
1258 memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
1259 ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
1260 ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1261
1262 uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params);
1263
1264 color_cell_compressor_params ccell_params_a;
1265 memset(&ccell_params_a, 0, sizeof(ccell_params_a));
1266
1267 ccell_params_a.m_num_pixels = 16;
1268 ccell_params_a.m_pPixels = block_a;
1269 ccell_params_a.m_num_selector_weights = 4;
1270 ccell_params_a.m_pSelector_weights = g_bc7_weights2;
1271 ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1272 ccell_params_a.m_astc_endpoint_range = endpoint_range;
1273 ccell_params_a.m_weights[0] = 1;
1274 ccell_params_a.m_weights[1] = 1;
1275 ccell_params_a.m_weights[2] = 1;
1276 ccell_params_a.m_weights[3] = 1;
1277
1278 color_cell_compressor_results ccell_results_a;
1279 uint8_t ccell_result_selectors_a[16];
1280 memset(&ccell_results_a, 0, sizeof(ccell_results_a));
1281 ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
1282 ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1283
1284 uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &comp_params) / 3;
1285
1286 uint64_t total_err = part_err_rgb + part_err_a;
1287
1288 // ASTC
1289 astc_block_desc blk;
1290 memset(&blk, 0, sizeof(blk));
1291
1292 blk.m_dual_plane = true;
1293 blk.m_weight_range = weight_range;
1294
1295 blk.m_ccs = rot_comp;
1296 blk.m_subsets = 1;
1297 blk.m_partition_seed = 0;
1298 blk.m_cem = 8;
1299
1300 blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
1301 blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
1302 blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
1303 blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
1304 blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
1305 blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
1306
1307 bool invert = false;
1308
1309 int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
1310 int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
1311 if (s1 < s0)
1312 {
1313 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
1314 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
1315 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
1316 invert = true;
1317 }
1318
1319 for (uint32_t y = 0; y < 4; y++)
1320 {
1321 for (uint32_t x = 0; x < 4; x++)
1322 {
1323 uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
1324 uint32_t a_index = ccell_result_selectors_a[x + y * 4];
1325
1326 if (invert)
1327 {
1328 rgb_index = 3 - rgb_index;
1329 a_index = 3 - a_index;
1330 }
1331
1332 blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
1333 blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
1334 }
1335 }
1336
1337 assert(total_results < MAX_ENCODE_RESULTS);
1338 if (total_results < MAX_ENCODE_RESULTS)
1339 {
1340 pResults[total_results].m_uastc_mode = 6;
1341 pResults[total_results].m_common_pattern = 0;
1342 pResults[total_results].m_astc = blk;
1343 pResults[total_results].m_astc_err = total_err;
1344 total_results++;
1345 }
1346 } // rot_comp
1347 }
1348
1349 // MODE 7 - 2 subset ASTC, 3 subset BC7
1350 // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE2
1351 static void astc_mode7(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition)
1352 {
1353 uint32_t first_common_pattern = 0;
1354 uint32_t last_common_pattern = TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS;
1355
1356 if (estimate_partition)
1357 {
1358 uint64_t best_err = UINT64_MAX;
1359 uint32_t best_common_pattern = 0;
1360 const uint32_t weights[4] = { 1, 1, 1, 1 };
1361
1362 for (uint32_t common_pattern = 0; common_pattern < TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; common_pattern++)
1363 {
1364 const uint8_t* pPartition = &g_bc7_3_astc2_patterns2[common_pattern][0];
1365
1366#ifdef _DEBUG
1367 const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2;
1368 const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73;
1369 const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k;
1370
1371 for (uint32_t y = 0; y < 4; y++)
1372 {
1373 for (uint32_t x = 0; x < 4; x++)
1374 {
1375 const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
1376 assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true));
1377 assert(astc_part == pPartition[x + y * 4]);
1378 }
1379 }
1380#endif
1381
1382 color_quad_u8 subset_colors[2][16];
1383 uint32_t subset_total_colors[2] = { 0, 0 };
1384 for (uint32_t index = 0; index < 16; index++)
1385 subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
1386
1387 uint64_t total_subset_err = 0;
1388 for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++)
1389 total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights);
1390
1391 if (total_subset_err < best_err)
1392 {
1393 best_err = total_subset_err;
1394 best_common_pattern = common_pattern;
1395 }
1396 }
1397
1398 first_common_pattern = best_common_pattern;
1399 last_common_pattern = best_common_pattern + 1;
1400 }
1401
1402 //const uint32_t weight_range = 2;
1403 const uint32_t endpoint_range = 12;
1404
1405 for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++)
1406 {
1407 const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2;
1408 const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73;
1409 const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k;
1410
1411 color_rgba part_pixels[2][16];
1412 uint32_t part_pixel_index[4][4];
1413 uint32_t num_part_pixels[2] = { 0, 0 };
1414
1415 for (uint32_t y = 0; y < 4; y++)
1416 {
1417 for (uint32_t x = 0; x < 4; x++)
1418 {
1419 const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
1420#ifdef _DEBUG
1421 assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true));
1422#endif
1423
1424 part_pixel_index[y][x] = num_part_pixels[astc_part];
1425 part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x];
1426 }
1427 }
1428
1429 color_cell_compressor_params ccell_params[2];
1430 color_cell_compressor_results ccell_results[2];
1431 uint8_t ccell_result_selectors[2][16];
1432 uint8_t ccell_result_selectors_temp[2][16];
1433
1434 uint64_t total_part_err = 0;
1435 for (uint32_t part = 0; part < 2; part++)
1436 {
1437 memset(&ccell_params[part], 0, sizeof(ccell_params[part]));
1438
1439 ccell_params[part].m_num_pixels = num_part_pixels[part];
1440 ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0];
1441 ccell_params[part].m_num_selector_weights = 4;
1442 ccell_params[part].m_pSelector_weights = g_bc7_weights2;
1443 ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1444 ccell_params[part].m_astc_endpoint_range = endpoint_range;
1445 ccell_params[part].m_weights[0] = 1;
1446 ccell_params[part].m_weights[1] = 1;
1447 ccell_params[part].m_weights[2] = 1;
1448 ccell_params[part].m_weights[3] = 1;
1449
1450 memset(&ccell_results[part], 0, sizeof(ccell_results[part]));
1451 ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0];
1452 ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0];
1453
1454 uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params);
1455 total_part_err += part_err;
1456 } // part
1457
1458 // ASTC
1459 astc_block_desc blk;
1460 memset(&blk, 0, sizeof(blk));
1461
1462 blk.m_dual_plane = false;
1463 blk.m_weight_range = 2;
1464
1465 blk.m_ccs = 0;
1466 blk.m_subsets = 2;
1467 blk.m_partition_seed = astc_pattern;
1468 blk.m_cem = 8;
1469
1470 const uint32_t p0 = 0;
1471 const uint32_t p1 = 1;
1472
1473 blk.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0];
1474 blk.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0];
1475 blk.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1];
1476 blk.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1];
1477 blk.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2];
1478 blk.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2];
1479
1480 bool invert[2] = { false, false };
1481
1482 int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
1483 int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
1484 if (s1 < s0)
1485 {
1486 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
1487 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
1488 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
1489 invert[0] = true;
1490 }
1491
1492 blk.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0];
1493 blk.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0];
1494 blk.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1];
1495 blk.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1];
1496 blk.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2];
1497 blk.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2];
1498
1499 s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4 + 6]].m_unquant;
1500 s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5 + 6]].m_unquant;
1501
1502 if (s1 < s0)
1503 {
1504 std::swap(blk.m_endpoints[0 + 6], blk.m_endpoints[1 + 6]);
1505 std::swap(blk.m_endpoints[2 + 6], blk.m_endpoints[3 + 6]);
1506 std::swap(blk.m_endpoints[4 + 6], blk.m_endpoints[5 + 6]);
1507 invert[1] = true;
1508 }
1509
1510 for (uint32_t y = 0; y < 4; y++)
1511 {
1512 for (uint32_t x = 0; x < 4; x++)
1513 {
1514 const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k);
1515
1516 blk.m_weights[x + y * 4] = ccell_result_selectors[astc_part][part_pixel_index[y][x]];
1517
1518 if (invert[astc_part])
1519 blk.m_weights[x + y * 4] = 3 - blk.m_weights[x + y * 4];
1520 }
1521 }
1522
1523 assert(total_results < MAX_ENCODE_RESULTS);
1524 if (total_results < MAX_ENCODE_RESULTS)
1525 {
1526 pResults[total_results].m_uastc_mode = 7;
1527 pResults[total_results].m_common_pattern = common_pattern;
1528 pResults[total_results].m_astc = blk;
1529 pResults[total_results].m_astc_err = total_part_err;
1530 total_results++;
1531 }
1532
1533 } // common_pattern
1534 }
1535
1536 static void estimate_partition2_list(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], uint32_t* pParts, uint32_t max_parts, const uint32_t weights[4])
1537 {
1538 assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64);
1539
1540 const uint32_t MAX_PARTS = 8;
1541 assert(max_parts <= MAX_PARTS);
1542
1543 uint64_t part_error[MAX_PARTS];
1544 memset(part_error, 0xFF, sizeof(part_error));
1545 memset(pParts, 0, sizeof(pParts[0]) * max_parts);
1546
1547 for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++)
1548 {
1549 const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
1550
1551 const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16];
1552
1553 color_quad_u8 subset_colors[2][16];
1554 uint32_t subset_total_colors[2] = { 0, 0 };
1555 for (uint32_t index = 0; index < 16; index++)
1556 subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index];
1557
1558 uint64_t total_subset_err = 0;
1559 for (uint32_t subset = 0; subset < 2; subset++)
1560 total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], UINT64_MAX, weights);
1561
1562 for (int i = 0; i < (int)max_parts; i++)
1563 {
1564 if (total_subset_err < part_error[i])
1565 {
1566 for (int j = max_parts - 1; j > i; --j)
1567 {
1568 pParts[j] = pParts[j - 1];
1569 part_error[j] = part_error[j - 1];
1570 }
1571
1572 pParts[i] = common_pattern;
1573 part_error[i] = total_subset_err;
1574
1575 break;
1576 }
1577 }
1578 }
1579
1580#ifdef _DEBUG
1581 for (uint32_t i = 0; i < max_parts - 1; i++)
1582 {
1583 assert(part_error[i] <= part_error[i + 1]);
1584 }
1585#endif
1586 }
1587
1588 // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7
1589 // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7
1590 static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size)
1591 {
1592 assert(mode == 9 || mode == 16);
1593
1594 const color_rgba* pBlock = &source_block[0][0];
1595
1596 color_rgba temp_block[16];
1597 if (mode == 16)
1598 {
1599 for (uint32_t i = 0; i < 16; i++)
1600 {
1601 if (mode == 16)
1602 {
1603 assert(pBlock[i].r == pBlock[i].g);
1604 assert(pBlock[i].r == pBlock[i].b);
1605 }
1606
1607 const uint32_t l = pBlock[i].r;
1608 const uint32_t a = pBlock[i].a;
1609
1610 // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally.
1611 temp_block[i].set_noclamp_rgba(l, 0, 0, a);
1612 }
1613
1614 pBlock = temp_block;
1615 }
1616
1617 const uint32_t weights[4] = { 1, 1, 1, 1 };
1618
1619 //const uint32_t weight_range = 2;
1620 const uint32_t endpoint_range = (mode == 16) ? 20 : 8;
1621
1622 uint32_t first_common_pattern = 0;
1623 uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2;
1624 bool use_part_list = false;
1625
1626 const uint32_t MAX_PARTS = 8;
1627 uint32_t parts[MAX_PARTS];
1628
1629 if (estimate_partition_list_size == 1)
1630 {
1631 first_common_pattern = estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights);
1632 last_common_pattern = first_common_pattern + 1;
1633 }
1634 else if (estimate_partition_list_size > 0)
1635 {
1636 assert(estimate_partition_list_size <= MAX_PARTS);
1637 estimate_partition_list_size = basisu::minimum(estimate_partition_list_size, MAX_PARTS);
1638
1639 estimate_partition2_list(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, parts, estimate_partition_list_size, weights);
1640
1641 first_common_pattern = 0;
1642 last_common_pattern = estimate_partition_list_size;
1643 use_part_list = true;
1644
1645#ifdef _DEBUG
1646 assert(parts[0] == estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights));
1647#endif
1648 }
1649
1650 for (uint32_t common_pattern_iter = first_common_pattern; common_pattern_iter < last_common_pattern; common_pattern_iter++)
1651 {
1652 const uint32_t common_pattern = use_part_list ? parts[common_pattern_iter] : common_pattern_iter;
1653
1654 const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7;
1655
1656 color_rgba part_pixels[2][16];
1657 uint32_t part_pixel_index[4][4];
1658 uint32_t num_part_pixels[2] = { 0, 0 };
1659
1660 for (uint32_t y = 0; y < 4; y++)
1661 {
1662 for (uint32_t x = 0; x < 4; x++)
1663 {
1664 const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1665 part_pixel_index[y][x] = num_part_pixels[part];
1666 part_pixels[part][num_part_pixels[part]++] = pBlock[y * 4 + x];
1667 }
1668 }
1669
1670 color_cell_compressor_params ccell_params[2];
1671 color_cell_compressor_results ccell_results[2];
1672 uint8_t ccell_result_selectors[2][16];
1673 uint8_t ccell_result_selectors_temp[2][16];
1674
1675 uint64_t total_err = 0;
1676 for (uint32_t subset = 0; subset < 2; subset++)
1677 {
1678 memset(&ccell_params[subset], 0, sizeof(ccell_params[subset]));
1679
1680 ccell_params[subset].m_num_pixels = num_part_pixels[subset];
1681 ccell_params[subset].m_pPixels = (color_quad_u8*)&part_pixels[subset][0];
1682 ccell_params[subset].m_num_selector_weights = 4;
1683 ccell_params[subset].m_pSelector_weights = g_bc7_weights2;
1684 ccell_params[subset].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1685 ccell_params[subset].m_astc_endpoint_range = endpoint_range;
1686 ccell_params[subset].m_weights[0] = weights[0];
1687 ccell_params[subset].m_weights[1] = weights[1];
1688 ccell_params[subset].m_weights[2] = weights[2];
1689 ccell_params[subset].m_weights[3] = weights[3];
1690 ccell_params[subset].m_has_alpha = true;
1691
1692 memset(&ccell_results[subset], 0, sizeof(ccell_results[subset]));
1693 ccell_results[subset].m_pSelectors = &ccell_result_selectors[subset][0];
1694 ccell_results[subset].m_pSelectors_temp = &ccell_result_selectors_temp[subset][0];
1695
1696 uint64_t subset_err = color_cell_compression(255, &ccell_params[subset], &ccell_results[subset], &comp_params);
1697
1698 if (mode == 16)
1699 {
1700 color_rgba colors[4];
1701 for (uint32_t c = 0; c < 4; c++)
1702 {
1703 colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
1704 colors[3].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
1705 }
1706
1707 for (uint32_t i = 1; i < 4 - 1; i++)
1708 for (uint32_t c = 0; c < 4; c++)
1709 colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[3].m_comps[c], g_bc7_weights2[i], false);
1710
1711 for (uint32_t p = 0; p < ccell_params[subset].m_num_pixels; p++)
1712 {
1713 color_rgba orig_pix(part_pixels[subset][p]);
1714 orig_pix.g = orig_pix.r;
1715 orig_pix.b = orig_pix.r;
1716 total_err += color_distance_la(orig_pix, colors[ccell_result_selectors[subset][p]]);
1717 }
1718 }
1719 else
1720 {
1721 total_err += subset_err;
1722 }
1723 } // subset
1724
1725 // ASTC
1726 astc_block_desc astc_results;
1727 memset(&astc_results, 0, sizeof(astc_results));
1728
1729 astc_results.m_dual_plane = false;
1730 astc_results.m_weight_range = 2;
1731
1732 astc_results.m_ccs = 0;
1733 astc_results.m_subsets = 2;
1734 astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc;
1735 astc_results.m_cem = (mode == 16) ? 4 : 12;
1736
1737 uint32_t part[2] = { 0, 1 };
1738 if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1739 std::swap(part[0], part[1]);
1740
1741 bool invert[2] = { false, false };
1742
1743 for (uint32_t p = 0; p < 2; p++)
1744 {
1745 if (mode == 16)
1746 {
1747 astc_results.m_endpoints[p * 4 + 0] = ccell_results[part[p]].m_astc_low_endpoint.m_c[0];
1748 astc_results.m_endpoints[p * 4 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[0];
1749
1750 astc_results.m_endpoints[p * 4 + 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[3];
1751 astc_results.m_endpoints[p * 4 + 3] = ccell_results[part[p]].m_astc_high_endpoint.m_c[3];
1752 }
1753 else
1754 {
1755 for (uint32_t c = 0; c < 4; c++)
1756 {
1757 astc_results.m_endpoints[p * 8 + c * 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[c];
1758 astc_results.m_endpoints[p * 8 + c * 2 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[c];
1759 }
1760
1761 int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 0]].m_unquant +
1762 g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 2]].m_unquant +
1763 g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 4]].m_unquant;
1764
1765 int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 1]].m_unquant +
1766 g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 3]].m_unquant +
1767 g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 5]].m_unquant;
1768
1769 if (s1 < s0)
1770 {
1771 std::swap(astc_results.m_endpoints[p * 8 + 0], astc_results.m_endpoints[p * 8 + 1]);
1772 std::swap(astc_results.m_endpoints[p * 8 + 2], astc_results.m_endpoints[p * 8 + 3]);
1773 std::swap(astc_results.m_endpoints[p * 8 + 4], astc_results.m_endpoints[p * 8 + 5]);
1774 std::swap(astc_results.m_endpoints[p * 8 + 6], astc_results.m_endpoints[p * 8 + 7]);
1775 invert[p] = true;
1776 }
1777 }
1778 }
1779
1780 for (uint32_t y = 0; y < 4; y++)
1781 {
1782 for (uint32_t x = 0; x < 4; x++)
1783 {
1784 const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4];
1785
1786 astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]];
1787
1788 uint32_t astc_part = bc7_part;
1789 if (g_astc_bc7_common_partitions2[common_pattern].m_invert)
1790 astc_part = 1 - astc_part;
1791
1792 if (invert[astc_part])
1793 astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
1794 }
1795 }
1796
1797 assert(total_results < MAX_ENCODE_RESULTS);
1798 if (total_results < MAX_ENCODE_RESULTS)
1799 {
1800 pResults[total_results].m_uastc_mode = mode;
1801 pResults[total_results].m_common_pattern = common_pattern;
1802 pResults[total_results].m_astc = astc_results;
1803 pResults[total_results].m_astc_err = total_err;
1804 total_results++;
1805 }
1806
1807 } // common_pattern
1808 }
1809
1810 // MODE 10
1811 // DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 13 (48) MODE6
1812 static void astc_mode10(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1813 {
1814 const uint32_t weight_range = 8;
1815 const uint32_t endpoint_range = 13;
1816
1817 color_cell_compressor_params ccell_params;
1818 memset(&ccell_params, 0, sizeof(ccell_params));
1819
1820 ccell_params.m_num_pixels = 16;
1821 ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
1822 ccell_params.m_num_selector_weights = 16;
1823 ccell_params.m_pSelector_weights = g_astc_weights4;
1824 ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x;
1825 ccell_params.m_astc_endpoint_range = endpoint_range;
1826 ccell_params.m_weights[0] = 1;
1827 ccell_params.m_weights[1] = 1;
1828 ccell_params.m_weights[2] = 1;
1829 ccell_params.m_weights[3] = 1;
1830 ccell_params.m_has_alpha = true;
1831
1832 color_cell_compressor_results ccell_results;
1833 uint8_t ccell_result_selectors[16];
1834 uint8_t ccell_result_selectors_temp[16];
1835 memset(&ccell_results, 0, sizeof(ccell_results));
1836 ccell_results.m_pSelectors = &ccell_result_selectors[0];
1837 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1838
1839 uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
1840
1841 // ASTC
1842 astc_block_desc astc_results;
1843 memset(&astc_results, 0, sizeof(astc_results));
1844
1845 astc_results.m_dual_plane = false;
1846 astc_results.m_weight_range = weight_range;
1847
1848 astc_results.m_ccs = 0;
1849 astc_results.m_subsets = 1;
1850 astc_results.m_partition_seed = 0;
1851 astc_results.m_cem = 12;
1852
1853 astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
1854 astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
1855 astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
1856 astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
1857 astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
1858 astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
1859 astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
1860 astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
1861
1862 bool invert = false;
1863
1864 int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
1865 int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
1866 if (s1 < s0)
1867 {
1868 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
1869 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
1870 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
1871 std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
1872 invert = true;
1873 }
1874
1875 for (uint32_t y = 0; y < 4; y++)
1876 {
1877 for (uint32_t x = 0; x < 4; x++)
1878 {
1879 astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
1880
1881 if (invert)
1882 astc_results.m_weights[x + y * 4] = 15 - astc_results.m_weights[x + y * 4];
1883 }
1884 }
1885
1886 assert(total_results < MAX_ENCODE_RESULTS);
1887 if (total_results < MAX_ENCODE_RESULTS)
1888 {
1889 pResults[total_results].m_uastc_mode = 10;
1890 pResults[total_results].m_common_pattern = 0;
1891 pResults[total_results].m_astc = astc_results;
1892 pResults[total_results].m_astc_err = part_err;
1893 total_results++;
1894 }
1895 }
1896
1897 // 11. DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct), EndpointRange: 13 (48) MODE5
1898 // 17. DualPlane: 1, WeightRange : 2 (4), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE5
1899 static void astc_mode11_or_17(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
1900 {
1901 assert((mode == 11) || (mode == 17));
1902
1903 const uint32_t weight_range = 2;
1904 const uint32_t endpoint_range = (mode == 17) ? 20 : 13;
1905
1906 bc7enc_compress_block_params local_comp_params(comp_params);
1907 local_comp_params.m_perceptual = false;
1908 local_comp_params.m_weights[0] = 1;
1909 local_comp_params.m_weights[1] = 1;
1910 local_comp_params.m_weights[2] = 1;
1911 local_comp_params.m_weights[3] = 1;
1912
1913 const uint32_t last_rot_comp = (mode == 17) ? 1 : 4;
1914
1915 for (uint32_t rot_comp = 0; rot_comp < last_rot_comp; rot_comp++)
1916 {
1917 color_quad_u8 block_rgb[16];
1918 color_quad_u8 block_a[16];
1919 for (uint32_t i = 0; i < 16; i++)
1920 {
1921 block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
1922 block_a[i] = block_rgb[i];
1923
1924 if (mode == 17)
1925 {
1926 assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[1]);
1927 assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[2]);
1928
1929 block_a[i].m_c[0] = block_rgb[i].m_c[3];
1930 block_a[i].m_c[1] = block_rgb[i].m_c[3];
1931 block_a[i].m_c[2] = block_rgb[i].m_c[3];
1932 block_a[i].m_c[3] = 255;
1933
1934 block_rgb[i].m_c[1] = block_rgb[i].m_c[0];
1935 block_rgb[i].m_c[2] = block_rgb[i].m_c[0];
1936 block_rgb[i].m_c[3] = 255;
1937 }
1938 else
1939 {
1940 uint8_t c = block_a[i].m_c[rot_comp];
1941 block_a[i].m_c[0] = c;
1942 block_a[i].m_c[1] = c;
1943 block_a[i].m_c[2] = c;
1944 block_a[i].m_c[3] = 255;
1945
1946 block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3];
1947 block_rgb[i].m_c[3] = 255;
1948 }
1949 }
1950
1951 uint8_t ccell_result_selectors_temp[16];
1952
1953 color_cell_compressor_params ccell_params_rgb;
1954 memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
1955
1956 ccell_params_rgb.m_num_pixels = 16;
1957 ccell_params_rgb.m_pPixels = block_rgb;
1958 ccell_params_rgb.m_num_selector_weights = 4;
1959 ccell_params_rgb.m_pSelector_weights = g_bc7_weights2;
1960 ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1961 ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
1962 ccell_params_rgb.m_weights[0] = 1;
1963 ccell_params_rgb.m_weights[1] = 1;
1964 ccell_params_rgb.m_weights[2] = 1;
1965 ccell_params_rgb.m_weights[3] = 1;
1966
1967 color_cell_compressor_results ccell_results_rgb;
1968 uint8_t ccell_result_selectors_rgb[16];
1969 memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
1970 ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
1971 ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1972
1973 uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params);
1974
1975 color_cell_compressor_params ccell_params_a;
1976 memset(&ccell_params_a, 0, sizeof(ccell_params_a));
1977
1978 ccell_params_a.m_num_pixels = 16;
1979 ccell_params_a.m_pPixels = block_a;
1980 ccell_params_a.m_num_selector_weights = 4;
1981 ccell_params_a.m_pSelector_weights = g_bc7_weights2;
1982 ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
1983 ccell_params_a.m_astc_endpoint_range = endpoint_range;
1984 ccell_params_a.m_weights[0] = 1;
1985 ccell_params_a.m_weights[1] = 1;
1986 ccell_params_a.m_weights[2] = 1;
1987 ccell_params_a.m_weights[3] = 1;
1988
1989 color_cell_compressor_results ccell_results_a;
1990 uint8_t ccell_result_selectors_a[16];
1991 memset(&ccell_results_a, 0, sizeof(ccell_results_a));
1992 ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
1993 ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
1994
1995 uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3;
1996
1997 uint64_t total_err = (mode == 17) ? ((part_err_rgb / 3) + part_err_a) : (part_err_rgb + part_err_a);
1998
1999 // ASTC
2000 astc_block_desc blk;
2001 memset(&blk, 0, sizeof(blk));
2002
2003 blk.m_dual_plane = true;
2004 blk.m_weight_range = weight_range;
2005
2006 blk.m_ccs = (mode == 17) ? 3 : rot_comp;
2007 blk.m_subsets = 1;
2008 blk.m_partition_seed = 0;
2009 blk.m_cem = (mode == 17) ? 4 : 12;
2010
2011 bool invert = false;
2012
2013 if (mode == 17)
2014 {
2015 assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[1]);
2016 assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[2]);
2017
2018 assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[1]);
2019 assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[2]);
2020
2021 blk.m_endpoints[0] = ccell_results_rgb.m_astc_low_endpoint.m_c[0];
2022 blk.m_endpoints[1] = ccell_results_rgb.m_astc_high_endpoint.m_c[0];
2023
2024 blk.m_endpoints[2] = ccell_results_a.m_astc_low_endpoint.m_c[0];
2025 blk.m_endpoints[3] = ccell_results_a.m_astc_high_endpoint.m_c[0];
2026 }
2027 else
2028 {
2029 blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
2030 blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
2031 blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
2032 blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
2033 blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
2034 blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
2035 if (rot_comp == 3)
2036 {
2037 blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0];
2038 blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0];
2039 }
2040 else
2041 {
2042 blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp];
2043 blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp];
2044 }
2045
2046 int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
2047 int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
2048 if (s1 < s0)
2049 {
2050 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
2051 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
2052 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
2053 std::swap(blk.m_endpoints[6], blk.m_endpoints[7]);
2054 invert = true;
2055 }
2056 }
2057
2058 for (uint32_t y = 0; y < 4; y++)
2059 {
2060 for (uint32_t x = 0; x < 4; x++)
2061 {
2062 uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
2063 uint32_t a_index = ccell_result_selectors_a[x + y * 4];
2064
2065 if (invert)
2066 {
2067 rgb_index = 3 - rgb_index;
2068 a_index = 3 - a_index;
2069 }
2070
2071 blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
2072 blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
2073 }
2074 }
2075
2076 assert(total_results < MAX_ENCODE_RESULTS);
2077 if (total_results < MAX_ENCODE_RESULTS)
2078 {
2079 pResults[total_results].m_uastc_mode = mode;
2080 pResults[total_results].m_common_pattern = 0;
2081 pResults[total_results].m_astc = blk;
2082 pResults[total_results].m_astc_err = total_err;
2083 total_results++;
2084 }
2085 } // rot_comp
2086 }
2087
2088 // MODE 12
2089 // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 19 (192) MODE6
2090 static void astc_mode12(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2091 {
2092 const uint32_t weight_range = 5;
2093 const uint32_t endpoint_range = 19;
2094
2095 color_cell_compressor_params ccell_params;
2096 memset(&ccell_params, 0, sizeof(ccell_params));
2097
2098 ccell_params.m_num_pixels = 16;
2099 ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
2100 ccell_params.m_num_selector_weights = 8;
2101 ccell_params.m_pSelector_weights = g_bc7_weights3;
2102 ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x;
2103 ccell_params.m_astc_endpoint_range = endpoint_range;
2104 ccell_params.m_weights[0] = 1;
2105 ccell_params.m_weights[1] = 1;
2106 ccell_params.m_weights[2] = 1;
2107 ccell_params.m_weights[3] = 1;
2108 ccell_params.m_has_alpha = true;
2109
2110 color_cell_compressor_results ccell_results;
2111 uint8_t ccell_result_selectors[16];
2112 uint8_t ccell_result_selectors_temp[16];
2113 memset(&ccell_results, 0, sizeof(ccell_results));
2114 ccell_results.m_pSelectors = &ccell_result_selectors[0];
2115 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2116
2117 uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
2118
2119 // ASTC
2120 astc_block_desc astc_results;
2121 memset(&astc_results, 0, sizeof(astc_results));
2122
2123 astc_results.m_dual_plane = false;
2124 astc_results.m_weight_range = weight_range;
2125
2126 astc_results.m_ccs = 0;
2127 astc_results.m_subsets = 1;
2128 astc_results.m_partition_seed = 0;
2129 astc_results.m_cem = 12;
2130
2131 astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
2132 astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
2133 astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
2134 astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
2135 astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
2136 astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
2137 astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
2138 astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
2139
2140 bool invert = false;
2141
2142 int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
2143 int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
2144 if (s1 < s0)
2145 {
2146 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
2147 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
2148 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
2149 std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
2150 invert = true;
2151 }
2152
2153 for (uint32_t y = 0; y < 4; y++)
2154 {
2155 for (uint32_t x = 0; x < 4; x++)
2156 {
2157 astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
2158
2159 if (invert)
2160 astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4];
2161 }
2162 }
2163
2164 assert(total_results < MAX_ENCODE_RESULTS);
2165 if (total_results < MAX_ENCODE_RESULTS)
2166 {
2167 pResults[total_results].m_uastc_mode = 12;
2168 pResults[total_results].m_common_pattern = 0;
2169 pResults[total_results].m_astc = astc_results;
2170 pResults[total_results].m_astc_err = part_err;
2171 total_results++;
2172 }
2173 }
2174
2175 // 13. DualPlane: 1, WeightRange: 0 (2), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE5
2176 static void astc_mode13(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2177 {
2178 bc7enc_compress_block_params local_comp_params(comp_params);
2179 local_comp_params.m_perceptual = false;
2180 local_comp_params.m_weights[0] = 1;
2181 local_comp_params.m_weights[1] = 1;
2182 local_comp_params.m_weights[2] = 1;
2183 local_comp_params.m_weights[3] = 1;
2184
2185 for (uint32_t rot_comp = 0; rot_comp < 4; rot_comp++)
2186 {
2187 const uint32_t weight_range = 0;
2188 const uint32_t endpoint_range = 20;
2189
2190 color_quad_u8 block_rgb[16];
2191 color_quad_u8 block_a[16];
2192 for (uint32_t i = 0; i < 16; i++)
2193 {
2194 block_rgb[i] = ((color_quad_u8*)&block[0][0])[i];
2195 block_a[i] = block_rgb[i];
2196
2197 uint8_t c = block_a[i].m_c[rot_comp];
2198 block_a[i].m_c[0] = c;
2199 block_a[i].m_c[1] = c;
2200 block_a[i].m_c[2] = c;
2201 block_a[i].m_c[3] = 255;
2202
2203 block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3];
2204 block_rgb[i].m_c[3] = 255;
2205 }
2206
2207 uint8_t ccell_result_selectors_temp[16];
2208
2209 color_cell_compressor_params ccell_params_rgb;
2210 memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb));
2211
2212 ccell_params_rgb.m_num_pixels = 16;
2213 ccell_params_rgb.m_pPixels = block_rgb;
2214 ccell_params_rgb.m_num_selector_weights = 2;
2215 ccell_params_rgb.m_pSelector_weights = g_bc7_weights1;
2216 ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x;
2217 ccell_params_rgb.m_astc_endpoint_range = endpoint_range;
2218 ccell_params_rgb.m_weights[0] = 1;
2219 ccell_params_rgb.m_weights[1] = 1;
2220 ccell_params_rgb.m_weights[2] = 1;
2221 ccell_params_rgb.m_weights[3] = 1;
2222
2223 color_cell_compressor_results ccell_results_rgb;
2224 uint8_t ccell_result_selectors_rgb[16];
2225 memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb));
2226 ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0];
2227 ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2228
2229 uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params);
2230
2231 color_cell_compressor_params ccell_params_a;
2232 memset(&ccell_params_a, 0, sizeof(ccell_params_a));
2233
2234 ccell_params_a.m_num_pixels = 16;
2235 ccell_params_a.m_pPixels = block_a;
2236 ccell_params_a.m_num_selector_weights = 2;
2237 ccell_params_a.m_pSelector_weights = g_bc7_weights1;
2238 ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x;
2239 ccell_params_a.m_astc_endpoint_range = endpoint_range;
2240 ccell_params_a.m_weights[0] = 1;
2241 ccell_params_a.m_weights[1] = 1;
2242 ccell_params_a.m_weights[2] = 1;
2243 ccell_params_a.m_weights[3] = 1;
2244
2245 color_cell_compressor_results ccell_results_a;
2246 uint8_t ccell_result_selectors_a[16];
2247 memset(&ccell_results_a, 0, sizeof(ccell_results_a));
2248 ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0];
2249 ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2250
2251 uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3;
2252
2253 uint64_t total_err = part_err_rgb + part_err_a;
2254
2255 // ASTC
2256 astc_block_desc blk;
2257 memset(&blk, 0, sizeof(blk));
2258
2259 blk.m_dual_plane = true;
2260 blk.m_weight_range = weight_range;
2261
2262 blk.m_ccs = rot_comp;
2263 blk.m_subsets = 1;
2264 blk.m_partition_seed = 0;
2265 blk.m_cem = 12;
2266
2267 blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0];
2268 blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0];
2269 blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1];
2270 blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1];
2271 blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2];
2272 blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2];
2273 if (rot_comp == 3)
2274 {
2275 blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0];
2276 blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0];
2277 }
2278 else
2279 {
2280 blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp];
2281 blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp];
2282 }
2283
2284 bool invert = false;
2285
2286 int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant;
2287 int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant;
2288 if (s1 < s0)
2289 {
2290 std::swap(blk.m_endpoints[0], blk.m_endpoints[1]);
2291 std::swap(blk.m_endpoints[2], blk.m_endpoints[3]);
2292 std::swap(blk.m_endpoints[4], blk.m_endpoints[5]);
2293 std::swap(blk.m_endpoints[6], blk.m_endpoints[7]);
2294 invert = true;
2295 }
2296
2297 for (uint32_t y = 0; y < 4; y++)
2298 {
2299 for (uint32_t x = 0; x < 4; x++)
2300 {
2301 uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4];
2302 uint32_t a_index = ccell_result_selectors_a[x + y * 4];
2303
2304 if (invert)
2305 {
2306 rgb_index = 1 - rgb_index;
2307 a_index = 1 - a_index;
2308 }
2309
2310 blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index;
2311 blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index;
2312 }
2313 }
2314
2315 assert(total_results < MAX_ENCODE_RESULTS);
2316 if (total_results < MAX_ENCODE_RESULTS)
2317 {
2318 pResults[total_results].m_uastc_mode = 13;
2319 pResults[total_results].m_common_pattern = 0;
2320 pResults[total_results].m_astc = blk;
2321 pResults[total_results].m_astc_err = total_err;
2322 total_results++;
2323 }
2324 } // rot_comp
2325 }
2326
2327 // MODE14
2328 // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE6
2329 static void astc_mode14(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2330 {
2331 const uint32_t weight_range = 2;
2332 const uint32_t endpoint_range = 20;
2333
2334 color_cell_compressor_params ccell_params;
2335 memset(&ccell_params, 0, sizeof(ccell_params));
2336
2337 ccell_params.m_num_pixels = 16;
2338 ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
2339 ccell_params.m_num_selector_weights = 4;
2340 ccell_params.m_pSelector_weights = g_bc7_weights2;
2341 ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x;
2342 ccell_params.m_astc_endpoint_range = endpoint_range;
2343 ccell_params.m_weights[0] = 1;
2344 ccell_params.m_weights[1] = 1;
2345 ccell_params.m_weights[2] = 1;
2346 ccell_params.m_weights[3] = 1;
2347 ccell_params.m_has_alpha = true;
2348
2349 color_cell_compressor_results ccell_results;
2350 uint8_t ccell_result_selectors[16];
2351 uint8_t ccell_result_selectors_temp[16];
2352 memset(&ccell_results, 0, sizeof(ccell_results));
2353 ccell_results.m_pSelectors = &ccell_result_selectors[0];
2354 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2355
2356 uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
2357
2358 // ASTC
2359 astc_block_desc astc_results;
2360 memset(&astc_results, 0, sizeof(astc_results));
2361
2362 astc_results.m_dual_plane = false;
2363 astc_results.m_weight_range = weight_range;
2364
2365 astc_results.m_ccs = 0;
2366 astc_results.m_subsets = 1;
2367 astc_results.m_partition_seed = 0;
2368 astc_results.m_cem = 12;
2369
2370 astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
2371 astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
2372 astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1];
2373 astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1];
2374 astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2];
2375 astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2];
2376 astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3];
2377 astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3];
2378
2379 bool invert = false;
2380
2381 int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant;
2382 int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant;
2383 if (s1 < s0)
2384 {
2385 std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]);
2386 std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]);
2387 std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]);
2388 std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]);
2389 invert = true;
2390 }
2391
2392 for (uint32_t y = 0; y < 4; y++)
2393 {
2394 for (uint32_t x = 0; x < 4; x++)
2395 {
2396 astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
2397
2398 if (invert)
2399 astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4];
2400 }
2401 }
2402
2403 assert(total_results < MAX_ENCODE_RESULTS);
2404 if (total_results < MAX_ENCODE_RESULTS)
2405 {
2406 pResults[total_results].m_uastc_mode = 14;
2407 pResults[total_results].m_common_pattern = 0;
2408 pResults[total_results].m_astc = astc_results;
2409 pResults[total_results].m_astc_err = part_err;
2410 total_results++;
2411 }
2412 }
2413
2414 // MODE 15
2415 // DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE6
2416 static void astc_mode15(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params)
2417 {
2418 const uint32_t weight_range = 8;
2419 const uint32_t endpoint_range = 20;
2420
2421 color_cell_compressor_params ccell_params;
2422 memset(&ccell_params, 0, sizeof(ccell_params));
2423
2424 color_rgba temp_block[16];
2425 for (uint32_t i = 0; i < 16; i++)
2426 {
2427 const uint32_t l = ((const color_rgba*)block)[i].r;
2428 const uint32_t a = ((const color_rgba*)block)[i].a;
2429
2430 // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally.
2431 temp_block[i].set_noclamp_rgba(l, 0, 0, a);
2432 }
2433
2434 ccell_params.m_num_pixels = 16;
2435 //ccell_params.m_pPixels = (color_quad_u8*)&block[0][0];
2436 ccell_params.m_pPixels = (color_quad_u8*)temp_block;
2437 ccell_params.m_num_selector_weights = 16;
2438 ccell_params.m_pSelector_weights = g_astc_weights4;
2439 ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x;
2440 ccell_params.m_astc_endpoint_range = endpoint_range;
2441 ccell_params.m_weights[0] = 1;
2442 ccell_params.m_weights[1] = 1;
2443 ccell_params.m_weights[2] = 1;
2444 ccell_params.m_weights[3] = 1;
2445 ccell_params.m_has_alpha = true;
2446
2447 color_cell_compressor_results ccell_results;
2448 uint8_t ccell_result_selectors[16];
2449 uint8_t ccell_result_selectors_temp[16];
2450 memset(&ccell_results, 0, sizeof(ccell_results));
2451 ccell_results.m_pSelectors = &ccell_result_selectors[0];
2452 ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0];
2453
2454 color_cell_compression(255, &ccell_params, &ccell_results, &comp_params);
2455
2456 // ASTC
2457 astc_block_desc astc_results;
2458 memset(&astc_results, 0, sizeof(astc_results));
2459
2460 astc_results.m_dual_plane = false;
2461 astc_results.m_weight_range = weight_range;
2462
2463 astc_results.m_ccs = 0;
2464 astc_results.m_subsets = 1;
2465 astc_results.m_partition_seed = 0;
2466 astc_results.m_cem = 4;
2467
2468 astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0];
2469 astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0];
2470
2471 astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[3];
2472 astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[3];
2473
2474 for (uint32_t y = 0; y < 4; y++)
2475 for (uint32_t x = 0; x < 4; x++)
2476 astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4];
2477
2478 color_rgba colors[16];
2479 for (uint32_t c = 0; c < 4; c++)
2480 {
2481 colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
2482 colors[15].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant;
2483 }
2484
2485 for (uint32_t i = 1; i < 16 - 1; i++)
2486 for (uint32_t c = 0; c < 4; c++)
2487 colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[15].m_comps[c], g_astc_weights4[i], false);
2488
2489 uint64_t total_err = 0;
2490 for (uint32_t p = 0; p < 16; p++)
2491 total_err += color_distance_la(((const color_rgba*)block)[p], colors[ccell_result_selectors[p]]);
2492
2493 assert(total_results < MAX_ENCODE_RESULTS);
2494 if (total_results < MAX_ENCODE_RESULTS)
2495 {
2496 pResults[total_results].m_uastc_mode = 15;
2497 pResults[total_results].m_common_pattern = 0;
2498 pResults[total_results].m_astc = astc_results;
2499 pResults[total_results].m_astc_err = total_err;
2500 total_results++;
2501 }
2502 }
2503
2504 static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err)
2505 {
2506 uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0;
2507
2508 for (uint32_t y = 0; y < 4; y++)
2509 {
2510 for (uint32_t x = 0; x < 4; x++)
2511 {
2512 const int dr = (int)block[y][x].m_comps[0] - (int)decoded_block[y][x].m_comps[0];
2513 const int dg = (int)block[y][x].m_comps[1] - (int)decoded_block[y][x].m_comps[1];
2514 const int db = (int)block[y][x].m_comps[2] - (int)decoded_block[y][x].m_comps[2];
2515 const int da = (int)block[y][x].m_comps[3] - (int)decoded_block[y][x].m_comps[3];
2516
2517 total_err_r += dr * dr;
2518 total_err_g += dg * dg;
2519 total_err_b += db * db;
2520 total_err_a += da * da;
2521 }
2522 }
2523
2524 total_la_err = total_err_r + total_err_a;
2525 total_rgb_err = total_err_r + total_err_g + total_err_b;
2526 total_rgba_err = total_rgb_err + total_err_a;
2527 }
2528
2529 static void compute_bc1_hints(bool &bc1_hint0, bool &bc1_hint1, const uastc_encode_results &best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4])
2530 {
2531 const uint32_t best_mode = best_results.m_uastc_mode;
2532 const bool perceptual = false;
2533
2534 bc1_hint0 = false;
2535 bc1_hint1 = false;
2536
2537 if (best_mode == UASTC_MODE_INDEX_SOLID_COLOR)
2538 return;
2539
2540 if (!g_uastc_mode_has_bc1_hint0[best_mode] && !g_uastc_mode_has_bc1_hint1[best_mode])
2541 return;
2542
2543 color_rgba tblock_bc1[4][4];
2544 dxt1_block tbc1_block[8];
2545 basist::encode_bc1(tbc1_block, (const uint8_t*)&decoded_uastc_block[0][0], 0);
2546 unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0]);
2547
2548 color_rgba tblock_hint0_bc1[4][4];
2549 color_rgba tblock_hint1_bc1[4][4];
2550
2551 etc_block etc1_blk;
2552 memset(&etc1_blk, 0, sizeof(etc1_blk));
2553
2554 eac_a8_block etc2_blk;
2555 memset(&etc2_blk, 0, sizeof(etc2_blk));
2556 etc2_blk.m_multiplier = 1;
2557
2558 // Pack to UASTC, then unpack, because the endpoints may be swapped.
2559
2560 uastc_block temp_ublock;
2561 pack_uastc(temp_ublock, best_results, etc1_blk, 0, etc2_blk, false, false);
2562
2563 unpacked_uastc_block temp_ublock_unpacked;
2564 unpack_uastc(temp_ublock, temp_ublock_unpacked, false);
2565
2566 unpacked_uastc_block ublock;
2567 memset(&ublock, 0, sizeof(ublock));
2568 ublock.m_mode = best_results.m_uastc_mode;
2569 ublock.m_common_pattern = best_results.m_common_pattern;
2570 ublock.m_astc = temp_ublock_unpacked.m_astc;
2571
2572 dxt1_block b;
2573
2574 // HINT1
2575 if (!g_uastc_mode_has_bc1_hint1[best_mode])
2576 {
2577 memset(tblock_hint1_bc1, 0, sizeof(tblock_hint1_bc1));
2578 }
2579 else
2580 {
2581 transcode_uastc_to_bc1_hint1(ublock, (color32 (*)[4]) decoded_uastc_block, &b, false);
2582
2583 unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0]);
2584 }
2585
2586 // HINT0
2587 if (!g_uastc_mode_has_bc1_hint0[best_mode])
2588 {
2589 memset(tblock_hint0_bc1, 0, sizeof(tblock_hint0_bc1));
2590 }
2591 else
2592 {
2593 transcode_uastc_to_bc1_hint0(ublock, &b);
2594
2595 unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]);
2596 }
2597
2598 // Compute block errors
2599 uint64_t total_t_err = 0, total_hint0_err = 0, total_hint1_err = 0;
2600 for (uint32_t y = 0; y < 4; y++)
2601 {
2602 for (uint32_t x = 0; x < 4; x++)
2603 {
2604 total_t_err += color_distance(perceptual, block[y][x], tblock_bc1[y][x], false);
2605 total_hint0_err += color_distance(perceptual, block[y][x], tblock_hint0_bc1[y][x], false);
2606 total_hint1_err += color_distance(perceptual, block[y][x], tblock_hint1_bc1[y][x], false);
2607 }
2608 }
2609
2610 const float t_err = sqrtf((float)total_t_err);
2611 const float t_err_hint0 = sqrtf((float)total_hint0_err);
2612 const float t_err_hint1 = sqrtf((float)total_hint1_err);
2613
2614 const float err_thresh0 = 1.075f;
2615 const float err_thresh1 = 1.075f;
2616
2617 if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0))
2618 bc1_hint0 = true;
2619
2620 if ((g_uastc_mode_has_bc1_hint1[best_mode]) && (t_err_hint1 <= t_err * err_thresh1))
2621 bc1_hint1 = true;
2622 }
2623
2624 struct ycbcr
2625 {
2626 int32_t m_y;
2627 int32_t m_cb;
2628 int32_t m_cr;
2629 };
2630
2631 static inline void rgb_to_y_cb_cr(const color_rgba& c, ycbcr& dst)
2632 {
2633 const int y = c.r * 54 + c.g * 183 + c.b * 19;
2634 dst.m_y = y;
2635 dst.m_cb = (c.b << 8) - y;
2636 dst.m_cr = (c.r << 8) - y;
2637 }
2638
2639 static inline uint64_t color_diff(const ycbcr& a, const ycbcr& b)
2640 {
2641 const int y_delta = a.m_y - b.m_y;
2642 const int cb_delta = a.m_cb - b.m_cb;
2643 const int cr_delta = a.m_cr - b.m_cr;
2644 return ((int64_t)y_delta * y_delta * 4) + ((int64_t)cr_delta * cr_delta) + ((int64_t)cb_delta * cb_delta);
2645 }
2646
2647 static inline int gray_distance2(const color_rgba& c, int r, int g, int b)
2648 {
2649 int gray_dist = (((int)c[0] - r) + ((int)c[1] - g) + ((int)c[2] - b) + 1) / 3;
2650
2651 int gray_point_r = clamp255(r + gray_dist);
2652 int gray_point_g = clamp255(g + gray_dist);
2653 int gray_point_b = clamp255(b + gray_dist);
2654
2655 int dist_to_gray_point_r = c[0] - gray_point_r;
2656 int dist_to_gray_point_g = c[1] - gray_point_g;
2657 int dist_to_gray_point_b = c[2] - gray_point_b;
2658
2659 return (dist_to_gray_point_r * dist_to_gray_point_r) + (dist_to_gray_point_g * dist_to_gray_point_g) + (dist_to_gray_point_b * dist_to_gray_point_b);
2660 }
2661
2662 static bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels)
2663 {
2664 int sums[3][2][2];
2665
2666#define GET_XY(x, y, c) pSrc_pixels[(x) + ((y) * 4)][c]
2667
2668 for (uint32_t c = 0; c < 3; c++)
2669 {
2670 sums[c][0][0] = GET_XY(0, 0, c) + GET_XY(0, 1, c) + GET_XY(1, 0, c) + GET_XY(1, 1, c);
2671 sums[c][1][0] = GET_XY(2, 0, c) + GET_XY(2, 1, c) + GET_XY(3, 0, c) + GET_XY(3, 1, c);
2672 sums[c][0][1] = GET_XY(0, 2, c) + GET_XY(0, 3, c) + GET_XY(1, 2, c) + GET_XY(1, 3, c);
2673 sums[c][1][1] = GET_XY(2, 2, c) + GET_XY(2, 3, c) + GET_XY(3, 2, c) + GET_XY(3, 3, c);
2674 }
2675
2676 int upper_avg[3], lower_avg[3], left_avg[3], right_avg[3];
2677 for (uint32_t c = 0; c < 3; c++)
2678 {
2679 upper_avg[c] = (sums[c][0][0] + sums[c][1][0] + 4) / 8;
2680 lower_avg[c] = (sums[c][0][1] + sums[c][1][1] + 4) / 8;
2681 left_avg[c] = (sums[c][0][0] + sums[c][0][1] + 4) / 8;
2682 right_avg[c] = (sums[c][1][0] + sums[c][1][1] + 4) / 8;
2683 }
2684
2685#undef GET_XY
2686#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a[0], a[1], a[2])
2687
2688 int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0;
2689 for (uint32_t i = 0; i < 4; i++)
2690 {
2691 for (uint32_t j = 0; j < 2; j++)
2692 {
2693 upper_gray_dist += GET_XY(i, j, upper_avg);
2694 lower_gray_dist += GET_XY(i, 2 + j, lower_avg);
2695 left_gray_dist += GET_XY(j, i, left_avg);
2696 right_gray_dist += GET_XY(2 + j, i, right_avg);
2697 }
2698 }
2699
2700#undef GET_XY
2701
2702 int upper_lower_sum = upper_gray_dist + lower_gray_dist;
2703 int left_right_sum = left_gray_dist + right_gray_dist;
2704
2705 return upper_lower_sum < left_right_sum;
2706 }
2707
2708 static void compute_etc1_hints(etc_block& best_etc1_blk, uint32_t& best_etc1_bias, const uastc_encode_results& best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4], int level, uint32_t flags)
2709 {
2710 best_etc1_bias = 0;
2711
2712 if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
2713 {
2714 pack_etc1_block_solid_color(best_etc1_blk, &best_results.m_solid_color.m_comps[0]);
2715 return;
2716 }
2717
2718 const bool faster_etc1 = (flags & cPackUASTCETC1FasterHints) != 0;
2719 const bool fastest_etc1 = (flags & cPackUASTCETC1FastestHints) != 0;
2720
2721 const bool has_bias = g_uastc_mode_has_etc1_bias[best_results.m_uastc_mode];
2722
2723 // 0 should be at the top, but we need 13 first because it represents bias (0,0,0).
2724 const uint8_t s_sorted_bias_modes[32] = { 13, 0, 22, 29, 27, 12, 26, 9, 30, 31, 8, 10, 25, 2, 23, 5, 15, 7, 3, 11, 6, 17, 28, 18, 1, 19, 20, 21, 24, 4, 14, 16 };
2725
2726 uint32_t last_bias = 1;
2727 bool use_faster_bias_mode_table = false;
2728 const bool flip_estimate = (level <= cPackUASTCLevelFaster) || (faster_etc1) || (fastest_etc1);
2729 if (has_bias)
2730 {
2731 switch (level)
2732 {
2733 case cPackUASTCLevelFastest:
2734 {
2735 last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 1 : 2);
2736 use_faster_bias_mode_table = true;
2737 break;
2738 }
2739 case cPackUASTCLevelFaster:
2740 {
2741 last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 3 : 5);
2742 use_faster_bias_mode_table = true;
2743 break;
2744 }
2745 case cPackUASTCLevelDefault:
2746 {
2747 last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 10 : 20);
2748 use_faster_bias_mode_table = true;
2749 break;
2750 }
2751 case cPackUASTCLevelSlower:
2752 {
2753 last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 16 : 32);
2754 use_faster_bias_mode_table = true;
2755 break;
2756 }
2757 default:
2758 {
2759 last_bias = 32;
2760 break;
2761 }
2762 }
2763 }
2764
2765 memset(&best_etc1_blk, 0, sizeof(best_etc1_blk));
2766 uint64_t best_err = UINT64_MAX;
2767
2768 etc_block trial_block;
2769 memset(&trial_block, 0, sizeof(trial_block));
2770
2771 ycbcr block_ycbcr[4][4], decoded_uastc_block_ycbcr[4][4];
2772 for (uint32_t y = 0; y < 4; y++)
2773 {
2774 for (uint32_t x = 0; x < 4; x++)
2775 {
2776 rgb_to_y_cb_cr(block[y][x], block_ycbcr[y][x]);
2777 rgb_to_y_cb_cr(decoded_uastc_block[y][x], decoded_uastc_block_ycbcr[y][x]);
2778 }
2779 }
2780
2781 uint32_t first_flip = 0, last_flip = 2;
2782 uint32_t first_individ = 0, last_individ = 2;
2783
2784 if (flags & cPackUASTCETC1DisableFlipAndIndividual)
2785 {
2786 last_flip = 1;
2787 last_individ = 1;
2788 }
2789 else if (flip_estimate)
2790 {
2791 if (pack_etc1_estimate_flipped(&decoded_uastc_block[0][0]))
2792 first_flip = 1;
2793 last_flip = first_flip + 1;
2794 }
2795
2796 for (uint32_t flip = first_flip; flip < last_flip; flip++)
2797 {
2798 trial_block.set_flip_bit(flip != 0);
2799
2800 for (uint32_t individ = first_individ; individ < last_individ; individ++)
2801 {
2802 const uint32_t mul = individ ? 15 : 31;
2803
2804 trial_block.set_diff_bit(individ == 0);
2805
2806 color_rgba unbiased_block_colors[2];
2807
2808 int min_r[2] = { 255, 255 }, min_g[2] = { 255, 255 }, min_b[2] = { 255, 255 }, max_r[2] = { 0, 0 }, max_g[2] = { 0, 0 }, max_b[2] = { 0, 0 };
2809
2810 for (uint32_t subset = 0; subset < 2; subset++)
2811 {
2812 uint32_t avg_color[3];
2813 memset(avg_color, 0, sizeof(avg_color));
2814
2815 for (uint32_t j = 0; j < 8; j++)
2816 {
2817 const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j];
2818 const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x];
2819
2820 avg_color[0] += p.r;
2821 avg_color[1] += p.g;
2822 avg_color[2] += p.b;
2823
2824 min_r[subset] = basisu::minimum<uint32_t>(min_r[subset], p.r);
2825 min_g[subset] = basisu::minimum<uint32_t>(min_g[subset], p.g);
2826 min_b[subset] = basisu::minimum<uint32_t>(min_b[subset], p.b);
2827
2828 max_r[subset] = basisu::maximum<uint32_t>(max_r[subset], p.r);
2829 max_g[subset] = basisu::maximum<uint32_t>(max_g[subset], p.g);
2830 max_b[subset] = basisu::maximum<uint32_t>(max_b[subset], p.b);
2831 } // j
2832
2833 unbiased_block_colors[subset][0] = (uint8_t)((avg_color[0] * mul + 1020) / (8 * 255));
2834 unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255));
2835 unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255));
2836 unbiased_block_colors[subset][3] = 0;
2837
2838 } // subset
2839
2840 for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++)
2841 {
2842 const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter;
2843
2844 color_rgba block_colors[2];
2845 for (uint32_t subset = 0; subset < 2; subset++)
2846 block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset];
2847
2848 if (individ)
2849 trial_block.set_block_color4(block_colors[0], block_colors[1]);
2850 else
2851 trial_block.set_block_color5_clamp(block_colors[0], block_colors[1]);
2852
2853 uint32_t range[2];
2854 for (uint32_t subset = 0; subset < 2; subset++)
2855 {
2856 const color_rgba base_c(trial_block.get_block_color(subset, true));
2857
2858 const int pos_r = iabs(max_r[subset] - base_c.r);
2859 const int neg_r = iabs(base_c.r - min_r[subset]);
2860
2861 const int pos_g = iabs(max_g[subset] - base_c.g);
2862 const int neg_g = iabs(base_c.g - min_g[subset]);
2863
2864 const int pos_b = iabs(max_b[subset] - base_c.b);
2865 const int neg_b = iabs(base_c.b - min_b[subset]);
2866
2867 range[subset] = maximum(maximum(pos_r, neg_r, pos_g, neg_g), pos_b, neg_b);
2868 }
2869
2870 uint32_t best_inten_table[2] = { 0, 0 };
2871
2872 for (uint32_t subset = 0; subset < 2; subset++)
2873 {
2874 uint64_t best_subset_err = UINT64_MAX;
2875
2876 const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2));
2877
2878 for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++)
2879 {
2880 trial_block.set_inten_table(subset, inten_table);
2881
2882 color_rgba color_table[4];
2883 trial_block.get_block_colors(color_table, subset);
2884
2885 ycbcr color_table_ycbcr[4];
2886 for (uint32_t i = 0; i < 4; i++)
2887 rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]);
2888
2889 uint64_t total_error = 0;
2890 if (flip)
2891 {
2892 for (uint32_t y = 0; y < 2; y++)
2893 {
2894 {
2895 const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][0];
2896 total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2897 }
2898 {
2899 const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][1];
2900 total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2901 }
2902 {
2903 const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][2];
2904 total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2905 }
2906 {
2907 const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][3];
2908 total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2909 }
2910 if (total_error >= best_subset_err)
2911 break;
2912 }
2913 }
2914 else
2915 {
2916 for (uint32_t y = 0; y < 4; y++)
2917 {
2918 {
2919 const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 0];
2920 total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2921 }
2922 {
2923 const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 1];
2924 total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c));
2925 }
2926 }
2927 if (total_error >= best_subset_err)
2928 break;
2929 }
2930
2931 if (total_error < best_subset_err)
2932 {
2933 best_subset_err = total_error;
2934 best_inten_table[subset] = inten_table;
2935 }
2936
2937 } // inten_table
2938
2939 } // subset
2940
2941 trial_block.set_inten_table(0, best_inten_table[0]);
2942 trial_block.set_inten_table(1, best_inten_table[1]);
2943
2944 // Compute error against the ORIGINAL block.
2945 uint64_t err = 0;
2946
2947 for (uint32_t subset = 0; subset < 2; subset++)
2948 {
2949 color_rgba color_table[4];
2950 trial_block.get_block_colors(color_table, subset);
2951
2952 ycbcr color_table_ycbcr[4];
2953 for (uint32_t i = 0; i < 4; i++)
2954 rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]);
2955
2956 if (flip)
2957 {
2958 for (uint32_t y = 0; y < 2; y++)
2959 {
2960 for (uint32_t x = 0; x < 4; x++)
2961 {
2962 const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][x];
2963 const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3);
2964
2965 const uint32_t best_index = (uint32_t)best_index_err & 3;
2966 err += color_diff(block_ycbcr[subset * 2 + y][x], color_table_ycbcr[best_index]);
2967 }
2968 if (err >= best_err)
2969 break;
2970 }
2971 }
2972 else
2973 {
2974 for (uint32_t y = 0; y < 4; y++)
2975 {
2976 for (uint32_t x = 0; x < 2; x++)
2977 {
2978 const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + x];
2979 const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3);
2980
2981 const uint32_t best_index = (uint32_t)best_index_err & 3;
2982 err += color_diff(block_ycbcr[y][subset * 2 + x], color_table_ycbcr[best_index]);
2983 }
2984 if (err >= best_err)
2985 break;
2986 }
2987 }
2988
2989 } // subset
2990
2991 if (err < best_err)
2992 {
2993 best_err = err;
2994
2995 best_etc1_blk = trial_block;
2996 best_etc1_bias = bias;
2997 }
2998
2999 } // bias_iter
3000
3001 } // individ
3002
3003 } // flip
3004 }
3005
3006 struct uastc_pack_eac_a8_results
3007 {
3008 uint32_t m_base;
3009 uint32_t m_table;
3010 uint32_t m_multiplier;
3011 };
3012
3013 static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask)
3014 {
3015 assert(num_pixels <= 16);
3016
3017 uint32_t min_alpha = 255, max_alpha = 0;
3018 for (uint32_t i = 0; i < num_pixels; i++)
3019 {
3020 const uint32_t a = pPixels[i];
3021 if (a < min_alpha) min_alpha = a;
3022 if (a > max_alpha) max_alpha = a;
3023 }
3024
3025 if (min_alpha == max_alpha)
3026 {
3027 results.m_base = min_alpha;
3028 results.m_table = 13;
3029 results.m_multiplier = 1;
3030 return 0;
3031 }
3032
3033 const uint32_t alpha_range = max_alpha - min_alpha;
3034
3035 uint64_t best_err = UINT64_MAX;
3036
3037 for (uint32_t table = 0; table < 16; table++)
3038 {
3039 if ((table_mask & (1U << table)) == 0)
3040 continue;
3041
3042 const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]);
3043 const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range));
3044
3045 const int base_min = clamp255(center - base_search_rad);
3046 const int base_max = clamp255(center + base_search_rad);
3047
3048 const int mul = (int)roundf(alpha_range / range);
3049 const int mul_low = clamp<int>(mul - mul_search_rad, 1, 15);
3050 const int mul_high = clamp<int>(mul + mul_search_rad, 1, 15);
3051
3052 for (int base = base_min; base <= base_max; base++)
3053 {
3054 for (int multiplier = mul_low; multiplier <= mul_high; multiplier++)
3055 {
3056 uint64_t total_err = 0;
3057
3058 for (uint32_t i = 0; i < num_pixels; i++)
3059 {
3060 const int a = pPixels[i];
3061
3062 uint32_t best_s_err = UINT32_MAX;
3063 //uint32_t best_s = 0;
3064 for (uint32_t s = 0; s < 8; s++)
3065 {
3066 const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base);
3067
3068 uint32_t err = iabs(a - v);
3069 if (err < best_s_err)
3070 {
3071 best_s_err = err;
3072 //best_s = s;
3073 }
3074 }
3075
3076 total_err += best_s_err * best_s_err;
3077 if (total_err >= best_err)
3078 break;
3079 }
3080
3081 if (total_err < best_err)
3082 {
3083 best_err = total_err;
3084 results.m_base = base;
3085 results.m_multiplier = multiplier;
3086 results.m_table = table;
3087 if (!best_err)
3088 return best_err;
3089 }
3090
3091 } // table
3092
3093 } // multiplier
3094
3095 } // base
3096
3097 return best_err;
3098 }
3099
3100 const int32_t DEFAULT_BC7_ERROR_WEIGHT = 50;
3101 const float UASTC_ERROR_THRESH = 1.3f;
3102
3103 // TODO: This is a quick hack to favor certain modes when we know we'll be followed up with an RDO postprocess.
3104 static inline float get_uastc_mode_weight(uint32_t mode)
3105 {
3106 const float FAVORED_MODE_WEIGHT = .8f;
3107
3108 switch (mode)
3109 {
3110 case 0:
3111 case 10:
3112 return FAVORED_MODE_WEIGHT;
3113 default:
3114 break;
3115 }
3116
3117 return 1.0f;
3118 }
3119
3120 void encode_uastc(const uint8_t* pRGBAPixels, uastc_block& output_block, uint32_t flags)
3121 {
3122// printf("encode_uastc: \n");
3123// for (int i = 0; i < 16; i++)
3124// printf("[%u %u %u %u] ", pRGBAPixels[i * 4 + 0], pRGBAPixels[i * 4 + 1], pRGBAPixels[i * 4 + 2], pRGBAPixels[i * 4 + 3]);
3125// printf("\n");
3126
3127 const color_rgba(*block)[4] = reinterpret_cast<const color_rgba(*)[4]>(pRGBAPixels);
3128
3129 bool solid_color = true, has_alpha = false, is_la = true;
3130
3131 const color_rgba first_color(block[0][0]);
3132 for (uint32_t y = 0; y < 4; y++)
3133 {
3134 for (uint32_t x = 0; x < 4; x++)
3135 {
3136 if (block[y][x].a < 255)
3137 has_alpha = true;
3138
3139 if (block[y][x] != first_color)
3140 solid_color = false;
3141
3142 if ((block[y][x].r != block[y][x].g) || (block[y][x].r != block[y][x].b))
3143 is_la = false;
3144 }
3145 }
3146
3147 if (solid_color)
3148 {
3149 // Solid color blocks are so common that we handle them specially and as quickly as we can.
3150 uastc_encode_results solid_results;
3151 solid_results.m_uastc_mode = UASTC_MODE_INDEX_SOLID_COLOR;
3152 solid_results.m_astc_err = 0;
3153 solid_results.m_common_pattern = 0;
3154 solid_results.m_solid_color = first_color;
3155 memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc));
3156
3157 etc_block etc1_blk;
3158 uint32_t etc1_bias = 0;
3159
3160 pack_etc1_block_solid_color(etc1_blk, &first_color.m_comps[0]);
3161
3162 eac_a8_block eac_a8_blk;
3163 eac_a8_blk.m_table = 0;
3164 eac_a8_blk.m_multiplier = 1;
3165
3166 pack_uastc(output_block, solid_results, etc1_blk, etc1_bias, eac_a8_blk, false, false);
3167
3168// printf(" Solid\n");
3169
3170 return;
3171 }
3172
3173 int level = flags & 7;
3174 const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0;
3175 const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0);
3176 //const bool etc1_perceptual = true;
3177
3178 uastc_encode_results results[MAX_ENCODE_RESULTS];
3179
3180 level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow);
3181
3182 // Set all options to slowest, then configure from there depending on the selected level.
3183 uint32_t mode_mask = UINT32_MAX;
3184 uint32_t uber_level = 6;
3185 bool estimate_partition = false;
3186 bool always_try_alpha_modes = true;
3187 uint32_t eac_a8_mul_search_rad = 3;
3188 uint32_t eac_a8_table_mask = UINT32_MAX;
3189 uint32_t least_squares_passes = 2;
3190 bool bc1_hints = true;
3191 bool only_use_la_on_transparent_blocks = false;
3192
3193 switch (level)
3194 {
3195 case cPackUASTCLevelFastest:
3196 {
3197 mode_mask = (1 << 0) | (1 << 8) |
3198 (1 << 11) | (1 << 12) |
3199 (1 << 15);
3200 always_try_alpha_modes = false;
3201 eac_a8_mul_search_rad = 0;
3202 eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3203 uber_level = 0;
3204 least_squares_passes = 1;
3205 bc1_hints = false;
3206 estimate_partition = true;
3207 only_use_la_on_transparent_blocks = true;
3208 break;
3209 }
3210 case cPackUASTCLevelFaster:
3211 {
3212 mode_mask = (1 << 0) | (1 << 4) | (1 << 6) | (1 << 8) |
3213 (1 << 9) | (1 << 11) | (1 << 12) |
3214 (1 << 15) | (1 << 17);
3215 always_try_alpha_modes = false;
3216 eac_a8_mul_search_rad = 0;
3217 eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3218 uber_level = 0;
3219 least_squares_passes = 1;
3220 estimate_partition = true;
3221 break;
3222 }
3223 case cPackUASTCLevelDefault:
3224 {
3225 mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) |
3226 (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) |
3227 (1 << 15) | (1 << 16) | (1 << 17);
3228 always_try_alpha_modes = false;
3229 eac_a8_mul_search_rad = 1;
3230 eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13);
3231 uber_level = 1;
3232 least_squares_passes = 1;
3233 estimate_partition = true;
3234 break;
3235 }
3236 case cPackUASTCLevelSlower:
3237 {
3238 always_try_alpha_modes = false;
3239 eac_a8_mul_search_rad = 2;
3240 uber_level = 3;
3241 estimate_partition = true;
3242 break;
3243 }
3244 case cPackUASTCLevelVerySlow:
3245 {
3246 break;
3247 }
3248 }
3249
3250#if BASISU_SUPPORT_FORCE_MODE
3251 static int force_mode = -1;
3252 force_mode = (force_mode + 1) % TOTAL_UASTC_MODES;
3253 mode_mask = UINT32_MAX;
3254 always_try_alpha_modes = true;
3255 only_use_la_on_transparent_blocks = false;
3256#endif
3257
3258 // HACK HACK
3259 //mode_mask &= ~(1 << 18);
3260 //mode_mask = (1 << 18)| (1 << 10);
3261
3262 uint32_t total_results = 0;
3263
3264 if (only_use_la_on_transparent_blocks)
3265 {
3266 if ((is_la) && (!has_alpha))
3267 is_la = false;
3268 }
3269
3270 const bool try_alpha_modes = has_alpha || always_try_alpha_modes;
3271
3272 bc7enc_compress_block_params comp_params;
3273 memset(&comp_params, 0, sizeof(comp_params));
3274 comp_params.m_max_partitions_mode1 = 64;
3275 comp_params.m_least_squares_passes = least_squares_passes;
3276 comp_params.m_weights[0] = 1;
3277 comp_params.m_weights[1] = 1;
3278 comp_params.m_weights[2] = 1;
3279 comp_params.m_weights[3] = 1;
3280 comp_params.m_uber_level = uber_level;
3281
3282 if (is_la)
3283 {
3284 if (mode_mask & (1U << 15))
3285 astc_mode15(block, results, total_results, comp_params);
3286
3287 if (mode_mask & (1U << 16))
3288 astc_mode9_or_16(16, block, results, total_results, comp_params, estimate_partition ? 4 : 0);
3289
3290 if (mode_mask & (1U << 17))
3291 astc_mode11_or_17(17, block, results, total_results, comp_params);
3292 }
3293
3294 if (!has_alpha)
3295 {
3296 if (mode_mask & (1U << 0))
3297 astc_mode0_or_18(0, block, results, total_results, comp_params);
3298
3299 if (mode_mask & (1U << 1))
3300 astc_mode1(block, results, total_results, comp_params);
3301
3302 if (mode_mask & (1U << 2))
3303 astc_mode2(block, results, total_results, comp_params, estimate_partition);
3304
3305 if (mode_mask & (1U << 3))
3306 astc_mode3(block, results, total_results, comp_params, estimate_partition);
3307
3308 if (mode_mask & (1U << 4))
3309 astc_mode4(block, results, total_results, comp_params, estimate_partition);
3310
3311 if (mode_mask & (1U << 5))
3312 astc_mode5(block, results, total_results, comp_params);
3313
3314 if (mode_mask & (1U << 6))
3315 astc_mode6(block, results, total_results, comp_params);
3316
3317 if (mode_mask & (1U << 7))
3318 astc_mode7(block, results, total_results, comp_params, estimate_partition);
3319
3320 if (mode_mask & (1U << 18))
3321 astc_mode0_or_18(18, block, results, total_results, comp_params);
3322 }
3323
3324 if (try_alpha_modes)
3325 {
3326 if (mode_mask & (1U << 9))
3327 astc_mode9_or_16(9, block, results, total_results, comp_params, estimate_partition ? 4 : 0);
3328
3329 if (mode_mask & (1U << 10))
3330 astc_mode10(block, results, total_results, comp_params);
3331
3332 if (mode_mask & (1U << 11))
3333 astc_mode11_or_17(11, block, results, total_results, comp_params);
3334
3335 if (mode_mask & (1U << 12))
3336 astc_mode12(block, results, total_results, comp_params);
3337
3338 if (mode_mask & (1U << 13))
3339 astc_mode13(block, results, total_results, comp_params);
3340
3341 if (mode_mask & (1U << 14))
3342 astc_mode14(block, results, total_results, comp_params);
3343 }
3344
3345 assert(total_results);
3346
3347 // Fix up the errors so we consistently have LA, RGB, or RGBA error.
3348 for (uint32_t i = 0; i < total_results; i++)
3349 {
3350 uastc_encode_results& r = results[i];
3351 if (!is_la)
3352 {
3353 if (g_uastc_mode_is_la[r.m_uastc_mode])
3354 {
3355 color_rgba unpacked_block[16];
3356 unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false);
3357
3358 uint64_t total_err = 0;
3359 for (uint32_t j = 0; j < 16; j++)
3360 total_err += color_distance(unpacked_block[j], ((const color_rgba*)block)[j], true);
3361
3362 r.m_astc_err = total_err;
3363 }
3364 }
3365 else
3366 {
3367 if (!g_uastc_mode_is_la[r.m_uastc_mode])
3368 {
3369 color_rgba unpacked_block[16];
3370 unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false);
3371
3372 uint64_t total_err = 0;
3373 for (uint32_t j = 0; j < 16; j++)
3374 total_err += color_distance_la(unpacked_block[j], ((const color_rgba*)block)[j]);
3375
3376 r.m_astc_err = total_err;
3377 }
3378 }
3379 }
3380
3381 unpacked_uastc_block unpacked_ublock;
3382 memset(&unpacked_ublock, 0, sizeof(unpacked_ublock));
3383
3384 uint64_t total_overall_err[MAX_ENCODE_RESULTS];
3385 float uastc_err_f[MAX_ENCODE_RESULTS];
3386 double best_uastc_err_f = 1e+20f;
3387
3388 int best_index = -1;
3389
3390 if (total_results == 1)
3391 {
3392 best_index = 0;
3393 }
3394 else
3395 {
3396 const uint32_t bc7_err_weight = favor_bc7_error ? 100 : ((favor_uastc_error ? 0 : DEFAULT_BC7_ERROR_WEIGHT));
3397 const uint32_t uastc_err_weight = favor_bc7_error ? 0 : 100;
3398
3399 // Find best overall results, balancing UASTC and UASTC->BC7 error.
3400 // We purposely allow UASTC error to increase a little, if doing so lowers the BC7 error.
3401 for (uint32_t i = 0; i < total_results; i++)
3402 {
3403#if BASISU_SUPPORT_FORCE_MODE
3404 if (results[i].m_uastc_mode == force_mode)
3405 {
3406 best_index = i;
3407 break;
3408 }
3409#endif
3410
3411 unpacked_ublock.m_mode = results[i].m_uastc_mode;
3412 unpacked_ublock.m_astc = results[i].m_astc;
3413 unpacked_ublock.m_common_pattern = results[i].m_common_pattern;
3414 unpacked_ublock.m_solid_color = results[i].m_solid_color.get_color32();
3415
3416 color_rgba decoded_uastc_block[4][4];
3417 bool success = unpack_uastc(results[i].m_uastc_mode, results[i].m_common_pattern, results[i].m_solid_color.get_color32(), results[i].m_astc, (basist::color32 *)&decoded_uastc_block[0][0], false);
3418 (void)success;
3419 VALIDATE(success);
3420
3421 uint64_t total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err;
3422 compute_block_error(block, decoded_uastc_block, total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err);
3423
3424 // Validate the computed error, or we're go mad if it's inaccurate.
3425 if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3426 {
3427 VALIDATE(total_uastc_rgba_err == 0);
3428 }
3429 else if (is_la)
3430 {
3431 VALIDATE(total_uastc_la_err == results[i].m_astc_err);
3432 }
3433 else if (g_uastc_mode_has_alpha[results[i].m_uastc_mode])
3434 {
3435 VALIDATE(total_uastc_rgba_err == results[i].m_astc_err);
3436 }
3437 else
3438 {
3439 VALIDATE(total_uastc_rgb_err == results[i].m_astc_err);
3440 }
3441
3442 // Transcode to BC7
3443 bc7_optimization_results bc7_results;
3444 transcode_uastc_to_bc7(unpacked_ublock, bc7_results);
3445
3446 bc7_block bc7_data;
3447 encode_bc7_block(&bc7_data, &bc7_results);
3448
3449 color_rgba decoded_bc7_block[4][4];
3450 unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0]);
3451
3452 // Compute BC7 error
3453 uint64_t total_bc7_la_err, total_bc7_rgb_err, total_bc7_rgba_err;
3454 compute_block_error(block, decoded_bc7_block, total_bc7_rgb_err, total_bc7_rgba_err, total_bc7_la_err);
3455
3456 if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3457 {
3458 VALIDATE(total_bc7_rgba_err == 0);
3459
3460 best_index = i;
3461 break;
3462 }
3463
3464 uint64_t total_uastc_err = 0, total_bc7_err = 0;
3465 if (is_la)
3466 {
3467 total_bc7_err = total_bc7_la_err;
3468 total_uastc_err = total_uastc_la_err;
3469 }
3470 else if (has_alpha)
3471 {
3472 total_bc7_err = total_bc7_rgba_err;
3473 total_uastc_err = total_uastc_rgba_err;
3474 }
3475 else
3476 {
3477 total_bc7_err = total_bc7_rgb_err;
3478 total_uastc_err = total_uastc_rgb_err;
3479 }
3480
3481 total_overall_err[i] = ((total_bc7_err * bc7_err_weight) / 100) + ((total_uastc_err * uastc_err_weight) / 100);
3482 if (!total_overall_err[i])
3483 {
3484 best_index = i;
3485 break;
3486 }
3487
3488 uastc_err_f[i] = sqrtf((float)total_uastc_err);
3489
3490 if (uastc_err_f[i] < best_uastc_err_f)
3491 {
3492 best_uastc_err_f = uastc_err_f[i];
3493 }
3494
3495 } // total_results
3496
3497 if (best_index < 0)
3498 {
3499 uint64_t best_err = UINT64_MAX;
3500
3501 if ((best_uastc_err_f == 0.0f) || (favor_bc7_error))
3502 {
3503 for (uint32_t i = 0; i < total_results; i++)
3504 {
3505 // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression.
3506 const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f;
3507
3508 const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight);
3509 if (w < best_err)
3510 {
3511 best_err = w;
3512 best_index = i;
3513 if (!best_err)
3514 break;
3515 }
3516 } // i
3517 }
3518 else
3519 {
3520 // Scan the UASTC results, and consider all results within a window that has the best UASTC+BC7 error.
3521 for (uint32_t i = 0; i < total_results; i++)
3522 {
3523 double err_delta = uastc_err_f[i] / best_uastc_err_f;
3524
3525 if (err_delta <= UASTC_ERROR_THRESH)
3526 {
3527 // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression.
3528 const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f;
3529
3530 const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight);
3531 if (w < best_err)
3532 {
3533 best_err = w;
3534 best_index = i;
3535 if (!best_err)
3536 break;
3537 }
3538 }
3539 } // i
3540 }
3541 }
3542 }
3543
3544 const uastc_encode_results& best_results = results[best_index];
3545 const uint32_t best_mode = best_results.m_uastc_mode;
3546 const astc_block_desc& best_astc_results = best_results.m_astc;
3547
3548 color_rgba decoded_uastc_block[4][4];
3549 bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false);
3550 (void)success;
3551 VALIDATE(success);
3552
3553#if BASISU_VALIDATE_UASTC_ENC
3554 // Make sure that the UASTC block unpacks to the same exact pixels as the ASTC block does, using two different decoders.
3555 {
3556 // Round trip to packed UASTC and back, then decode to pixels.
3557 etc_block etc1_blk;
3558 memset(&etc1_blk, 0, sizeof(etc1_blk));
3559 eac_a8_block etc_eac_a8_blk;
3560 memset(&etc_eac_a8_blk, 0, sizeof(etc_eac_a8_blk));
3561 etc_eac_a8_blk.m_multiplier = 1;
3562
3563 basist::uastc_block temp_block;
3564 pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false);
3565
3566 basist::color32 temp_block_unpacked[4][4];
3567 success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false);
3568 VALIDATE(success);
3569
3570#if BASISU_USE_ASTC_DECOMPRESS
3571 // Now round trip to packed ASTC and back, then decode to pixels.
3572 uint32_t astc_data[4];
3573
3574 if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3575 pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color);
3576 else
3577 {
3578 success = pack_astc_block(astc_data, &best_astc_results, best_results.m_uastc_mode);
3579 VALIDATE(success);
3580 }
3581
3582 color_rgba decoded_astc_block[4][4];
3583 success = basisu_astc::astc::decompress((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4);
3584 VALIDATE(success);
3585
3586 for (uint32_t y = 0; y < 4; y++)
3587 {
3588 for (uint32_t x = 0; x < 4; x++)
3589 {
3590 VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]);
3591
3592 VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r);
3593 VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g);
3594 VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b);
3595 VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a);
3596 }
3597 }
3598#endif
3599 }
3600#endif
3601
3602 // Compute BC1 hints
3603 bool bc1_hint0 = false, bc1_hint1 = false;
3604 if (bc1_hints)
3605 compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block);
3606
3607 eac_a8_block eac_a8_blk;
3608 if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR))
3609 {
3610 // Compute ETC2 hints
3611 uint8_t decoded_uastc_block_alpha[16];
3612 for (uint32_t i = 0; i < 16; i++)
3613 decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a;
3614
3615 uastc_pack_eac_a8_results eac8_a8_results;
3616 memset(&eac8_a8_results, 0, sizeof(eac8_a8_results));
3617 uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask);
3618
3619 // All we care about for hinting is the table and multiplier.
3620 eac_a8_blk.m_table = eac8_a8_results.m_table;
3621 eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier;
3622 }
3623 else
3624 {
3625 memset(&eac_a8_blk, 0, sizeof(eac_a8_blk));
3626 }
3627
3628 // Compute ETC1 hints
3629 etc_block etc1_blk;
3630 uint32_t etc1_bias = 0;
3631 compute_etc1_hints(etc1_blk, etc1_bias, best_results, block, decoded_uastc_block, level, flags);
3632
3633 // Finally, pack the UASTC block with its hints and we're done.
3634 pack_uastc(output_block, best_results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1);
3635
3636// printf(" Packed: ");
3637// for (int i = 0; i < 16; i++)
3638// printf("%X ", output_block.m_bytes[i]);
3639// printf("\n");
3640 }
3641
3642 static bool uastc_recompute_hints(basist::uastc_block* pBlock, const color_rgba* pBlock_pixels, uint32_t flags, const unpacked_uastc_block *pUnpacked_blk)
3643 {
3644 unpacked_uastc_block unpacked_blk;
3645
3646 if (pUnpacked_blk)
3647 unpacked_blk = *pUnpacked_blk;
3648 else
3649 {
3650 if (!unpack_uastc(*pBlock, unpacked_blk, false, true))
3651 return false;
3652 }
3653 color_rgba decoded_uastc_block[4][4];
3654 if (!unpack_uastc(unpacked_blk, (basist::color32 *)decoded_uastc_block, false))
3655 return false;
3656 uastc_encode_results results;
3657 results.m_uastc_mode = unpacked_blk.m_mode;
3658 results.m_common_pattern = unpacked_blk.m_common_pattern;
3659 results.m_astc = unpacked_blk.m_astc;
3660 results.m_solid_color = unpacked_blk.m_solid_color;
3661 results.m_astc_err = 0;
3662 bool bc1_hints = true;
3663 uint32_t eac_a8_mul_search_rad = 3;
3664 uint32_t eac_a8_table_mask = UINT32_MAX;
3665 const uint32_t level = flags & cPackUASTCLevelMask;
3666 switch (level)
3667 {
3668 case cPackUASTCLevelFastest:
3669 {
3670 eac_a8_mul_search_rad = 0;
3671 eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3672 bc1_hints = false;
3673 break;
3674 }
3675 case cPackUASTCLevelFaster:
3676 {
3677 eac_a8_mul_search_rad = 0;
3678 eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13);
3679 break;
3680 }
3681 case cPackUASTCLevelDefault:
3682 {
3683 eac_a8_mul_search_rad = 1;
3684 eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13);
3685 break;
3686 }
3687 case cPackUASTCLevelSlower:
3688 {
3689 eac_a8_mul_search_rad = 2;
3690 break;
3691 }
3692 case cPackUASTCLevelVerySlow:
3693 {
3694 break;
3695 }
3696 }
3697 bool bc1_hint0 = false, bc1_hint1 = false;
3698 if (bc1_hints)
3699 compute_bc1_hints(bc1_hint0, bc1_hint1, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block);
3700 const uint32_t best_mode = unpacked_blk.m_mode;
3701 eac_a8_block eac_a8_blk;
3702 if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR))
3703 {
3704 uint8_t decoded_uastc_block_alpha[16];
3705 for (uint32_t i = 0; i < 16; i++)
3706 decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a;
3707 uastc_pack_eac_a8_results eac8_a8_results;
3708 memset(&eac8_a8_results, 0, sizeof(eac8_a8_results));
3709 uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask);
3710 eac_a8_blk.m_table = eac8_a8_results.m_table;
3711 eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier;
3712 }
3713 else
3714 {
3715 memset(&eac_a8_blk, 0, sizeof(eac_a8_blk));
3716 }
3717 etc_block etc1_blk;
3718 uint32_t etc1_bias = 0;
3719 compute_etc1_hints(etc1_blk, etc1_bias, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block, level, flags);
3720 pack_uastc(*pBlock, results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1);
3721 return true;
3722 }
3723
3724 static const uint8_t g_uastc_mode_selector_bits[TOTAL_UASTC_MODES][2] =
3725 {
3726 { 65, 63 }, { 69, 31 }, { 73, 46 }, { 89, 29 },
3727 { 89, 30 }, { 68, 47 }, { 66, 62 }, { 89, 30 },
3728 { 0, 0 }, { 97, 30 }, { 65, 63 }, { 66, 62 },
3729 { 81, 47 }, { 94, 30 }, { 92, 31 }, { 62, 63 },
3730 { 98, 30 }, { 61, 62 }, { 49, 79 }
3731 };
3732
3733 static inline uint32_t set_block_bits(uint8_t* pBytes, uint64_t val, uint32_t num_bits, uint32_t cur_ofs)
3734 {
3735 assert(num_bits <= 64);
3736 assert((num_bits == 64) || (val < (1ULL << num_bits)));
3737 uint64_t mask = (num_bits == 64) ? UINT64_MAX : ((1ULL << num_bits) - 1);
3738 while (num_bits)
3739 {
3740 const uint32_t n = basisu::minimum<uint32_t>(8U - (cur_ofs & 7U), num_bits);
3741 pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7U));
3742 pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7U));
3743 val >>= n;
3744 mask >>= n;
3745 num_bits -= n;
3746 cur_ofs += n;
3747 }
3748 return cur_ofs;
3749 }
3750
3751 static const uint8_t g_tdefl_small_dist_extra[512] =
3752 {
3753 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
3754 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3755 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3756 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3757 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3758 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3759 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
3760 7, 7, 7, 7, 7, 7, 7, 7
3761 };
3762
3763 static const uint8_t g_tdefl_large_dist_extra[128] =
3764 {
3765 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
3766 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
3767 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
3768 };
3769
3770 static inline uint32_t compute_match_cost_estimate(uint32_t dist)
3771 {
3772 uint32_t len_cost = 7;
3773 uint32_t dist_cost = 5;
3774 if (dist < 512)
3775 dist_cost += g_tdefl_small_dist_extra[dist & 511];
3776 else
3777 {
3778 dist_cost += g_tdefl_large_dist_extra[basisu::minimum<uint32_t>(dist, 32767) >> 8];
3779 while (dist >= 32768)
3780 {
3781 dist_cost++;
3782 dist >>= 1;
3783 }
3784 }
3785 return len_cost + dist_cost;
3786 }
3787
3788 struct selector_bitsequence
3789 {
3790 uint64_t m_sel;
3791 uint32_t m_ofs;
3792 selector_bitsequence() { }
3793 selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs) { }
3794 bool operator== (const selector_bitsequence& other) const
3795 {
3796 return (m_ofs == other.m_ofs) && (m_sel == other.m_sel);
3797 }
3798
3799 bool operator< (const selector_bitsequence& other) const
3800 {
3801 if (m_ofs < other.m_ofs)
3802 return true;
3803 else if (m_ofs == other.m_ofs)
3804 return m_sel < other.m_sel;
3805
3806 return false;
3807 }
3808 };
3809
3810 struct selector_bitsequence_hash
3811 {
3812 std::size_t operator()(selector_bitsequence const& s) const noexcept
3813 {
3814 return static_cast<std::size_t>(hash_hsieh((uint8_t *)&s, sizeof(s)) ^ s.m_sel);
3815 }
3816 };
3817
3818 class tracked_stat
3819 {
3820 public:
3821 tracked_stat() { clear(); }
3822
3823 void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
3824
3825 void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
3826
3827 tracked_stat& operator += (uint32_t val) { update(val); return *this; }
3828
3829 uint32_t get_number_of_values() { return m_num; }
3830 uint64_t get_total() const { return m_total; }
3831 uint64_t get_total2() const { return m_total2; }
3832
3833 float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; };
3834 float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
3835 float get_variance() const { float s = get_std_dev(); return s * s; }
3836
3837 private:
3838 uint32_t m_num;
3839 uint64_t m_total;
3840 uint64_t m_total2;
3841 };
3842
3843 static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags,
3844 uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth)
3845 {
3846 debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index);
3847
3848 const int total_blocks_to_check = basisu::maximum<uint32_t>(1U, params.m_lz_dict_size / sizeof(basist::uastc_block));
3849 const bool perceptual = false;
3850
3851 std::unordered_map<selector_bitsequence, uint32_t, selector_bitsequence_hash> selector_history;
3852
3853 for (uint32_t block_index = first_index; block_index < last_index; block_index++)
3854 {
3855 const basist::uastc_block& blk = pBlocks[block_index];
3856 const color_rgba* pPixels = &pBlock_pixels[16 * block_index];
3857
3858 unpacked_uastc_block unpacked_blk;
3859 if (!unpack_uastc(blk, unpacked_blk, false, true))
3860 return false;
3861
3862 const uint32_t block_mode = unpacked_blk.m_mode;
3863 if (block_mode == UASTC_MODE_INDEX_SOLID_COLOR)
3864 continue;
3865
3866 tracked_stat r_stats, g_stats, b_stats, a_stats;
3867
3868 for (uint32_t i = 0; i < 16; i++)
3869 {
3870 r_stats.update(pPixels[i].r);
3871 g_stats.update(pPixels[i].g);
3872 b_stats.update(pPixels[i].b);
3873 a_stats.update(pPixels[i].a);
3874 }
3875
3876 const float max_std_dev = basisu::maximum<float>(basisu::maximum<float>(basisu::maximum(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev());
3877
3878 float yl = clamp<float>(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f);
3879 yl = yl * yl;
3880 const float smooth_block_error_scale = lerp<float>(params.m_smooth_block_max_error_scale, 1.0f, yl);
3881 if (smooth_block_error_scale > 1.0f)
3882 total_smooth++;
3883
3884 color_rgba decoded_uastc_block[4][4];
3885 if (!unpack_uastc(unpacked_blk, (basist::color32*)decoded_uastc_block, false))
3886 return false;
3887
3888 uint64_t uastc_err = 0;
3889 for (uint32_t i = 0; i < 16; i++)
3890 uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_uastc_block)[i], true);
3891
3892 // Transcode to BC7
3893 bc7_optimization_results b7_results;
3894 if (!transcode_uastc_to_bc7(unpacked_blk, b7_results))
3895 return false;
3896
3897 basist::bc7_block b7_block;
3898 basist::encode_bc7_block(&b7_block, &b7_results);
3899
3900 color_rgba decoded_b7_blk[4][4];
3901 unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]);
3902
3903 uint64_t bc7_err = 0;
3904 for (uint32_t i = 0; i < 16; i++)
3905 bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true);
3906
3907 uint64_t cur_err = (uastc_err + bc7_err) / 2;
3908
3909 // Divide by 16*4 to compute RMS error
3910 const float cur_ms_err = (float)cur_err * (1.0f / 64.0f);
3911 const float cur_rms_err = sqrt(cur_ms_err);
3912
3913 const uint32_t first_sel_bit = g_uastc_mode_selector_bits[block_mode][0];
3914 const uint32_t total_sel_bits = g_uastc_mode_selector_bits[block_mode][1];
3915 assert(first_sel_bit + total_sel_bits <= 128);
3916 assert(total_sel_bits > 0);
3917
3918 uint32_t cur_bit_offset = first_sel_bit;
3919 uint64_t cur_sel_bits = read_bits((const uint8_t*)&blk, cur_bit_offset, basisu::minimum(64U, total_sel_bits));
3920
3921 if (cur_rms_err >= params.m_skip_block_rms_thresh)
3922 {
3923 auto cur_search_res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, cur_sel_bits), block_index));
3924
3925 // Block already has too much error, so don't mess with it.
3926 if (!cur_search_res.second)
3927 (*cur_search_res.first).second = block_index;
3928
3929 total_skipped++;
3930 continue;
3931 }
3932
3933 int cur_bits;
3934 auto cur_find_res = selector_history.find(selector_bitsequence(first_sel_bit, cur_sel_bits));
3935 if (cur_find_res == selector_history.end())
3936 {
3937 // Wasn't found - wildly estimate literal cost
3938 //cur_bits = (total_sel_bits * 5) / 4;
3939 cur_bits = (total_sel_bits * params.m_lz_literal_cost) / 100;
3940 }
3941 else
3942 {
3943 // Was found - wildly estimate match cost
3944 uint32_t match_block_index = cur_find_res->second;
3945 const int block_dist_in_bytes = (block_index - match_block_index) * 16;
3946 cur_bits = compute_match_cost_estimate(block_dist_in_bytes);
3947 }
3948
3949 int first_block_to_check = basisu::maximum<int>(first_index, block_index - total_blocks_to_check);
3950 int last_block_to_check = block_index - 1;
3951
3952 basist::uastc_block best_block(blk);
3953 uint32_t best_block_index = block_index;
3954
3955 float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda;
3956
3957 // Now scan through previous blocks, insert their selector bit patterns into the current block, and find
3958 // selector bit patterns which don't increase the overall block error too much.
3959 for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index)
3960 {
3961 const basist::uastc_block& prev_blk = pBlocks[prev_block_index];
3962
3963 uint32_t bit_offset = first_sel_bit;
3964 uint64_t sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, basisu::minimum(64U, total_sel_bits));
3965
3966 int match_block_index = prev_block_index;
3967 auto res = selector_history.find(selector_bitsequence(first_sel_bit, sel_bits));
3968 if (res != selector_history.end())
3969 match_block_index = res->second;
3970 // Have we already checked this bit pattern? If so then skip this block.
3971 if (match_block_index > prev_block_index)
3972 continue;
3973
3974 unpacked_uastc_block unpacked_prev_blk;
3975 if (!unpack_uastc(prev_blk, unpacked_prev_blk, false, true))
3976 return false;
3977
3978 basist::uastc_block trial_blk(blk);
3979
3980 set_block_bits((uint8_t*)&trial_blk, sel_bits, basisu::minimum(64U, total_sel_bits), first_sel_bit);
3981
3982 if (total_sel_bits > 64)
3983 {
3984 sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, total_sel_bits - 64U);
3985
3986 set_block_bits((uint8_t*)&trial_blk, sel_bits, total_sel_bits - 64U, first_sel_bit + basisu::minimum(64U, total_sel_bits));
3987 }
3988
3989 unpacked_uastc_block unpacked_trial_blk;
3990 if (!unpack_uastc(trial_blk, unpacked_trial_blk, false, true))
3991 continue;
3992
3993 color_rgba decoded_trial_uastc_block[4][4];
3994 if (!unpack_uastc(unpacked_trial_blk, (basist::color32*)decoded_trial_uastc_block, false))
3995 continue;
3996
3997 uint64_t trial_uastc_err = 0;
3998 for (uint32_t i = 0; i < 16; i++)
3999 trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true);
4000
4001 // Transcode trial to BC7, compute error
4002 bc7_optimization_results trial_b7_results;
4003 if (!transcode_uastc_to_bc7(unpacked_trial_blk, trial_b7_results))
4004 return false;
4005
4006 basist::bc7_block trial_b7_block;
4007 basist::encode_bc7_block(&trial_b7_block, &trial_b7_results);
4008
4009 color_rgba decoded_trial_b7_blk[4][4];
4010 unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0]);
4011
4012 uint64_t trial_bc7_err = 0;
4013 for (uint32_t i = 0; i < 16; i++)
4014 trial_bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_b7_blk)[i], true);
4015
4016 uint64_t trial_err = (trial_uastc_err + trial_bc7_err) / 2;
4017
4018 const float trial_ms_err = (float)trial_err * (1.0f / 64.0f);
4019 const float trial_rms_err = sqrtf(trial_ms_err);
4020
4021 if (trial_rms_err > cur_rms_err * params.m_max_allowed_rms_increase_ratio)
4022 continue;
4023
4024 const int block_dist_in_bytes = (block_index - match_block_index) * 16;
4025 const int match_bits = compute_match_cost_estimate(block_dist_in_bytes);
4026
4027 float t = trial_ms_err * smooth_block_error_scale + match_bits * params.m_lambda;
4028 if (t < best_t)
4029 {
4030 best_t = t;
4031 best_block_index = prev_block_index;
4032
4033 best_block = trial_blk;
4034 }
4035
4036 } // prev_block_index
4037
4038 if (best_block_index != block_index)
4039 {
4040 total_modified++;
4041
4042 unpacked_uastc_block unpacked_best_blk;
4043 if (!unpack_uastc(best_block, unpacked_best_blk, false, false))
4044 return false;
4045
4046 if ((params.m_endpoint_refinement) && (block_mode == 0))
4047 {
4048 // Attempt to refine mode 0 block's endpoints, using the new selectors. This doesn't help much, but it does help.
4049 // TODO: We could do this with the other modes too.
4050 color_rgba decoded_best_uastc_block[4][4];
4051 if (!unpack_uastc(unpacked_best_blk, (basist::color32*)decoded_best_uastc_block, false))
4052 return false;
4053
4054 // Compute the block's current error (with the modified selectors).
4055 uint64_t best_uastc_err = 0;
4056 for (uint32_t i = 0; i < 16; i++)
4057 best_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_best_uastc_block)[i], true);
4058
4059 bc7enc_compress_block_params comp_params;
4060 memset(&comp_params, 0, sizeof(comp_params));
4061 comp_params.m_max_partitions_mode1 = 64;
4062 comp_params.m_least_squares_passes = 1;
4063 comp_params.m_weights[0] = 1;
4064 comp_params.m_weights[1] = 1;
4065 comp_params.m_weights[2] = 1;
4066 comp_params.m_weights[3] = 1;
4067 comp_params.m_uber_level = 0;
4068
4069 uastc_encode_results results;
4070 uint32_t total_results = 0;
4071 astc_mode0_or_18(0, (color_rgba(*)[4])pPixels, &results, total_results, comp_params, unpacked_best_blk.m_astc.m_weights);
4072 assert(total_results == 1);
4073
4074 // See if the overall error has actually gone done.
4075
4076 color_rgba decoded_trial_uastc_block[4][4];
4077 bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false);
4078 assert(success);
4079
4080 BASISU_NOTE_UNUSED(success);
4081
4082 uint64_t trial_uastc_err = 0;
4083 for (uint32_t i = 0; i < 16; i++)
4084 trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true);
4085
4086 if (trial_uastc_err < best_uastc_err)
4087 {
4088 // The error went down, so accept the new endpoints.
4089
4090 // Ensure the selectors haven't changed, otherwise we'll invalidate the LZ matches.
4091 for (uint32_t i = 0; i < 16; i++)
4092 assert(unpacked_best_blk.m_astc.m_weights[i] == results.m_astc.m_weights[i]);
4093
4094 unpacked_best_blk.m_astc = results.m_astc;
4095
4096 total_refined++;
4097 }
4098 } // if ((params.m_endpoint_refinement) && (block_mode == 0))
4099
4100 // The selectors have changed, so go recompute the block hints.
4101 if (!uastc_recompute_hints(&best_block, pPixels, flags, &unpacked_best_blk))
4102 return false;
4103
4104 // Write the modified block
4105 pBlocks[block_index] = best_block;
4106
4107 } // if (best_block_index != block_index)
4108
4109 {
4110 uint32_t bit_offset = first_sel_bit;
4111 uint64_t sel_bits = read_bits((const uint8_t*)&best_block, bit_offset, basisu::minimum(64U, total_sel_bits));
4112
4113 auto res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, sel_bits), block_index));
4114 if (!res.second)
4115 (*res.first).second = block_index;
4116 }
4117
4118 } // block_index
4119
4120 return true;
4121 }
4122
4123 // This function implements a basic form of rate distortion optimization (RDO) for UASTC.
4124 // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation.
4125 // There's A LOT that can be done better in here, but it's a start.
4126 // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses.
4127 bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, job_pool* pJob_pool, uint32_t total_jobs)
4128 {
4129 assert(params.m_max_allowed_rms_increase_ratio > 1.0f);
4130 assert(params.m_lz_dict_size > 0);
4131 assert(params.m_lambda > 0.0f);
4132
4133 uint32_t total_skipped = 0, total_modified = 0, total_refined = 0, total_smooth = 0;
4134
4135 uint32_t blocks_per_job = total_jobs ? (num_blocks / total_jobs) : 0;
4136
4137 std::mutex stat_mutex;
4138
4139 bool status = false;
4140
4141 if ((!pJob_pool) || (total_jobs <= 1) || (blocks_per_job <= 8))
4142 {
4143 status = uastc_rdo_blocks(0, num_blocks, pBlocks, pBlock_pixels, params, flags, total_skipped, total_refined, total_modified, total_smooth);
4144 }
4145 else
4146 {
4147 bool all_succeeded = true;
4148
4149 for (uint32_t block_index_iter = 0; block_index_iter < num_blocks; block_index_iter += blocks_per_job)
4150 {
4151 const uint32_t first_index = block_index_iter;
4152 const uint32_t last_index = minimum<uint32_t>(num_blocks, block_index_iter + blocks_per_job);
4153
4154#ifndef __EMSCRIPTEN__
4155 pJob_pool->add_job([first_index, last_index, pBlocks, pBlock_pixels, &params, flags, &total_skipped, &total_modified, &total_refined, &total_smooth, &all_succeeded, &stat_mutex] {
4156#endif
4157
4158 uint32_t job_skipped = 0, job_modified = 0, job_refined = 0, job_smooth = 0;
4159
4160 bool status = uastc_rdo_blocks(first_index, last_index, pBlocks, pBlock_pixels, params, flags, job_skipped, job_refined, job_modified, job_smooth);
4161
4162 {
4163 std::lock_guard<std::mutex> lck(stat_mutex);
4164
4165 all_succeeded = all_succeeded && status;
4166 total_skipped += job_skipped;
4167 total_modified += job_modified;
4168 total_refined += job_refined;
4169 total_smooth += job_smooth;
4170 }
4171
4172#ifndef __EMSCRIPTEN__
4173 }
4174 );
4175#endif
4176
4177 } // block_index_iter
4178
4179#ifndef __EMSCRIPTEN__
4180 pJob_pool->wait_for_all();
4181#endif
4182
4183 status = all_succeeded;
4184 }
4185
4186 debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks);
4187
4188 return status;
4189 }
4190} // namespace basisu
4191
4192
4193
4194
4195
4196