1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2023 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/**
19 * @brief Functions for the library entrypoint.
20 */
21
22#include <array>
23#include <cstring>
24#include <new>
25
26#include "astcenc.h"
27#include "astcenc_internal_entry.h"
28#include "astcenc_diagnostic_trace.h"
29
30/**
31 * @brief Record of the quality tuning parameter values.
32 *
33 * See the @c astcenc_config structure for detailed parameter documentation.
34 *
35 * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
36 * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
37 * for the more through search presets because the underlying db_limit is so much higher.
38 */
39struct astcenc_preset_config
40{
41 float quality;
42 unsigned int tune_partition_count_limit;
43 unsigned int tune_2partition_index_limit;
44 unsigned int tune_3partition_index_limit;
45 unsigned int tune_4partition_index_limit;
46 unsigned int tune_block_mode_limit;
47 unsigned int tune_refinement_limit;
48 unsigned int tune_candidate_limit;
49 unsigned int tune_2partitioning_candidate_limit;
50 unsigned int tune_3partitioning_candidate_limit;
51 unsigned int tune_4partitioning_candidate_limit;
52 float tune_db_limit_a_base;
53 float tune_db_limit_b_base;
54 float tune_mse_overshoot;
55 float tune_2partition_early_out_limit_factor;
56 float tune_3partition_early_out_limit_factor;
57 float tune_2plane_early_out_limit_correlation;
58};
59
60/**
61 * @brief The static presets for high bandwidth encodings (x < 25 texels per block).
62 */
63static const std::array<astcenc_preset_config, 6> preset_configs_high {{
64 {
65 ASTCENC_PRE_FASTEST,
66 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
67 }, {
68 ASTCENC_PRE_FAST,
69 3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f
70 }, {
71 ASTCENC_PRE_MEDIUM,
72 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f
73 }, {
74 ASTCENC_PRE_THOROUGH,
75 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f
76 }, {
77 ASTCENC_PRE_VERYTHOROUGH,
78 4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
79 }, {
80 ASTCENC_PRE_EXHAUSTIVE,
81 4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
82 }
83}};
84
85/**
86 * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
87 */
88static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
89 {
90 ASTCENC_PRE_FASTEST,
91 2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f
92 }, {
93 ASTCENC_PRE_FAST,
94 3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f
95 }, {
96 ASTCENC_PRE_MEDIUM,
97 4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f
98 }, {
99 ASTCENC_PRE_THOROUGH,
100 4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f
101 }, {
102 ASTCENC_PRE_VERYTHOROUGH,
103 4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
104 }, {
105 ASTCENC_PRE_EXHAUSTIVE,
106 4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
107 }
108}};
109
110/**
111 * @brief The static presets for low bandwidth encodings (64 <= x texels per block).
112 */
113static const std::array<astcenc_preset_config, 6> preset_configs_low {{
114 {
115 ASTCENC_PRE_FASTEST,
116 2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f
117 }, {
118 ASTCENC_PRE_FAST,
119 2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f
120 }, {
121 ASTCENC_PRE_MEDIUM,
122 3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f
123 }, {
124 ASTCENC_PRE_THOROUGH,
125 4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f
126 }, {
127 ASTCENC_PRE_VERYTHOROUGH,
128 4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f
129 }, {
130 ASTCENC_PRE_EXHAUSTIVE,
131 4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f
132 }
133}};
134
135/**
136 * @brief Validate CPU floating point meets assumptions made in the codec.
137 *
138 * The codec is written with the assumption that a float threaded through the @c if32 union will be
139 * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the
140 * case in an IEEE-754 compliant system, however not every system or compilation mode is actually
141 * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled.
142 *
143 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
144 */
145static astcenc_error validate_cpu_float()
146{
147 if32 p;
148 volatile float xprec_testval = 2.51f;
149 p.f = xprec_testval + 12582912.0f;
150 float q = p.f - 12582912.0f;
151
152 if (q != 3.0f)
153 {
154 return ASTCENC_ERR_BAD_CPU_FLOAT;
155 }
156
157 return ASTCENC_SUCCESS;
158}
159
160/**
161 * @brief Validate config profile.
162 *
163 * @param profile The profile to check.
164 *
165 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
166 */
167static astcenc_error validate_profile(
168 astcenc_profile profile
169) {
170 // Values in this enum are from an external user, so not guaranteed to be
171 // bounded to the enum values
172 switch (static_cast<int>(profile))
173 {
174 case ASTCENC_PRF_LDR_SRGB:
175 case ASTCENC_PRF_LDR:
176 case ASTCENC_PRF_HDR_RGB_LDR_A:
177 case ASTCENC_PRF_HDR:
178 return ASTCENC_SUCCESS;
179 default:
180 return ASTCENC_ERR_BAD_PROFILE;
181 }
182}
183
184/**
185 * @brief Validate block size.
186 *
187 * @param block_x The block x dimensions.
188 * @param block_y The block y dimensions.
189 * @param block_z The block z dimensions.
190 *
191 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
192 */
193static astcenc_error validate_block_size(
194 unsigned int block_x,
195 unsigned int block_y,
196 unsigned int block_z
197) {
198 // Test if this is a legal block size at all
199 bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) ||
200 ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z)));
201 if (!is_legal)
202 {
203 return ASTCENC_ERR_BAD_BLOCK_SIZE;
204 }
205
206 // Test if this build has sufficient capacity for this block size
207 bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
208 if (!have_capacity)
209 {
210 return ASTCENC_ERR_NOT_IMPLEMENTED;
211 }
212
213 return ASTCENC_SUCCESS;
214}
215
216/**
217 * @brief Validate flags.
218 *
219 * @param flags The flags to check.
220 *
221 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
222 */
223static astcenc_error validate_flags(
224 unsigned int flags
225) {
226 // Flags field must not contain any unknown flag bits
227 unsigned int exMask = ~ASTCENC_ALL_FLAGS;
228 if (popcount(flags & exMask) != 0)
229 {
230 return ASTCENC_ERR_BAD_FLAGS;
231 }
232
233 // Flags field must only contain at most a single map type
234 exMask = ASTCENC_FLG_MAP_NORMAL
235 | ASTCENC_FLG_MAP_RGBM;
236 if (popcount(flags & exMask) > 1)
237 {
238 return ASTCENC_ERR_BAD_FLAGS;
239 }
240
241 return ASTCENC_SUCCESS;
242}
243
244#if !defined(ASTCENC_DECOMPRESS_ONLY)
245
246/**
247 * @brief Validate single channel compression swizzle.
248 *
249 * @param swizzle The swizzle to check.
250 *
251 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
252 */
253static astcenc_error validate_compression_swz(
254 astcenc_swz swizzle
255) {
256 // Not all enum values are handled; SWZ_Z is invalid for compression
257 switch (static_cast<int>(swizzle))
258 {
259 case ASTCENC_SWZ_R:
260 case ASTCENC_SWZ_G:
261 case ASTCENC_SWZ_B:
262 case ASTCENC_SWZ_A:
263 case ASTCENC_SWZ_0:
264 case ASTCENC_SWZ_1:
265 return ASTCENC_SUCCESS;
266 default:
267 return ASTCENC_ERR_BAD_SWIZZLE;
268 }
269}
270
271/**
272 * @brief Validate overall compression swizzle.
273 *
274 * @param swizzle The swizzle to check.
275 *
276 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
277 */
278static astcenc_error validate_compression_swizzle(
279 const astcenc_swizzle& swizzle
280) {
281 if (validate_compression_swz(swizzle.r) ||
282 validate_compression_swz(swizzle.g) ||
283 validate_compression_swz(swizzle.b) ||
284 validate_compression_swz(swizzle.a))
285 {
286 return ASTCENC_ERR_BAD_SWIZZLE;
287 }
288
289 return ASTCENC_SUCCESS;
290}
291#endif
292
293/**
294 * @brief Validate single channel decompression swizzle.
295 *
296 * @param swizzle The swizzle to check.
297 *
298 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
299 */
300static astcenc_error validate_decompression_swz(
301 astcenc_swz swizzle
302) {
303 // Values in this enum are from an external user, so not guaranteed to be
304 // bounded to the enum values
305 switch (static_cast<int>(swizzle))
306 {
307 case ASTCENC_SWZ_R:
308 case ASTCENC_SWZ_G:
309 case ASTCENC_SWZ_B:
310 case ASTCENC_SWZ_A:
311 case ASTCENC_SWZ_0:
312 case ASTCENC_SWZ_1:
313 case ASTCENC_SWZ_Z:
314 return ASTCENC_SUCCESS;
315 default:
316 return ASTCENC_ERR_BAD_SWIZZLE;
317 }
318}
319
320/**
321 * @brief Validate overall decompression swizzle.
322 *
323 * @param swizzle The swizzle to check.
324 *
325 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
326 */
327static astcenc_error validate_decompression_swizzle(
328 const astcenc_swizzle& swizzle
329) {
330 if (validate_decompression_swz(swizzle.r) ||
331 validate_decompression_swz(swizzle.g) ||
332 validate_decompression_swz(swizzle.b) ||
333 validate_decompression_swz(swizzle.a))
334 {
335 return ASTCENC_ERR_BAD_SWIZZLE;
336 }
337
338 return ASTCENC_SUCCESS;
339}
340
341/**
342 * Validate that an incoming configuration is in-spec.
343 *
344 * This function can respond in two ways:
345 *
346 * * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
347 * for out-of-range inputs in this case.
348 * * Numerical inputs and logic inputs are are logically invalid and which make no sense
349 * algorithmically will return an error.
350 *
351 * @param[in,out] config The input compressor configuration.
352 *
353 * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
354 */
355static astcenc_error validate_config(
356 astcenc_config &config
357) {
358 astcenc_error status;
359
360 status = validate_profile(config.profile);
361 if (status != ASTCENC_SUCCESS)
362 {
363 return status;
364 }
365
366 status = validate_flags(config.flags);
367 if (status != ASTCENC_SUCCESS)
368 {
369 return status;
370 }
371
372 status = validate_block_size(config.block_x, config.block_y, config.block_z);
373 if (status != ASTCENC_SUCCESS)
374 {
375 return status;
376 }
377
378#if defined(ASTCENC_DECOMPRESS_ONLY)
379 // Decompress-only builds only support decompress-only contexts
380 if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
381 {
382 return ASTCENC_ERR_BAD_PARAM;
383 }
384#endif
385
386 config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f);
387
388 config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u);
389 config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
390 config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
391 config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS);
392 config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u);
393 config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u);
394 config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES);
395 config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
396 config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
397 config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES);
398 config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f);
399 config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f);
400 config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, 0.0f);
401 config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
402 config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);
403
404 // Specifying a zero weight color component is not allowed; force to small value
405 float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
406 astc::max(config.cw_b_weight, config.cw_a_weight));
407 if (max_weight > 0.0f)
408 {
409 max_weight /= 1000.0f;
410 config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
411 config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
412 config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
413 config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
414 }
415 // If all color components error weights are zero then return an error
416 else
417 {
418 return ASTCENC_ERR_BAD_PARAM;
419 }
420
421 return ASTCENC_SUCCESS;
422}
423
424/* See header for documentation. */
425astcenc_error astcenc_config_init(
426 astcenc_profile profile,
427 unsigned int block_x,
428 unsigned int block_y,
429 unsigned int block_z,
430 float quality,
431 unsigned int flags,
432 astcenc_config* configp
433) {
434 astcenc_error status;
435
436 status = validate_cpu_float();
437 if (status != ASTCENC_SUCCESS)
438 {
439 return status;
440 }
441
442 // Zero init all config fields; although most of will be over written
443 astcenc_config& config = *configp;
444 std::memset(&config, 0, sizeof(config));
445
446 // Process the block size
447 block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
448 status = validate_block_size(block_x, block_y, block_z);
449 if (status != ASTCENC_SUCCESS)
450 {
451 return status;
452 }
453
454 config.block_x = block_x;
455 config.block_y = block_y;
456 config.block_z = block_z;
457
458 float texels = static_cast<float>(block_x * block_y * block_z);
459 float ltexels = logf(texels) / logf(10.0f);
460
461 // Process the performance quality level or preset; note that this must be done before we
462 // process any additional settings, such as color profile and flags, which may replace some of
463 // these settings with more use case tuned values
464 if (quality < ASTCENC_PRE_FASTEST ||
465 quality > ASTCENC_PRE_EXHAUSTIVE)
466 {
467 return ASTCENC_ERR_BAD_QUALITY;
468 }
469
470 static const std::array<astcenc_preset_config, 6>* preset_configs;
471 int texels_int = block_x * block_y * block_z;
472 if (texels_int < 25)
473 {
474 preset_configs = &preset_configs_high;
475 }
476 else if (texels_int < 64)
477 {
478 preset_configs = &preset_configs_mid;
479 }
480 else
481 {
482 preset_configs = &preset_configs_low;
483 }
484
485 // Determine which preset to use, or which pair to interpolate
486 size_t start;
487 size_t end;
488 for (end = 0; end < preset_configs->size(); end++)
489 {
490 if ((*preset_configs)[end].quality >= quality)
491 {
492 break;
493 }
494 }
495
496 start = end == 0 ? 0 : end - 1;
497
498 // Start and end node are the same - so just transfer the values.
499 if (start == end)
500 {
501 config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
502 config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
503 config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
504 config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
505 config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
506 config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
507 config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit, TUNE_MAX_TRIAL_CANDIDATES);
508 config.tune_2partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_2partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
509 config.tune_3partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_3partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
510 config.tune_4partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_4partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
511 config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels,
512 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels);
513
514 config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
515
516 config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
517 config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
518 config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
519 }
520 // Start and end node are not the same - so interpolate between them
521 else
522 {
523 auto& node_a = (*preset_configs)[start];
524 auto& node_b = (*preset_configs)[end];
525
526 float wt_range = node_b.quality - node_a.quality;
527 assert(wt_range > 0);
528
529 // Compute interpolation factors
530 float wt_node_a = (node_b.quality - quality) / wt_range;
531 float wt_node_b = (quality - node_a.quality) / wt_range;
532
533 #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
534 #define LERPI(param) astc::flt2int_rtn(\
535 (static_cast<float>(node_a.param) * wt_node_a) + \
536 (static_cast<float>(node_b.param) * wt_node_b))
537 #define LERPUI(param) static_cast<unsigned int>(LERPI(param))
538
539 config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
540 config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
541 config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
542 config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
543 config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
544 config.tune_refinement_limit = LERPI(tune_refinement_limit);
545 config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit),
546 TUNE_MAX_TRIAL_CANDIDATES);
547 config.tune_2partitioning_candidate_limit = astc::min(LERPUI(tune_2partitioning_candidate_limit),
548 BLOCK_MAX_PARTITIONINGS);
549 config.tune_3partitioning_candidate_limit = astc::min(LERPUI(tune_3partitioning_candidate_limit),
550 BLOCK_MAX_PARTITIONINGS);
551 config.tune_4partitioning_candidate_limit = astc::min(LERPUI(tune_4partitioning_candidate_limit),
552 BLOCK_MAX_PARTITIONINGS);
553 config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels,
554 LERP(tune_db_limit_b_base) - 19 * ltexels);
555
556 config.tune_mse_overshoot = LERP(tune_mse_overshoot);
557
558 config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
559 config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
560 config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
561 #undef LERP
562 #undef LERPI
563 #undef LERPUI
564 }
565
566 // Set heuristics to the defaults for each color profile
567 config.cw_r_weight = 1.0f;
568 config.cw_g_weight = 1.0f;
569 config.cw_b_weight = 1.0f;
570 config.cw_a_weight = 1.0f;
571
572 config.a_scale_radius = 0;
573
574 config.rgbm_m_scale = 0.0f;
575
576 config.profile = profile;
577
578 // Values in this enum are from an external user, so not guaranteed to be
579 // bounded to the enum values
580 switch (static_cast<int>(profile))
581 {
582 case ASTCENC_PRF_LDR:
583 case ASTCENC_PRF_LDR_SRGB:
584 break;
585 case ASTCENC_PRF_HDR_RGB_LDR_A:
586 case ASTCENC_PRF_HDR:
587 config.tune_db_limit = 999.0f;
588 break;
589 default:
590 return ASTCENC_ERR_BAD_PROFILE;
591 }
592
593 // Flags field must not contain any unknown flag bits
594 status = validate_flags(flags);
595 if (status != ASTCENC_SUCCESS)
596 {
597 return status;
598 }
599
600 if (flags & ASTCENC_FLG_MAP_NORMAL)
601 {
602 // Normal map encoding uses L+A blocks, so allow one more partitioning
603 // than normal. We need need fewer bits for endpoints, so more likely
604 // to be able to use more partitions than an RGB/RGBA block
605 config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u);
606
607 config.cw_g_weight = 0.0f;
608 config.cw_b_weight = 0.0f;
609 config.tune_2partition_early_out_limit_factor *= 1.5f;
610 config.tune_3partition_early_out_limit_factor *= 1.5f;
611 config.tune_2plane_early_out_limit_correlation = 0.99f;
612
613 // Normals are prone to blocking artifacts on smooth curves
614 // so force compressor to try harder here ...
615 config.tune_db_limit *= 1.03f;
616 }
617 else if (flags & ASTCENC_FLG_MAP_RGBM)
618 {
619 config.rgbm_m_scale = 5.0f;
620 config.cw_a_weight = 2.0f * config.rgbm_m_scale;
621 }
622 else // (This is color data)
623 {
624 // This is a very basic perceptual metric for RGB color data, which weights error
625 // significance by the perceptual luminance contribution of each color channel. For
626 // luminance the usual weights to compute luminance from a linear RGB value are as
627 // follows:
628 //
629 // l = r * 0.3 + g * 0.59 + b * 0.11
630 //
631 // ... but we scale these up to keep a better balance between color and alpha. Note
632 // that if the content is using alpha we'd recommend using the -a option to weight
633 // the color contribution by the alpha transparency.
634 if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
635 {
636 config.cw_r_weight = 0.30f * 2.25f;
637 config.cw_g_weight = 0.59f * 2.25f;
638 config.cw_b_weight = 0.11f * 2.25f;
639 }
640 }
641 config.flags = flags;
642
643 return ASTCENC_SUCCESS;
644}
645
646/* See header for documentation. */
647astcenc_error astcenc_context_alloc(
648 const astcenc_config* configp,
649 unsigned int thread_count,
650 astcenc_context** context
651) {
652 astcenc_error status;
653 const astcenc_config& config = *configp;
654
655 status = validate_cpu_float();
656 if (status != ASTCENC_SUCCESS)
657 {
658 return status;
659 }
660
661 if (thread_count == 0)
662 {
663 return ASTCENC_ERR_BAD_PARAM;
664 }
665
666#if defined(ASTCENC_DIAGNOSTICS)
667 // Force single threaded compressor use in diagnostic mode.
668 if (thread_count != 1)
669 {
670 return ASTCENC_ERR_BAD_PARAM;
671 }
672#endif
673
674 astcenc_context* ctxo = new astcenc_context;
675 astcenc_contexti* ctx = &ctxo->context;
676 ctx->thread_count = thread_count;
677 ctx->config = config;
678 ctx->working_buffers = nullptr;
679
680 // These are allocated per-compress, as they depend on image size
681 ctx->input_alpha_averages = nullptr;
682
683 // Copy the config first and validate the copy (we may modify it)
684 status = validate_config(ctx->config);
685 if (status != ASTCENC_SUCCESS)
686 {
687 delete ctxo;
688 return status;
689 }
690
691 ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
692 bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
693 init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
694 can_omit_modes,
695 config.tune_partition_count_limit,
696 static_cast<float>(config.tune_block_mode_limit) / 100.0f,
697 *ctx->bsd);
698
699#if !defined(ASTCENC_DECOMPRESS_ONLY)
700 // Do setup only needed by compression
701 if (!(status & ASTCENC_FLG_DECOMPRESS_ONLY))
702 {
703 // Turn a dB limit into a per-texel error for faster use later
704 if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
705 {
706 ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f;
707 }
708 else
709 {
710 ctx->config.tune_db_limit = 0.0f;
711 }
712
713 size_t worksize = sizeof(compression_working_buffers) * thread_count;
714 ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
715 static_assert((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0,
716 "compression_working_buffers size must be multiple of vector alignment");
717 if (!ctx->working_buffers)
718 {
719 aligned_free<block_size_descriptor>(ctx->bsd);
720 delete ctxo;
721 *context = nullptr;
722 return ASTCENC_ERR_OUT_OF_MEM;
723 }
724 }
725#endif
726
727#if defined(ASTCENC_DIAGNOSTICS)
728 ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
729 if (!ctx->trace_log->m_file)
730 {
731 return ASTCENC_ERR_DTRACE_FAILURE;
732 }
733
734 trace_add_data("block_x", config.block_x);
735 trace_add_data("block_y", config.block_y);
736 trace_add_data("block_z", config.block_z);
737#endif
738
739 *context = ctxo;
740
741#if !defined(ASTCENC_DECOMPRESS_ONLY)
742 prepare_angular_tables();
743#endif
744
745 return ASTCENC_SUCCESS;
746}
747
748/* See header dor documentation. */
749void astcenc_context_free(
750 astcenc_context* ctxo
751) {
752 if (ctxo)
753 {
754 astcenc_contexti* ctx = &ctxo->context;
755 aligned_free<compression_working_buffers>(ctx->working_buffers);
756 aligned_free<block_size_descriptor>(ctx->bsd);
757#if defined(ASTCENC_DIAGNOSTICS)
758 delete ctx->trace_log;
759#endif
760 delete ctxo;
761 }
762}
763
764#if !defined(ASTCENC_DECOMPRESS_ONLY)
765
766/**
767 * @brief Compress an image, after any preflight has completed.
768 *
769 * @param[out] ctxo The compressor context.
770 * @param thread_index The thread index.
771 * @param image The intput image.
772 * @param swizzle The input swizzle.
773 * @param[out] buffer The output array for the compressed data.
774 */
775static void compress_image(
776 astcenc_context& ctxo,
777 unsigned int thread_index,
778 const astcenc_image& image,
779 const astcenc_swizzle& swizzle,
780 uint8_t* buffer
781) {
782 astcenc_contexti& ctx = ctxo.context;
783 const block_size_descriptor& bsd = *ctx.bsd;
784 astcenc_profile decode_mode = ctx.config.profile;
785
786 image_block blk;
787
788 int block_x = bsd.xdim;
789 int block_y = bsd.ydim;
790 int block_z = bsd.zdim;
791 blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
792
793 int dim_x = image.dim_x;
794 int dim_y = image.dim_y;
795 int dim_z = image.dim_z;
796
797 int xblocks = (dim_x + block_x - 1) / block_x;
798 int yblocks = (dim_y + block_y - 1) / block_y;
799 int zblocks = (dim_z + block_z - 1) / block_z;
800 int block_count = zblocks * yblocks * xblocks;
801
802 int row_blocks = xblocks;
803 int plane_blocks = xblocks * yblocks;
804
805 // Populate the block channel weights
806 blk.channel_weight = vfloat4(ctx.config.cw_r_weight,
807 ctx.config.cw_g_weight,
808 ctx.config.cw_b_weight,
809 ctx.config.cw_a_weight);
810
811 // Use preallocated scratch buffer
812 auto& temp_buffers = ctx.working_buffers[thread_index];
813
814 // Only the first thread actually runs the initializer
815 ctxo.manage_compress.init(block_count);
816
817 // Determine if we can use an optimized load function
818 bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) ||
819 (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A);
820
821 bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) ||
822 (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
823
824 bool use_fast_load = !needs_swz && !needs_hdr &&
825 block_z == 1 && image.data_type == ASTCENC_TYPE_U8;
826
827 auto load_func = load_image_block;
828 if (use_fast_load)
829 {
830 load_func = load_image_block_fast_ldr;
831 }
832
833 // All threads run this processing loop until there is no work remaining
834 while (true)
835 {
836 unsigned int count;
837 unsigned int base = ctxo.manage_compress.get_task_assignment(16, count);
838 if (!count)
839 {
840 break;
841 }
842
843 for (unsigned int i = base; i < base + count; i++)
844 {
845 // Decode i into x, y, z block indices
846 int z = i / plane_blocks;
847 unsigned int rem = i - (z * plane_blocks);
848 int y = rem / row_blocks;
849 int x = rem - (y * row_blocks);
850
851 // Test if we can apply some basic alpha-scale RDO
852 bool use_full_block = true;
853 if (ctx.config.a_scale_radius != 0 && block_z == 1)
854 {
855 int start_x = x * block_x;
856 int end_x = astc::min(dim_x, start_x + block_x);
857
858 int start_y = y * block_y;
859 int end_y = astc::min(dim_y, start_y + block_y);
860
861 // SATs accumulate error, so don't test exactly zero. Test for
862 // less than 1 alpha in the expanded block footprint that
863 // includes the alpha radius.
864 int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1);
865
866 int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1);
867
868 float footprint = static_cast<float>(x_footprint * y_footprint);
869 float threshold = 0.9f / (255.0f * footprint);
870
871 // Do we have any alpha values?
872 use_full_block = false;
873 for (int ay = start_y; ay < end_y; ay++)
874 {
875 for (int ax = start_x; ax < end_x; ax++)
876 {
877 float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
878 if (a_avg > threshold)
879 {
880 use_full_block = true;
881 ax = end_x;
882 ay = end_y;
883 }
884 }
885 }
886 }
887
888 // Fetch the full block for compression
889 if (use_full_block)
890 {
891 load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
892
893 // Scale RGB error contribution by the maximum alpha in the block
894 // This encourages preserving alpha accuracy in regions with high
895 // transparency, and can buy up to 0.5 dB PSNR.
896 if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
897 {
898 float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f);
899 blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale,
900 ctx.config.cw_g_weight * alpha_scale,
901 ctx.config.cw_b_weight * alpha_scale,
902 ctx.config.cw_a_weight);
903 }
904 }
905 // Apply alpha scale RDO - substitute constant color block
906 else
907 {
908 blk.origin_texel = vfloat4::zero();
909 blk.data_min = vfloat4::zero();
910 blk.data_mean = vfloat4::zero();
911 blk.data_max = vfloat4::zero();
912 blk.grayscale = true;
913 }
914
915 int offset = ((z * yblocks + y) * xblocks + x) * 16;
916 uint8_t *bp = buffer + offset;
917 physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
918 compress_block(ctx, blk, *pcb, temp_buffers);
919 }
920
921 ctxo.manage_compress.complete_task_assignment(count);
922 }
923}
924
925/**
926 * @brief Compute regional averages in an image.
927 *
928 * This function can be called by multiple threads, but only after a single
929 * thread calls the setup function @c init_compute_averages().
930 *
931 * Results are written back into @c img->input_alpha_averages.
932 *
933 * @param[out] ctx The context.
934 * @param ag The average and variance arguments created during setup.
935 */
936static void compute_averages(
937 astcenc_context& ctx,
938 const avg_args &ag
939) {
940 pixel_region_args arg = ag.arg;
941 arg.work_memory = new vfloat4[ag.work_memory_size];
942
943 int size_x = ag.img_size_x;
944 int size_y = ag.img_size_y;
945 int size_z = ag.img_size_z;
946
947 int step_xy = ag.blk_size_xy;
948 int step_z = ag.blk_size_z;
949
950 int y_tasks = (size_y + step_xy - 1) / step_xy;
951
952 // All threads run this processing loop until there is no work remaining
953 while (true)
954 {
955 unsigned int count;
956 unsigned int base = ctx.manage_avg.get_task_assignment(16, count);
957 if (!count)
958 {
959 break;
960 }
961
962 for (unsigned int i = base; i < base + count; i++)
963 {
964 int z = (i / (y_tasks)) * step_z;
965 int y = (i - (z * y_tasks)) * step_xy;
966
967 arg.size_z = astc::min(step_z, size_z - z);
968 arg.offset_z = z;
969
970 arg.size_y = astc::min(step_xy, size_y - y);
971 arg.offset_y = y;
972
973 for (int x = 0; x < size_x; x += step_xy)
974 {
975 arg.size_x = astc::min(step_xy, size_x - x);
976 arg.offset_x = x;
977 compute_pixel_region_variance(ctx.context, arg);
978 }
979 }
980
981 ctx.manage_avg.complete_task_assignment(count);
982 }
983
984 delete[] arg.work_memory;
985}
986
987#endif
988
989/* See header for documentation. */
990astcenc_error astcenc_compress_image(
991 astcenc_context* ctxo,
992 astcenc_image* imagep,
993 const astcenc_swizzle* swizzle,
994 uint8_t* data_out,
995 size_t data_len,
996 unsigned int thread_index
997) {
998#if defined(ASTCENC_DECOMPRESS_ONLY)
999 (void)ctxo;
1000 (void)imagep;
1001 (void)swizzle;
1002 (void)data_out;
1003 (void)data_len;
1004 (void)thread_index;
1005 return ASTCENC_ERR_BAD_CONTEXT;
1006#else
1007 astcenc_contexti* ctx = &ctxo->context;
1008 astcenc_error status;
1009 astcenc_image& image = *imagep;
1010
1011 if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1012 {
1013 return ASTCENC_ERR_BAD_CONTEXT;
1014 }
1015
1016 status = validate_compression_swizzle(*swizzle);
1017 if (status != ASTCENC_SUCCESS)
1018 {
1019 return status;
1020 }
1021
1022 if (thread_index >= ctx->thread_count)
1023 {
1024 return ASTCENC_ERR_BAD_PARAM;
1025 }
1026
1027 unsigned int block_x = ctx->config.block_x;
1028 unsigned int block_y = ctx->config.block_y;
1029 unsigned int block_z = ctx->config.block_z;
1030
1031 unsigned int xblocks = (image.dim_x + block_x - 1) / block_x;
1032 unsigned int yblocks = (image.dim_y + block_y - 1) / block_y;
1033 unsigned int zblocks = (image.dim_z + block_z - 1) / block_z;
1034
1035 // Check we have enough output space (16 bytes per block)
1036 size_t size_needed = xblocks * yblocks * zblocks * 16;
1037 if (data_len < size_needed)
1038 {
1039 return ASTCENC_ERR_OUT_OF_MEM;
1040 }
1041
1042 // If context thread count is one then implicitly reset
1043 if (ctx->thread_count == 1)
1044 {
1045 astcenc_compress_reset(ctxo);
1046 }
1047
1048 if (ctx->config.a_scale_radius != 0)
1049 {
1050 // First thread to enter will do setup, other threads will subsequently
1051 // enter the critical section but simply skip over the initialization
1052 auto init_avg = [ctx, &image, swizzle]() {
1053 // Perform memory allocations for the destination buffers
1054 size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
1055 ctx->input_alpha_averages = new float[texel_count];
1056
1057 return init_compute_averages(
1058 image, ctx->config.a_scale_radius, *swizzle,
1059 ctx->avg_preprocess_args);
1060 };
1061
1062 // Only the first thread actually runs the initializer
1063 ctxo->manage_avg.init(init_avg);
1064
1065 // All threads will enter this function and dynamically grab work
1066 compute_averages(*ctxo, ctx->avg_preprocess_args);
1067 }
1068
1069 // Wait for compute_averages to complete before compressing
1070 ctxo->manage_avg.wait();
1071
1072 compress_image(*ctxo, thread_index, image, *swizzle, data_out);
1073
1074 // Wait for compress to complete before freeing memory
1075 ctxo->manage_compress.wait();
1076
1077 auto term_compress = [ctx]() {
1078 delete[] ctx->input_alpha_averages;
1079 ctx->input_alpha_averages = nullptr;
1080 };
1081
1082 // Only the first thread to arrive actually runs the term
1083 ctxo->manage_compress.term(term_compress);
1084
1085 return ASTCENC_SUCCESS;
1086#endif
1087}
1088
1089/* See header for documentation. */
1090astcenc_error astcenc_compress_reset(
1091 astcenc_context* ctxo
1092) {
1093#if defined(ASTCENC_DECOMPRESS_ONLY)
1094 (void)ctxo;
1095 return ASTCENC_ERR_BAD_CONTEXT;
1096#else
1097 astcenc_contexti* ctx = &ctxo->context;
1098 if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1099 {
1100 return ASTCENC_ERR_BAD_CONTEXT;
1101 }
1102
1103 ctxo->manage_avg.reset();
1104 ctxo->manage_compress.reset();
1105 return ASTCENC_SUCCESS;
1106#endif
1107}
1108
1109/* See header for documentation. */
1110astcenc_error astcenc_decompress_image(
1111 astcenc_context* ctxo,
1112 const uint8_t* data,
1113 size_t data_len,
1114 astcenc_image* image_outp,
1115 const astcenc_swizzle* swizzle,
1116 unsigned int thread_index
1117) {
1118 astcenc_error status;
1119 astcenc_image& image_out = *image_outp;
1120 astcenc_contexti* ctx = &ctxo->context;
1121
1122 // Today this doesn't matter (working set on stack) but might in future ...
1123 if (thread_index >= ctx->thread_count)
1124 {
1125 return ASTCENC_ERR_BAD_PARAM;
1126 }
1127
1128 status = validate_decompression_swizzle(*swizzle);
1129 if (status != ASTCENC_SUCCESS)
1130 {
1131 return status;
1132 }
1133
1134 unsigned int block_x = ctx->config.block_x;
1135 unsigned int block_y = ctx->config.block_y;
1136 unsigned int block_z = ctx->config.block_z;
1137
1138 unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x;
1139 unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y;
1140 unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z;
1141
1142 int row_blocks = xblocks;
1143 int plane_blocks = xblocks * yblocks;
1144
1145 // Check we have enough output space (16 bytes per block)
1146 size_t size_needed = xblocks * yblocks * zblocks * 16;
1147 if (data_len < size_needed)
1148 {
1149 return ASTCENC_ERR_OUT_OF_MEM;
1150 }
1151
1152 image_block blk;
1153 blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
1154
1155 // If context thread count is one then implicitly reset
1156 if (ctx->thread_count == 1)
1157 {
1158 astcenc_decompress_reset(ctxo);
1159 }
1160
1161 // Only the first thread actually runs the initializer
1162 ctxo->manage_decompress.init(zblocks * yblocks * xblocks);
1163
1164 // All threads run this processing loop until there is no work remaining
1165 while (true)
1166 {
1167 unsigned int count;
1168 unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count);
1169 if (!count)
1170 {
1171 break;
1172 }
1173
1174 for (unsigned int i = base; i < base + count; i++)
1175 {
1176 // Decode i into x, y, z block indices
1177 int z = i / plane_blocks;
1178 unsigned int rem = i - (z * plane_blocks);
1179 int y = rem / row_blocks;
1180 int x = rem - (y * row_blocks);
1181
1182 unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16;
1183 const uint8_t* bp = data + offset;
1184
1185 const physical_compressed_block& pcb = *reinterpret_cast<const physical_compressed_block*>(bp);
1186 symbolic_compressed_block scb;
1187
1188 physical_to_symbolic(*ctx->bsd, pcb, scb);
1189
1190 decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
1191 x * block_x, y * block_y, z * block_z,
1192 scb, blk);
1193
1194 store_image_block(image_out, blk, *ctx->bsd,
1195 x * block_x, y * block_y, z * block_z, *swizzle);
1196 }
1197
1198 ctxo->manage_decompress.complete_task_assignment(count);
1199 }
1200
1201 return ASTCENC_SUCCESS;
1202}
1203
1204/* See header for documentation. */
1205astcenc_error astcenc_decompress_reset(
1206 astcenc_context* ctxo
1207) {
1208 ctxo->manage_decompress.reset();
1209 return ASTCENC_SUCCESS;
1210}
1211
1212/* See header for documentation. */
1213astcenc_error astcenc_get_block_info(
1214 astcenc_context* ctxo,
1215 const uint8_t data[16],
1216 astcenc_block_info* info
1217) {
1218#if defined(ASTCENC_DECOMPRESS_ONLY)
1219 (void)ctxo;
1220 (void)data;
1221 (void)info;
1222 return ASTCENC_ERR_BAD_CONTEXT;
1223#else
1224 astcenc_contexti* ctx = &ctxo->context;
1225
1226 // Decode the compressed data into a symbolic form
1227 const physical_compressed_block&pcb = *reinterpret_cast<const physical_compressed_block*>(data);
1228 symbolic_compressed_block scb;
1229 physical_to_symbolic(*ctx->bsd, pcb, scb);
1230
1231 // Fetch the appropriate partition and decimation tables
1232 block_size_descriptor& bsd = *ctx->bsd;
1233
1234 // Start from a clean slate
1235 memset(info, 0, sizeof(*info));
1236
1237 // Basic info we can always populate
1238 info->profile = ctx->config.profile;
1239
1240 info->block_x = ctx->config.block_x;
1241 info->block_y = ctx->config.block_y;
1242 info->block_z = ctx->config.block_z;
1243 info->texel_count = bsd.texel_count;
1244
1245 // Check for error blocks first
1246 info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
1247 if (info->is_error_block)
1248 {
1249 return ASTCENC_SUCCESS;
1250 }
1251
1252 // Check for constant color blocks second
1253 info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 ||
1254 scb.block_type == SYM_BTYPE_CONST_U16;
1255 if (info->is_constant_block)
1256 {
1257 return ASTCENC_SUCCESS;
1258 }
1259
1260 // Otherwise handle a full block ; known to be valid after conditions above have been checked
1261 int partition_count = scb.partition_count;
1262 const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
1263
1264 const block_mode& bm = bsd.get_block_mode(scb.block_mode);
1265 const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
1266
1267 info->weight_x = di.weight_x;
1268 info->weight_y = di.weight_y;
1269 info->weight_z = di.weight_z;
1270
1271 info->is_dual_plane_block = bm.is_dual_plane != 0;
1272
1273 info->partition_count = scb.partition_count;
1274 info->partition_index = scb.partition_index;
1275 info->dual_plane_component = scb.plane2_component;
1276
1277 info->color_level_count = get_quant_level(scb.get_color_quant_mode());
1278 info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
1279
1280 // Unpack color endpoints for each active partition
1281 for (unsigned int i = 0; i < scb.partition_count; i++)
1282 {
1283 bool rgb_hdr;
1284 bool a_hdr;
1285 vint4 endpnt[2];
1286
1287 unpack_color_endpoints(ctx->config.profile,
1288 scb.color_formats[i],
1289 scb.color_values[i],
1290 rgb_hdr, a_hdr,
1291 endpnt[0], endpnt[1]);
1292
1293 // Store the color endpoint mode info
1294 info->color_endpoint_modes[i] = scb.color_formats[i];
1295 info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr;
1296
1297 // Store the unpacked and decoded color endpoint
1298 vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
1299 for (int j = 0; j < 2; j++)
1300 {
1301 vint4 color_lns = lns_to_sf16(endpnt[j]);
1302 vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
1303 vint4 datai = select(color_unorm, color_lns, hdr_mask);
1304 store(float16_to_float(datai), info->color_endpoints[i][j]);
1305 }
1306 }
1307
1308 // Unpack weights for each texel
1309 int weight_plane1[BLOCK_MAX_TEXELS];
1310 int weight_plane2[BLOCK_MAX_TEXELS];
1311
1312 unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
1313 for (unsigned int i = 0; i < bsd.texel_count; i++)
1314 {
1315 info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1316 if (info->is_dual_plane_block)
1317 {
1318 info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM);
1319 }
1320 }
1321
1322 // Unpack partition assignments for each texel
1323 for (unsigned int i = 0; i < bsd.texel_count; i++)
1324 {
1325 info->partition_assignment[i] = pi.partition_of_texel[i];
1326 }
1327
1328 return ASTCENC_SUCCESS;
1329#endif
1330}
1331
1332/* See header for documentation. */
1333const char* astcenc_get_error_string(
1334 astcenc_error status
1335) {
1336 // Values in this enum are from an external user, so not guaranteed to be
1337 // bounded to the enum values
1338 switch (static_cast<int>(status))
1339 {
1340 case ASTCENC_SUCCESS:
1341 return "ASTCENC_SUCCESS";
1342 case ASTCENC_ERR_OUT_OF_MEM:
1343 return "ASTCENC_ERR_OUT_OF_MEM";
1344 case ASTCENC_ERR_BAD_CPU_FLOAT:
1345 return "ASTCENC_ERR_BAD_CPU_FLOAT";
1346 case ASTCENC_ERR_BAD_PARAM:
1347 return "ASTCENC_ERR_BAD_PARAM";
1348 case ASTCENC_ERR_BAD_BLOCK_SIZE:
1349 return "ASTCENC_ERR_BAD_BLOCK_SIZE";
1350 case ASTCENC_ERR_BAD_PROFILE:
1351 return "ASTCENC_ERR_BAD_PROFILE";
1352 case ASTCENC_ERR_BAD_QUALITY:
1353 return "ASTCENC_ERR_BAD_QUALITY";
1354 case ASTCENC_ERR_BAD_FLAGS:
1355 return "ASTCENC_ERR_BAD_FLAGS";
1356 case ASTCENC_ERR_BAD_SWIZZLE:
1357 return "ASTCENC_ERR_BAD_SWIZZLE";
1358 case ASTCENC_ERR_BAD_CONTEXT:
1359 return "ASTCENC_ERR_BAD_CONTEXT";
1360 case ASTCENC_ERR_NOT_IMPLEMENTED:
1361 return "ASTCENC_ERR_NOT_IMPLEMENTED";
1362#if defined(ASTCENC_DIAGNOSTICS)
1363 case ASTCENC_ERR_DTRACE_FAILURE:
1364 return "ASTCENC_ERR_DTRACE_FAILURE";
1365#endif
1366 default:
1367 return nullptr;
1368 }
1369}
1370