astcenc_entry.cpp source code [Godot/thirdparty/astcenc/astcenc_entry.cpp]

1	// SPDX-License-Identifier: Apache-2.0
2	// ----------------------------------------------------------------------------
3	// Copyright 2011-2023 Arm Limited
4	//
5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6	// use this file except in compliance with the License. You may obtain a copy
7	// of the License at:
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing, software
12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14	// License for the specific language governing permissions and limitations
15	// under the License.
16	// ----------------------------------------------------------------------------
17
18	/**
19	* @brief Functions for the library entrypoint.
20	*/
21
22	#include <array>
23	#include <cstring>
24	#include <new>
25
26	#include "astcenc.h"
27	#include "astcenc_internal_entry.h"
28	#include "astcenc_diagnostic_trace.h"
29
30	/**
31	* @brief Record of the quality tuning parameter values.
32	*
33	* See the @c astcenc_config structure for detailed parameter documentation.
34	*
35	* Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit.
36	* A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios
37	* for the more through search presets because the underlying db_limit is so much higher.
38	*/
39	struct astcenc_preset_config
40	{
41	float quality;
42	unsigned int tune_partition_count_limit;
43	unsigned int tune_2partition_index_limit;
44	unsigned int tune_3partition_index_limit;
45	unsigned int tune_4partition_index_limit;
46	unsigned int tune_block_mode_limit;
47	unsigned int tune_refinement_limit;
48	unsigned int tune_candidate_limit;
49	unsigned int tune_2partitioning_candidate_limit;
50	unsigned int tune_3partitioning_candidate_limit;
51	unsigned int tune_4partitioning_candidate_limit;
52	float tune_db_limit_a_base;
53	float tune_db_limit_b_base;
54	float tune_mse_overshoot;
55	float tune_2partition_early_out_limit_factor;
56	float tune_3partition_early_out_limit_factor;
57	float tune_2plane_early_out_limit_correlation;
58	};
59
60	/**
61	* @brief The static presets for high bandwidth encodings (x < 25 texels per block).
62	*/
63	static const std::array<astcenc_preset_config, `6`> preset_configs_high {{
64	{
65	ASTCENC_PRE_FASTEST,
66	`2`, `10`, `6`, `4`, `43`, `2`, `2`, `2`, `2`, `2`, `85.2f`, `63.2f`, `3.5f`, `1.0f`, `1.0f`, `0.85f`
67	}, {
68	ASTCENC_PRE_FAST,
69	`3`, `18`, `10`, `8`, `55`, `3`, `3`, `2`, `2`, `2`, `85.2f`, `63.2f`, `3.5f`, `1.0f`, `1.0f`, `0.90f`
70	}, {
71	ASTCENC_PRE_MEDIUM,
72	`4`, `34`, `28`, `16`, `77`, `3`, `3`, `2`, `2`, `2`, `95.0f`, `70.0f`, `2.5f`, `1.1f`, `1.05f`, `0.95f`
73	}, {
74	ASTCENC_PRE_THOROUGH,
75	`4`, `82`, `60`, `30`, `94`, `4`, `4`, `3`, `2`, `2`, `105.0f`, `77.0f`, `10.0f`, `1.35f`, `1.15f`, `0.97f`
76	}, {
77	ASTCENC_PRE_VERYTHOROUGH,
78	`4`, `256`, `128`, `64`, `98`, `4`, `6`, `20`, `14`, `8`, `200.0f`, `200.0f`, `10.0f`, `1.6f`, `1.4f`, `0.98f`
79	}, {
80	ASTCENC_PRE_EXHAUSTIVE,
81	`4`, `512`, `512`, `512`, `100`, `4`, `8`, `32`, `32`, `32`, `200.0f`, `200.0f`, `10.0f`, `2.0f`, `2.0f`, `0.99f`
82	}
83	}};
84
85	/**
86	* @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block).
87	*/
88	static const std::array<astcenc_preset_config, `6`> preset_configs_mid {{
89	{
90	ASTCENC_PRE_FASTEST,
91	`2`, `10`, `6`, `4`, `43`, `2`, `2`, `2`, `2`, `2`, `85.2f`, `63.2f`, `3.5f`, `1.0f`, `1.0f`, `0.80f`
92	}, {
93	ASTCENC_PRE_FAST,
94	`3`, `18`, `12`, `10`, `55`, `3`, `3`, `2`, `2`, `2`, `85.2f`, `63.2f`, `3.5f`, `1.0f`, `1.0f`, `0.85f`
95	}, {
96	ASTCENC_PRE_MEDIUM,
97	`4`, `34`, `28`, `16`, `77`, `3`, `3`, `2`, `2`, `2`, `95.0f`, `70.0f`, `3.0f`, `1.1f`, `1.05f`, `0.90f`
98	}, {
99	ASTCENC_PRE_THOROUGH,
100	`4`, `82`, `60`, `30`, `94`, `4`, `4`, `3`, `2`, `2`, `105.0f`, `77.0f`, `10.0f`, `1.4f`, `1.2f`, `0.95f`
101	}, {
102	ASTCENC_PRE_VERYTHOROUGH,
103	`4`, `256`, `128`, `64`, `98`, `4`, `6`, `12`, `8`, `3`, `200.0f`, `200.0f`, `10.0f`, `1.6f`, `1.4f`, `0.98f`
104	}, {
105	ASTCENC_PRE_EXHAUSTIVE,
106	`4`, `256`, `256`, `256`, `100`, `4`, `8`, `32`, `32`, `32`, `200.0f`, `200.0f`, `10.0f`, `2.0f`, `2.0f`, `0.99f`
107	}
108	}};
109
110	/**
111	* @brief The static presets for low bandwidth encodings (64 <= x texels per block).
112	*/
113	static const std::array<astcenc_preset_config, `6`> preset_configs_low {{
114	{
115	ASTCENC_PRE_FASTEST,
116	`2`, `10`, `6`, `4`, `40`, `2`, `2`, `2`, `2`, `2`, `85.0f`, `63.0f`, `3.5f`, `1.0f`, `1.0f`, `0.80f`
117	}, {
118	ASTCENC_PRE_FAST,
119	`2`, `18`, `12`, `10`, `55`, `3`, `3`, `2`, `2`, `2`, `85.0f`, `63.0f`, `3.5f`, `1.0f`, `1.0f`, `0.85f`
120	}, {
121	ASTCENC_PRE_MEDIUM,
122	`3`, `34`, `28`, `16`, `77`, `3`, `3`, `2`, `2`, `2`, `95.0f`, `70.0f`, `3.5f`, `1.1f`, `1.05f`, `0.90f`
123	}, {
124	ASTCENC_PRE_THOROUGH,
125	`4`, `82`, `60`, `30`, `93`, `4`, `4`, `3`, `2`, `2`, `105.0f`, `77.0f`, `10.0f`, `1.3f`, `1.2f`, `0.97f`
126	}, {
127	ASTCENC_PRE_VERYTHOROUGH,
128	`4`, `256`, `128`, `64`, `98`, `4`, `6`, `9`, `5`, `2`, `200.0f`, `200.0f`, `10.0f`, `1.6f`, `1.4f`, `0.98f`
129	}, {
130	ASTCENC_PRE_EXHAUSTIVE,
131	`4`, `256`, `256`, `256`, `100`, `4`, `8`, `32`, `32`, `32`, `200.0f`, `200.0f`, `10.0f`, `2.0f`, `2.0f`, `0.99f`
132	}
133	}};
134
135	/**
136	* @brief Validate CPU floating point meets assumptions made in the codec.
137	*
138	* The codec is written with the assumption that a float threaded through the @c if32 union will be
139	* stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the
140	* case in an IEEE-754 compliant system, however not every system or compilation mode is actually
141	* IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled.
142	*
143	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
144	*/
145	static astcenc_error validate_cpu_float()
146	{
147	if32 p;
148	volatile float xprec_testval = `2.51f`;
149	p.f = xprec_testval + `12582912.0f`;
150	float q = p.f - `12582912.0f`;
151
152	if (q != `3.0f`)
153	{
154	return ASTCENC_ERR_BAD_CPU_FLOAT;
155	}
156
157	return ASTCENC_SUCCESS;
158	}
159
160	/**
161	* @brief Validate config profile.
162	*
163	* @param profile The profile to check.
164	*
165	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
166	*/
167	static astcenc_error validate_profile(
168	astcenc_profile profile
169	) {
170	// Values in this enum are from an external user, so not guaranteed to be
171	// bounded to the enum values
172	switch (static_cast<int>(profile))
173	{
174	case ASTCENC_PRF_LDR_SRGB:
175	case ASTCENC_PRF_LDR:
176	case ASTCENC_PRF_HDR_RGB_LDR_A:
177	case ASTCENC_PRF_HDR:
178	return ASTCENC_SUCCESS;
179	default:
180	return ASTCENC_ERR_BAD_PROFILE;
181	}
182	}
183
184	/**
185	* @brief Validate block size.
186	*
187	* @param block_x The block x dimensions.
188	* @param block_y The block y dimensions.
189	* @param block_z The block z dimensions.
190	*
191	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
192	*/
193	static astcenc_error validate_block_size(
194	unsigned int block_x,
195	unsigned int block_y,
196	unsigned int block_z
197	) {
198	// Test if this is a legal block size at all
199	bool is_legal = (((block_z <= `1`) && is_legal_2d_block_size(block_x, block_y)) \|\|
200	((block_z >= `2`) && is_legal_3d_block_size(block_x, block_y, block_z)));
201	if (!is_legal)
202	{
203	return ASTCENC_ERR_BAD_BLOCK_SIZE;
204	}
205
206	// Test if this build has sufficient capacity for this block size
207	bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS;
208	if (!have_capacity)
209	{
210	return ASTCENC_ERR_NOT_IMPLEMENTED;
211	}
212
213	return ASTCENC_SUCCESS;
214	}
215
216	/**
217	* @brief Validate flags.
218	*
219	* @param flags The flags to check.
220	*
221	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
222	*/
223	static astcenc_error validate_flags(
224	unsigned int flags
225	) {
226	// Flags field must not contain any unknown flag bits
227	unsigned int exMask = ~ASTCENC_ALL_FLAGS;
228	if (popcount(flags & exMask) != `0`)
229	{
230	return ASTCENC_ERR_BAD_FLAGS;
231	}
232
233	// Flags field must only contain at most a single map type
234	exMask = ASTCENC_FLG_MAP_NORMAL
235	\| ASTCENC_FLG_MAP_RGBM;
236	if (popcount(flags & exMask) > `1`)
237	{
238	return ASTCENC_ERR_BAD_FLAGS;
239	}
240
241	return ASTCENC_SUCCESS;
242	}
243
244	#if !defined(ASTCENC_DECOMPRESS_ONLY)
245
246	/**
247	* @brief Validate single channel compression swizzle.
248	*
249	* @param swizzle The swizzle to check.
250	*
251	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
252	*/
253	static astcenc_error validate_compression_swz(
254	astcenc_swz swizzle
255	) {
256	// Not all enum values are handled; SWZ_Z is invalid for compression
257	switch (static_cast<int>(swizzle))
258	{
259	case ASTCENC_SWZ_R:
260	case ASTCENC_SWZ_G:
261	case ASTCENC_SWZ_B:
262	case ASTCENC_SWZ_A:
263	case ASTCENC_SWZ_0:
264	case ASTCENC_SWZ_1:
265	return ASTCENC_SUCCESS;
266	default:
267	return ASTCENC_ERR_BAD_SWIZZLE;
268	}
269	}
270
271	/**
272	* @brief Validate overall compression swizzle.
273	*
274	* @param swizzle The swizzle to check.
275	*
276	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
277	*/
278	static astcenc_error validate_compression_swizzle(
279	const astcenc_swizzle& swizzle
280	) {
281	if (validate_compression_swz(swizzle.r) \|\|
282	validate_compression_swz(swizzle.g) \|\|
283	validate_compression_swz(swizzle.b) \|\|
284	validate_compression_swz(swizzle.a))
285	{
286	return ASTCENC_ERR_BAD_SWIZZLE;
287	}
288
289	return ASTCENC_SUCCESS;
290	}
291	#endif
292
293	/**
294	* @brief Validate single channel decompression swizzle.
295	*
296	* @param swizzle The swizzle to check.
297	*
298	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
299	*/
300	static astcenc_error validate_decompression_swz(
301	astcenc_swz swizzle
302	) {
303	// Values in this enum are from an external user, so not guaranteed to be
304	// bounded to the enum values
305	switch (static_cast<int>(swizzle))
306	{
307	case ASTCENC_SWZ_R:
308	case ASTCENC_SWZ_G:
309	case ASTCENC_SWZ_B:
310	case ASTCENC_SWZ_A:
311	case ASTCENC_SWZ_0:
312	case ASTCENC_SWZ_1:
313	case ASTCENC_SWZ_Z:
314	return ASTCENC_SUCCESS;
315	default:
316	return ASTCENC_ERR_BAD_SWIZZLE;
317	}
318	}
319
320	/**
321	* @brief Validate overall decompression swizzle.
322	*
323	* @param swizzle The swizzle to check.
324	*
325	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
326	*/
327	static astcenc_error validate_decompression_swizzle(
328	const astcenc_swizzle& swizzle
329	) {
330	if (validate_decompression_swz(swizzle.r) \|\|
331	validate_decompression_swz(swizzle.g) \|\|
332	validate_decompression_swz(swizzle.b) \|\|
333	validate_decompression_swz(swizzle.a))
334	{
335	return ASTCENC_ERR_BAD_SWIZZLE;
336	}
337
338	return ASTCENC_SUCCESS;
339	}
340
341	/**
342	* Validate that an incoming configuration is in-spec.
343	*
344	* This function can respond in two ways:
345	*
346	* * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown
347	* for out-of-range inputs in this case.
348	* * Numerical inputs and logic inputs are are logically invalid and which make no sense
349	* algorithmically will return an error.
350	*
351	* @param[in,out] config The input compressor configuration.
352	*
353	* @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure.
354	*/
355	static astcenc_error validate_config(
356	astcenc_config &config
357	) {
358	astcenc_error status;
359
360	status = validate_profile(config.profile);
361	if (status != ASTCENC_SUCCESS)
362	{
363	return status;
364	}
365
366	status = validate_flags(config.flags);
367	if (status != ASTCENC_SUCCESS)
368	{
369	return status;
370	}
371
372	status = validate_block_size(config.block_x, config.block_y, config.block_z);
373	if (status != ASTCENC_SUCCESS)
374	{
375	return status;
376	}
377
378	#if defined(ASTCENC_DECOMPRESS_ONLY)
379	// Decompress-only builds only support decompress-only contexts
380	if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY))
381	{
382	return ASTCENC_ERR_BAD_PARAM;
383	}
384	#endif
385
386	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, `1.0f`);
387
388	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, `1u`, `4u`);
389	config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, `1u`, BLOCK_MAX_PARTITIONINGS);
390	config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, `1u`, BLOCK_MAX_PARTITIONINGS);
391	config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, `1u`, BLOCK_MAX_PARTITIONINGS);
392	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, `1u`, `100u`);
393	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, `1u`);
394	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, `1u`, TUNE_MAX_TRIAL_CANDIDATES);
395	config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, `1u`, TUNE_MAX_PARTITIONING_CANDIDATES);
396	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, `1u`, TUNE_MAX_PARTITIONING_CANDIDATES);
397	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, `1u`, TUNE_MAX_PARTITIONING_CANDIDATES);
398	config.tune_db_limit = astc::max(config.tune_db_limit, `0.0f`);
399	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, `1.0f`);
400	config.tune_2partition_early_out_limit_factor = astc::max(config.tune_2partition_early_out_limit_factor, `0.0f`);
401	config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, `0.0f`);
402	config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, `0.0f`);
403
404	// Specifying a zero weight color component is not allowed; force to small value
405	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
406	astc::max(config.cw_b_weight, config.cw_a_weight));
407	if (max_weight > `0.0f`)
408	{
409	max_weight /= `1000.0f`;
410	config.cw_r_weight = astc::max(config.cw_r_weight, max_weight);
411	config.cw_g_weight = astc::max(config.cw_g_weight, max_weight);
412	config.cw_b_weight = astc::max(config.cw_b_weight, max_weight);
413	config.cw_a_weight = astc::max(config.cw_a_weight, max_weight);
414	}
415	// If all color components error weights are zero then return an error
416	else
417	{
418	return ASTCENC_ERR_BAD_PARAM;
419	}
420
421	return ASTCENC_SUCCESS;
422	}
423
424	/ See header for documentation. /
425	astcenc_error astcenc_config_init(
426	astcenc_profile profile,
427	unsigned int block_x,
428	unsigned int block_y,
429	unsigned int block_z,
430	float quality,
431	unsigned int flags,
432	astcenc_config* configp
433	) {
434	astcenc_error status;
435
436	status = validate_cpu_float();
437	if (status != ASTCENC_SUCCESS)
438	{
439	return status;
440	}
441
442	// Zero init all config fields; although most of will be over written
443	astcenc_config& config = *configp;
444	std::memset(&config, `0`, sizeof(config));
445
446	// Process the block size
447	block_z = astc::max(block_z, `1u`); // For 2D blocks Z==0 is accepted, but convert to 1
448	status = validate_block_size(block_x, block_y, block_z);
449	if (status != ASTCENC_SUCCESS)
450	{
451	return status;
452	}
453
454	config.block_x = block_x;
455	config.block_y = block_y;
456	config.block_z = block_z;
457
458	float texels = static_cast<float>(block_x * block_y * block_z);
459	float ltexels = logf(texels) / logf(`10.0f`);
460
461	// Process the performance quality level or preset; note that this must be done before we
462	// process any additional settings, such as color profile and flags, which may replace some of
463	// these settings with more use case tuned values
464	if (quality < ASTCENC_PRE_FASTEST \|\|
465	quality > ASTCENC_PRE_EXHAUSTIVE)
466	{
467	return ASTCENC_ERR_BAD_QUALITY;
468	}
469
470	static const std::array<astcenc_preset_config, `6`>* preset_configs;
471	int texels_int = block_x * block_y * block_z;
472	if (texels_int < `25`)
473	{
474	preset_configs = &preset_configs_high;
475	}
476	else if (texels_int < `64`)
477	{
478	preset_configs = &preset_configs_mid;
479	}
480	else
481	{
482	preset_configs = &preset_configs_low;
483	}
484
485	// Determine which preset to use, or which pair to interpolate
486	size_t start;
487	size_t end;
488	for (end = `0`; end < preset_configs->size(); end++)
489	{
490	if ((*preset_configs)[end].quality >= quality)
491	{
492	break;
493	}
494	}
495
496	start = end == `0` ? `0` : end - `1`;
497
498	// Start and end node are the same - so just transfer the values.
499	if (start == end)
500	{
501	config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit;
502	config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit;
503	config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit;
504	config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit;
505	config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit;
506	config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit;
507	config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit, TUNE_MAX_TRIAL_CANDIDATES);
508	config.tune_2partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_2partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
509	config.tune_3partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_3partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
510	config.tune_4partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_4partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES);
511	config.tune_db_limit = astc::max((preset_configs)[start].tune_db_limit_a_base - `35` ltexels,
512	(preset_configs)[start].tune_db_limit_b_base - `19` ltexels);
513
514	config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot;
515
516	config.tune_2partition_early_out_limit_factor = (*preset_configs)[start].tune_2partition_early_out_limit_factor;
517	config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
518	config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
519	}
520	// Start and end node are not the same - so interpolate between them
521	else
522	{
523	auto& node_a = (*preset_configs)[start];
524	auto& node_b = (*preset_configs)[end];
525
526	float wt_range = node_b.quality - node_a.quality;
527	assert(wt_range > `0`);
528
529	// Compute interpolation factors
530	float wt_node_a = (node_b.quality - quality) / wt_range;
531	float wt_node_b = (quality - node_a.quality) / wt_range;
532
533	#define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
534	#define LERPI(param) astc::flt2int_rtn(\
535	(static_cast<float>(node_a.param) * wt_node_a) + \
536	(static_cast<float>(node_b.param) * wt_node_b))
537	#define LERPUI(param) static_cast<unsigned int>(LERPI(param))
538
539	config.tune_partition_count_limit = LERPI(tune_partition_count_limit);
540	config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit);
541	config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit);
542	config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit);
543	config.tune_block_mode_limit = LERPI(tune_block_mode_limit);
544	config.tune_refinement_limit = LERPI(tune_refinement_limit);
545	config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit),
546	TUNE_MAX_TRIAL_CANDIDATES);
547	config.tune_2partitioning_candidate_limit = astc::min(LERPUI(tune_2partitioning_candidate_limit),
548	BLOCK_MAX_PARTITIONINGS);
549	config.tune_3partitioning_candidate_limit = astc::min(LERPUI(tune_3partitioning_candidate_limit),
550	BLOCK_MAX_PARTITIONINGS);
551	config.tune_4partitioning_candidate_limit = astc::min(LERPUI(tune_4partitioning_candidate_limit),
552	BLOCK_MAX_PARTITIONINGS);
553	config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - `35` * ltexels,
554	LERP(tune_db_limit_b_base) - `19` * ltexels);
555
556	config.tune_mse_overshoot = LERP(tune_mse_overshoot);
557
558	config.tune_2partition_early_out_limit_factor = LERP(tune_2partition_early_out_limit_factor);
559	config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
560	config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
561	#undef LERP
562	#undef LERPI
563	#undef LERPUI
564	}
565
566	// Set heuristics to the defaults for each color profile
567	config.cw_r_weight = `1.0f`;
568	config.cw_g_weight = `1.0f`;
569	config.cw_b_weight = `1.0f`;
570	config.cw_a_weight = `1.0f`;
571
572	config.a_scale_radius = `0`;
573
574	config.rgbm_m_scale = `0.0f`;
575
576	config.profile = profile;
577
578	// Values in this enum are from an external user, so not guaranteed to be
579	// bounded to the enum values
580	switch (static_cast<int>(profile))
581	{
582	case ASTCENC_PRF_LDR:
583	case ASTCENC_PRF_LDR_SRGB:
584	break;
585	case ASTCENC_PRF_HDR_RGB_LDR_A:
586	case ASTCENC_PRF_HDR:
587	config.tune_db_limit = `999.0f`;
588	break;
589	default:
590	return ASTCENC_ERR_BAD_PROFILE;
591	}
592
593	// Flags field must not contain any unknown flag bits
594	status = validate_flags(flags);
595	if (status != ASTCENC_SUCCESS)
596	{
597	return status;
598	}
599
600	if (flags & ASTCENC_FLG_MAP_NORMAL)
601	{
602	// Normal map encoding uses L+A blocks, so allow one more partitioning
603	// than normal. We need need fewer bits for endpoints, so more likely
604	// to be able to use more partitions than an RGB/RGBA block
605	config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + `1u`, `4u`);
606
607	config.cw_g_weight = `0.0f`;
608	config.cw_b_weight = `0.0f`;
609	config.tune_2partition_early_out_limit_factor *= `1.5f`;
610	config.tune_3partition_early_out_limit_factor *= `1.5f`;
611	config.tune_2plane_early_out_limit_correlation = `0.99f`;
612
613	// Normals are prone to blocking artifacts on smooth curves
614	// so force compressor to try harder here ...
615	config.tune_db_limit *= `1.03f`;
616	}
617	else if (flags & ASTCENC_FLG_MAP_RGBM)
618	{
619	config.rgbm_m_scale = `5.0f`;
620	config.cw_a_weight = `2.0f` * config.rgbm_m_scale;
621	}
622	else // (This is color data)
623	{
624	// This is a very basic perceptual metric for RGB color data, which weights error
625	// significance by the perceptual luminance contribution of each color channel. For
626	// luminance the usual weights to compute luminance from a linear RGB value are as
627	// follows:
628	//
629	// l = r 0.3 + g * 0.59 + b * 0.11*
630	//
631	// ... but we scale these up to keep a better balance between color and alpha. Note
632	// that if the content is using alpha we'd recommend using the -a option to weight
633	// the color contribution by the alpha transparency.
634	if (flags & ASTCENC_FLG_USE_PERCEPTUAL)
635	{
636	config.cw_r_weight = `0.30f` * `2.25f`;
637	config.cw_g_weight = `0.59f` * `2.25f`;
638	config.cw_b_weight = `0.11f` * `2.25f`;
639	}
640	}
641	config.flags = flags;
642
643	return ASTCENC_SUCCESS;
644	}
645
646	/ See header for documentation. /
647	astcenc_error astcenc_context_alloc(
648	const astcenc_config* configp,
649	unsigned int thread_count,
650	astcenc_context** context
651	) {
652	astcenc_error status;
653	const astcenc_config& config = *configp;
654
655	status = validate_cpu_float();
656	if (status != ASTCENC_SUCCESS)
657	{
658	return status;
659	}
660
661	if (thread_count == `0`)
662	{
663	return ASTCENC_ERR_BAD_PARAM;
664	}
665
666	#if defined(ASTCENC_DIAGNOSTICS)
667	// Force single threaded compressor use in diagnostic mode.
668	if (thread_count != `1`)
669	{
670	return ASTCENC_ERR_BAD_PARAM;
671	}
672	#endif
673
674	astcenc_context* ctxo = new astcenc_context;
675	astcenc_contexti* ctx = &ctxo->context;
676	ctx->thread_count = thread_count;
677	ctx->config = config;
678	ctx->working_buffers = nullptr;
679
680	// These are allocated per-compress, as they depend on image size
681	ctx->input_alpha_averages = nullptr;
682
683	// Copy the config first and validate the copy (we may modify it)
684	status = validate_config(ctx->config);
685	if (status != ASTCENC_SUCCESS)
686	{
687	delete ctxo;
688	return status;
689	}
690
691	ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN);
692	bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY);
693	init_block_size_descriptor(config.block_x, config.block_y, config.block_z,
694	can_omit_modes,
695	config.tune_partition_count_limit,
696	static_cast<float>(config.tune_block_mode_limit) / `100.0f`,
697	*ctx->bsd);
698
699	#if !defined(ASTCENC_DECOMPRESS_ONLY)
700	// Do setup only needed by compression
701	if (!(status & ASTCENC_FLG_DECOMPRESS_ONLY))
702	{
703	// Turn a dB limit into a per-texel error for faster use later
704	if ((ctx->config.profile == ASTCENC_PRF_LDR) \|\| (ctx->config.profile == ASTCENC_PRF_LDR_SRGB))
705	{
706	ctx->config.tune_db_limit = astc::pow(`0.1f`, ctx->config.tune_db_limit * `0.1f`) * `65535.0f` * `65535.0f`;
707	}
708	else
709	{
710	ctx->config.tune_db_limit = `0.0f`;
711	}
712
713	size_t worksize = sizeof(compression_working_buffers) * thread_count;
714	ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN);
715	static_assert((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == `0`,
716	"compression_working_buffers size must be multiple of vector alignment");
717	if (!ctx->working_buffers)
718	{
719	aligned_free<block_size_descriptor>(ctx->bsd);
720	delete ctxo;
721	context = nullptr*;
722	return ASTCENC_ERR_OUT_OF_MEM;
723	}
724	}
725	#endif
726
727	#if defined(ASTCENC_DIAGNOSTICS)
728	ctx->trace_log = new TraceLog(ctx->config.trace_file_path);
729	if (!ctx->trace_log->m_file)
730	{
731	return ASTCENC_ERR_DTRACE_FAILURE;
732	}
733
734	trace_add_data("block_x", config.block_x);
735	trace_add_data("block_y", config.block_y);
736	trace_add_data("block_z", config.block_z);
737	#endif
738
739	*context = ctxo;
740
741	#if !defined(ASTCENC_DECOMPRESS_ONLY)
742	prepare_angular_tables();
743	#endif
744
745	return ASTCENC_SUCCESS;
746	}
747
748	/ See header dor documentation. /
749	void astcenc_context_free(
750	astcenc_context* ctxo
751	) {
752	if (ctxo)
753	{
754	astcenc_contexti* ctx = &ctxo->context;
755	aligned_free<compression_working_buffers>(ctx->working_buffers);
756	aligned_free<block_size_descriptor>(ctx->bsd);
757	#if defined(ASTCENC_DIAGNOSTICS)
758	delete ctx->trace_log;
759	#endif
760	delete ctxo;
761	}
762	}
763
764	#if !defined(ASTCENC_DECOMPRESS_ONLY)
765
766	/**
767	* @brief Compress an image, after any preflight has completed.
768	*
769	* @param[out] ctxo The compressor context.
770	* @param thread_index The thread index.
771	* @param image The intput image.
772	* @param swizzle The input swizzle.
773	* @param[out] buffer The output array for the compressed data.
774	*/
775	static void compress_image(
776	astcenc_context& ctxo,
777	unsigned int thread_index,
778	const astcenc_image& image,
779	const astcenc_swizzle& swizzle,
780	uint8_t* buffer
781	) {
782	astcenc_contexti& ctx = ctxo.context;
783	const block_size_descriptor& bsd = *ctx.bsd;
784	astcenc_profile decode_mode = ctx.config.profile;
785
786	image_block blk;
787
788	int block_x = bsd.xdim;
789	int block_y = bsd.ydim;
790	int block_z = bsd.zdim;
791	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
792
793	int dim_x = image.dim_x;
794	int dim_y = image.dim_y;
795	int dim_z = image.dim_z;
796
797	int xblocks = (dim_x + block_x - `1`) / block_x;
798	int yblocks = (dim_y + block_y - `1`) / block_y;
799	int zblocks = (dim_z + block_z - `1`) / block_z;
800	int block_count = zblocks * yblocks * xblocks;
801
802	int row_blocks = xblocks;
803	int plane_blocks = xblocks * yblocks;
804
805	// Populate the block channel weights
806	blk.channel_weight = vfloat4 (ctx.config.cw_r_weight,
807	ctx.config.cw_g_weight,
808	ctx.config.cw_b_weight,
809	ctx.config.cw_a_weight);
810
811	// Use preallocated scratch buffer
812	auto& temp_buffers = ctx.working_buffers[thread_index];
813
814	// Only the first thread actually runs the initializer
815	ctxo.manage_compress.init(block_count);
816
817	// Determine if we can use an optimized load function
818	bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) \|\| (swizzle.g != ASTCENC_SWZ_G) \|\|
819	(swizzle.b != ASTCENC_SWZ_B) \|\| (swizzle.a != ASTCENC_SWZ_A);
820
821	bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) \|\|
822	(decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A);
823
824	bool use_fast_load = !needs_swz && !needs_hdr &&
825	block_z == `1` && image.data_type == ASTCENC_TYPE_U8;
826
827	auto load_func = load_image_block;
828	if (use_fast_load)
829	{
830	load_func = load_image_block_fast_ldr;
831	}
832
833	// All threads run this processing loop until there is no work remaining
834	while (true)
835	{
836	unsigned int count;
837	unsigned int base = ctxo.manage_compress.get_task_assignment(`16`, count);
838	if (!count)
839	{
840	break;
841	}
842
843	for (unsigned int i = base; i < base + count; i++)
844	{
845	// Decode i into x, y, z block indices
846	int z = i / plane_blocks;
847	unsigned int rem = i - (z * plane_blocks);
848	int y = rem / row_blocks;
849	int x = rem - (y * row_blocks);
850
851	// Test if we can apply some basic alpha-scale RDO
852	bool use_full_block = true;
853	if (ctx.config.a_scale_radius != `0` && block_z == `1`)
854	{
855	int start_x = x * block_x;
856	int end_x = astc::min(dim_x, start_x + block_x);
857
858	int start_y = y * block_y;
859	int end_y = astc::min(dim_y, start_y + block_y);
860
861	// SATs accumulate error, so don't test exactly zero. Test for
862	// less than 1 alpha in the expanded block footprint that
863	// includes the alpha radius.
864	int x_footprint = block_x + `2` * (ctx.config.a_scale_radius - `1`);
865
866	int y_footprint = block_y + `2` * (ctx.config.a_scale_radius - `1`);
867
868	float footprint = static_cast<float>(x_footprint * y_footprint);
869	float threshold = `0.9f` / (`255.0f` * footprint);
870
871	// Do we have any alpha values?
872	use_full_block = false;
873	for (int ay = start_y; ay < end_y; ay++)
874	{
875	for (int ax = start_x; ax < end_x; ax++)
876	{
877	float a_avg = ctx.input_alpha_averages[ay * dim_x + ax];
878	if (a_avg > threshold)
879	{
880	use_full_block = true;
881	ax = end_x;
882	ay = end_y;
883	}
884	}
885	}
886	}
887
888	// Fetch the full block for compression
889	if (use_full_block)
890	{
891	load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle);
892
893	// Scale RGB error contribution by the maximum alpha in the block
894	// This encourages preserving alpha accuracy in regions with high
895	// transparency, and can buy up to 0.5 dB PSNR.
896	if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT)
897	{
898	float alpha_scale = blk.data_max.lane<`3`>() * (`1.0f` / `65535.0f`);
899	blk.channel_weight = vfloat4 (ctx.config.cw_r_weight * alpha_scale,
900	ctx.config.cw_g_weight * alpha_scale,
901	ctx.config.cw_b_weight * alpha_scale,
902	ctx.config.cw_a_weight);
903	}
904	}
905	// Apply alpha scale RDO - substitute constant color block
906	else
907	{
908	blk.origin_texel = vfloat4::zero();
909	blk.data_min = vfloat4::zero();
910	blk.data_mean = vfloat4::zero();
911	blk.data_max = vfloat4::zero();
912	blk.grayscale = true;
913	}
914
915	int offset = ((z * yblocks + y) * xblocks + x) * `16`;
916	uint8_t *bp = buffer + offset;
917	physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp);
918	compress_block(ctx, blk, *pcb, temp_buffers);
919	}
920
921	ctxo.manage_compress.complete_task_assignment(count);
922	}
923	}
924
925	/**
926	* @brief Compute regional averages in an image.
927	*
928	* This function can be called by multiple threads, but only after a single
929	* thread calls the setup function @c init_compute_averages().
930	*
931	* Results are written back into @c img->input_alpha_averages.
932	*
933	* @param[out] ctx The context.
934	* @param ag The average and variance arguments created during setup.
935	*/
936	static void compute_averages(
937	astcenc_context& ctx,
938	const avg_args &ag
939	) {
940	pixel_region_args arg = ag.arg;
941	arg.work_memory = new vfloat4[ag.work_memory_size];
942
943	int size_x = ag.img_size_x;
944	int size_y = ag.img_size_y;
945	int size_z = ag.img_size_z;
946
947	int step_xy = ag.blk_size_xy;
948	int step_z = ag.blk_size_z;
949
950	int y_tasks = (size_y + step_xy - `1`) / step_xy;
951
952	// All threads run this processing loop until there is no work remaining
953	while (true)
954	{
955	unsigned int count;
956	unsigned int base = ctx.manage_avg.get_task_assignment(`16`, count);
957	if (!count)
958	{
959	break;
960	}
961
962	for (unsigned int i = base; i < base + count; i++)
963	{
964	int z = (i / (y_tasks)) * step_z;
965	int y = (i - (z * y_tasks)) * step_xy;
966
967	arg.size_z = astc::min(step_z, size_z - z);
968	arg.offset_z = z;
969
970	arg.size_y = astc::min(step_xy, size_y - y);
971	arg.offset_y = y;
972
973	for (int x = `0`; x < size_x; x += step_xy)
974	{
975	arg.size_x = astc::min(step_xy, size_x - x);
976	arg.offset_x = x;
977	compute_pixel_region_variance(ctx.context, arg);
978	}
979	}
980
981	ctx.manage_avg.complete_task_assignment(count);
982	}
983
984	delete[] arg.work_memory;
985	}
986
987	#endif
988
989	/ See header for documentation. /
990	astcenc_error astcenc_compress_image(
991	astcenc_context* ctxo,
992	astcenc_image* imagep,
993	const astcenc_swizzle* swizzle,
994	uint8_t* data_out,
995	size_t data_len,
996	unsigned int thread_index
997	) {
998	#if defined(ASTCENC_DECOMPRESS_ONLY)
999	(void)ctxo;
1000	(void)imagep;
1001	(void)swizzle;
1002	(void)data_out;
1003	(void)data_len;
1004	(void)thread_index;
1005	return ASTCENC_ERR_BAD_CONTEXT;
1006	#else
1007	astcenc_contexti* ctx = &ctxo->context;
1008	astcenc_error status;
1009	astcenc_image& image = *imagep;
1010
1011	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1012	{
1013	return ASTCENC_ERR_BAD_CONTEXT;
1014	}
1015
1016	status = validate_compression_swizzle(*swizzle);
1017	if (status != ASTCENC_SUCCESS)
1018	{
1019	return status;
1020	}
1021
1022	if (thread_index >= ctx->thread_count)
1023	{
1024	return ASTCENC_ERR_BAD_PARAM;
1025	}
1026
1027	unsigned int block_x = ctx->config.block_x;
1028	unsigned int block_y = ctx->config.block_y;
1029	unsigned int block_z = ctx->config.block_z;
1030
1031	unsigned int xblocks = (image.dim_x + block_x - `1`) / block_x;
1032	unsigned int yblocks = (image.dim_y + block_y - `1`) / block_y;
1033	unsigned int zblocks = (image.dim_z + block_z - `1`) / block_z;
1034
1035	// Check we have enough output space (16 bytes per block)
1036	size_t size_needed = xblocks * yblocks * zblocks * `16`;
1037	if (data_len < size_needed)
1038	{
1039	return ASTCENC_ERR_OUT_OF_MEM;
1040	}
1041
1042	// If context thread count is one then implicitly reset
1043	if (ctx->thread_count == `1`)
1044	{
1045	astcenc_compress_reset(ctxo);
1046	}
1047
1048	if (ctx->config.a_scale_radius != `0`)
1049	{
1050	// First thread to enter will do setup, other threads will subsequently
1051	// enter the critical section but simply skip over the initialization
1052	auto init_avg = [ctx, &image, swizzle]() {
1053	// Perform memory allocations for the destination buffers
1054	size_t texel_count = image.dim_x * image.dim_y * image.dim_z;
1055	ctx->input_alpha_averages = new float[texel_count];
1056
1057	return init_compute_averages(
1058	image, ctx->config.a_scale_radius, *swizzle,
1059	ctx->avg_preprocess_args);
1060	};
1061
1062	// Only the first thread actually runs the initializer
1063	ctxo->manage_avg.init(init_avg);
1064
1065	// All threads will enter this function and dynamically grab work
1066	compute_averages(*ctxo, ctx->avg_preprocess_args);
1067	}
1068
1069	// Wait for compute_averages to complete before compressing
1070	ctxo->manage_avg.wait();
1071
1072	compress_image(ctxo, thread_index, image, swizzle, data_out);
1073
1074	// Wait for compress to complete before freeing memory
1075	ctxo->manage_compress.wait();
1076
1077	auto term_compress = [ctx]() {
1078	delete[] ctx->input_alpha_averages;
1079	ctx->input_alpha_averages = nullptr;
1080	};
1081
1082	// Only the first thread to arrive actually runs the term
1083	ctxo->manage_compress.term(term_compress);
1084
1085	return ASTCENC_SUCCESS;
1086	#endif
1087	}
1088
1089	/ See header for documentation. /
1090	astcenc_error astcenc_compress_reset(
1091	astcenc_context* ctxo
1092	) {
1093	#if defined(ASTCENC_DECOMPRESS_ONLY)
1094	(void)ctxo;
1095	return ASTCENC_ERR_BAD_CONTEXT;
1096	#else
1097	astcenc_contexti* ctx = &ctxo->context;
1098	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)
1099	{
1100	return ASTCENC_ERR_BAD_CONTEXT;
1101	}
1102
1103	ctxo->manage_avg.reset();
1104	ctxo->manage_compress.reset();
1105	return ASTCENC_SUCCESS;
1106	#endif
1107	}
1108
1109	/ See header for documentation. /
1110	astcenc_error astcenc_decompress_image(
1111	astcenc_context* ctxo,
1112	const uint8_t* data,
1113	size_t data_len,
1114	astcenc_image* image_outp,
1115	const astcenc_swizzle* swizzle,
1116	unsigned int thread_index
1117	) {
1118	astcenc_error status;
1119	astcenc_image& image_out = *image_outp;
1120	astcenc_contexti* ctx = &ctxo->context;
1121
1122	// Today this doesn't matter (working set on stack) but might in future ...
1123	if (thread_index >= ctx->thread_count)
1124	{
1125	return ASTCENC_ERR_BAD_PARAM;
1126	}
1127
1128	status = validate_decompression_swizzle(*swizzle);
1129	if (status != ASTCENC_SUCCESS)
1130	{
1131	return status;
1132	}
1133
1134	unsigned int block_x = ctx->config.block_x;
1135	unsigned int block_y = ctx->config.block_y;
1136	unsigned int block_z = ctx->config.block_z;
1137
1138	unsigned int xblocks = (image_out.dim_x + block_x - `1`) / block_x;
1139	unsigned int yblocks = (image_out.dim_y + block_y - `1`) / block_y;
1140	unsigned int zblocks = (image_out.dim_z + block_z - `1`) / block_z;
1141
1142	int row_blocks = xblocks;
1143	int plane_blocks = xblocks * yblocks;
1144
1145	// Check we have enough output space (16 bytes per block)
1146	size_t size_needed = xblocks * yblocks * zblocks * `16`;
1147	if (data_len < size_needed)
1148	{
1149	return ASTCENC_ERR_OUT_OF_MEM;
1150	}
1151
1152	image_block blk;
1153	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z);
1154
1155	// If context thread count is one then implicitly reset
1156	if (ctx->thread_count == `1`)
1157	{
1158	astcenc_decompress_reset(ctxo);
1159	}
1160
1161	// Only the first thread actually runs the initializer
1162	ctxo->manage_decompress.init(zblocks * yblocks * xblocks);
1163
1164	// All threads run this processing loop until there is no work remaining
1165	while (true)
1166	{
1167	unsigned int count;
1168	unsigned int base = ctxo->manage_decompress.get_task_assignment(`128`, count);
1169	if (!count)
1170	{
1171	break;
1172	}
1173
1174	for (unsigned int i = base; i < base + count; i++)
1175	{
1176	// Decode i into x, y, z block indices
1177	int z = i / plane_blocks;
1178	unsigned int rem = i - (z * plane_blocks);
1179	int y = rem / row_blocks;
1180	int x = rem - (y * row_blocks);
1181
1182	unsigned int offset = (((z * yblocks + y) * xblocks) + x) * `16`;
1183	const uint8_t* bp = data + offset;
1184
1185	const physical_compressed_block& pcb = *reinterpret_cast<const physical_compressed_block*>(bp);
1186	symbolic_compressed_block scb;
1187
1188	physical_to_symbolic(*ctx->bsd, pcb, scb);
1189
1190	decompress_symbolic_block(ctx->config.profile, *ctx->bsd,
1191	x * block_x, y * block_y, z * block_z,
1192	scb, blk);
1193
1194	store_image_block(image_out, blk, *ctx->bsd,
1195	x * block_x, y * block_y, z * block_z, *swizzle);
1196	}
1197
1198	ctxo->manage_decompress.complete_task_assignment(count);
1199	}
1200
1201	return ASTCENC_SUCCESS;
1202	}
1203
1204	/ See header for documentation. /
1205	astcenc_error astcenc_decompress_reset(
1206	astcenc_context* ctxo
1207	) {
1208	ctxo->manage_decompress.reset();
1209	return ASTCENC_SUCCESS;
1210	}
1211
1212	/ See header for documentation. /
1213	astcenc_error astcenc_get_block_info(
1214	astcenc_context* ctxo,
1215	const uint8_t data[`16`],
1216	astcenc_block_info* info
1217	) {
1218	#if defined(ASTCENC_DECOMPRESS_ONLY)
1219	(void)ctxo;
1220	(void)data;
1221	(void)info;
1222	return ASTCENC_ERR_BAD_CONTEXT;
1223	#else
1224	astcenc_contexti* ctx = &ctxo->context;
1225
1226	// Decode the compressed data into a symbolic form
1227	const physical_compressed_block&pcb = *reinterpret_cast<const physical_compressed_block*>(data);
1228	symbolic_compressed_block scb;
1229	physical_to_symbolic(*ctx->bsd, pcb, scb);
1230
1231	// Fetch the appropriate partition and decimation tables
1232	block_size_descriptor& bsd = *ctx->bsd;
1233
1234	// Start from a clean slate
1235	memset(info, `0`, sizeof(*info));
1236
1237	// Basic info we can always populate
1238	info->profile = ctx->config.profile;
1239
1240	info->block_x = ctx->config.block_x;
1241	info->block_y = ctx->config.block_y;
1242	info->block_z = ctx->config.block_z;
1243	info->texel_count = bsd.texel_count;
1244
1245	// Check for error blocks first
1246	info->is_error_block = scb.block_type == SYM_BTYPE_ERROR;
1247	if (info->is_error_block)
1248	{
1249	return ASTCENC_SUCCESS;
1250	}
1251
1252	// Check for constant color blocks second
1253	info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 \|\|
1254	scb.block_type == SYM_BTYPE_CONST_U16;
1255	if (info->is_constant_block)
1256	{
1257	return ASTCENC_SUCCESS;
1258	}
1259
1260	// Otherwise handle a full block ; known to be valid after conditions above have been checked
1261	int partition_count = scb.partition_count;
1262	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
1263
1264	const block_mode& bm = bsd.get_block_mode(scb.block_mode);
1265	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
1266
1267	info->weight_x = di.weight_x;
1268	info->weight_y = di.weight_y;
1269	info->weight_z = di.weight_z;
1270
1271	info->is_dual_plane_block = bm.is_dual_plane != `0`;
1272
1273	info->partition_count = scb.partition_count;
1274	info->partition_index = scb.partition_index;
1275	info->dual_plane_component = scb.plane2_component;
1276
1277	info->color_level_count = get_quant_level(scb.get_color_quant_mode());
1278	info->weight_level_count = get_quant_level(bm.get_weight_quant_mode());
1279
1280	// Unpack color endpoints for each active partition
1281	for (unsigned int i = `0`; i < scb.partition_count; i++)
1282	{
1283	bool rgb_hdr;
1284	bool a_hdr;
1285	vint4 endpnt[`2`];
1286
1287	unpack_color_endpoints(ctx->config.profile,
1288	scb.color_formats[i],
1289	scb.color_values[i],
1290	rgb_hdr, a_hdr,
1291	endpnt[`0`], endpnt[`1`]);
1292
1293	// Store the color endpoint mode info
1294	info->color_endpoint_modes[i] = scb.color_formats[i];
1295	info->is_hdr_block = info->is_hdr_block \|\| rgb_hdr \|\| a_hdr;
1296
1297	// Store the unpacked and decoded color endpoint
1298	vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr);
1299	for (int j = `0`; j < `2`; j++)
1300	{
1301	vint4 color_lns = lns_to_sf16(endpnt[j]);
1302	vint4 color_unorm = unorm16_to_sf16(endpnt[j]);
1303	vint4 datai = select(color_unorm, color_lns, hdr_mask);
1304	store(float16_to_float(datai), info->color_endpoints[i][j]);
1305	}
1306	}
1307
1308	// Unpack weights for each texel
1309	int weight_plane1[BLOCK_MAX_TEXELS];
1310	int weight_plane2[BLOCK_MAX_TEXELS];
1311
1312	unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2);
1313	for (unsigned int i = `0`; i < bsd.texel_count; i++)
1314	{
1315	info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (`1.0f` / WEIGHTS_TEXEL_SUM);
1316	if (info->is_dual_plane_block)
1317	{
1318	info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (`1.0f` / WEIGHTS_TEXEL_SUM);
1319	}
1320	}
1321
1322	// Unpack partition assignments for each texel
1323	for (unsigned int i = `0`; i < bsd.texel_count; i++)
1324	{
1325	info->partition_assignment[i] = pi.partition_of_texel[i];
1326	}
1327
1328	return ASTCENC_SUCCESS;
1329	#endif
1330	}
1331
1332	/ See header for documentation. /
1333	const char* astcenc_get_error_string(
1334	astcenc_error status
1335	) {
1336	// Values in this enum are from an external user, so not guaranteed to be
1337	// bounded to the enum values
1338	switch (static_cast<int>(status))
1339	{
1340	case ASTCENC_SUCCESS:
1341	return "ASTCENC_SUCCESS";
1342	case ASTCENC_ERR_OUT_OF_MEM:
1343	return "ASTCENC_ERR_OUT_OF_MEM";
1344	case ASTCENC_ERR_BAD_CPU_FLOAT:
1345	return "ASTCENC_ERR_BAD_CPU_FLOAT";
1346	case ASTCENC_ERR_BAD_PARAM:
1347	return "ASTCENC_ERR_BAD_PARAM";
1348	case ASTCENC_ERR_BAD_BLOCK_SIZE:
1349	return "ASTCENC_ERR_BAD_BLOCK_SIZE";
1350	case ASTCENC_ERR_BAD_PROFILE:
1351	return "ASTCENC_ERR_BAD_PROFILE";
1352	case ASTCENC_ERR_BAD_QUALITY:
1353	return "ASTCENC_ERR_BAD_QUALITY";
1354	case ASTCENC_ERR_BAD_FLAGS:
1355	return "ASTCENC_ERR_BAD_FLAGS";
1356	case ASTCENC_ERR_BAD_SWIZZLE:
1357	return "ASTCENC_ERR_BAD_SWIZZLE";
1358	case ASTCENC_ERR_BAD_CONTEXT:
1359	return "ASTCENC_ERR_BAD_CONTEXT";
1360	case ASTCENC_ERR_NOT_IMPLEMENTED:
1361	return "ASTCENC_ERR_NOT_IMPLEMENTED";
1362	#if defined(ASTCENC_DIAGNOSTICS)
1363	case ASTCENC_ERR_DTRACE_FAILURE:
1364	return "ASTCENC_ERR_DTRACE_FAILURE";
1365	#endif
1366	default:
1367	return nullptr;
1368	}
1369	}
1370

Browse the source code of Godot/thirdparty/astcenc/astcenc_entry.cpp