astcenc_internal.h source code [Godot/thirdparty/astcenc/astcenc_internal.h]

1	// SPDX-License-Identifier: Apache-2.0
2	// ----------------------------------------------------------------------------
3	// Copyright 2011-2023 Arm Limited
4	//
5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6	// use this file except in compliance with the License. You may obtain a copy
7	// of the License at:
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing, software
12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14	// License for the specific language governing permissions and limitations
15	// under the License.
16	// ----------------------------------------------------------------------------
17
18	/**
19	* @brief Functions and data declarations.
20	*/
21
22	#ifndef ASTCENC_INTERNAL_INCLUDED
23	#define ASTCENC_INTERNAL_INCLUDED
24
25	#include <algorithm>
26	#include <cstddef>
27	#include <cstdint>
28	#if defined(ASTCENC_DIAGNOSTICS)
29	#include <cstdio>
30	#endif
31	#include <cstdlib>
32
33	#include "astcenc.h"
34	#include "astcenc_mathlib.h"
35	#include "astcenc_vecmathlib.h"
36
37	/**
38	* @brief Make a promise to the compiler's optimizer.
39	*
40	* A promise is an expression that the optimizer is can assume is true for to help it generate
41	* faster code. Common use cases for this are to promise that a for loop will iterate more than
42	* once, or that the loop iteration count is a multiple of a vector length, which avoids pre-loop
43	* checks and can avoid loop tails if loops are unrolled by the auto-vectorizer.
44	*/
45	#if defined(NDEBUG)
46	#if !defined(__clang__) && defined(_MSC_VER)
47	#define promise(cond) __assume(cond)
48	#elif defined(__clang__)
49	#if __has_builtin(__builtin_assume)
50	#define promise(cond) __builtin_assume(cond)
51	#elif __has_builtin(__builtin_unreachable)
52	#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
53	#else
54	#define promise(cond)
55	#endif
56	#else // Assume GCC
57	#define promise(cond) if (!(cond)) { __builtin_unreachable(); }
58	#endif
59	#else
60	#define promise(cond) assert(cond)
61	#endif
62
63	/ ============================================================================*
64	Constants
65	============================================================================ /*
66	#if !defined(ASTCENC_BLOCK_MAX_TEXELS)
67	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
68	#endif
69
70	/* @brief The maximum number of texels a block can support (6x6x6 block). /
71	static constexpr unsigned int BLOCK_MAX_TEXELS { ASTCENC_BLOCK_MAX_TEXELS };
72
73	/* @brief The maximum number of components a block can support. /
74	static constexpr unsigned int BLOCK_MAX_COMPONENTS { `4` };
75
76	/* @brief The maximum number of partitions a block can support. /
77	static constexpr unsigned int BLOCK_MAX_PARTITIONS { `4` };
78
79	/* @brief The number of partitionings, per partition count, suported by the ASTC format. /
80	static constexpr unsigned int BLOCK_MAX_PARTITIONINGS { `1024` };
81
82	/* @brief The maximum number of weights used during partition selection for texel clustering. /
83	static constexpr uint8_t BLOCK_MAX_KMEANS_TEXELS { `64` };
84
85	/* @brief The maximum number of weights a block can support. /
86	static constexpr unsigned int BLOCK_MAX_WEIGHTS { `64` };
87
88	/* @brief The maximum number of weights a block can support per plane in 2 plane mode. /
89	static constexpr unsigned int BLOCK_MAX_WEIGHTS_2PLANE { BLOCK_MAX_WEIGHTS / `2` };
90
91	/* @brief The minimum number of weight bits a candidate encoding must encode. /
92	static constexpr unsigned int BLOCK_MIN_WEIGHT_BITS { `24` };
93
94	/* @brief The maximum number of weight bits a candidate encoding can encode. /
95	static constexpr unsigned int BLOCK_MAX_WEIGHT_BITS { `96` };
96
97	/* @brief The index indicating a bad (unused) block mode in the remap array. /
98	static constexpr uint16_t BLOCK_BAD_BLOCK_MODE { `0xFFFFu` };
99
100	/* @brief The index indicating a bad (unused) partitioning in the remap array. /
101	static constexpr uint16_t BLOCK_BAD_PARTITIONING { `0xFFFFu` };
102
103	/* @brief The number of partition index bits supported by the ASTC format . /
104	static constexpr unsigned int PARTITION_INDEX_BITS { `10` };
105
106	/* @brief The offset of the plane 2 weights in shared weight arrays. /
107	static constexpr unsigned int WEIGHTS_PLANE2_OFFSET { BLOCK_MAX_WEIGHTS_2PLANE };
108
109	/* @brief The sum of quantized weights for one texel. /
110	static constexpr float WEIGHTS_TEXEL_SUM { `16.0f` };
111
112	/* @brief The number of block modes supported by the ASTC format. /
113	static constexpr unsigned int WEIGHTS_MAX_BLOCK_MODES { `2048` };
114
115	/* @brief The number of weight grid decimation modes supported by the ASTC format. /
116	static constexpr unsigned int WEIGHTS_MAX_DECIMATION_MODES { `87` };
117
118	/* @brief The high default error used to initialize error trackers. /
119	static constexpr float ERROR_CALC_DEFAULT { `1e30f` };
120
121	/**
122	* @brief The minimum texel count for a block to use the one partition fast path.
123	*
124	* This setting skips 4x4 and 5x4 block sizes.
125	*/
126	static constexpr unsigned int TUNE_MIN_TEXELS_MODE0_FASTPATH { `24` };
127
128	/**
129	* @brief The maximum number of candidate encodings tested for each encoding mode.
130	*
131	* This can be dynamically reduced by the compression quality preset.
132	*/
133	static constexpr unsigned int TUNE_MAX_TRIAL_CANDIDATES { `8` };
134
135	/**
136	* @brief The maximum number of candidate partitionings tested for each encoding mode.
137	*
138	* This can be dynamically reduced by the compression quality preset.
139	*/
140	static constexpr unsigned int TUNE_MAX_PARTITIONING_CANDIDATES { `32` };
141
142	/**
143	* @brief The maximum quant level using full angular endpoint search method.
144	*
145	* The angular endpoint search is used to find the min/max weight that should
146	* be used for a given quantization level. It is effective but expensive, so
147	* we only use it where it has the most value - low quant levels with wide
148	* spacing. It is used below TUNE_MAX_ANGULAR_QUANT (inclusive). Above this we
149	* assume the min weight is 0.0f, and the max weight is 1.0f.
150	*
151	* Note the angular algorithm is vectorized, and using QUANT_12 exactly fills
152	* one 8-wide vector. Decreasing by one doesn't buy much performance, and
153	* increasing by one is disproportionately expensive.
154	*/
155	static constexpr unsigned int TUNE_MAX_ANGULAR_QUANT { `7` }; / QUANT_12 /
156
157	static_assert((BLOCK_MAX_TEXELS % ASTCENC_SIMD_WIDTH) == `0`,
158	"BLOCK_MAX_TEXELS must be multiple of ASTCENC_SIMD_WIDTH");
159
160	static_assert(BLOCK_MAX_TEXELS <= `216`,
161	"BLOCK_MAX_TEXELS must not be greater than 216");
162
163	static_assert((BLOCK_MAX_WEIGHTS % ASTCENC_SIMD_WIDTH) == `0`,
164	"BLOCK_MAX_WEIGHTS must be multiple of ASTCENC_SIMD_WIDTH");
165
166	static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == `0`,
167	"WEIGHTS_MAX_BLOCK_MODES must be multiple of ASTCENC_SIMD_WIDTH");
168
169
170	/ ============================================================================*
171	Commonly used data structures
172	============================================================================ /*
173
174	/**
175	* @brief The ASTC endpoint formats.
176	*
177	* Note, the values here are used directly in the encoding in the format so do not rearrange.
178	*/
179	enum endpoint_formats
180	{
181	FMT_LUMINANCE = `0`,
182	FMT_LUMINANCE_DELTA = `1`,
183	FMT_HDR_LUMINANCE_LARGE_RANGE = `2`,
184	FMT_HDR_LUMINANCE_SMALL_RANGE = `3`,
185	FMT_LUMINANCE_ALPHA = `4`,
186	FMT_LUMINANCE_ALPHA_DELTA = `5`,
187	FMT_RGB_SCALE = `6`,
188	FMT_HDR_RGB_SCALE = `7`,
189	FMT_RGB = `8`,
190	FMT_RGB_DELTA = `9`,
191	FMT_RGB_SCALE_ALPHA = `10`,
192	FMT_HDR_RGB = `11`,
193	FMT_RGBA = `12`,
194	FMT_RGBA_DELTA = `13`,
195	FMT_HDR_RGB_LDR_ALPHA = `14`,
196	FMT_HDR_RGBA = `15`
197	};
198
199	/**
200	* @brief The ASTC quantization methods.
201	*
202	* Note, the values here are used directly in the encoding in the format so do not rearrange.
203	*/
204	enum quant_method
205	{
206	QUANT_2 = `0`,
207	QUANT_3 = `1`,
208	QUANT_4 = `2`,
209	QUANT_5 = `3`,
210	QUANT_6 = `4`,
211	QUANT_8 = `5`,
212	QUANT_10 = `6`,
213	QUANT_12 = `7`,
214	QUANT_16 = `8`,
215	QUANT_20 = `9`,
216	QUANT_24 = `10`,
217	QUANT_32 = `11`,
218	QUANT_40 = `12`,
219	QUANT_48 = `13`,
220	QUANT_64 = `14`,
221	QUANT_80 = `15`,
222	QUANT_96 = `16`,
223	QUANT_128 = `17`,
224	QUANT_160 = `18`,
225	QUANT_192 = `19`,
226	QUANT_256 = `20`
227	};
228
229	/**
230	* @brief The number of levels use by an ASTC quantization method.
231	*
232	* @param method The quantization method
233	*
234	* @return The number of levels used by @c method.
235	*/
236	static inline unsigned int get_quant_level(quant_method method)
237	{
238	switch (method)
239	{
240	case QUANT_2: return `2`;
241	case QUANT_3: return `3`;
242	case QUANT_4: return `4`;
243	case QUANT_5: return `5`;
244	case QUANT_6: return `6`;
245	case QUANT_8: return `8`;
246	case QUANT_10: return `10`;
247	case QUANT_12: return `12`;
248	case QUANT_16: return `16`;
249	case QUANT_20: return `20`;
250	case QUANT_24: return `24`;
251	case QUANT_32: return `32`;
252	case QUANT_40: return `40`;
253	case QUANT_48: return `48`;
254	case QUANT_64: return `64`;
255	case QUANT_80: return `80`;
256	case QUANT_96: return `96`;
257	case QUANT_128: return `128`;
258	case QUANT_160: return `160`;
259	case QUANT_192: return `192`;
260	case QUANT_256: return `256`;
261	}
262
263	// Unreachable - the enum is fully described
264	return `0`;
265	}
266
267	/**
268	* @brief Computed metrics about a partition in a block.
269	*/
270	struct partition_metrics
271	{
272	/* @brief The error-weighted average color in the partition. /
273	vfloat4 avg;
274
275	/* @brief The dominant error-weighted direction in the partition. /
276	vfloat4 dir;
277	};
278
279	/**
280	* @brief Computed lines for a a three component analysis.
281	*/
282	struct partition_lines3
283	{
284	/* @brief Line for uncorrelated chroma. /
285	line3 uncor_line;
286
287	/* @brief Line for correlated chroma, passing though the origin. /
288	line3 samec_line;
289
290	/* @brief Post-processed line for uncorrelated chroma. /
291	processed_line3 uncor_pline;
292
293	/* @brief Post-processed line for correlated chroma, passing though the origin. /
294	processed_line3 samec_pline;
295
296	/**
297	* @brief The length of the line for uncorrelated chroma.
298	*
299	* This is used for both the uncorrelated and same chroma lines - they are normally very similar
300	* and only used for the relative ranking of partitionings against one another.
301	*/
302	float line_length;
303	};
304
305	/**
306	* @brief The partition information for a single partition.
307	*
308	* ASTC has a total of 1024 candidate partitions for each of 2/3/4 partition counts, although this
309	* 1024 includes seeds that generate duplicates of other seeds and seeds that generate completely
310	* empty partitions. These are both valid encodings, but astcenc will skip both during compression
311	* as they are not useful.
312	*/
313	struct partition_info
314	{
315	/* @brief The number of partitions in this partitioning. /
316	uint16_t partition_count;
317
318	/* @brief The index (seed) of this partitioning. /
319	uint16_t partition_index;
320
321	/**
322	* @brief The number of texels in each partition.
323	*
324	* Note that some seeds result in zero texels assigned to a partition. These are valid, but are
325	* skipped by this compressor as there is no point spending bits encoding an unused endpoints.
326	*/
327	uint8_t partition_texel_count[BLOCK_MAX_PARTITIONS];
328
329	/* @brief The partition of each texel in the block. /
330	uint8_t partition_of_texel[BLOCK_MAX_TEXELS];
331
332	/* @brief The list of texels in each partition. /
333	uint8_t texels_of_partition[BLOCK_MAX_PARTITIONS][BLOCK_MAX_TEXELS];
334	};
335
336	/**
337	* @brief The weight grid information for a single decimation pattern.
338	*
339	* ASTC can store one weight per texel, but is also capable of storing lower resolution weight grids
340	* that are interpolated during decompression to assign a with to a texel. Storing fewer weights
341	* can free up a substantial amount of bits that we can then spend on more useful things, such as
342	* more accurate endpoints and weights, or additional partitions.
343	*
344	* This data structure is used to store information about a single weight grid decimation pattern,
345	* for a single block size.
346	*/
347	struct decimation_info
348	{
349	/* @brief The total number of texels in the block. /
350	uint8_t texel_count;
351
352	/* @brief The maximum number of stored weights that contribute to each texel, between 1 and 4. /
353	uint8_t max_texel_weight_count;
354
355	/* @brief The total number of weights stored. /
356	uint8_t weight_count;
357
358	/* @brief The number of stored weights in the X dimension. /
359	uint8_t weight_x;
360
361	/* @brief The number of stored weights in the Y dimension. /
362	uint8_t weight_y;
363
364	/* @brief The number of stored weights in the Z dimension. /
365	uint8_t weight_z;
366
367	/**
368	* @brief The number of weights that contribute to each texel.
369	* Value is between 1 and 4.
370	*/
371	uint8_t texel_weight_count[BLOCK_MAX_TEXELS];
372
373	/**
374	* @brief The weight index of the N weights that are interpolated for each texel.
375	* Stored transposed to improve vectorization.
376	*/
377	uint8_t texel_weights_tr[`4`][BLOCK_MAX_TEXELS];
378
379	/**
380	* @brief The bilinear contribution of the N weights that are interpolated for each texel.
381	* Value is between 0 and 16, stored transposed to improve vectorization.
382	*/
383	uint8_t texel_weight_contribs_int_tr[`4`][BLOCK_MAX_TEXELS];
384
385	/**
386	* @brief The bilinear contribution of the N weights that are interpolated for each texel.
387	* Value is between 0 and 1, stored transposed to improve vectorization.
388	*/
389	alignas(ASTCENC_VECALIGN) float texel_weight_contribs_float_tr[`4`][BLOCK_MAX_TEXELS];
390
391	/* @brief The number of texels that each stored weight contributes to. /
392	uint8_t weight_texel_count[BLOCK_MAX_WEIGHTS];
393
394	/**
395	* @brief The list of texels that use a specific weight index.
396	* Stored transposed to improve vectorization.
397	*/
398	uint8_t weight_texels_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
399
400	/**
401	* @brief The bilinear contribution to the N texels that use each weight.
402	* Value is between 0 and 1, stored transposed to improve vectorization.
403	*/
404	alignas(ASTCENC_VECALIGN) float weights_texel_contribs_tr[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
405
406	/**
407	* @brief The bilinear contribution to the Nth texel that uses each weight.
408	* Value is between 0 and 1, stored transposed to improve vectorization.
409	*/
410	float texel_contrib_for_weight[BLOCK_MAX_TEXELS][BLOCK_MAX_WEIGHTS];
411	};
412
413	/**
414	* @brief Metadata for single block mode for a specific block size.
415	*/
416	struct block_mode
417	{
418	/* @brief The block mode index in the ASTC encoded form. /
419	uint16_t mode_index;
420
421	/* @brief The decimation mode index in the compressor reindexed list. /
422	uint8_t decimation_mode;
423
424	/* @brief The weight quantization used by this block mode. /
425	uint8_t quant_mode;
426
427	/* @brief The weight quantization used by this block mode. /
428	uint8_t weight_bits;
429
430	/* @brief Is a dual weight plane used by this block mode? /
431	uint8_t is_dual_plane : `1`;
432
433	/**
434	* @brief Get the weight quantization used by this block mode.
435	*
436	* @return The quantization level.
437	*/
438	inline quant_method get_weight_quant_mode() const
439	{
440	return static_cast<quant_method>(this->quant_mode);
441	}
442	};
443
444	/**
445	* @brief Metadata for single decimation mode for a specific block size.
446	*/
447	struct decimation_mode
448	{
449	/* @brief The max weight precision for 1 plane, or -1 if not supported. /
450	int8_t maxprec_1plane;
451
452	/* @brief The max weight precision for 2 planes, or -1 if not supported. /
453	int8_t maxprec_2planes;
454
455	/**
456	* @brief Bitvector indicating weight quant modes used by active 1 plane block modes.
457	*
458	* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
459	*/
460	uint16_t refprec_1plane;
461
462	/**
463	* @brief Bitvector indicating weight quant methods used by active 2 plane block modes.
464	*
465	* Bit 0 = QUANT_2, Bit 1 = QUANT_3, etc.
466	*/
467	uint16_t refprec_2planes;
468
469	/**
470	* @brief Set a 1 plane weight quant as active.
471	*
472	* @param weight_quant The quant method to set.
473	*/
474	void set_ref_1plane(quant_method weight_quant)
475	{
476	refprec_1plane \|= (`1` << weight_quant);
477	}
478
479	/**
480	* @brief Test if this mode is active below a given 1 plane weight quant (inclusive).
481	*
482	* @param max_weight_quant The max quant method to test.
483	*/
484	bool is_ref_1plane(quant_method max_weight_quant) const
485	{
486	uint16_t mask = static_cast<uint16_t>((`1` << (max_weight_quant + `1`)) - `1`);
487	return (refprec_1plane & mask) != `0`;
488	}
489
490	/**
491	* @brief Set a 2 plane weight quant as active.
492	*
493	* @param weight_quant The quant method to set.
494	*/
495	void set_ref_2plane(quant_method weight_quant)
496	{
497	refprec_2planes \|= static_cast<uint16_t>(`1` << weight_quant);
498	}
499
500	/**
501	* @brief Test if this mode is active below a given 2 plane weight quant (inclusive).
502	*
503	* @param max_weight_quant The max quant method to test.
504	*/
505	bool is_ref_2plane(quant_method max_weight_quant) const
506	{
507	uint16_t mask = static_cast<uint16_t>((`1` << (max_weight_quant + `1`)) - `1`);
508	return (refprec_2planes & mask) != `0`;
509	}
510	};
511
512	/**
513	* @brief Data tables for a single block size.
514	*
515	* The decimation tables store the information to apply weight grid dimension reductions. We only
516	* store the decimation modes that are actually needed by the current context; many of the possible
517	* modes will be unused (too many weights for the current block size or disabled by heuristics). The
518	* actual number of weights stored is @c decimation_mode_count, and the @c decimation_modes and
519	* @c decimation_tables arrays store the active modes contiguously at the start of the array. These
520	* entries are not stored in any particular order.
521	*
522	* The block mode tables store the unpacked block mode settings. Block modes are stored in the
523	* compressed block as an 11 bit field, but for any given block size and set of compressor
524	* heuristics, only a subset of the block modes will be used. The actual number of block modes
525	* stored is indicated in @c block_mode_count, and the @c block_modes array store the active modes
526	* contiguously at the start of the array. These entries are stored in incrementing "packed" value
527	* order, which doesn't mean much once unpacked. To allow decompressors to reference the packed data
528	* efficiently the @c block_mode_packed_index array stores the mapping between physical ID and the
529	* actual remapped array index.
530	*/
531	struct block_size_descriptor
532	{
533	/* @brief The block X dimension, in texels. /
534	uint8_t xdim;
535
536	/* @brief The block Y dimension, in texels. /
537	uint8_t ydim;
538
539	/* @brief The block Z dimension, in texels. /
540	uint8_t zdim;
541
542	/* @brief The block total texel count. /
543	uint8_t texel_count;
544
545	/**
546	* @brief The number of stored decimation modes which are "always" modes.
547	*
548	* Always modes are stored at the start of the decimation_modes list.
549	*/
550	unsigned int decimation_mode_count_always;
551
552	/* @brief The number of stored decimation modes for selected encodings. /
553	unsigned int decimation_mode_count_selected;
554
555	/* @brief The number of stored decimation modes for any encoding. /
556	unsigned int decimation_mode_count_all;
557
558	/**
559	* @brief The number of stored block modes which are "always" modes.
560	*
561	* Always modes are stored at the start of the block_modes list.
562	*/
563	unsigned int block_mode_count_1plane_always;
564
565	/* @brief The number of stored block modes for active 1 plane encodings. /
566	unsigned int block_mode_count_1plane_selected;
567
568	/* @brief The number of stored block modes for active 1 and 2 plane encodings. /
569	unsigned int block_mode_count_1plane_2plane_selected;
570
571	/* @brief The number of stored block modes for any encoding. /
572	unsigned int block_mode_count_all;
573
574	/* @brief The number of selected partitionings for 1/2/3/4 partitionings. /
575	unsigned int partitioning_count_selected[BLOCK_MAX_PARTITIONS];
576
577	/* @brief The number of partitionings for 1/2/3/4 partitionings. /
578	unsigned int partitioning_count_all[BLOCK_MAX_PARTITIONS];
579
580	/* @brief The active decimation modes, stored in low indices. /
581	decimation_mode decimation_modes[WEIGHTS_MAX_DECIMATION_MODES];
582
583	/* @brief The active decimation tables, stored in low indices. /
584	alignas(ASTCENC_VECALIGN) decimation_info decimation_tables[WEIGHTS_MAX_DECIMATION_MODES];
585
586	/* @brief The packed block mode array index, or @c BLOCK_BAD_BLOCK_MODE if not active. /
587	uint16_t block_mode_packed_index[WEIGHTS_MAX_BLOCK_MODES];
588
589	/* @brief The active block modes, stored in low indices. /
590	block_mode block_modes[WEIGHTS_MAX_BLOCK_MODES];
591
592	/* @brief The active partition tables, stored in low indices per-count. /
593	partition_info partitionings[(`3` * BLOCK_MAX_PARTITIONINGS) + `1`];
594
595	/**
596	* @brief The packed partition table array index, or @c BLOCK_BAD_PARTITIONING if not active.
597	*
598	* Indexed by partition_count - 2, containing 2, 3 and 4 partitions.
599	*/
600	uint16_t partitioning_packed_index[`3`][BLOCK_MAX_PARTITIONINGS];
601
602	/* @brief The active texels for k-means partition selection. /
603	uint8_t kmeans_texels[BLOCK_MAX_KMEANS_TEXELS];
604
605	/**
606	* @brief The canonical 2-partition coverage pattern used during block partition search.
607	*
608	* Indexed by remapped index, not physical index.
609	*/
610	uint64_t coverage_bitmaps_2[BLOCK_MAX_PARTITIONINGS][`2`];
611
612	/**
613	* @brief The canonical 3-partition coverage pattern used during block partition search.
614	*
615	* Indexed by remapped index, not physical index.
616	*/
617	uint64_t coverage_bitmaps_3[BLOCK_MAX_PARTITIONINGS][`3`];
618
619	/**
620	* @brief The canonical 4-partition coverage pattern used during block partition search.
621	*
622	* Indexed by remapped index, not physical index.
623	*/
624	uint64_t coverage_bitmaps_4[BLOCK_MAX_PARTITIONINGS][`4`];
625
626	/**
627	* @brief Get the block mode structure for index @c block_mode.
628	*
629	* This function can only return block modes that are enabled by the current compressor config.
630	* Decompression from an arbitrary source should not use this without first checking that the
631	* packed block mode index is not @c BLOCK_BAD_BLOCK_MODE.
632	*
633	* @param block_mode The packed block mode index.
634	*
635	* @return The block mode structure.
636	*/
637	const block_mode& get_block_mode(unsigned int block_mode) const
638	{
639	unsigned int packed_index = this->block_mode_packed_index[block_mode];
640	assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
641	return this->block_modes[packed_index];
642	}
643
644	/**
645	* @brief Get the decimation mode structure for index @c decimation_mode.
646	*
647	* This function can only return decimation modes that are enabled by the current compressor
648	* config. The mode array is stored packed, but this is only ever indexed by the packed index
649	* stored in the @c block_mode and never exists in an unpacked form.
650	*
651	* @param decimation_mode The packed decimation mode index.
652	*
653	* @return The decimation mode structure.
654	*/
655	const decimation_mode& get_decimation_mode(unsigned int decimation_mode) const
656	{
657	return this->decimation_modes[decimation_mode];
658	}
659
660	/**
661	* @brief Get the decimation info structure for index @c decimation_mode.
662	*
663	* This function can only return decimation modes that are enabled by the current compressor
664	* config. The mode array is stored packed, but this is only ever indexed by the packed index
665	* stored in the @c block_mode and never exists in an unpacked form.
666	*
667	* @param decimation_mode The packed decimation mode index.
668	*
669	* @return The decimation info structure.
670	*/
671	const decimation_info& get_decimation_info(unsigned int decimation_mode) const
672	{
673	return this->decimation_tables[decimation_mode];
674	}
675
676	/**
677	* @brief Get the partition info table for a given partition count.
678	*
679	* @param partition_count The number of partitions we want the table for.
680	*
681	* @return The pointer to the table of 1024 entries (for 2/3/4 parts) or 1 entry (for 1 part).
682	*/
683	const partition_info* get_partition_table(unsigned int partition_count) const
684	{
685	if (partition_count == `1`)
686	{
687	partition_count = `5`;
688	}
689	unsigned int index = (partition_count - `2`) * BLOCK_MAX_PARTITIONINGS;
690	return this->partitionings + index;
691	}
692
693	/**
694	* @brief Get the partition info structure for a given partition count and seed.
695	*
696	* @param partition_count The number of partitions we want the info for.
697	* @param index The partition seed (between 0 and 1023).
698	*
699	* @return The partition info structure.
700	*/
701	const partition_info& get_partition_info(unsigned int partition_count, unsigned int index) const
702	{
703	unsigned int packed_index = `0`;
704	if (partition_count >= `2`)
705	{
706	packed_index = this->partitioning_packed_index[partition_count - `2`][index];
707	}
708
709	assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - `1`]);
710	auto& result = get_partition_table(partition_count)[packed_index];
711	assert(index == result.partition_index);
712	return result;
713	}
714
715	/**
716	* @brief Get the partition info structure for a given partition count and seed.
717	*
718	* @param partition_count The number of partitions we want the info for.
719	* @param packed_index The raw array offset.
720	*
721	* @return The partition info structure.
722	*/
723	const partition_info& get_raw_partition_info(unsigned int partition_count, unsigned int packed_index) const
724	{
725	assert(packed_index != BLOCK_BAD_PARTITIONING && packed_index < this->partitioning_count_all[partition_count - `1`]);
726	auto& result = get_partition_table(partition_count)[packed_index];
727	return result;
728	}
729	};
730
731	/**
732	* @brief The image data for a single block.
733	*
734	* The @c data_[rgba] fields store the image data in an encoded SoA float form designed for easy
735	* vectorization. Input data is converted to float and stored as values between 0 and 65535. LDR
736	* data is stored as direct UNORM data, HDR data is stored as LNS data.
737	*
738	* The @c rgb_lns and @c alpha_lns fields that assigned a per-texel use of HDR are only used during
739	* decompression. The current compressor will always use HDR endpoint formats when in HDR mode.
740	*/
741	struct image_block
742	{
743	/* @brief The input (compress) or output (decompress) data for the red color component. /
744	alignas(ASTCENC_VECALIGN) float data_r[BLOCK_MAX_TEXELS];
745
746	/* @brief The input (compress) or output (decompress) data for the green color component. /
747	alignas(ASTCENC_VECALIGN) float data_g[BLOCK_MAX_TEXELS];
748
749	/* @brief The input (compress) or output (decompress) data for the blue color component. /
750	alignas(ASTCENC_VECALIGN) float data_b[BLOCK_MAX_TEXELS];
751
752	/* @brief The input (compress) or output (decompress) data for the alpha color component. /
753	alignas(ASTCENC_VECALIGN) float data_a[BLOCK_MAX_TEXELS];
754
755	/* @brief The number of texels in the block. /
756	uint8_t texel_count;
757
758	/* @brief The original data for texel 0 for constant color block encoding. /
759	vfloat4 origin_texel;
760
761	/* @brief The min component value of all texels in the block. /
762	vfloat4 data_min;
763
764	/* @brief The mean component value of all texels in the block. /
765	vfloat4 data_mean;
766
767	/* @brief The max component value of all texels in the block. /
768	vfloat4 data_max;
769
770	/* @brief The relative error significance of the color channels. /
771	vfloat4 channel_weight;
772
773	/* @brief Is this grayscale block where R == G == B for all texels? /
774	bool grayscale;
775
776	/* @brief Set to 1 if a texel is using HDR RGB endpoints (decompression only). /
777	uint8_t rgb_lns[BLOCK_MAX_TEXELS];
778
779	/* @brief Set to 1 if a texel is using HDR alpha endpoints (decompression only). /
780	uint8_t alpha_lns[BLOCK_MAX_TEXELS];
781
782	/* @brief The X position of this block in the input or output image. /
783	unsigned int xpos;
784
785	/* @brief The Y position of this block in the input or output image. /
786	unsigned int ypos;
787
788	/* @brief The Z position of this block in the input or output image. /
789	unsigned int zpos;
790
791	/**
792	* @brief Get an RGBA texel value from the data.
793	*
794	* @param index The texel index.
795	*
796	* @return The texel in RGBA component ordering.
797	*/
798	inline vfloat4 texel(unsigned int index) const
799	{
800	return vfloat4 (data_r[index],
801	data_g[index],
802	data_b[index],
803	data_a[index]);
804	}
805
806	/**
807	* @brief Get an RGB texel value from the data.
808	*
809	* @param index The texel index.
810	*
811	* @return The texel in RGB0 component ordering.
812	*/
813	inline vfloat4 texel3(unsigned int index) const
814	{
815	return vfloat3(data_r[index],
816	data_g[index],
817	data_b[index]);
818	}
819
820	/**
821	* @brief Get the default alpha value for endpoints that don't store it.
822	*
823	* The default depends on whether the alpha endpoint is LDR or HDR.
824	*
825	* @return The alpha value in the scaled range used by the compressor.
826	*/
827	inline float get_default_alpha() const
828	{
829	return this->alpha_lns[`0`] ? static_cast<float>(`0x7800`) : static_cast<float>(`0xFFFF`);
830	}
831
832	/**
833	* @brief Test if a single color channel is constant across the block.
834	*
835	* Constant color channels are easier to compress as interpolating between two identical colors
836	* always returns the same value, irrespective of the weight used. They therefore can be ignored
837	* for the purposes of weight selection and use of a second weight plane.
838	*
839	* @return @c true if the channel is constant across the block, @c false otherwise.
840	*/
841	inline bool is_constant_channel(int channel) const
842	{
843	vmask4 lane_mask = vint4::lane_id() == vint4 (channel);
844	vmask4 color_mask = this->data_min == this->data_max;
845	return any(lane_mask & color_mask);
846	}
847
848	/**
849	* @brief Test if this block is a luminance block with constant 1.0 alpha.
850	*
851	* @return @c true if the block is a luminance block , @c false otherwise.
852	*/
853	inline bool is_luminance() const
854	{
855	float default_alpha = this->get_default_alpha();
856	bool alpha1 = (this->data_min.lane<`3`>() == default_alpha) &&
857	(this->data_max.lane<`3`>() == default_alpha);
858	return this->grayscale && alpha1;
859	}
860
861	/**
862	* @brief Test if this block is a luminance block with variable alpha.
863	*
864	* @return @c true if the block is a luminance + alpha block , @c false otherwise.
865	*/
866	inline bool is_luminancealpha() const
867	{
868	float default_alpha = this->get_default_alpha();
869	bool alpha1 = (this->data_min.lane<`3`>() == default_alpha) &&
870	(this->data_max.lane<`3`>() == default_alpha);
871	return this->grayscale && !alpha1;
872	}
873	};
874
875	/**
876	* @brief Data structure storing the color endpoints for a block.
877	*/
878	struct endpoints
879	{
880	/* @brief The number of partition endpoints stored. /
881	unsigned int partition_count;
882
883	/* @brief The colors for endpoint 0. /
884	vfloat4 endpt0[BLOCK_MAX_PARTITIONS];
885
886	/* @brief The colors for endpoint 1. /
887	vfloat4 endpt1[BLOCK_MAX_PARTITIONS];
888	};
889
890	/**
891	* @brief Data structure storing the color endpoints and weights.
892	*/
893	struct endpoints_and_weights
894	{
895	/* @brief True if all active values in weight_error_scale are the same. /
896	bool is_constant_weight_error_scale;
897
898	/* @brief The color endpoints. /
899	endpoints ep;
900
901	/* @brief The ideal weight for each texel; may be undecimated or decimated. /
902	alignas(ASTCENC_VECALIGN) float weights[BLOCK_MAX_TEXELS];
903
904	/* @brief The ideal weight error scaling for each texel; may be undecimated or decimated. /
905	alignas(ASTCENC_VECALIGN) float weight_error_scale[BLOCK_MAX_TEXELS];
906	};
907
908	/**
909	* @brief Utility storing estimated errors from choosing particular endpoint encodings.
910	*/
911	struct encoding_choice_errors
912	{
913	/* @brief Error of using LDR RGB-scale instead of complete endpoints. /
914	float rgb_scale_error;
915
916	/* @brief Error of using HDR RGB-scale instead of complete endpoints. /
917	float rgb_luma_error;
918
919	/* @brief Error of using luminance instead of RGB. /
920	float luminance_error;
921
922	/* @brief Error of discarding alpha and using a constant 1.0 alpha. /
923	float alpha_drop_error;
924
925	/* @brief Can we use delta offset encoding? /
926	bool can_offset_encode;
927
928	/* @brief Can we use blue contraction encoding? /
929	bool can_blue_contract;
930	};
931
932	/**
933	* @brief Preallocated working buffers, allocated per thread during context creation.
934	*/
935	struct alignas(ASTCENC_VECALIGN) compression_working_buffers
936	{
937	/* @brief Ideal endpoints and weights for plane 1. /
938	endpoints_and_weights ei1;
939
940	/* @brief Ideal endpoints and weights for plane 2. /
941	endpoints_and_weights ei2;
942
943	/**
944	* @brief Decimated ideal weight values in the ~0-1 range.
945	*
946	* Note that values can be slightly below zero or higher than one due to
947	* endpoint extents being inside the ideal color representation.
948	*
949	* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
950	*/
951	alignas(ASTCENC_VECALIGN) float dec_weights_ideal[WEIGHTS_MAX_DECIMATION_MODES * BLOCK_MAX_WEIGHTS];
952
953	/**
954	* @brief Decimated quantized weight values in the unquantized 0-64 range.
955	*
956	* For two planes, second plane starts at @c WEIGHTS_PLANE2_OFFSET offsets.
957	*/
958	uint8_t dec_weights_uquant[WEIGHTS_MAX_BLOCK_MODES * BLOCK_MAX_WEIGHTS];
959
960	/* @brief Error of the best encoding combination for each block mode. /
961	alignas(ASTCENC_VECALIGN) float errors_of_best_combination[WEIGHTS_MAX_BLOCK_MODES];
962
963	/* @brief The best color quant for each block mode. /
964	uint8_t best_quant_levels[WEIGHTS_MAX_BLOCK_MODES];
965
966	/* @brief The best color quant for each block mode if modes are the same and we have spare bits. /
967	uint8_t best_quant_levels_mod[WEIGHTS_MAX_BLOCK_MODES];
968
969	/* @brief The best endpoint format for each partition. /
970	uint8_t best_ep_formats[WEIGHTS_MAX_BLOCK_MODES][BLOCK_MAX_PARTITIONS];
971
972	/* @brief The total bit storage needed for quantized weights for each block mode. /
973	int8_t qwt_bitcounts[WEIGHTS_MAX_BLOCK_MODES];
974
975	/* @brief The cumulative error for quantized weights for each block mode. /
976	float qwt_errors[WEIGHTS_MAX_BLOCK_MODES];
977
978	/* @brief The low weight value in plane 1 for each block mode. /
979	float weight_low_value1[WEIGHTS_MAX_BLOCK_MODES];
980
981	/* @brief The high weight value in plane 1 for each block mode. /
982	float weight_high_value1[WEIGHTS_MAX_BLOCK_MODES];
983
984	/* @brief The low weight value in plane 1 for each quant level and decimation mode. /
985	float weight_low_values1[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + `1`];
986
987	/* @brief The high weight value in plane 1 for each quant level and decimation mode. /
988	float weight_high_values1[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + `1`];
989
990	/* @brief The low weight value in plane 2 for each block mode. /
991	float weight_low_value2[WEIGHTS_MAX_BLOCK_MODES];
992
993	/* @brief The high weight value in plane 2 for each block mode. /
994	float weight_high_value2[WEIGHTS_MAX_BLOCK_MODES];
995
996	/* @brief The low weight value in plane 2 for each quant level and decimation mode. /
997	float weight_low_values2[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + `1`];
998
999	/* @brief The high weight value in plane 2 for each quant level and decimation mode. /
1000	float weight_high_values2[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + `1`];
1001	};
1002
1003	struct dt_init_working_buffers
1004	{
1005	uint8_t weight_count_of_texel[BLOCK_MAX_TEXELS];
1006	uint8_t grid_weights_of_texel[BLOCK_MAX_TEXELS][`4`];
1007	uint8_t weights_of_texel[BLOCK_MAX_TEXELS][`4`];
1008
1009	uint8_t texel_count_of_weight[BLOCK_MAX_WEIGHTS];
1010	uint8_t texels_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
1011	uint8_t texel_weights_of_weight[BLOCK_MAX_WEIGHTS][BLOCK_MAX_TEXELS];
1012	};
1013
1014	/**
1015	* @brief Weight quantization transfer table.
1016	*
1017	* ASTC can store texel weights at many quantization levels, so for performance we store essential
1018	* information about each level as a precomputed data structure. Unquantized weights are integers
1019	* or floats in the range [0, 64].
1020	*
1021	* This structure provides a table, used to estimate the closest quantized weight for a given
1022	* floating-point weight. For each quantized weight, the corresponding unquantized values. For each
1023	* quantized weight, a previous-value and a next-value.
1024	*/
1025	struct quant_and_transfer_table
1026	{
1027	/* @brief The unscrambled unquantized value. /
1028	int8_t quant_to_unquant[`32`];
1029
1030	/* @brief The scrambling order: scrambled_quant = map[unscrambled_quant]. /
1031	int8_t scramble_map[`32`];
1032
1033	/* @brief The unscrambling order: unscrambled_unquant = map[scrambled_quant]. /
1034	int8_t unscramble_and_unquant_map[`32`];
1035
1036	/**
1037	* @brief A table of previous-and-next weights, indexed by the current unquantized value.
1038	* * bits 7:0 = previous-index, unquantized
1039	* * bits 15:8 = next-index, unquantized
1040	*/
1041	uint16_t prev_next_values[`65`];
1042	};
1043
1044	/* @brief The precomputed quant and transfer table. /
1045	extern const quant_and_transfer_table quant_and_xfer_tables[`12`];
1046
1047	/* @brief The block is an error block, and will return error color or NaN. /
1048	static constexpr uint8_t SYM_BTYPE_ERROR { `0` };
1049
1050	/* @brief The block is a constant color block using FP16 colors. /
1051	static constexpr uint8_t SYM_BTYPE_CONST_F16 { `1` };
1052
1053	/* @brief The block is a constant color block using UNORM16 colors. /
1054	static constexpr uint8_t SYM_BTYPE_CONST_U16 { `2` };
1055
1056	/* @brief The block is a normal non-constant color block. /
1057	static constexpr uint8_t SYM_BTYPE_NONCONST { `3` };
1058
1059	/**
1060	* @brief A symbolic representation of a compressed block.
1061	*
1062	* The symbolic representation stores the unpacked content of a single
1063	* @c physical_compressed_block, in a form which is much easier to access for
1064	* the rest of the compressor code.
1065	*/
1066	struct symbolic_compressed_block
1067	{
1068	/* @brief The block type, one of the @c SYM_BTYPE_* constants. /
1069	uint8_t block_type;
1070
1071	/* @brief The number of partitions; valid for @c NONCONST blocks. /
1072	uint8_t partition_count;
1073
1074	/* @brief Non-zero if the color formats matched; valid for @c NONCONST blocks. /
1075	uint8_t color_formats_matched;
1076
1077	/* @brief The plane 2 color component, or -1 if single plane; valid for @c NONCONST blocks. /
1078	int8_t plane2_component;
1079
1080	/* @brief The block mode; valid for @c NONCONST blocks. /
1081	uint16_t block_mode;
1082
1083	/* @brief The partition index; valid for @c NONCONST blocks if 2 or more partitions. /
1084	uint16_t partition_index;
1085
1086	/* @brief The endpoint color formats for each partition; valid for @c NONCONST blocks. /
1087	uint8_t color_formats[BLOCK_MAX_PARTITIONS];
1088
1089	/* @brief The endpoint color quant mode; valid for @c NONCONST blocks. /
1090	quant_method quant_mode;
1091
1092	/* @brief The error of the current encoding; valid for @c NONCONST blocks. /
1093	float errorval;
1094
1095	// We can't have both of these at the same time
1096	union {
1097	/* @brief The constant color; valid for @c CONST blocks. /
1098	int constant_color[BLOCK_MAX_COMPONENTS];
1099
1100	/* @brief The quantized endpoint color pairs; valid for @c NONCONST blocks. /
1101	uint8_t color_values[BLOCK_MAX_PARTITIONS][`8`];
1102	};
1103
1104	/* @brief The quantized and decimated weights.*
1105	*
1106	* Weights are stored in the 0-64 unpacked range allowing them to be used
1107	* directly in encoding passes without per-use unpacking. Packing happens
1108	* when converting to/from the physical bitstream encoding.
1109	*
1110	* If dual plane, the second plane starts at @c weights[WEIGHTS_PLANE2_OFFSET].
1111	*/
1112	uint8_t weights[BLOCK_MAX_WEIGHTS];
1113
1114	/**
1115	* @brief Get the weight quantization used by this block mode.
1116	*
1117	* @return The quantization level.
1118	*/
1119	inline quant_method get_color_quant_mode() const
1120	{
1121	return this->quant_mode;
1122	}
1123	};
1124
1125	/**
1126	* @brief A physical representation of a compressed block.
1127	*
1128	* The physical representation stores the raw bytes of the format in memory.
1129	*/
1130	struct physical_compressed_block
1131	{
1132	/* @brief The ASTC encoded data for a single block. /
1133	uint8_t data[`16`];
1134	};
1135
1136
1137	/**
1138	* @brief Parameter structure for @c compute_pixel_region_variance().
1139	*
1140	* This function takes a structure to avoid spilling arguments to the stack on every function
1141	* invocation, as there are a lot of parameters.
1142	*/
1143	struct pixel_region_args
1144	{
1145	/* @brief The image to analyze. /
1146	const astcenc_image* img;
1147
1148	/* @brief The component swizzle pattern. /
1149	astcenc_swizzle swz;
1150
1151	/* @brief Should the algorithm bother with Z axis processing? /
1152	bool have_z;
1153
1154	/* @brief The kernel radius for alpha processing. /
1155	unsigned int alpha_kernel_radius;
1156
1157	/* @brief The X dimension of the working data to process. /
1158	unsigned int size_x;
1159
1160	/* @brief The Y dimension of the working data to process. /
1161	unsigned int size_y;
1162
1163	/* @brief The Z dimension of the working data to process. /
1164	unsigned int size_z;
1165
1166	/* @brief The X position of first src and dst data in the data set. /
1167	unsigned int offset_x;
1168
1169	/* @brief The Y position of first src and dst data in the data set. /
1170	unsigned int offset_y;
1171
1172	/* @brief The Z position of first src and dst data in the data set. /
1173	unsigned int offset_z;
1174
1175	/* @brief The working memory buffer. /
1176	vfloat4 *work_memory;
1177	};
1178
1179	/**
1180	* @brief Parameter structure for @c compute_averages_proc().
1181	*/
1182	struct avg_args
1183	{
1184	/* @brief The arguments for the nested variance computation. /
1185	pixel_region_args arg;
1186
1187	/* @brief The image X dimensions. /
1188	unsigned int img_size_x;
1189
1190	/* @brief The image Y dimensions. /
1191	unsigned int img_size_y;
1192
1193	/* @brief The image Z dimensions. /
1194	unsigned int img_size_z;
1195
1196	/* @brief The maximum working block dimensions in X and Y dimensions. /
1197	unsigned int blk_size_xy;
1198
1199	/* @brief The maximum working block dimensions in Z dimensions. /
1200	unsigned int blk_size_z;
1201
1202	/* @brief The working block memory size. /
1203	unsigned int work_memory_size;
1204	};
1205
1206	#if defined(ASTCENC_DIAGNOSTICS)
1207	/ See astcenc_diagnostic_trace header for details. /
1208	class TraceLog;
1209	#endif
1210
1211	/**
1212	* @brief The astcenc compression context.
1213	*/
1214	struct astcenc_contexti
1215	{
1216	/* @brief The configuration this context was created with. /
1217	astcenc_config config;
1218
1219	/* @brief The thread count supported by this context. /
1220	unsigned int thread_count;
1221
1222	/* @brief The block size descriptor this context was created with. /
1223	block_size_descriptor* bsd;
1224
1225	/*
1226	* Fields below here are not needed in a decompress-only build, but some remain as they are
1227	* small and it avoids littering the code with #ifdefs. The most significant contributors to
1228	* large structure size are omitted.
1229	*/
1230
1231	/* @brief The input image alpha channel averages table, may be @c nullptr if not needed. /
1232	float* input_alpha_averages;
1233
1234	/* @brief The scratch working buffers, one per thread (see @c thread_count). /
1235	compression_working_buffers* working_buffers;
1236
1237	#if !defined(ASTCENC_DECOMPRESS_ONLY)
1238	/* @brief The pixel region and variance worker arguments. /
1239	avg_args avg_preprocess_args;
1240	#endif
1241
1242	#if defined(ASTCENC_DIAGNOSTICS)
1243	/**
1244	* @brief The diagnostic trace logger.
1245	*
1246	* Note that this is a singleton, so can only be used in single threaded mode. It only exists
1247	* here so we have a reference to close the file at the end of the capture.
1248	*/
1249	TraceLog* trace_log;
1250	#endif
1251	};
1252
1253	/ ============================================================================*
1254	Functionality for managing block sizes and partition tables.
1255	============================================================================ /*
1256
1257	/**
1258	* @brief Populate the block size descriptor for the target block size.
1259	*
1260	* This will also initialize the partition table metadata, which is stored as part of the BSD
1261	* structure.
1262	*
1263	* @param x_texels The number of texels in the block X dimension.
1264	* @param y_texels The number of texels in the block Y dimension.
1265	* @param z_texels The number of texels in the block Z dimension.
1266	* @param can_omit_modes Can we discard modes and partitionings that astcenc won't use?
1267	* @param partition_count_cutoff The partition count cutoff to use, if we can omit partitionings.
1268	* @param mode_cutoff The block mode percentile cutoff [0-1].
1269	* @param[out] bsd The descriptor to initialize.
1270	*/
1271	void init_block_size_descriptor(
1272	unsigned int x_texels,
1273	unsigned int y_texels,
1274	unsigned int z_texels,
1275	bool can_omit_modes,
1276	unsigned int partition_count_cutoff,
1277	float mode_cutoff,
1278	block_size_descriptor& bsd);
1279
1280	/**
1281	* @brief Populate the partition tables for the target block size.
1282	*
1283	* Note the @c bsd descriptor must be initialized by calling @c init_block_size_descriptor() before
1284	* calling this function.
1285	*
1286	* @param[out] bsd The block size information structure to populate.
1287	* @param can_omit_partitionings True if we can we drop partitionings that astcenc won't use.
1288	* @param partition_count_cutoff The partition count cutoff to use, if we can omit partitionings.
1289	*/
1290	void init_partition_tables(
1291	block_size_descriptor& bsd,
1292	bool can_omit_partitionings,
1293	unsigned int partition_count_cutoff);
1294
1295	/**
1296	* @brief Get the percentile table for 2D block modes.
1297	*
1298	* This is an empirically determined prioritization of which block modes to use in the search in
1299	* terms of their centile (lower centiles = more useful).
1300	*
1301	* Returns a dynamically allocated array; caller must free with delete[].
1302	*
1303	* @param xdim The block x size.
1304	* @param ydim The block y size.
1305	*
1306	* @return The unpacked table.
1307	*/
1308	const float* get_2d_percentile_table(
1309	unsigned int xdim,
1310	unsigned int ydim);
1311
1312	/**
1313	* @brief Query if a 2D block size is legal.
1314	*
1315	* @return True if legal, false otherwise.
1316	*/
1317	bool is_legal_2d_block_size(
1318	unsigned int xdim,
1319	unsigned int ydim);
1320
1321	/**
1322	* @brief Query if a 3D block size is legal.
1323	*
1324	* @return True if legal, false otherwise.
1325	*/
1326	bool is_legal_3d_block_size(
1327	unsigned int xdim,
1328	unsigned int ydim,
1329	unsigned int zdim);
1330
1331	/ ============================================================================*
1332	Functionality for managing BISE quantization and unquantization.
1333	============================================================================ /*
1334
1335	/**
1336	* @brief The precomputed table for quantizing color values.
1337	*
1338	* Converts unquant value in 0-255 range into quant value in 0-255 range.
1339	* No BISE scrambling is applied at this stage.
1340	*
1341	* The BISE encoding results in ties where available quant<256> values are
1342	* equidistant the available quant<BISE> values. This table stores two values
1343	* for each input - one for use with a negative residual, and one for use with
1344	* a positive residual.
1345	*
1346	* Indexed by [quant_mode - 4][data_value * 2 + residual].
1347	*/
1348	extern const uint8_t color_unquant_to_uquant_tables[`17`][`512`];
1349
1350	/**
1351	* @brief The precomputed table for packing quantized color values.
1352	*
1353	* Converts quant value in 0-255 range into packed quant value in 0-N range,
1354	* with BISE scrambling applied.
1355	*
1356	* Indexed by [quant_mode - 4][data_value].
1357	*/
1358	extern const uint8_t color_uquant_to_scrambled_pquant_tables[`17`][`256`];
1359
1360	/**
1361	* @brief The precomputed table for unpacking color values.
1362	*
1363	* Converts quant value in 0-N range into unpacked value in 0-255 range,
1364	* with BISE unscrambling applied.
1365	*
1366	* Indexed by [quant_mode - 4][data_value].
1367	*/
1368	extern const uint8_t* color_scrambled_pquant_to_uquant_tables[`17`];
1369
1370	/**
1371	* @brief The precomputed quant mode storage table.
1372	*
1373	* Indexing by [integer_count/2][bits] gives us the quantization level for a given integer count and
1374	* number of compressed storage bits. Returns -1 for cases where the requested integer count cannot
1375	* ever fit in the supplied storage size.
1376	*/
1377	extern const int8_t quant_mode_table[`10`][`128`];
1378
1379	/**
1380	* @brief Encode a packed string using BISE.
1381	*
1382	* Note that BISE can return strings that are not a whole number of bytes in length, and ASTC can
1383	* start storing strings in a block at arbitrary bit offsets in the encoded data.
1384	*
1385	* @param quant_level The BISE alphabet size.
1386	* @param character_count The number of characters in the string.
1387	* @param input_data The unpacked string, one byte per character.
1388	* @param[in,out] output_data The output packed string.
1389	* @param bit_offset The starting offset in the output storage.
1390	*/
1391	void encode_ise(
1392	quant_method quant_level,
1393	unsigned int character_count,
1394	const uint8_t* input_data,
1395	uint8_t* output_data,
1396	unsigned int bit_offset);
1397
1398	/**
1399	* @brief Decode a packed string using BISE.
1400	*
1401	* Note that BISE input strings are not a whole number of bytes in length, and ASTC can start
1402	* strings at arbitrary bit offsets in the encoded data.
1403	*
1404	* @param quant_level The BISE alphabet size.
1405	* @param character_count The number of characters in the string.
1406	* @param input_data The packed string.
1407	* @param[in,out] output_data The output storage, one byte per character.
1408	* @param bit_offset The starting offset in the output storage.
1409	*/
1410	void decode_ise(
1411	quant_method quant_level,
1412	unsigned int character_count,
1413	const uint8_t* input_data,
1414	uint8_t* output_data,
1415	unsigned int bit_offset);
1416
1417	/**
1418	* @brief Return the number of bits needed to encode an ISE sequence.
1419	*
1420	* This implementation assumes that the @c quant level is untrusted, given it may come from random
1421	* data being decompressed, so we return an arbitrary unencodable size if that is the case.
1422	*
1423	* @param character_count The number of items in the sequence.
1424	* @param quant_level The desired quantization level.
1425	*
1426	* @return The number of bits needed to encode the BISE string.
1427	*/
1428	unsigned int get_ise_sequence_bitcount(
1429	unsigned int character_count,
1430	quant_method quant_level);
1431
1432	/ ============================================================================*
1433	Functionality for managing color partitioning.
1434	============================================================================ /*
1435
1436	/**
1437	* @brief Compute averages and dominant directions for each partition in a 2 component texture.
1438	*
1439	* @param pi The partition info for the current trial.
1440	* @param blk The image block color data to be compressed.
1441	* @param component1 The first component included in the analysis.
1442	* @param component2 The second component included in the analysis.
1443	* @param[out] pm The output partition metrics.
1444	* - Only pi.partition_count array entries actually get initialized.
1445	* - Direction vectors @c pm.dir are not normalized.
1446	*/
1447	void compute_avgs_and_dirs_2_comp(
1448	const partition_info& pi,
1449	const image_block& blk,
1450	unsigned int component1,
1451	unsigned int component2,
1452	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1453
1454	/**
1455	* @brief Compute averages and dominant directions for each partition in a 3 component texture.
1456	*
1457	* @param pi The partition info for the current trial.
1458	* @param blk The image block color data to be compressed.
1459	* @param omitted_component The component excluded from the analysis.
1460	* @param[out] pm The output partition metrics.
1461	* - Only pi.partition_count array entries actually get initialized.
1462	* - Direction vectors @c pm.dir are not normalized.
1463	*/
1464	void compute_avgs_and_dirs_3_comp(
1465	const partition_info& pi,
1466	const image_block& blk,
1467	unsigned int omitted_component,
1468	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1469
1470	/**
1471	* @brief Compute averages and dominant directions for each partition in a 3 component texture.
1472	*
1473	* This is a specialization of @c compute_avgs_and_dirs_3_comp where the omitted component is
1474	* always alpha, a common case during partition search.
1475	*
1476	* @param pi The partition info for the current trial.
1477	* @param blk The image block color data to be compressed.
1478	* @param[out] pm The output partition metrics.
1479	* - Only pi.partition_count array entries actually get initialized.
1480	* - Direction vectors @c pm.dir are not normalized.
1481	*/
1482	void compute_avgs_and_dirs_3_comp_rgb(
1483	const partition_info& pi,
1484	const image_block& blk,
1485	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1486
1487	/**
1488	* @brief Compute averages and dominant directions for each partition in a 4 component texture.
1489	*
1490	* @param pi The partition info for the current trial.
1491	* @param blk The image block color data to be compressed.
1492	* @param[out] pm The output partition metrics.
1493	* - Only pi.partition_count array entries actually get initialized.
1494	* - Direction vectors @c pm.dir are not normalized.
1495	*/
1496	void compute_avgs_and_dirs_4_comp(
1497	const partition_info& pi,
1498	const image_block& blk,
1499	partition_metrics pm[BLOCK_MAX_PARTITIONS]);
1500
1501	/**
1502	* @brief Compute the RGB error for uncorrelated and same chroma projections.
1503	*
1504	* The output of compute averages and dirs is post processed to define two lines, both of which go
1505	* through the mean-color-value. One line has a direction defined by the dominant direction; this
1506	* is used to assess the error from using an uncorrelated color representation. The other line goes
1507	* through (0,0,0) and is used to assess the error from using an RGBS color representation.
1508	*
1509	* This function computes the squared error when using these two representations.
1510	*
1511	* @param pi The partition info for the current trial.
1512	* @param blk The image block color data to be compressed.
1513	* @param[in,out] plines Processed line inputs, and line length outputs.
1514	* @param[out] uncor_error The cumulative error for using the uncorrelated line.
1515	* @param[out] samec_error The cumulative error for using the same chroma line.
1516	*/
1517	void compute_error_squared_rgb(
1518	const partition_info& pi,
1519	const image_block& blk,
1520	partition_lines3 plines[BLOCK_MAX_PARTITIONS],
1521	float& uncor_error,
1522	float& samec_error);
1523
1524	/**
1525	* @brief Compute the RGBA error for uncorrelated and same chroma projections.
1526	*
1527	* The output of compute averages and dirs is post processed to define two lines, both of which go
1528	* through the mean-color-value. One line has a direction defined by the dominant direction; this
1529	* is used to assess the error from using an uncorrelated color representation. The other line goes
1530	* through (0,0,0,1) and is used to assess the error from using an RGBS color representation.
1531	*
1532	* This function computes the squared error when using these two representations.
1533	*
1534	* @param pi The partition info for the current trial.
1535	* @param blk The image block color data to be compressed.
1536	* @param uncor_plines Processed uncorrelated partition lines for each partition.
1537	* @param samec_plines Processed same chroma partition lines for each partition.
1538	* @param[out] line_lengths The length of each components deviation from the line.
1539	* @param[out] uncor_error The cumulative error for using the uncorrelated line.
1540	* @param[out] samec_error The cumulative error for using the same chroma line.
1541	*/
1542	void compute_error_squared_rgba(
1543	const partition_info& pi,
1544	const image_block& blk,
1545	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
1546	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
1547	float line_lengths[BLOCK_MAX_PARTITIONS],
1548	float& uncor_error,
1549	float& samec_error);
1550
1551	/**
1552	* @brief Find the best set of partitions to trial for a given block.
1553	*
1554	* On return the @c best_partitions list will contain the two best partition
1555	* candidates; one assuming data has uncorrelated chroma and one assuming the
1556	* data has correlated chroma. The best candidate is returned first in the list.
1557	*
1558	* @param bsd The block size information.
1559	* @param blk The image block color data to compress.
1560	* @param partition_count The number of partitions in the block.
1561	* @param partition_search_limit The number of candidate partition encodings to trial.
1562	* @param[out] best_partitions The best partition candidates.
1563	* @param requested_candidates The number of requested partitionings. May return fewer if
1564	* candidates are not available.
1565	*
1566	* @return The actual number of candidates returned.
1567	*/
1568	unsigned int find_best_partition_candidates(
1569	const block_size_descriptor& bsd,
1570	const image_block& blk,
1571	unsigned int partition_count,
1572	unsigned int partition_search_limit,
1573	unsigned int best_partitions[TUNE_MAX_PARTITIONING_CANDIDATES],
1574	unsigned int requested_candidates);
1575
1576	/ ============================================================================*
1577	Functionality for managing images and image related data.
1578	============================================================================ /*
1579
1580	/**
1581	* @brief Setup computation of regional averages in an image.
1582	*
1583	* This must be done by only a single thread per image, before any thread calls
1584	* @c compute_averages().
1585	*
1586	* Results are written back into @c img->input_alpha_averages.
1587	*
1588	* @param img The input image data, also holds output data.
1589	* @param alpha_kernel_radius The kernel radius (in pixels) for alpha mods.
1590	* @param swz Input data component swizzle.
1591	* @param[out] ag The average variance arguments to init.
1592	*
1593	* @return The number of tasks in the processing stage.
1594	*/
1595	unsigned int init_compute_averages(
1596	const astcenc_image& img,
1597	unsigned int alpha_kernel_radius,
1598	const astcenc_swizzle& swz,
1599	avg_args& ag);
1600
1601	/**
1602	* @brief Compute averages for a pixel region.
1603	*
1604	* The routine computes both in a single pass, using a summed-area table to decouple the running
1605	* time from the averaging/variance kernel size.
1606	*
1607	* @param[out] ctx The compressor context storing the output data.
1608	* @param arg The input parameter structure.
1609	*/
1610	void compute_pixel_region_variance(
1611	astcenc_contexti& ctx,
1612	const pixel_region_args& arg);
1613	/**
1614	* @brief Load a single image block from the input image.
1615	*
1616	* @param decode_mode The compression color profile.
1617	* @param img The input image data.
1618	* @param[out] blk The image block to populate.
1619	* @param bsd The block size information.
1620	* @param xpos The block X coordinate in the input image.
1621	* @param ypos The block Y coordinate in the input image.
1622	* @param zpos The block Z coordinate in the input image.
1623	* @param swz The swizzle to apply on load.
1624	*/
1625	void load_image_block(
1626	astcenc_profile decode_mode,
1627	const astcenc_image& img,
1628	image_block& blk,
1629	const block_size_descriptor& bsd,
1630	unsigned int xpos,
1631	unsigned int ypos,
1632	unsigned int zpos,
1633	const astcenc_swizzle& swz);
1634
1635	/**
1636	* @brief Load a single image block from the input image.
1637	*
1638	* This specialized variant can be used only if the block is 2D LDR U8 data,
1639	* with no swizzle.
1640	*
1641	* @param decode_mode The compression color profile.
1642	* @param img The input image data.
1643	* @param[out] blk The image block to populate.
1644	* @param bsd The block size information.
1645	* @param xpos The block X coordinate in the input image.
1646	* @param ypos The block Y coordinate in the input image.
1647	* @param zpos The block Z coordinate in the input image.
1648	* @param swz The swizzle to apply on load.
1649	*/
1650	void load_image_block_fast_ldr(
1651	astcenc_profile decode_mode,
1652	const astcenc_image& img,
1653	image_block& blk,
1654	const block_size_descriptor& bsd,
1655	unsigned int xpos,
1656	unsigned int ypos,
1657	unsigned int zpos,
1658	const astcenc_swizzle& swz);
1659
1660	/**
1661	* @brief Store a single image block to the output image.
1662	*
1663	* @param[out] img The output image data.
1664	* @param blk The image block to export.
1665	* @param bsd The block size information.
1666	* @param xpos The block X coordinate in the input image.
1667	* @param ypos The block Y coordinate in the input image.
1668	* @param zpos The block Z coordinate in the input image.
1669	* @param swz The swizzle to apply on store.
1670	*/
1671	void store_image_block(
1672	astcenc_image& img,
1673	const image_block& blk,
1674	const block_size_descriptor& bsd,
1675	unsigned int xpos,
1676	unsigned int ypos,
1677	unsigned int zpos,
1678	const astcenc_swizzle& swz);
1679
1680	/ ============================================================================*
1681	Functionality for computing endpoint colors and weights for a block.
1682	============================================================================ /*
1683
1684	/**
1685	* @brief Compute ideal endpoint colors and weights for 1 plane of weights.
1686	*
1687	* The ideal endpoints define a color line for the partition. For each texel the ideal weight
1688	* defines an exact position on the partition color line. We can then use these to assess the error
1689	* introduced by removing and quantizing the weight grid.
1690	*
1691	* @param blk The image block color data to compress.
1692	* @param pi The partition info for the current trial.
1693	* @param[out] ei The endpoint and weight values.
1694	*/
1695	void compute_ideal_colors_and_weights_1plane(
1696	const image_block& blk,
1697	const partition_info& pi,
1698	endpoints_and_weights& ei);
1699
1700	/**
1701	* @brief Compute ideal endpoint colors and weights for 2 planes of weights.
1702	*
1703	* The ideal endpoints define a color line for the partition. For each texel the ideal weight
1704	* defines an exact position on the partition color line. We can then use these to assess the error
1705	* introduced by removing and quantizing the weight grid.
1706	*
1707	* @param bsd The block size information.
1708	* @param blk The image block color data to compress.
1709	* @param plane2_component The component assigned to plane 2.
1710	* @param[out] ei1 The endpoint and weight values for plane 1.
1711	* @param[out] ei2 The endpoint and weight values for plane 2.
1712	*/
1713	void compute_ideal_colors_and_weights_2planes(
1714	const block_size_descriptor& bsd,
1715	const image_block& blk,
1716	unsigned int plane2_component,
1717	endpoints_and_weights& ei1,
1718	endpoints_and_weights& ei2);
1719
1720	/**
1721	* @brief Compute the optimal unquantized weights for a decimation table.
1722	*
1723	* After computing ideal weights for the case for a complete weight grid, we we want to compute the
1724	* ideal weights for the case where weights exist only for some texels. We do this with a
1725	* steepest-descent grid solver which works as follows:
1726	*
1727	* First, for each actual weight, perform a weighted averaging of the texels affected by the weight.
1728	* Then, set step size to <some initial value> and attempt one step towards the original ideal
1729	* weight if it helps to reduce error.
1730	*
1731	* @param ei The non-decimated endpoints and weights.
1732	* @param di The selected weight decimation.
1733	* @param[out] dec_weight_ideal_value The ideal values for the decimated weight set.
1734	*/
1735	void compute_ideal_weights_for_decimation(
1736	const endpoints_and_weights& ei,
1737	const decimation_info& di,
1738	float* dec_weight_ideal_value);
1739
1740	/**
1741	* @brief Compute the optimal quantized weights for a decimation table.
1742	*
1743	* We test the two closest weight indices in the allowed quantization range and keep the weight that
1744	* is the closest match.
1745	*
1746	* @param di The selected weight decimation.
1747	* @param low_bound The lowest weight allowed.
1748	* @param high_bound The highest weight allowed.
1749	* @param dec_weight_ideal_value The ideal weight set.
1750	* @param[out] dec_weight_quant_uvalue The output quantized weight as a float.
1751	* @param[out] dec_weight_uquant The output quantized weight as encoded int.
1752	* @param quant_level The desired weight quant level.
1753	*/
1754	void compute_quantized_weights_for_decimation(
1755	const decimation_info& di,
1756	float low_bound,
1757	float high_bound,
1758	const float* dec_weight_ideal_value,
1759	float* dec_weight_quant_uvalue,
1760	uint8_t* dec_weight_uquant,
1761	quant_method quant_level);
1762
1763	/**
1764	* @brief Compute the error of a decimated weight set for 1 plane.
1765	*
1766	* After computing ideal weights for the case with one weight per texel, we want to compute the
1767	* error for decimated weight grids where weights are stored at a lower resolution. This function
1768	* computes the error of the reduced grid, compared to the full grid.
1769	*
1770	* @param eai The ideal weights for the full grid.
1771	* @param di The selected weight decimation.
1772	* @param dec_weight_quant_uvalue The quantized weights for the decimated grid.
1773	*
1774	* @return The accumulated error.
1775	*/
1776	float compute_error_of_weight_set_1plane(
1777	const endpoints_and_weights& eai,
1778	const decimation_info& di,
1779	const float* dec_weight_quant_uvalue);
1780
1781	/**
1782	* @brief Compute the error of a decimated weight set for 2 planes.
1783	*
1784	* After computing ideal weights for the case with one weight per texel, we want to compute the
1785	* error for decimated weight grids where weights are stored at a lower resolution. This function
1786	* computes the error of the reduced grid, compared to the full grid.
1787	*
1788	* @param eai1 The ideal weights for the full grid and plane 1.
1789	* @param eai2 The ideal weights for the full grid and plane 2.
1790	* @param di The selected weight decimation.
1791	* @param dec_weight_quant_uvalue_plane1 The quantized weights for the decimated grid plane 1.
1792	* @param dec_weight_quant_uvalue_plane2 The quantized weights for the decimated grid plane 2.
1793	*
1794	* @return The accumulated error.
1795	*/
1796	float compute_error_of_weight_set_2planes(
1797	const endpoints_and_weights& eai1,
1798	const endpoints_and_weights& eai2,
1799	const decimation_info& di,
1800	const float* dec_weight_quant_uvalue_plane1,
1801	const float* dec_weight_quant_uvalue_plane2);
1802
1803	/**
1804	* @brief Pack a single pair of color endpoints as effectively as possible.
1805	*
1806	* The user requests a base color endpoint mode in @c format, but the quantizer may choose a
1807	* delta-based representation. It will report back the format variant it actually used.
1808	*
1809	* @param color0 The input unquantized color0 endpoint for absolute endpoint pairs.
1810	* @param color1 The input unquantized color1 endpoint for absolute endpoint pairs.
1811	* @param rgbs_color The input unquantized RGBS variant endpoint for same chroma endpoints.
1812	* @param rgbo_color The input unquantized RGBS variant endpoint for HDR endpoints.
1813	* @param format The desired base format.
1814	* @param[out] output The output storage for the quantized colors/
1815	* @param quant_level The quantization level requested.
1816	*
1817	* @return The actual endpoint mode used.
1818	*/
1819	uint8_t pack_color_endpoints(
1820	vfloat4 color0,
1821	vfloat4 color1,
1822	vfloat4 rgbs_color,
1823	vfloat4 rgbo_color,
1824	int format,
1825	uint8_t* output,
1826	quant_method quant_level);
1827
1828	/**
1829	* @brief Unpack a single pair of encoded endpoints.
1830	*
1831	* Endpoints must be unscrambled and converted into the 0-255 range before calling this functions.
1832	*
1833	* @param decode_mode The decode mode (LDR, HDR).
1834	* @param format The color endpoint mode used.
1835	* @param input The raw array of encoded input integers. The length of this array
1836	* depends on @c format; it can be safely assumed to be large enough.
1837	* @param[out] rgb_hdr Is the endpoint using HDR for the RGB channels?
1838	* @param[out] alpha_hdr Is the endpoint using HDR for the A channel?
1839	* @param[out] output0 The output color for endpoint 0.
1840	* @param[out] output1 The output color for endpoint 1.
1841	*/
1842	void unpack_color_endpoints(
1843	astcenc_profile decode_mode,
1844	int format,
1845	const uint8_t* input,
1846	bool& rgb_hdr,
1847	bool& alpha_hdr,
1848	vint4& output0,
1849	vint4& output1);
1850
1851	/**
1852	* @brief Unpack a set of quantized and decimated weights.
1853	*
1854	* TODO: Can we skip this for non-decimated weights now that the @c scb is
1855	* already storing unquantized weights?
1856	*
1857	* @param bsd The block size information.
1858	* @param scb The symbolic compressed encoding.
1859	* @param di The weight grid decimation table.
1860	* @param is_dual_plane @c true if this is a dual plane block, @c false otherwise.
1861	* @param[out] weights_plane1 The output array for storing the plane 1 weights.
1862	* @param[out] weights_plane2 The output array for storing the plane 2 weights.
1863	*/
1864	void unpack_weights(
1865	const block_size_descriptor& bsd,
1866	const symbolic_compressed_block& scb,
1867	const decimation_info& di,
1868	bool is_dual_plane,
1869	int weights_plane1[BLOCK_MAX_TEXELS],
1870	int weights_plane2[BLOCK_MAX_TEXELS]);
1871
1872	/**
1873	* @brief Identify, for each mode, which set of color endpoint produces the best result.
1874	*
1875	* Returns the best @c tune_candidate_limit best looking modes, along with the ideal color encoding
1876	* combination for each. The modified quantization level can be used when all formats are the same,
1877	* as this frees up two additional bits of storage.
1878	*
1879	* @param pi The partition info for the current trial.
1880	* @param blk The image block color data to compress.
1881	* @param ep The ideal endpoints.
1882	* @param qwt_bitcounts Bit counts for different quantization methods.
1883	* @param qwt_errors Errors for different quantization methods.
1884	* @param tune_candidate_limit The max number of candidates to return, may be less.
1885	* @param start_block_mode The first block mode to inspect.
1886	* @param end_block_mode The last block mode to inspect.
1887	* @param[out] partition_format_specifiers The best formats per partition.
1888	* @param[out] block_mode The best packed block mode indexes.
1889	* @param[out] quant_level The best color quant level.
1890	* @param[out] quant_level_mod The best color quant level if endpoints are the same.
1891	* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
1892	*
1893	* @return The actual number of candidate matches returned.
1894	*/
1895	unsigned int compute_ideal_endpoint_formats(
1896	const partition_info& pi,
1897	const image_block& blk,
1898	const endpoints& ep,
1899	const int8_t* qwt_bitcounts,
1900	const float* qwt_errors,
1901	unsigned int tune_candidate_limit,
1902	unsigned int start_block_mode,
1903	unsigned int end_block_mode,
1904	uint8_t partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][BLOCK_MAX_PARTITIONS],
1905	int block_mode[TUNE_MAX_TRIAL_CANDIDATES],
1906	quant_method quant_level[TUNE_MAX_TRIAL_CANDIDATES],
1907	quant_method quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES],
1908	compression_working_buffers& tmpbuf);
1909
1910	/**
1911	* @brief For a given 1 plane weight set recompute the endpoint colors.
1912	*
1913	* As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
1914	* recompute the ideal colors for a specific weight set.
1915	*
1916	* @param blk The image block color data to compress.
1917	* @param pi The partition info for the current trial.
1918	* @param di The weight grid decimation table.
1919	* @param dec_weights_uquant The quantized weight set.
1920	* @param[in,out] ep The color endpoints (modifed in place).
1921	* @param[out] rgbs_vectors The RGB+scale vectors for LDR blocks.
1922	* @param[out] rgbo_vectors The RGB+offset vectors for HDR blocks.
1923	*/
1924	void recompute_ideal_colors_1plane(
1925	const image_block& blk,
1926	const partition_info& pi,
1927	const decimation_info& di,
1928	const uint8_t* dec_weights_uquant,
1929	endpoints& ep,
1930	vfloat4 rgbs_vectors[BLOCK_MAX_PARTITIONS],
1931	vfloat4 rgbo_vectors[BLOCK_MAX_PARTITIONS]);
1932
1933	/**
1934	* @brief For a given 2 plane weight set recompute the endpoint colors.
1935	*
1936	* As we quantize and decimate weights the optimal endpoint colors may change slightly, so we must
1937	* recompute the ideal colors for a specific weight set.
1938	*
1939	* @param blk The image block color data to compress.
1940	* @param bsd The block_size descriptor.
1941	* @param di The weight grid decimation table.
1942	* @param dec_weights_uquant_plane1 The quantized weight set for plane 1.
1943	* @param dec_weights_uquant_plane2 The quantized weight set for plane 2.
1944	* @param[in,out] ep The color endpoints (modifed in place).
1945	* @param[out] rgbs_vector The RGB+scale color for LDR blocks.
1946	* @param[out] rgbo_vector The RGB+offset color for HDR blocks.
1947	* @param plane2_component The component assigned to plane 2.
1948	*/
1949	void recompute_ideal_colors_2planes(
1950	const image_block& blk,
1951	const block_size_descriptor& bsd,
1952	const decimation_info& di,
1953	const uint8_t* dec_weights_uquant_plane1,
1954	const uint8_t* dec_weights_uquant_plane2,
1955	endpoints& ep,
1956	vfloat4& rgbs_vector,
1957	vfloat4& rgbo_vector,
1958	int plane2_component);
1959
1960	/**
1961	* @brief Expand the angular tables needed for the alternative to PCA that we use.
1962	*/
1963	void prepare_angular_tables();
1964
1965	/**
1966	* @brief Compute the angular endpoints for one plane for each block mode.
1967	*
1968	* @param only_always Only consider block modes that are always enabled.
1969	* @param bsd The block size descriptor for the current trial.
1970	* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
1971	* @param max_weight_quant The maximum block mode weight quantization allowed.
1972	* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
1973	*/
1974	void compute_angular_endpoints_1plane(
1975	bool only_always,
1976	const block_size_descriptor& bsd,
1977	const float* dec_weight_ideal_value,
1978	unsigned int max_weight_quant,
1979	compression_working_buffers& tmpbuf);
1980
1981	/**
1982	* @brief Compute the angular endpoints for two planes for each block mode.
1983	*
1984	* @param bsd The block size descriptor for the current trial.
1985	* @param dec_weight_ideal_value The ideal decimated unquantized weight values.
1986	* @param max_weight_quant The maximum block mode weight quantization allowed.
1987	* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
1988	*/
1989	void compute_angular_endpoints_2planes(
1990	const block_size_descriptor& bsd,
1991	const float* dec_weight_ideal_value,
1992	unsigned int max_weight_quant,
1993	compression_working_buffers& tmpbuf);
1994
1995	/ ============================================================================*
1996	Functionality for high level compression and decompression access.
1997	============================================================================ /*
1998
1999	/**
2000	* @brief Compress an image block into a physical block.
2001	*
2002	* @param ctx The compressor context and configuration.
2003	* @param blk The image block color data to compress.
2004	* @param[out] pcb The physical compressed block output.
2005	* @param[out] tmpbuf Preallocated scratch buffers for the compressor.
2006	*/
2007	void compress_block(
2008	const astcenc_contexti& ctx,
2009	const image_block& blk,
2010	physical_compressed_block& pcb,
2011	compression_working_buffers& tmpbuf);
2012
2013	/**
2014	* @brief Decompress a symbolic block in to an image block.
2015	*
2016	* @param decode_mode The decode mode (LDR, HDR, etc).
2017	* @param bsd The block size information.
2018	* @param xpos The X coordinate of the block in the overall image.
2019	* @param ypos The Y coordinate of the block in the overall image.
2020	* @param zpos The Z coordinate of the block in the overall image.
2021	* @param[out] blk The decompressed image block color data.
2022	*/
2023	void decompress_symbolic_block(
2024	astcenc_profile decode_mode,
2025	const block_size_descriptor& bsd,
2026	int xpos,
2027	int ypos,
2028	int zpos,
2029	const symbolic_compressed_block& scb,
2030	image_block& blk);
2031
2032	/**
2033	* @brief Compute the error between a symbolic block and the original input data.
2034	*
2035	* This function is specialized for 2 plane and 1 partition search.
2036	*
2037	* In RGBM mode this will reject blocks that attempt to encode a zero M value.
2038	*
2039	* @param config The compressor config.
2040	* @param bsd The block size information.
2041	* @param scb The symbolic compressed encoding.
2042	* @param blk The original image block color data.
2043	*
2044	* @return Returns the computed error, or a negative value if the encoding
2045	* should be rejected for any reason.
2046	*/
2047	float compute_symbolic_block_difference_2plane(
2048	const astcenc_config& config,
2049	const block_size_descriptor& bsd,
2050	const symbolic_compressed_block& scb,
2051	const image_block& blk);
2052
2053	/**
2054	* @brief Compute the error between a symbolic block and the original input data.
2055	*
2056	* This function is specialized for 1 plane and N partition search.
2057	*
2058	* In RGBM mode this will reject blocks that attempt to encode a zero M value.
2059	*
2060	* @param config The compressor config.
2061	* @param bsd The block size information.
2062	* @param scb The symbolic compressed encoding.
2063	* @param blk The original image block color data.
2064	*
2065	* @return Returns the computed error, or a negative value if the encoding
2066	* should be rejected for any reason.
2067	*/
2068	float compute_symbolic_block_difference_1plane(
2069	const astcenc_config& config,
2070	const block_size_descriptor& bsd,
2071	const symbolic_compressed_block& scb,
2072	const image_block& blk);
2073
2074	/**
2075	* @brief Compute the error between a symbolic block and the original input data.
2076	*
2077	* This function is specialized for 1 plane and 1 partition search.
2078	*
2079	* In RGBM mode this will reject blocks that attempt to encode a zero M value.
2080	*
2081	* @param config The compressor config.
2082	* @param bsd The block size information.
2083	* @param scb The symbolic compressed encoding.
2084	* @param blk The original image block color data.
2085	*
2086	* @return Returns the computed error, or a negative value if the encoding
2087	* should be rejected for any reason.
2088	*/
2089	float compute_symbolic_block_difference_1plane_1partition(
2090	const astcenc_config& config,
2091	const block_size_descriptor& bsd,
2092	const symbolic_compressed_block& scb,
2093	const image_block& blk);
2094
2095	/**
2096	* @brief Convert a symbolic representation into a binary physical encoding.
2097	*
2098	* It is assumed that the symbolic encoding is valid and encodable, or
2099	* previously flagged as an error block if an error color it to be encoded.
2100	*
2101	* @param bsd The block size information.
2102	* @param scb The symbolic representation.
2103	* @param[out] pcb The binary encoded data.
2104	*/
2105	void symbolic_to_physical(
2106	const block_size_descriptor& bsd,
2107	const symbolic_compressed_block& scb,
2108	physical_compressed_block& pcb);
2109
2110	/**
2111	* @brief Convert a binary physical encoding into a symbolic representation.
2112	*
2113	* This function can cope with arbitrary input data; output blocks will be
2114	* flagged as an error block if the encoding is invalid.
2115	*
2116	* @param bsd The block size information.
2117	* @param pcb The binary encoded data.
2118	* @param[out] scb The output symbolic representation.
2119	*/
2120	void physical_to_symbolic(
2121	const block_size_descriptor& bsd,
2122	const physical_compressed_block& pcb,
2123	symbolic_compressed_block& scb);
2124
2125	/ ============================================================================*
2126	Platform-specific functions.
2127	============================================================================ /*
2128	/**
2129	* @brief Allocate an aligned memory buffer.
2130	*
2131	* Allocated memory must be freed by aligned_free;
2132	*
2133	* @param size The desired buffer size.
2134	* @param align The desired buffer alignment; must be 2^N.
2135	*
2136	* @return The memory buffer pointer or nullptr on allocation failure.
2137	*/
2138	template<typename T>
2139	T* aligned_malloc(size_t size, size_t align)
2140	{
2141	void* ptr;
2142	int error = `0`;
2143
2144	#if defined(_WIN32)
2145	ptr = _aligned_malloc(size, align);
2146	#else
2147	error = posix_memalign(&ptr, align, size);
2148	#endif
2149
2150	if (error \|\| (!ptr))
2151	{
2152	return nullptr;
2153	}
2154
2155	return static_cast<T*>(ptr);
2156	}
2157
2158	/**
2159	* @brief Free an aligned memory buffer.
2160	*
2161	* @param ptr The buffer to free.
2162	*/
2163	template<typename T>
2164	void aligned_free(T* ptr)
2165	{
2166	#if defined(_WIN32)
2167	_aligned_free(reinterpret_cast<void*>(ptr));
2168	#else
2169	free(reinterpret_cast<void*>(ptr));
2170	#endif
2171	}
2172
2173	#endif
2174

Definitions

BLOCK_MAX_TEXELS
BLOCK_MAX_COMPONENTS
BLOCK_MAX_PARTITIONS
BLOCK_MAX_PARTITIONINGS
BLOCK_MAX_KMEANS_TEXELS
BLOCK_MAX_WEIGHTS
BLOCK_MAX_WEIGHTS_2PLANE
BLOCK_MIN_WEIGHT_BITS
BLOCK_MAX_WEIGHT_BITS
BLOCK_BAD_BLOCK_MODE
BLOCK_BAD_PARTITIONING
PARTITION_INDEX_BITS
WEIGHTS_PLANE2_OFFSET
WEIGHTS_TEXEL_SUM
WEIGHTS_MAX_BLOCK_MODES
WEIGHTS_MAX_DECIMATION_MODES
ERROR_CALC_DEFAULT
TUNE_MIN_TEXELS_MODE0_FASTPATH
TUNE_MAX_TRIAL_CANDIDATES
TUNE_MAX_PARTITIONING_CANDIDATES
TUNE_MAX_ANGULAR_QUANT
endpoint_formats
quant_method
get_quant_level
partition_metrics
partition_lines3
partition_info
decimation_info
block_mode
get_weight_quant_mode
decimation_mode
set_ref_1plane
is_ref_1plane
set_ref_2plane
is_ref_2plane
block_size_descriptor
get_block_mode
get_decimation_mode
get_decimation_info
get_partition_table
get_partition_info
get_raw_partition_info
image_block
texel
texel3
get_default_alpha
is_constant_channel
is_luminance
is_luminancealpha
endpoints
endpoints_and_weights
encoding_choice_errors
compression_working_buffers
dt_init_working_buffers
quant_and_transfer_table
SYM_BTYPE_ERROR
SYM_BTYPE_CONST_F16
SYM_BTYPE_CONST_U16
SYM_BTYPE_NONCONST
symbolic_compressed_block
get_color_quant_mode
physical_compressed_block
pixel_region_args
avg_args
astcenc_contexti
aligned_malloc

Browse the source code of Godot/thirdparty/astcenc/astcenc_internal.h

Definitions