astcenc_color_quantize.cpp source code [Godot/thirdparty/astcenc/astcenc_color_quantize.cpp]

1	// SPDX-License-Identifier: Apache-2.0
2	// ----------------------------------------------------------------------------
3	// Copyright 2011-2023 Arm Limited
4	//
5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6	// use this file except in compliance with the License. You may obtain a copy
7	// of the License at:
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing, software
12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14	// License for the specific language governing permissions and limitations
15	// under the License.
16	// ----------------------------------------------------------------------------
17
18	#if !defined(ASTCENC_DECOMPRESS_ONLY)
19
20	/**
21	* @brief Functions for color quantization.
22	*
23	* The design of the color quantization functionality requires the caller to use higher level error
24	* analysis to determine the base encoding that should be used. This earlier analysis will select
25	* the basic type of the endpoint that should be used:
26	*
27	* * Mode: LDR or HDR
28	* * Quantization level
29	* * Channel count: L, LA, RGB, or RGBA
30	* * Endpoint 2 type: Direct color endcode, or scaled from endpoint 1.
31	*
32	* However, this leaves a number of decisions about exactly how to pack the endpoints open. In
33	* particular we need to determine if blue contraction can be used, or/and if delta encoding can be
34	* used. If they can be applied these will allow us to maintain higher precision in the endpoints
35	* without needing additional storage.
36	*/
37
38	#include <stdio.h>
39	#include <assert.h>
40
41	#include "astcenc_internal.h"
42
43	/**
44	* @brief Determine the quantized value given a quantization level.
45	*
46	* @param quant_level The quantization level to use.
47	* @param value The value to convert. This must be in the 0-255 range.
48	*
49	* @return The unpacked quantized value, returned in 0-255 range.
50	*/
51	static inline uint8_t quant_color(
52	quant_method quant_level,
53	int value
54	) {
55	int index = value * `2` + `1`;
56	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
57	}
58
59	/**
60	* @brief Determine the quantized value given a quantization level and residual.
61	*
62	* @param quant_level The quantization level to use.
63	* @param value The value to convert. This must be in the 0-255 range.
64	* @param valuef The original value before rounding, used to compute a residual.
65	*
66	* @return The unpacked quantized value, returned in 0-255 range.
67	*/
68	static inline uint8_t quant_color(
69	quant_method quant_level,
70	int value,
71	float valuef
72	) {
73	int index = value * `2`;
74
75	// Compute the residual to determine if we should round down or up ties.
76	// Test should be residual >= 0, but empirical testing shows small bias helps.
77	float residual = valuef - static_cast<float>(value);
78	if (residual >= -`0.1f`)
79	{
80	index++;
81	}
82
83	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
84	}
85
86	/**
87	* @brief Quantize an LDR RGB color.
88	*
89	* Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
90	* For this encoding @c color0 cannot be larger than @c color1. If @c color0 is actually larger
91	* than @c color1, @c color0 is reduced and @c color1 is increased until the constraint is met.
92	*
93	* @param color0 The input unquantized color0 endpoint.
94	* @param color1 The input unquantized color1 endpoint.
95	* @param[out] output The output endpoints, returned as (r0, r1, g0, g1, b0, b1).
96	* @param quant_level The quantization level to use.
97	*/
98	static void quantize_rgb(
99	vfloat4 color0,
100	vfloat4 color1,
101	uint8_t output[`6`],
102	quant_method quant_level
103	) {
104	float scale = `1.0f` / `257.0f`;
105
106	float r0 = astc::clamp255f(color0.lane<`0`>() * scale);
107	float g0 = astc::clamp255f(color0.lane<`1`>() * scale);
108	float b0 = astc::clamp255f(color0.lane<`2`>() * scale);
109
110	float r1 = astc::clamp255f(color1.lane<`0`>() * scale);
111	float g1 = astc::clamp255f(color1.lane<`1`>() * scale);
112	float b1 = astc::clamp255f(color1.lane<`2`>() * scale);
113
114	int ri0, gi0, bi0, ri1, gi1, bi1;
115	float rgb0_addon = `0.0f`;
116	float rgb1_addon = `0.0f`;
117	do
118	{
119	ri0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(r0 + rgb0_addon), `0`), r0 + rgb0_addon);
120	gi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(g0 + rgb0_addon), `0`), g0 + rgb0_addon);
121	bi0 = quant_color(quant_level, astc::max(astc::flt2int_rtn(b0 + rgb0_addon), `0`), b0 + rgb0_addon);
122	ri1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(r1 + rgb1_addon), `255`), r1 + rgb1_addon);
123	gi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(g1 + rgb1_addon), `255`), g1 + rgb1_addon);
124	bi1 = quant_color(quant_level, astc::min(astc::flt2int_rtn(b1 + rgb1_addon), `255`), b1 + rgb1_addon);
125
126	rgb0_addon -= `0.2f`;
127	rgb1_addon += `0.2f`;
128	} while (ri0 + gi0 + bi0 > ri1 + gi1 + bi1);
129
130	output[`0`] = static_cast<uint8_t>(ri0);
131	output[`1`] = static_cast<uint8_t>(ri1);
132	output[`2`] = static_cast<uint8_t>(gi0);
133	output[`3`] = static_cast<uint8_t>(gi1);
134	output[`4`] = static_cast<uint8_t>(bi0);
135	output[`5`] = static_cast<uint8_t>(bi1);
136	}
137
138	/**
139	* @brief Quantize an LDR RGBA color.
140	*
141	* Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
142	* For this encoding @c color0.rgb cannot be larger than @c color1.rgb (this indicates blue
143	* contraction). If @c color0.rgb is actually larger than @c color1.rgb, @c color0.rgb is reduced
144	* and @c color1.rgb is increased until the constraint is met.
145	*
146	* @param color0 The input unquantized color0 endpoint.
147	* @param color1 The input unquantized color1 endpoint.
148	* @param[out] output The output endpoints, returned as (r0, r1, g0, g1, b0, b1, a0, a1).
149	* @param quant_level The quantization level to use.
150	*/
151	static void quantize_rgba(
152	vfloat4 color0,
153	vfloat4 color1,
154	uint8_t output[`8`],
155	quant_method quant_level
156	) {
157	float scale = `1.0f` / `257.0f`;
158
159	float a0 = astc::clamp255f(color0.lane<`3`>() * scale);
160	float a1 = astc::clamp255f(color1.lane<`3`>() * scale);
161
162	output[`6`] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
163	output[`7`] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
164
165	quantize_rgb(color0, color1, output, quant_level);
166	}
167
168	/**
169	* @brief Try to quantize an LDR RGB color using blue-contraction.
170	*
171	* Blue-contraction is only usable if encoded color 1 is larger than color 0.
172	*
173	* @param color0 The input unquantized color0 endpoint.
174	* @param color1 The input unquantized color1 endpoint.
175	* @param[out] output The output endpoints, returned as (r1, r0, g1, g0, b1, b0).
176	* @param quant_level The quantization level to use.
177	*
178	* @return Returns @c false on failure, @c true on success.
179	*/
180	static bool try_quantize_rgb_blue_contract(
181	vfloat4 color0,
182	vfloat4 color1,
183	uint8_t output[`6`],
184	quant_method quant_level
185	) {
186	float scale = `1.0f` / `257.0f`;
187
188	float r0 = color0.lane<`0`>() * scale;
189	float g0 = color0.lane<`1`>() * scale;
190	float b0 = color0.lane<`2`>() * scale;
191
192	float r1 = color1.lane<`0`>() * scale;
193	float g1 = color1.lane<`1`>() * scale;
194	float b1 = color1.lane<`2`>() * scale;
195
196	// Apply inverse blue-contraction. This can produce an overflow; which means BC cannot be used.
197	r0 += (r0 - b0);
198	g0 += (g0 - b0);
199	r1 += (r1 - b1);
200	g1 += (g1 - b1);
201
202	if (r0 < `0.0f` \|\| r0 > `255.0f` \|\| g0 < `0.0f` \|\| g0 > `255.0f` \|\| b0 < `0.0f` \|\| b0 > `255.0f` \|\|
203	r1 < `0.0f` \|\| r1 > `255.0f` \|\| g1 < `0.0f` \|\| g1 > `255.0f` \|\| b1 < `0.0f` \|\| b1 > `255.0f`)
204	{
205	return false;
206	}
207
208	// Quantize the inverse-blue-contracted color
209	int ri0 = quant_color(quant_level, astc::flt2int_rtn(r0), r0);
210	int gi0 = quant_color(quant_level, astc::flt2int_rtn(g0), g0);
211	int bi0 = quant_color(quant_level, astc::flt2int_rtn(b0), b0);
212
213	int ri1 = quant_color(quant_level, astc::flt2int_rtn(r1), r1);
214	int gi1 = quant_color(quant_level, astc::flt2int_rtn(g1), g1);
215	int bi1 = quant_color(quant_level, astc::flt2int_rtn(b1), b1);
216
217	// If color #1 is not larger than color #0 then blue-contraction cannot be used. Note that
218	// blue-contraction and quantization change this order, which is why we must test afterwards.
219	if (ri1 + gi1 + bi1 <= ri0 + gi0 + bi0)
220	{
221	return false;
222	}
223
224	output[`0`] = static_cast<uint8_t>(ri1);
225	output[`1`] = static_cast<uint8_t>(ri0);
226	output[`2`] = static_cast<uint8_t>(gi1);
227	output[`3`] = static_cast<uint8_t>(gi0);
228	output[`4`] = static_cast<uint8_t>(bi1);
229	output[`5`] = static_cast<uint8_t>(bi0);
230
231	return true;
232	}
233
234	/**
235	* @brief Try to quantize an LDR RGBA color using blue-contraction.
236	*
237	* Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
238	*
239	* @param color0 The input unquantized color0 endpoint.
240	* @param color1 The input unquantized color1 endpoint.
241	* @param[out] output The output endpoints, returned as (r1, r0, g1, g0, b1, b0, a1, a0).
242	* @param quant_level The quantization level to use.
243	*
244	* @return Returns @c false on failure, @c true on success.
245	*/
246	static bool try_quantize_rgba_blue_contract(
247	vfloat4 color0,
248	vfloat4 color1,
249	uint8_t output[`8`],
250	quant_method quant_level
251	) {
252	float scale = `1.0f` / `257.0f`;
253
254	float a0 = astc::clamp255f(color0.lane<`3`>() * scale);
255	float a1 = astc::clamp255f(color1.lane<`3`>() * scale);
256
257	output[`6`] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
258	output[`7`] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
259
260	return try_quantize_rgb_blue_contract(color0, color1, output, quant_level);
261	}
262
263	/**
264	* @brief Try to quantize an LDR RGB color using delta encoding.
265	*
266	* At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
267	* we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
268	* non-negative, then we encode a regular delta.
269	*
270	* @param color0 The input unquantized color0 endpoint.
271	* @param color1 The input unquantized color1 endpoint.
272	* @param[out] output The output endpoints, returned as (r0, r1, g0, g1, b0, b1).
273	* @param quant_level The quantization level to use.
274	*
275	* @return Returns @c false on failure, @c true on success.
276	*/
277	static bool try_quantize_rgb_delta(
278	vfloat4 color0,
279	vfloat4 color1,
280	uint8_t output[`6`],
281	quant_method quant_level
282	) {
283	float scale = `1.0f` / `257.0f`;
284
285	float r0 = astc::clamp255f(color0.lane<`0`>() * scale);
286	float g0 = astc::clamp255f(color0.lane<`1`>() * scale);
287	float b0 = astc::clamp255f(color0.lane<`2`>() * scale);
288
289	float r1 = astc::clamp255f(color1.lane<`0`>() * scale);
290	float g1 = astc::clamp255f(color1.lane<`1`>() * scale);
291	float b1 = astc::clamp255f(color1.lane<`2`>() * scale);
292
293	// Transform r0 to unorm9
294	int r0a = astc::flt2int_rtn(r0);
295	int g0a = astc::flt2int_rtn(g0);
296	int b0a = astc::flt2int_rtn(b0);
297
298	r0a <<= `1`;
299	g0a <<= `1`;
300	b0a <<= `1`;
301
302	// Mask off the top bit
303	int r0b = r0a & `0xFF`;
304	int g0b = g0a & `0xFF`;
305	int b0b = b0a & `0xFF`;
306
307	// Quantize then unquantize in order to get a value that we take differences against
308	int r0be = quant_color(quant_level, r0b);
309	int g0be = quant_color(quant_level, g0b);
310	int b0be = quant_color(quant_level, b0b);
311
312	r0b = r0be \| (r0a & `0x100`);
313	g0b = g0be \| (g0a & `0x100`);
314	b0b = b0be \| (b0a & `0x100`);
315
316	// Get hold of the second value
317	int r1d = astc::flt2int_rtn(r1);
318	int g1d = astc::flt2int_rtn(g1);
319	int b1d = astc::flt2int_rtn(b1);
320
321	r1d <<= `1`;
322	g1d <<= `1`;
323	b1d <<= `1`;
324
325	// ... and take differences
326	r1d -= r0b;
327	g1d -= g0b;
328	b1d -= b0b;
329
330	// Check if the difference is too large to be encodable
331	if (r1d > `63` \|\| g1d > `63` \|\| b1d > `63` \|\| r1d < -`64` \|\| g1d < -`64` \|\| b1d < -`64`)
332	{
333	return false;
334	}
335
336	// Insert top bit of the base into the offset
337	r1d &= `0x7F`;
338	g1d &= `0x7F`;
339	b1d &= `0x7F`;
340
341	r1d \|= (r0b & `0x100`) >> `1`;
342	g1d \|= (g0b & `0x100`) >> `1`;
343	b1d \|= (b0b & `0x100`) >> `1`;
344
345	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
346	// since we have then corrupted either the top bit of the base or the sign bit of the offset
347	int r1de = quant_color(quant_level, r1d);
348	int g1de = quant_color(quant_level, g1d);
349	int b1de = quant_color(quant_level, b1d);
350
351	if (((r1d ^ r1de) \| (g1d ^ g1de) \| (b1d ^ b1de)) & `0xC0`)
352	{
353	return false;
354	}
355
356	// If the sum of offsets triggers blue-contraction then encoding fails
357	vint4 ep0(r0be, g0be, b0be, `0`);
358	vint4 ep1(r1de, g1de, b1de, `0`);
359	bit_transfer_signed(ep1, ep0);
360	if (hadd_rgb_s(ep1) < `0`)
361	{
362	return false;
363	}
364
365	// Check that the offsets produce legitimate sums as well
366	ep0 = ep0 + ep1;
367	if (any((ep0 < vint4 (`0`)) \| (ep0 > vint4 (`0xFF`))))
368	{
369	return false;
370	}
371
372	output[`0`] = static_cast<uint8_t>(r0be);
373	output[`1`] = static_cast<uint8_t>(r1de);
374	output[`2`] = static_cast<uint8_t>(g0be);
375	output[`3`] = static_cast<uint8_t>(g1de);
376	output[`4`] = static_cast<uint8_t>(b0be);
377	output[`5`] = static_cast<uint8_t>(b1de);
378
379	return true;
380	}
381
382	static bool try_quantize_rgb_delta_blue_contract(
383	vfloat4 color0,
384	vfloat4 color1,
385	uint8_t output[`6`],
386	quant_method quant_level
387	) {
388	// Note: Switch around endpoint colors already at start
389	float scale = `1.0f` / `257.0f`;
390
391	float r1 = color0.lane<`0`>() * scale;
392	float g1 = color0.lane<`1`>() * scale;
393	float b1 = color0.lane<`2`>() * scale;
394
395	float r0 = color1.lane<`0`>() * scale;
396	float g0 = color1.lane<`1`>() * scale;
397	float b0 = color1.lane<`2`>() * scale;
398
399	// Apply inverse blue-contraction. This can produce an overflow; which means BC cannot be used.
400	r0 += (r0 - b0);
401	g0 += (g0 - b0);
402	r1 += (r1 - b1);
403	g1 += (g1 - b1);
404
405	if (r0 < `0.0f` \|\| r0 > `255.0f` \|\| g0 < `0.0f` \|\| g0 > `255.0f` \|\| b0 < `0.0f` \|\| b0 > `255.0f` \|\|
406	r1 < `0.0f` \|\| r1 > `255.0f` \|\| g1 < `0.0f` \|\| g1 > `255.0f` \|\| b1 < `0.0f` \|\| b1 > `255.0f`)
407	{
408	return false;
409	}
410
411	// Transform r0 to unorm9
412	int r0a = astc::flt2int_rtn(r0);
413	int g0a = astc::flt2int_rtn(g0);
414	int b0a = astc::flt2int_rtn(b0);
415	r0a <<= `1`;
416	g0a <<= `1`;
417	b0a <<= `1`;
418
419	// Mask off the top bit
420	int r0b = r0a & `0xFF`;
421	int g0b = g0a & `0xFF`;
422	int b0b = b0a & `0xFF`;
423
424	// Quantize, then unquantize in order to get a value that we take differences against.
425	int r0be = quant_color(quant_level, r0b);
426	int g0be = quant_color(quant_level, g0b);
427	int b0be = quant_color(quant_level, b0b);
428
429	r0b = r0be \| (r0a & `0x100`);
430	g0b = g0be \| (g0a & `0x100`);
431	b0b = b0be \| (b0a & `0x100`);
432
433	// Get hold of the second value
434	int r1d = astc::flt2int_rtn(r1);
435	int g1d = astc::flt2int_rtn(g1);
436	int b1d = astc::flt2int_rtn(b1);
437
438	r1d <<= `1`;
439	g1d <<= `1`;
440	b1d <<= `1`;
441
442	// .. and take differences!
443	r1d -= r0b;
444	g1d -= g0b;
445	b1d -= b0b;
446
447	// Check if the difference is too large to be encodable
448	if (r1d > `63` \|\| g1d > `63` \|\| b1d > `63` \|\| r1d < -`64` \|\| g1d < -`64` \|\| b1d < -`64`)
449	{
450	return false;
451	}
452
453	// Insert top bit of the base into the offset
454	r1d &= `0x7F`;
455	g1d &= `0x7F`;
456	b1d &= `0x7F`;
457
458	r1d \|= (r0b & `0x100`) >> `1`;
459	g1d \|= (g0b & `0x100`) >> `1`;
460	b1d \|= (b0b & `0x100`) >> `1`;
461
462	// Then quantize and unquantize; if this causes any of the top two bits to flip,
463	// then encoding fails, since we have then corrupted either the top bit of the base
464	// or the sign bit of the offset.
465	int r1de = quant_color(quant_level, r1d);
466	int g1de = quant_color(quant_level, g1d);
467	int b1de = quant_color(quant_level, b1d);
468
469	if (((r1d ^ r1de) \| (g1d ^ g1de) \| (b1d ^ b1de)) & `0xC0`)
470	{
471	return false;
472	}
473
474	// If the sum of offsets does not trigger blue-contraction then encoding fails
475	vint4 ep0(r0be, g0be, b0be, `0`);
476	vint4 ep1(r1de, g1de, b1de, `0`);
477	bit_transfer_signed(ep1, ep0);
478	if (hadd_rgb_s(ep1) >= `0`)
479	{
480	return false;
481	}
482
483	// Check that the offsets produce legitimate sums as well
484	ep0 = ep0 + ep1;
485	if (any((ep0 < vint4 (`0`)) \| (ep0 > vint4 (`0xFF`))))
486	{
487	return false;
488	}
489
490	output[`0`] = static_cast<uint8_t>(r0be);
491	output[`1`] = static_cast<uint8_t>(r1de);
492	output[`2`] = static_cast<uint8_t>(g0be);
493	output[`3`] = static_cast<uint8_t>(g1de);
494	output[`4`] = static_cast<uint8_t>(b0be);
495	output[`5`] = static_cast<uint8_t>(b1de);
496
497	return true;
498	}
499
500	/**
501	* @brief Try to quantize an LDR A color using delta encoding.
502	*
503	* At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
504	* we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
505	* non-negative, then we encode a regular delta.
506	*
507	* This function only compressed the alpha - the other elements in the output array are not touched.
508	*
509	* @param color0 The input unquantized color0 endpoint.
510	* @param color1 The input unquantized color1 endpoint.
511	* @param[out] output The output endpoints, returned as (x, x, x, x, x, x, a0, a1).
512	* @param quant_level The quantization level to use.
513	*
514	* @return Returns @c false on failure, @c true on success.
515	*/
516	static bool try_quantize_alpha_delta(
517	vfloat4 color0,
518	vfloat4 color1,
519	uint8_t output[`8`],
520	quant_method quant_level
521	) {
522	float scale = `1.0f` / `257.0f`;
523
524	float a0 = astc::clamp255f(color0.lane<`3`>() * scale);
525	float a1 = astc::clamp255f(color1.lane<`3`>() * scale);
526
527	int a0a = astc::flt2int_rtn(a0);
528	a0a <<= `1`;
529	int a0b = a0a & `0xFF`;
530	int a0be = quant_color(quant_level, a0b);
531	a0b = a0be;
532	a0b \|= a0a & `0x100`;
533	int a1d = astc::flt2int_rtn(a1);
534	a1d <<= `1`;
535	a1d -= a0b;
536
537	if (a1d > `63` \|\| a1d < -`64`)
538	{
539	return false;
540	}
541
542	a1d &= `0x7F`;
543	a1d \|= (a0b & `0x100`) >> `1`;
544
545	int a1de = quant_color(quant_level, a1d);
546	int a1du = a1de;
547	if ((a1d ^ a1du) & `0xC0`)
548	{
549	return false;
550	}
551
552	a1du &= `0x7F`;
553	if (a1du & `0x40`)
554	{
555	a1du -= `0x80`;
556	}
557
558	a1du += a0b;
559	if (a1du < `0` \|\| a1du > `0x1FF`)
560	{
561	return false;
562	}
563
564	output[`6`] = static_cast<uint8_t>(a0be);
565	output[`7`] = static_cast<uint8_t>(a1de);
566
567	return true;
568	}
569
570	/**
571	* @brief Try to quantize an LDR LA color using delta encoding.
572	*
573	* At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
574	* we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
575	* non-negative, then we encode a regular delta.
576	*
577	* This function only compressed the alpha - the other elements in the output array are not touched.
578	*
579	* @param color0 The input unquantized color0 endpoint.
580	* @param color1 The input unquantized color1 endpoint.
581	* @param[out] output The output endpoints, returned as (l0, l1, a0, a1).
582	* @param quant_level The quantization level to use.
583	*
584	* @return Returns @c false on failure, @c true on success.
585	*/
586	static bool try_quantize_luminance_alpha_delta(
587	vfloat4 color0,
588	vfloat4 color1,
589	uint8_t output[`4`],
590	quant_method quant_level
591	) {
592	float scale = `1.0f` / `257.0f`;
593
594	float l0 = astc::clamp255f(hadd_rgb_s(color0) * ((`1.0f` / `3.0f`) * scale));
595	float l1 = astc::clamp255f(hadd_rgb_s(color1) * ((`1.0f` / `3.0f`) * scale));
596
597	float a0 = astc::clamp255f(color0.lane<`3`>() * scale);
598	float a1 = astc::clamp255f(color1.lane<`3`>() * scale);
599
600	int l0a = astc::flt2int_rtn(l0);
601	int a0a = astc::flt2int_rtn(a0);
602	l0a <<= `1`;
603	a0a <<= `1`;
604
605	int l0b = l0a & `0xFF`;
606	int a0b = a0a & `0xFF`;
607	int l0be = quant_color(quant_level, l0b);
608	int a0be = quant_color(quant_level, a0b);
609	l0b = l0be;
610	a0b = a0be;
611	l0b \|= l0a & `0x100`;
612	a0b \|= a0a & `0x100`;
613
614	int l1d = astc::flt2int_rtn(l1);
615	int a1d = astc::flt2int_rtn(a1);
616	l1d <<= `1`;
617	a1d <<= `1`;
618	l1d -= l0b;
619	a1d -= a0b;
620
621	if (l1d > `63` \|\| l1d < -`64`)
622	{
623	return false;
624	}
625
626	if (a1d > `63` \|\| a1d < -`64`)
627	{
628	return false;
629	}
630
631	l1d &= `0x7F`;
632	a1d &= `0x7F`;
633	l1d \|= (l0b & `0x100`) >> `1`;
634	a1d \|= (a0b & `0x100`) >> `1`;
635
636	int l1de = quant_color(quant_level, l1d);
637	int a1de = quant_color(quant_level, a1d);
638	int l1du = l1de;
639	int a1du = a1de;
640
641	if ((l1d ^ l1du) & `0xC0`)
642	{
643	return false;
644	}
645
646	if ((a1d ^ a1du) & `0xC0`)
647	{
648	return false;
649	}
650
651	l1du &= `0x7F`;
652	a1du &= `0x7F`;
653
654	if (l1du & `0x40`)
655	{
656	l1du -= `0x80`;
657	}
658
659	if (a1du & `0x40`)
660	{
661	a1du -= `0x80`;
662	}
663
664	l1du += l0b;
665	a1du += a0b;
666
667	if (l1du < `0` \|\| l1du > `0x1FF`)
668	{
669	return false;
670	}
671
672	if (a1du < `0` \|\| a1du > `0x1FF`)
673	{
674	return false;
675	}
676
677	output[`0`] = static_cast<uint8_t>(l0be);
678	output[`1`] = static_cast<uint8_t>(l1de);
679	output[`2`] = static_cast<uint8_t>(a0be);
680	output[`3`] = static_cast<uint8_t>(a1de);
681
682	return true;
683	}
684
685	/**
686	* @brief Try to quantize an LDR RGBA color using delta encoding.
687	*
688	* At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
689	* we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
690	* non-negative, then we encode a regular delta.
691	*
692	* This function only compressed the alpha - the other elements in the output array are not touched.
693	*
694	* @param color0 The input unquantized color0 endpoint.
695	* @param color1 The input unquantized color1 endpoint.
696	* @param[out] output The output endpoints, returned as (r0, r1, b0, b1, g0, g1, a0, a1).
697	* @param quant_level The quantization level to use.
698	*
699	* @return Returns @c false on failure, @c true on success.
700	*/
701	static bool try_quantize_rgba_delta(
702	vfloat4 color0,
703	vfloat4 color1,
704	uint8_t output[`8`],
705	quant_method quant_level
706	) {
707	return try_quantize_rgb_delta(color0, color1, output, quant_level) &&
708	try_quantize_alpha_delta(color0, color1, output, quant_level);
709	}
710
711
712	/**
713	* @brief Try to quantize an LDR RGBA color using delta and blue contract encoding.
714	*
715	* At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
716	* we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
717	* non-negative, then we encode a regular delta.
718	*
719	* This function only compressed the alpha - the other elements in the output array are not touched.
720	*
721	* @param color0 The input unquantized color0 endpoint.
722	* @param color1 The input unquantized color1 endpoint.
723	* @param[out] output The output endpoints, returned as (r0, r1, b0, b1, g0, g1, a0, a1).
724	* @param quant_level The quantization level to use.
725	*
726	* @return Returns @c false on failure, @c true on success.
727	*/
728	static bool try_quantize_rgba_delta_blue_contract(
729	vfloat4 color0,
730	vfloat4 color1,
731	uint8_t output[`8`],
732	quant_method quant_level
733	) {
734	// Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract
735	return try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level) &&
736	try_quantize_alpha_delta(color1, color0, output, quant_level);
737	}
738
739	/**
740	* @brief Quantize an LDR RGB color using scale encoding.
741	*
742	* @param color The input unquantized color endpoint and scale factor.
743	* @param[out] output The output endpoints, returned as (r0, g0, b0, s).
744	* @param quant_level The quantization level to use.
745	*/
746	static void quantize_rgbs(
747	vfloat4 color,
748	uint8_t output[`4`],
749	quant_method quant_level
750	) {
751	float scale = `1.0f` / `257.0f`;
752
753	float r = astc::clamp255f(color.lane<`0`>() * scale);
754	float g = astc::clamp255f(color.lane<`1`>() * scale);
755	float b = astc::clamp255f(color.lane<`2`>() * scale);
756
757	int ri = quant_color(quant_level, astc::flt2int_rtn(r), r);
758	int gi = quant_color(quant_level, astc::flt2int_rtn(g), g);
759	int bi = quant_color(quant_level, astc::flt2int_rtn(b), b);
760
761	float oldcolorsum = hadd_rgb_s(color) * scale;
762	float newcolorsum = static_cast<float>(ri + gi + bi);
763
764	float scalea = astc::clamp1f(color.lane<`3`>() * (oldcolorsum + `1e-10f`) / (newcolorsum + `1e-10f`));
765	int scale_idx = astc::flt2int_rtn(scalea * `256.0f`);
766	scale_idx = astc::clamp(scale_idx, `0`, `255`);
767
768	output[`0`] = static_cast<uint8_t>(ri);
769	output[`1`] = static_cast<uint8_t>(gi);
770	output[`2`] = static_cast<uint8_t>(bi);
771	output[`3`] = quant_color(quant_level, scale_idx);
772	}
773
774	/**
775	* @brief Quantize an LDR RGBA color using scale encoding.
776	*
777	* @param color The input unquantized color endpoint and scale factor.
778	* @param[out] output The output endpoints, returned as (r0, g0, b0, s, a0, a1).
779	* @param quant_level The quantization level to use.
780	*/
781	static void quantize_rgbs_alpha(
782	vfloat4 color0,
783	vfloat4 color1,
784	vfloat4 color,
785	uint8_t output[`6`],
786	quant_method quant_level
787	) {
788	float scale = `1.0f` / `257.0f`;
789
790	float a0 = astc::clamp255f(color0.lane<`3`>() * scale);
791	float a1 = astc::clamp255f(color1.lane<`3`>() * scale);
792
793	output[`4`] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
794	output[`5`] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
795
796	quantize_rgbs(color, output, quant_level);
797	}
798
799	/**
800	* @brief Quantize a LDR L color.
801	*
802	* @param color0 The input unquantized color0 endpoint.
803	* @param color1 The input unquantized color1 endpoint.
804	* @param[out] output The output endpoints, returned as (l0, l1).
805	* @param quant_level The quantization level to use.
806	*/
807	static void quantize_luminance(
808	vfloat4 color0,
809	vfloat4 color1,
810	uint8_t output[`2`],
811	quant_method quant_level
812	) {
813	float scale = `1.0f` / `257.0f`;
814
815	color0 = color0 * scale;
816	color1 = color1 * scale;
817
818	float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (`1.0f` / `3.0f`));
819	float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (`1.0f` / `3.0f`));
820
821	if (lum0 > lum1)
822	{
823	float avg = (lum0 + lum1) * `0.5f`;
824	lum0 = avg;
825	lum1 = avg;
826	}
827
828	output[`0`] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
829	output[`1`] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
830	}
831
832	/**
833	* @brief Quantize a LDR LA color.
834	*
835	* @param color0 The input unquantized color0 endpoint.
836	* @param color1 The input unquantized color1 endpoint.
837	* @param[out] output The output endpoints, returned as (l0, l1, a0, a1).
838	* @param quant_level The quantization level to use.
839	*/
840	static void quantize_luminance_alpha(
841	vfloat4 color0,
842	vfloat4 color1,
843	uint8_t output[`4`],
844	quant_method quant_level
845	) {
846	float scale = `1.0f` / `257.0f`;
847
848	color0 = color0 * scale;
849	color1 = color1 * scale;
850
851	float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (`1.0f` / `3.0f`));
852	float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (`1.0f` / `3.0f`));
853
854	float a0 = astc::clamp255f(color0.lane<`3`>());
855	float a1 = astc::clamp255f(color1.lane<`3`>());
856
857	output[`0`] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
858	output[`1`] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
859	output[`2`] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
860	output[`3`] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
861	}
862
863	/**
864	* @brief Quantize and unquantize a value ensuring top two bits are the same.
865	*
866	* @param quant_level The quantization level to use.
867	* @param value The input unquantized value.
868	* @param[out] quant_value The quantized value.
869	*/
870	static inline void quantize_and_unquantize_retain_top_two_bits(
871	quant_method quant_level,
872	uint8_t value,
873	uint8_t& quant_value
874	) {
875	int perform_loop;
876	uint8_t quantval;
877
878	do
879	{
880	quantval = quant_color(quant_level, value);
881
882	// Perform looping if the top two bits were modified by quant/unquant
883	perform_loop = (value & `0xC0`) != (quantval & `0xC0`);
884
885	if ((quantval & `0xC0`) > (value & `0xC0`))
886	{
887	// Quant/unquant rounded UP so that the top two bits changed;
888	// decrement the input in hopes that this will avoid rounding up.
889	value--;
890	}
891	else if ((quantval & `0xC0`) < (value & `0xC0`))
892	{
893	// Quant/unquant rounded DOWN so that the top two bits changed;
894	// decrement the input in hopes that this will avoid rounding down.
895	value--;
896	}
897	} while (perform_loop);
898
899	quant_value = quantval;
900	}
901
902	/**
903	* @brief Quantize and unquantize a value ensuring top four bits are the same.
904	*
905	* @param quant_level The quantization level to use.
906	* @param value The input unquantized value.
907	* @param[out] quant_value The quantized value in 0-255 range.
908	*/
909	static inline void quantize_and_unquantize_retain_top_four_bits(
910	quant_method quant_level,
911	uint8_t value,
912	uint8_t& quant_value
913	) {
914	uint8_t perform_loop;
915	uint8_t quantval;
916
917	do
918	{
919	quantval = quant_color(quant_level, value);
920	// Perform looping if the top four bits were modified by quant/unquant
921	perform_loop = (value & `0xF0`) != (quantval & `0xF0`);
922
923	if ((quantval & `0xF0`) > (value & `0xF0`))
924	{
925	// Quant/unquant rounded UP so that the top four bits changed;
926	// decrement the input value in hopes that this will avoid rounding up.
927	value--;
928	}
929	else if ((quantval & `0xF0`) < (value & `0xF0`))
930	{
931	// Quant/unquant rounded DOWN so that the top four bits changed;
932	// decrement the input value in hopes that this will avoid rounding down.
933	value--;
934	}
935	} while (perform_loop);
936
937	quant_value = quantval;
938	}
939
940	/**
941	* @brief Quantize a HDR RGB color using RGB + offset.
942	*
943	* @param color The input unquantized color endpoint and offset.
944	* @param[out] output The output endpoints, returned as packed RGBS with some mode bits.
945	* @param quant_level The quantization level to use.
946	*/
947	static void quantize_hdr_rgbo(
948	vfloat4 color,
949	uint8_t output[`4`],
950	quant_method quant_level
951	) {
952	color.set_lane<`0`>(color.lane<`0`>() + color.lane<`3`>());
953	color.set_lane<`1`>(color.lane<`1`>() + color.lane<`3`>());
954	color.set_lane<`2`>(color.lane<`2`>() + color.lane<`3`>());
955
956	color = clamp(`0.0f`, `65535.0f`, color);
957
958	vfloat4 color_bak = color;
959
960	int majcomp;
961	if (color.lane<`0`>() > color.lane<`1`>() && color.lane<`0`>() > color.lane<`2`>())
962	{
963	majcomp = `0`; // red is largest component
964	}
965	else if (color.lane<`1`>() > color.lane<`2`>())
966	{
967	majcomp = `1`; // green is largest component
968	}
969	else
970	{
971	majcomp = `2`; // blue is largest component
972	}
973
974	// swap around the red component and the largest component.
975	switch (majcomp)
976	{
977	case `1`:
978	color = color.swz<`1`, `0`, `2`, `3`>();
979	break;
980	case `2`:
981	color = color.swz<`2`, `1`, `0`, `3`>();
982	break;
983	default:
984	break;
985	}
986
987	static const int mode_bits[`5`][`3`] {
988	{`11`, `5`, `7`},
989	{`11`, `6`, `5`},
990	{`10`, `5`, `8`},
991	{`9`, `6`, `7`},
992	{`8`, `7`, `6`}
993	};
994
995	static const float mode_cutoffs[`5`][`2`] {
996	{`1024`, `4096`},
997	{`2048`, `1024`},
998	{`2048`, `16384`},
999	{`8192`, `16384`},
1000	{`32768`, `16384`}
1001	};
1002
1003	static const float mode_rscales[`5`] {
1004	`32.0f`,
1005	`32.0f`,
1006	`64.0f`,
1007	`128.0f`,
1008	`256.0f`,
1009	};
1010
1011	static const float mode_scales[`5`] {
1012	`1.0f` / `32.0f`,
1013	`1.0f` / `32.0f`,
1014	`1.0f` / `64.0f`,
1015	`1.0f` / `128.0f`,
1016	`1.0f` / `256.0f`,
1017	};
1018
1019	float r_base = color.lane<`0`>();
1020	float g_base = color.lane<`0`>() - color.lane<`1`>() ;
1021	float b_base = color.lane<`0`>() - color.lane<`2`>() ;
1022	float s_base = color.lane<`3`>() ;
1023
1024	for (int mode = `0`; mode < `5`; mode++)
1025	{
1026	if (g_base > mode_cutoffs[mode][`0`] \|\| b_base > mode_cutoffs[mode][`0`] \|\| s_base > mode_cutoffs[mode][`1`])
1027	{
1028	continue;
1029	}
1030
1031	// Encode the mode into a 4-bit vector
1032	int mode_enc = mode < `4` ? (mode \| (majcomp << `2`)) : (majcomp \| `0xC`);
1033
1034	float mode_scale = mode_scales[mode];
1035	float mode_rscale = mode_rscales[mode];
1036
1037	int gb_intcutoff = `1` << mode_bits[mode][`1`];
1038	int s_intcutoff = `1` << mode_bits[mode][`2`];
1039
1040	// Quantize and unquantize R
1041	int r_intval = astc::flt2int_rtn(r_base * mode_scale);
1042
1043	int r_lowbits = r_intval & `0x3f`;
1044
1045	r_lowbits \|= (mode_enc & `3`) << `6`;
1046
1047	uint8_t r_quantval;
1048	quantize_and_unquantize_retain_top_two_bits(
1049	quant_level, static_cast<uint8_t>(r_lowbits), r_quantval);
1050
1051	r_intval = (r_intval & ~`0x3f`) \| (r_quantval & `0x3f`);
1052	float r_fval = static_cast<float>(r_intval) * mode_rscale;
1053
1054	// Recompute G and B, then quantize and unquantize them
1055	float g_fval = r_fval - color.lane<`1`>() ;
1056	float b_fval = r_fval - color.lane<`2`>() ;
1057
1058	g_fval = astc::clamp(g_fval, `0.0f`, `65535.0f`);
1059	b_fval = astc::clamp(b_fval, `0.0f`, `65535.0f`);
1060
1061	int g_intval = astc::flt2int_rtn(g_fval * mode_scale);
1062	int b_intval = astc::flt2int_rtn(b_fval * mode_scale);
1063
1064	if (g_intval >= gb_intcutoff \|\| b_intval >= gb_intcutoff)
1065	{
1066	continue;
1067	}
1068
1069	int g_lowbits = g_intval & `0x1f`;
1070	int b_lowbits = b_intval & `0x1f`;
1071
1072	int bit0 = `0`;
1073	int bit1 = `0`;
1074	int bit2 = `0`;
1075	int bit3 = `0`;
1076
1077	switch (mode)
1078	{
1079	case `0`:
1080	case `2`:
1081	bit0 = (r_intval >> `9`) & `1`;
1082	break;
1083	case `1`:
1084	case `3`:
1085	bit0 = (r_intval >> `8`) & `1`;
1086	break;
1087	case `4`:
1088	case `5`:
1089	bit0 = (g_intval >> `6`) & `1`;
1090	break;
1091	}
1092
1093	switch (mode)
1094	{
1095	case `0`:
1096	case `1`:
1097	case `2`:
1098	case `3`:
1099	bit2 = (r_intval >> `7`) & `1`;
1100	break;
1101	case `4`:
1102	case `5`:
1103	bit2 = (b_intval >> `6`) & `1`;
1104	break;
1105	}
1106
1107	switch (mode)
1108	{
1109	case `0`:
1110	case `2`:
1111	bit1 = (r_intval >> `8`) & `1`;
1112	break;
1113	case `1`:
1114	case `3`:
1115	case `4`:
1116	case `5`:
1117	bit1 = (g_intval >> `5`) & `1`;
1118	break;
1119	}
1120
1121	switch (mode)
1122	{
1123	case `0`:
1124	bit3 = (r_intval >> `10`) & `1`;
1125	break;
1126	case `2`:
1127	bit3 = (r_intval >> `6`) & `1`;
1128	break;
1129	case `1`:
1130	case `3`:
1131	case `4`:
1132	case `5`:
1133	bit3 = (b_intval >> `5`) & `1`;
1134	break;
1135	}
1136
1137	g_lowbits \|= (mode_enc & `0x4`) << `5`;
1138	b_lowbits \|= (mode_enc & `0x8`) << `4`;
1139
1140	g_lowbits \|= bit0 << `6`;
1141	g_lowbits \|= bit1 << `5`;
1142	b_lowbits \|= bit2 << `6`;
1143	b_lowbits \|= bit3 << `5`;
1144
1145	uint8_t g_quantval;
1146	uint8_t b_quantval;
1147
1148	quantize_and_unquantize_retain_top_four_bits(
1149	quant_level, static_cast<uint8_t>(g_lowbits), g_quantval);
1150	quantize_and_unquantize_retain_top_four_bits(
1151	quant_level, static_cast<uint8_t>(b_lowbits), b_quantval);
1152
1153	g_intval = (g_intval & ~`0x1f`) \| (g_quantval & `0x1f`);
1154	b_intval = (b_intval & ~`0x1f`) \| (b_quantval & `0x1f`);
1155
1156	g_fval = static_cast<float>(g_intval) * mode_rscale;
1157	b_fval = static_cast<float>(b_intval) * mode_rscale;
1158
1159	// Recompute the scale value, based on the errors introduced to red, green and blue
1160
1161	// If the error is positive, then the R,G,B errors combined have raised the color
1162	// value overall; as such, the scale value needs to be increased.
1163	float rgb_errorsum = (r_fval - color.lane<`0`>() ) + (r_fval - g_fval - color.lane<`1`>() ) + (r_fval - b_fval - color.lane<`2`>() );
1164
1165	float s_fval = s_base + rgb_errorsum * (`1.0f` / `3.0f`);
1166	s_fval = astc::clamp(s_fval, `0.0f`, `1e9f`);
1167
1168	int s_intval = astc::flt2int_rtn(s_fval * mode_scale);
1169
1170	if (s_intval >= s_intcutoff)
1171	{
1172	continue;
1173	}
1174
1175	int s_lowbits = s_intval & `0x1f`;
1176
1177	int bit4;
1178	int bit5;
1179	int bit6;
1180	switch (mode)
1181	{
1182	case `1`:
1183	bit6 = (r_intval >> `9`) & `1`;
1184	break;
1185	default:
1186	bit6 = (s_intval >> `5`) & `1`;
1187	break;
1188	}
1189
1190	switch (mode)
1191	{
1192	case `4`:
1193	bit5 = (r_intval >> `7`) & `1`;
1194	break;
1195	case `1`:
1196	bit5 = (r_intval >> `10`) & `1`;
1197	break;
1198	default:
1199	bit5 = (s_intval >> `6`) & `1`;
1200	break;
1201	}
1202
1203	switch (mode)
1204	{
1205	case `2`:
1206	bit4 = (s_intval >> `7`) & `1`;
1207	break;
1208	default:
1209	bit4 = (r_intval >> `6`) & `1`;
1210	break;
1211	}
1212
1213	s_lowbits \|= bit6 << `5`;
1214	s_lowbits \|= bit5 << `6`;
1215	s_lowbits \|= bit4 << `7`;
1216
1217	uint8_t s_quantval;
1218
1219	quantize_and_unquantize_retain_top_four_bits(
1220	quant_level, static_cast<uint8_t>(s_lowbits), s_quantval);
1221
1222	output[`0`] = r_quantval;
1223	output[`1`] = g_quantval;
1224	output[`2`] = b_quantval;
1225	output[`3`] = s_quantval;
1226	return;
1227	}
1228
1229	// Failed to encode any of the modes above? In that case encode using mode #5
1230	float vals[`4`];
1231	vals[`0`] = color_bak.lane<`0`>();
1232	vals[`1`] = color_bak.lane<`1`>();
1233	vals[`2`] = color_bak.lane<`2`>();
1234	vals[`3`] = color_bak.lane<`3`>();
1235
1236	int ivals[`4`];
1237	float cvals[`3`];
1238
1239	for (int i = `0`; i < `3`; i++)
1240	{
1241	vals[i] = astc::clamp(vals[i], `0.0f`, `65020.0f`);
1242	ivals[i] = astc::flt2int_rtn(vals[i] * (`1.0f` / `512.0f`));
1243	cvals[i] = static_cast<float>(ivals[i]) * `512.0f`;
1244	}
1245
1246	float rgb_errorsum = (cvals[`0`] - vals[`0`]) + (cvals[`1`] - vals[`1`]) + (cvals[`2`] - vals[`2`]);
1247	vals[`3`] += rgb_errorsum * (`1.0f` / `3.0f`);
1248
1249	vals[`3`] = astc::clamp(vals[`3`], `0.0f`, `65020.0f`);
1250	ivals[`3`] = astc::flt2int_rtn(vals[`3`] * (`1.0f` / `512.0f`));
1251
1252	int encvals[`4`];
1253	encvals[`0`] = (ivals[`0`] & `0x3f`) \| `0xC0`;
1254	encvals[`1`] = (ivals[`1`] & `0x7f`) \| `0x80`;
1255	encvals[`2`] = (ivals[`2`] & `0x7f`) \| `0x80`;
1256	encvals[`3`] = (ivals[`3`] & `0x7f`) \| ((ivals[`0`] & `0x40`) << `1`);
1257
1258	for (uint8_t i = `0`; i < `4`; i++)
1259	{
1260	quantize_and_unquantize_retain_top_four_bits(
1261	quant_level, static_cast<uint8_t>(encvals[i]), output[i]);
1262	}
1263
1264	return;
1265	}
1266
1267	/**
1268	* @brief Quantize a HDR RGB color using direct RGB encoding.
1269	*
1270	* @param color0 The input unquantized color0 endpoint.
1271	* @param color1 The input unquantized color1 endpoint.
1272	* @param[out] output The output endpoints, returned as packed RGB+RGB pairs with mode bits.
1273	* @param quant_level The quantization level to use.
1274	*/
1275	static void quantize_hdr_rgb(
1276	vfloat4 color0,
1277	vfloat4 color1,
1278	uint8_t output[`6`],
1279	quant_method quant_level
1280	) {
1281	// Note: color.lane<3> is not used so we can ignore it*
1282	color0 = clamp(`0.0f`, `65535.0f`, color0);
1283	color1 = clamp(`0.0f`, `65535.0f`, color1);
1284
1285	vfloat4 color0_bak = color0;
1286	vfloat4 color1_bak = color1;
1287
1288	int majcomp;
1289	if (color1.lane<`0`>() > color1.lane<`1`>() && color1.lane<`0`>() > color1.lane<`2`>())
1290	{
1291	majcomp = `0`;
1292	}
1293	else if (color1.lane<`1`>() > color1.lane<`2`>())
1294	{
1295	majcomp = `1`;
1296	}
1297	else
1298	{
1299	majcomp = `2`;
1300	}
1301
1302	// Swizzle the components
1303	switch (majcomp)
1304	{
1305	case `1`: // red-green swap
1306	color0 = color0.swz<`1`, `0`, `2`, `3`>();
1307	color1 = color1.swz<`1`, `0`, `2`, `3`>();
1308	break;
1309	case `2`: // red-blue swap
1310	color0 = color0.swz<`2`, `1`, `0`, `3`>();
1311	color1 = color1.swz<`2`, `1`, `0`, `3`>();
1312	break;
1313	default:
1314	break;
1315	}
1316
1317	float a_base = color1.lane<`0`>();
1318	a_base = astc::clamp(a_base, `0.0f`, `65535.0f`);
1319
1320	float b0_base = a_base - color1.lane<`1`>();
1321	float b1_base = a_base - color1.lane<`2`>();
1322	float c_base = a_base - color0.lane<`0`>();
1323	float d0_base = a_base - b0_base - c_base - color0.lane<`1`>();
1324	float d1_base = a_base - b1_base - c_base - color0.lane<`2`>();
1325
1326	// Number of bits in the various fields in the various modes
1327	static const int mode_bits[`8`][`4`] {
1328	{`9`, `7`, `6`, `7`},
1329	{`9`, `8`, `6`, `6`},
1330	{`10`, `6`, `7`, `7`},
1331	{`10`, `7`, `7`, `6`},
1332	{`11`, `8`, `6`, `5`},
1333	{`11`, `6`, `8`, `6`},
1334	{`12`, `7`, `7`, `5`},
1335	{`12`, `6`, `7`, `6`}
1336	};
1337
1338	// Cutoffs to use for the computed values of a,b,c,d, assuming the
1339	// range 0..65535 are LNS values corresponding to fp16.
1340	static const float mode_cutoffs[`8`][`4`] {
1341	{`16384`, `8192`, `8192`, `8`}, // mode 0: 9,7,6,7
1342	{`32768`, `8192`, `4096`, `8`}, // mode 1: 9,8,6,6
1343	{`4096`, `8192`, `4096`, `4`}, // mode 2: 10,6,7,7
1344	{`8192`, `8192`, `2048`, `4`}, // mode 3: 10,7,7,6
1345	{`8192`, `2048`, `512`, `2`}, // mode 4: 11,8,6,5
1346	{`2048`, `8192`, `1024`, `2`}, // mode 5: 11,6,8,6
1347	{`2048`, `2048`, `256`, `1`}, // mode 6: 12,7,7,5
1348	{`1024`, `2048`, `512`, `1`}, // mode 7: 12,6,7,6
1349	};
1350
1351	static const float mode_scales[`8`] {
1352	`1.0f` / `128.0f`,
1353	`1.0f` / `128.0f`,
1354	`1.0f` / `64.0f`,
1355	`1.0f` / `64.0f`,
1356	`1.0f` / `32.0f`,
1357	`1.0f` / `32.0f`,
1358	`1.0f` / `16.0f`,
1359	`1.0f` / `16.0f`,
1360	};
1361
1362	// Scaling factors when going from what was encoded in the mode to 16 bits.
1363	static const float mode_rscales[`8`] {
1364	`128.0f`,
1365	`128.0f`,
1366	`64.0f`,
1367	`64.0f`,
1368	`32.0f`,
1369	`32.0f`,
1370	`16.0f`,
1371	`16.0f`
1372	};
1373
1374	// Try modes one by one, with the highest-precision mode first.
1375	for (int mode = `7`; mode >= `0`; mode--)
1376	{
1377	// For each mode, test if we can in fact accommodate the computed b, c, and d values.
1378	// If we clearly can't, then we skip to the next mode.
1379
1380	float b_cutoff = mode_cutoffs[mode][`0`];
1381	float c_cutoff = mode_cutoffs[mode][`1`];
1382	float d_cutoff = mode_cutoffs[mode][`2`];
1383
1384	if (b0_base > b_cutoff \|\| b1_base > b_cutoff \|\| c_base > c_cutoff \|\| fabsf(d0_base) > d_cutoff \|\| fabsf(d1_base) > d_cutoff)
1385	{
1386	continue;
1387	}
1388
1389	float mode_scale = mode_scales[mode];
1390	float mode_rscale = mode_rscales[mode];
1391
1392	int b_intcutoff = `1` << mode_bits[mode][`1`];
1393	int c_intcutoff = `1` << mode_bits[mode][`2`];
1394	int d_intcutoff = `1` << (mode_bits[mode][`3`] - `1`);
1395
1396	// Quantize and unquantize A, with the assumption that its high bits can be handled safely.
1397	int a_intval = astc::flt2int_rtn(a_base * mode_scale);
1398	int a_lowbits = a_intval & `0xFF`;
1399
1400	int a_quantval = quant_color(quant_level, a_lowbits);
1401	int a_uquantval = a_quantval;
1402	a_intval = (a_intval & ~`0xFF`) \| a_uquantval;
1403	float a_fval = static_cast<float>(a_intval) * mode_rscale;
1404
1405	// Recompute C, then quantize and unquantize it
1406	float c_fval = a_fval - color0.lane<`0`>();
1407	c_fval = astc::clamp(c_fval, `0.0f`, `65535.0f`);
1408
1409	int c_intval = astc::flt2int_rtn(c_fval * mode_scale);
1410
1411	if (c_intval >= c_intcutoff)
1412	{
1413	continue;
1414	}
1415
1416	int c_lowbits = c_intval & `0x3f`;
1417
1418	c_lowbits \|= (mode & `1`) << `7`;
1419	c_lowbits \|= (a_intval & `0x100`) >> `2`;
1420
1421	uint8_t c_quantval;
1422
1423	quantize_and_unquantize_retain_top_two_bits(
1424	quant_level, static_cast<uint8_t>(c_lowbits), c_quantval);
1425
1426	c_intval = (c_intval & ~`0x3F`) \| (c_quantval & `0x3F`);
1427	c_fval = static_cast<float>(c_intval) * mode_rscale;
1428
1429	// Recompute B0 and B1, then quantize and unquantize them
1430	float b0_fval = a_fval - color1.lane<`1`>();
1431	float b1_fval = a_fval - color1.lane<`2`>();
1432
1433	b0_fval = astc::clamp(b0_fval, `0.0f`, `65535.0f`);
1434	b1_fval = astc::clamp(b1_fval, `0.0f`, `65535.0f`);
1435	int b0_intval = astc::flt2int_rtn(b0_fval * mode_scale);
1436	int b1_intval = astc::flt2int_rtn(b1_fval * mode_scale);
1437
1438	if (b0_intval >= b_intcutoff \|\| b1_intval >= b_intcutoff)
1439	{
1440	continue;
1441	}
1442
1443	int b0_lowbits = b0_intval & `0x3f`;
1444	int b1_lowbits = b1_intval & `0x3f`;
1445
1446	int bit0 = `0`;
1447	int bit1 = `0`;
1448	switch (mode)
1449	{
1450	case `0`:
1451	case `1`:
1452	case `3`:
1453	case `4`:
1454	case `6`:
1455	bit0 = (b0_intval >> `6`) & `1`;
1456	break;
1457	case `2`:
1458	case `5`:
1459	case `7`:
1460	bit0 = (a_intval >> `9`) & `1`;
1461	break;
1462	}
1463
1464	switch (mode)
1465	{
1466	case `0`:
1467	case `1`:
1468	case `3`:
1469	case `4`:
1470	case `6`:
1471	bit1 = (b1_intval >> `6`) & `1`;
1472	break;
1473	case `2`:
1474	bit1 = (c_intval >> `6`) & `1`;
1475	break;
1476	case `5`:
1477	case `7`:
1478	bit1 = (a_intval >> `10`) & `1`;
1479	break;
1480	}
1481
1482	b0_lowbits \|= bit0 << `6`;
1483	b1_lowbits \|= bit1 << `6`;
1484
1485	b0_lowbits \|= ((mode >> `1`) & `1`) << `7`;
1486	b1_lowbits \|= ((mode >> `2`) & `1`) << `7`;
1487
1488	uint8_t b0_quantval;
1489	uint8_t b1_quantval;
1490
1491	quantize_and_unquantize_retain_top_two_bits(
1492	quant_level, static_cast<uint8_t>(b0_lowbits), b0_quantval);
1493	quantize_and_unquantize_retain_top_two_bits(
1494	quant_level, static_cast<uint8_t>(b1_lowbits), b1_quantval);
1495
1496	b0_intval = (b0_intval & ~`0x3f`) \| (b0_quantval & `0x3f`);
1497	b1_intval = (b1_intval & ~`0x3f`) \| (b1_quantval & `0x3f`);
1498	b0_fval = static_cast<float>(b0_intval) * mode_rscale;
1499	b1_fval = static_cast<float>(b1_intval) * mode_rscale;
1500
1501	// Recompute D0 and D1, then quantize and unquantize them
1502	float d0_fval = a_fval - b0_fval - c_fval - color0.lane<`1`>();
1503	float d1_fval = a_fval - b1_fval - c_fval - color0.lane<`2`>();
1504
1505	d0_fval = astc::clamp(d0_fval, -`65535.0f`, `65535.0f`);
1506	d1_fval = astc::clamp(d1_fval, -`65535.0f`, `65535.0f`);
1507
1508	int d0_intval = astc::flt2int_rtn(d0_fval * mode_scale);
1509	int d1_intval = astc::flt2int_rtn(d1_fval * mode_scale);
1510
1511	if (abs(d0_intval) >= d_intcutoff \|\| abs(d1_intval) >= d_intcutoff)
1512	{
1513	continue;
1514	}
1515
1516	int d0_lowbits = d0_intval & `0x1f`;
1517	int d1_lowbits = d1_intval & `0x1f`;
1518
1519	int bit2 = `0`;
1520	int bit3 = `0`;
1521	int bit4;
1522	int bit5;
1523	switch (mode)
1524	{
1525	case `0`:
1526	case `2`:
1527	bit2 = (d0_intval >> `6`) & `1`;
1528	break;
1529	case `1`:
1530	case `4`:
1531	bit2 = (b0_intval >> `7`) & `1`;
1532	break;
1533	case `3`:
1534	bit2 = (a_intval >> `9`) & `1`;
1535	break;
1536	case `5`:
1537	bit2 = (c_intval >> `7`) & `1`;
1538	break;
1539	case `6`:
1540	case `7`:
1541	bit2 = (a_intval >> `11`) & `1`;
1542	break;
1543	}
1544	switch (mode)
1545	{
1546	case `0`:
1547	case `2`:
1548	bit3 = (d1_intval >> `6`) & `1`;
1549	break;
1550	case `1`:
1551	case `4`:
1552	bit3 = (b1_intval >> `7`) & `1`;
1553	break;
1554	case `3`:
1555	case `5`:
1556	case `6`:
1557	case `7`:
1558	bit3 = (c_intval >> `6`) & `1`;
1559	break;
1560	}
1561
1562	switch (mode)
1563	{
1564	case `4`:
1565	case `6`:
1566	bit4 = (a_intval >> `9`) & `1`;
1567	bit5 = (a_intval >> `10`) & `1`;
1568	break;
1569	default:
1570	bit4 = (d0_intval >> `5`) & `1`;
1571	bit5 = (d1_intval >> `5`) & `1`;
1572	break;
1573	}
1574
1575	d0_lowbits \|= bit2 << `6`;
1576	d1_lowbits \|= bit3 << `6`;
1577	d0_lowbits \|= bit4 << `5`;
1578	d1_lowbits \|= bit5 << `5`;
1579
1580	d0_lowbits \|= (majcomp & `1`) << `7`;
1581	d1_lowbits \|= ((majcomp >> `1`) & `1`) << `7`;
1582
1583	uint8_t d0_quantval;
1584	uint8_t d1_quantval;
1585
1586	quantize_and_unquantize_retain_top_four_bits(
1587	quant_level, static_cast<uint8_t>(d0_lowbits), d0_quantval);
1588	quantize_and_unquantize_retain_top_four_bits(
1589	quant_level, static_cast<uint8_t>(d1_lowbits), d1_quantval);
1590
1591	output[`0`] = static_cast<uint8_t>(a_quantval);
1592	output[`1`] = c_quantval;
1593	output[`2`] = b0_quantval;
1594	output[`3`] = b1_quantval;
1595	output[`4`] = d0_quantval;
1596	output[`5`] = d1_quantval;
1597	return;
1598	}
1599
1600	// If neither of the modes fit we will use a flat representation for storing data, using 8 bits
1601	// for red and green, and 7 bits for blue. This gives color accuracy roughly similar to LDR
1602	// 4:4:3 which is not at all great but usable. This representation is used if the light color is
1603	// more than 4x the color value of the dark color.
1604	float vals[`6`];
1605	vals[`0`] = color0_bak.lane<`0`>();
1606	vals[`1`] = color1_bak.lane<`0`>();
1607	vals[`2`] = color0_bak.lane<`1`>();
1608	vals[`3`] = color1_bak.lane<`1`>();
1609	vals[`4`] = color0_bak.lane<`2`>();
1610	vals[`5`] = color1_bak.lane<`2`>();
1611
1612	for (int i = `0`; i < `6`; i++)
1613	{
1614	vals[i] = astc::clamp(vals[i], `0.0f`, `65020.0f`);
1615	}
1616
1617	for (int i = `0`; i < `4`; i++)
1618	{
1619	int idx = astc::flt2int_rtn(vals[i] * `1.0f` / `256.0f`);
1620	output[i] = quant_color(quant_level, idx);
1621	}
1622
1623	for (int i = `4`; i < `6`; i++)
1624	{
1625	int idx = astc::flt2int_rtn(vals[i] * `1.0f` / `512.0f`) + `128`;
1626	quantize_and_unquantize_retain_top_two_bits(
1627	quant_level, static_cast<uint8_t>(idx), output[i]);
1628	}
1629
1630	return;
1631	}
1632
1633	/**
1634	* @brief Quantize a HDR RGB + LDR A color using direct RGBA encoding.
1635	*
1636	* @param color0 The input unquantized color0 endpoint.
1637	* @param color1 The input unquantized color1 endpoint.
1638	* @param[out] output The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1639	* @param quant_level The quantization level to use.
1640	*/
1641	static void quantize_hdr_rgb_ldr_alpha(
1642	vfloat4 color0,
1643	vfloat4 color1,
1644	uint8_t output[`8`],
1645	quant_method quant_level
1646	) {
1647	float scale = `1.0f` / `257.0f`;
1648
1649	float a0 = astc::clamp255f(color0.lane<`3`>() * scale);
1650	float a1 = astc::clamp255f(color1.lane<`3`>() * scale);
1651
1652	output[`6`] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
1653	output[`7`] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
1654
1655	quantize_hdr_rgb(color0, color1, output, quant_level);
1656	}
1657
1658	/**
1659	* @brief Quantize a HDR L color using the large range encoding.
1660	*
1661	* @param color0 The input unquantized color0 endpoint.
1662	* @param color1 The input unquantized color1 endpoint.
1663	* @param[out] output The output endpoints, returned as packed (l0, l1).
1664	* @param quant_level The quantization level to use.
1665	*/
1666	static void quantize_hdr_luminance_large_range(
1667	vfloat4 color0,
1668	vfloat4 color1,
1669	uint8_t output[`2`],
1670	quant_method quant_level
1671	) {
1672	float lum0 = hadd_rgb_s(color0) * (`1.0f` / `3.0f`);
1673	float lum1 = hadd_rgb_s(color1) * (`1.0f` / `3.0f`);
1674
1675	if (lum1 < lum0)
1676	{
1677	float avg = (lum0 + lum1) * `0.5f`;
1678	lum0 = avg;
1679	lum1 = avg;
1680	}
1681
1682	int ilum1 = astc::flt2int_rtn(lum1);
1683	int ilum0 = astc::flt2int_rtn(lum0);
1684
1685	// Find the closest encodable point in the upper half of the code-point space
1686	int upper_v0 = (ilum0 + `128`) >> `8`;
1687	int upper_v1 = (ilum1 + `128`) >> `8`;
1688
1689	upper_v0 = astc::clamp(upper_v0, `0`, `255`);
1690	upper_v1 = astc::clamp(upper_v1, `0`, `255`);
1691
1692	// Find the closest encodable point in the lower half of the code-point space
1693	int lower_v0 = (ilum1 + `256`) >> `8`;
1694	int lower_v1 = ilum0 >> `8`;
1695
1696	lower_v0 = astc::clamp(lower_v0, `0`, `255`);
1697	lower_v1 = astc::clamp(lower_v1, `0`, `255`);
1698
1699	// Determine the distance between the point in code-point space and the input value
1700	int upper0_dec = upper_v0 << `8`;
1701	int upper1_dec = upper_v1 << `8`;
1702	int lower0_dec = (lower_v1 << `8`) + `128`;
1703	int lower1_dec = (lower_v0 << `8`) - `128`;
1704
1705	int upper0_diff = upper0_dec - ilum0;
1706	int upper1_diff = upper1_dec - ilum1;
1707	int lower0_diff = lower0_dec - ilum0;
1708	int lower1_diff = lower1_dec - ilum1;
1709
1710	int upper_error = (upper0_diff * upper0_diff) + (upper1_diff * upper1_diff);
1711	int lower_error = (lower0_diff * lower0_diff) + (lower1_diff * lower1_diff);
1712
1713	int v0, v1;
1714	if (upper_error < lower_error)
1715	{
1716	v0 = upper_v0;
1717	v1 = upper_v1;
1718	}
1719	else
1720	{
1721	v0 = lower_v0;
1722	v1 = lower_v1;
1723	}
1724
1725	// OK; encode
1726	output[`0`] = quant_color(quant_level, v0);
1727	output[`1`] = quant_color(quant_level, v1);
1728	}
1729
1730	/**
1731	* @brief Quantize a HDR L color using the small range encoding.
1732	*
1733	* @param color0 The input unquantized color0 endpoint.
1734	* @param color1 The input unquantized color1 endpoint.
1735	* @param[out] output The output endpoints, returned as packed (l0, l1) with mode bits.
1736	* @param quant_level The quantization level to use.
1737	*
1738	* @return Returns @c false on failure, @c true on success.
1739	*/
1740	static bool try_quantize_hdr_luminance_small_range(
1741	vfloat4 color0,
1742	vfloat4 color1,
1743	uint8_t output[`2`],
1744	quant_method quant_level
1745	) {
1746	float lum0 = hadd_rgb_s(color0) * (`1.0f` / `3.0f`);
1747	float lum1 = hadd_rgb_s(color1) * (`1.0f` / `3.0f`);
1748
1749	if (lum1 < lum0)
1750	{
1751	float avg = (lum0 + lum1) * `0.5f`;
1752	lum0 = avg;
1753	lum1 = avg;
1754	}
1755
1756	int ilum1 = astc::flt2int_rtn(lum1);
1757	int ilum0 = astc::flt2int_rtn(lum0);
1758
1759	// Difference of more than a factor-of-2 results in immediate failure
1760	if (ilum1 - ilum0 > `2048`)
1761	{
1762	return false;
1763	}
1764
1765	int lowval, highval, diffval;
1766	int v0, v1;
1767	int v0e, v1e;
1768	int v0d, v1d;
1769
1770	// Try to encode the high-precision submode
1771	lowval = (ilum0 + `16`) >> `5`;
1772	highval = (ilum1 + `16`) >> `5`;
1773
1774	lowval = astc::clamp(lowval, `0`, `2047`);
1775	highval = astc::clamp(highval, `0`, `2047`);
1776
1777	v0 = lowval & `0x7F`;
1778	v0e = quant_color(quant_level, v0);
1779	v0d = v0e;
1780
1781	if (v0d < `0x80`)
1782	{
1783	lowval = (lowval & ~`0x7F`) \| v0d;
1784	diffval = highval - lowval;
1785	if (diffval >= `0` && diffval <= `15`)
1786	{
1787	v1 = ((lowval >> `3`) & `0xF0`) \| diffval;
1788	v1e = quant_color(quant_level, v1);
1789	v1d = v1e;
1790	if ((v1d & `0xF0`) == (v1 & `0xF0`))
1791	{
1792	output[`0`] = static_cast<uint8_t>(v0e);
1793	output[`1`] = static_cast<uint8_t>(v1e);
1794	return true;
1795	}
1796	}
1797	}
1798
1799	// Try to encode the low-precision submode
1800	lowval = (ilum0 + `32`) >> `6`;
1801	highval = (ilum1 + `32`) >> `6`;
1802
1803	lowval = astc::clamp(lowval, `0`, `1023`);
1804	highval = astc::clamp(highval, `0`, `1023`);
1805
1806	v0 = (lowval & `0x7F`) \| `0x80`;
1807	v0e = quant_color(quant_level, v0);
1808	v0d = v0e;
1809	if ((v0d & `0x80`) == `0`)
1810	{
1811	return false;
1812	}
1813
1814	lowval = (lowval & ~`0x7F`) \| (v0d & `0x7F`);
1815	diffval = highval - lowval;
1816	if (diffval < `0` \|\| diffval > `31`)
1817	{
1818	return false;
1819	}
1820
1821	v1 = ((lowval >> `2`) & `0xE0`) \| diffval;
1822	v1e = quant_color(quant_level, v1);
1823	v1d = v1e;
1824	if ((v1d & `0xE0`) != (v1 & `0xE0`))
1825	{
1826	return false;
1827	}
1828
1829	output[`0`] = static_cast<uint8_t>(v0e);
1830	output[`1`] = static_cast<uint8_t>(v1e);
1831	return true;
1832	}
1833
1834	/**
1835	* @brief Quantize a HDR A color using either delta or direct RGBA encoding.
1836	*
1837	* @param alpha0 The input unquantized color0 endpoint.
1838	* @param alpha1 The input unquantized color1 endpoint.
1839	* @param[out] output The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1840	* @param quant_level The quantization level to use.
1841	*/
1842	static void quantize_hdr_alpha(
1843	float alpha0,
1844	float alpha1,
1845	uint8_t output[`2`],
1846	quant_method quant_level
1847	) {
1848	alpha0 = astc::clamp(alpha0, `0.0f`, `65280.0f`);
1849	alpha1 = astc::clamp(alpha1, `0.0f`, `65280.0f`);
1850
1851	int ialpha0 = astc::flt2int_rtn(alpha0);
1852	int ialpha1 = astc::flt2int_rtn(alpha1);
1853
1854	int val0, val1, diffval;
1855	int v6, v7;
1856	int v6e, v7e;
1857	int v6d, v7d;
1858
1859	// Try to encode one of the delta submodes, in decreasing-precision order
1860	for (int i = `2`; i >= `0`; i--)
1861	{
1862	val0 = (ialpha0 + (`128` >> i)) >> (`8` - i);
1863	val1 = (ialpha1 + (`128` >> i)) >> (`8` - i);
1864
1865	v6 = (val0 & `0x7F`) \| ((i & `1`) << `7`);
1866	v6e = quant_color(quant_level, v6);
1867	v6d = v6e;
1868
1869	if ((v6 ^ v6d) & `0x80`)
1870	{
1871	continue;
1872	}
1873
1874	val0 = (val0 & ~`0x7f`) \| (v6d & `0x7f`);
1875	diffval = val1 - val0;
1876	int cutoff = `32` >> i;
1877	int mask = `2` * cutoff - `1`;
1878
1879	if (diffval < -cutoff \|\| diffval >= cutoff)
1880	{
1881	continue;
1882	}
1883
1884	v7 = ((i & `2`) << `6`) \| ((val0 >> `7`) << (`6` - i)) \| (diffval & mask);
1885	v7e = quant_color(quant_level, v7);
1886	v7d = v7e;
1887
1888	static const int testbits[`3`] { `0xE0`, `0xF0`, `0xF8` };
1889
1890	if ((v7 ^ v7d) & testbits[i])
1891	{
1892	continue;
1893	}
1894
1895	output[`0`] = static_cast<uint8_t>(v6e);
1896	output[`1`] = static_cast<uint8_t>(v7e);
1897	return;
1898	}
1899
1900	// Could not encode any of the delta modes; instead encode a flat value
1901	val0 = (ialpha0 + `256`) >> `9`;
1902	val1 = (ialpha1 + `256`) >> `9`;
1903	v6 = val0 \| `0x80`;
1904	v7 = val1 \| `0x80`;
1905
1906	output[`0`] = quant_color(quant_level, v6);
1907	output[`1`] = quant_color(quant_level, v7);
1908
1909	return;
1910	}
1911
1912	/**
1913	* @brief Quantize a HDR RGBA color using either delta or direct RGBA encoding.
1914	*
1915	* @param color0 The input unquantized color0 endpoint.
1916	* @param color1 The input unquantized color1 endpoint.
1917	* @param[out] output The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1918	* @param quant_level The quantization level to use.
1919	*/
1920	static void quantize_hdr_rgb_alpha(
1921	vfloat4 color0,
1922	vfloat4 color1,
1923	uint8_t output[`8`],
1924	quant_method quant_level
1925	) {
1926	quantize_hdr_rgb(color0, color1, output, quant_level);
1927	quantize_hdr_alpha(color0.lane<`3`>(), color1.lane<`3`>(), output + `6`, quant_level);
1928	}
1929
1930	/ See header for documentation. /
1931	uint8_t pack_color_endpoints(
1932	vfloat4 color0,
1933	vfloat4 color1,
1934	vfloat4 rgbs_color,
1935	vfloat4 rgbo_color,
1936	int format,
1937	uint8_t* output,
1938	quant_method quant_level
1939	) {
1940	assert(QUANT_6 <= quant_level && quant_level <= QUANT_256);
1941
1942	// We do not support negative colors
1943	color0 = max(color0, `0.0f`);
1944	color1 = max(color1, `0.0f`);
1945
1946	uint8_t retval = `0`;
1947
1948	switch (format)
1949	{
1950	case FMT_RGB:
1951	if (quant_level <= QUANT_160)
1952	{
1953	if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level))
1954	{
1955	retval = FMT_RGB_DELTA;
1956	break;
1957	}
1958	if (try_quantize_rgb_delta(color0, color1, output, quant_level))
1959	{
1960	retval = FMT_RGB_DELTA;
1961	break;
1962	}
1963	}
1964	if (quant_level < QUANT_256 && try_quantize_rgb_blue_contract(color0, color1, output, quant_level))
1965	{
1966	retval = FMT_RGB;
1967	break;
1968	}
1969	quantize_rgb(color0, color1, output, quant_level);
1970	retval = FMT_RGB;
1971	break;
1972
1973	case FMT_RGBA:
1974	if (quant_level <= QUANT_160)
1975	{
1976	if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level))
1977	{
1978	retval = FMT_RGBA_DELTA;
1979	break;
1980	}
1981	if (try_quantize_rgba_delta(color0, color1, output, quant_level))
1982	{
1983	retval = FMT_RGBA_DELTA;
1984	break;
1985	}
1986	}
1987	if (quant_level < QUANT_256 && try_quantize_rgba_blue_contract(color0, color1, output, quant_level))
1988	{
1989	retval = FMT_RGBA;
1990	break;
1991	}
1992	quantize_rgba(color0, color1, output, quant_level);
1993	retval = FMT_RGBA;
1994	break;
1995
1996	case FMT_RGB_SCALE:
1997	quantize_rgbs(rgbs_color, output, quant_level);
1998	retval = FMT_RGB_SCALE;
1999	break;
2000
2001	case FMT_HDR_RGB_SCALE:
2002	quantize_hdr_rgbo(rgbo_color, output, quant_level);
2003	retval = FMT_HDR_RGB_SCALE;
2004	break;
2005
2006	case FMT_HDR_RGB:
2007	quantize_hdr_rgb(color0, color1, output, quant_level);
2008	retval = FMT_HDR_RGB;
2009	break;
2010
2011	case FMT_RGB_SCALE_ALPHA:
2012	quantize_rgbs_alpha(color0, color1, rgbs_color, output, quant_level);
2013	retval = FMT_RGB_SCALE_ALPHA;
2014	break;
2015
2016	case FMT_HDR_LUMINANCE_SMALL_RANGE:
2017	case FMT_HDR_LUMINANCE_LARGE_RANGE:
2018	if (try_quantize_hdr_luminance_small_range(color0, color1, output, quant_level))
2019	{
2020	retval = FMT_HDR_LUMINANCE_SMALL_RANGE;
2021	break;
2022	}
2023	quantize_hdr_luminance_large_range(color0, color1, output, quant_level);
2024	retval = FMT_HDR_LUMINANCE_LARGE_RANGE;
2025	break;
2026
2027	case FMT_LUMINANCE:
2028	quantize_luminance(color0, color1, output, quant_level);
2029	retval = FMT_LUMINANCE;
2030	break;
2031
2032	case FMT_LUMINANCE_ALPHA:
2033	if (quant_level <= `18`)
2034	{
2035	if (try_quantize_luminance_alpha_delta(color0, color1, output, quant_level))
2036	{
2037	retval = FMT_LUMINANCE_ALPHA_DELTA;
2038	break;
2039	}
2040	}
2041	quantize_luminance_alpha(color0, color1, output, quant_level);
2042	retval = FMT_LUMINANCE_ALPHA;
2043	break;
2044
2045	case FMT_HDR_RGB_LDR_ALPHA:
2046	quantize_hdr_rgb_ldr_alpha(color0, color1, output, quant_level);
2047	retval = FMT_HDR_RGB_LDR_ALPHA;
2048	break;
2049
2050	case FMT_HDR_RGBA:
2051	quantize_hdr_rgb_alpha(color0, color1, output, quant_level);
2052	retval = FMT_HDR_RGBA;
2053	break;
2054	}
2055
2056	return retval;
2057	}
2058
2059	#endif
2060

Browse the source code of Godot/thirdparty/astcenc/astcenc_color_quantize.cpp