astcenc_image.cpp source code [Godot/thirdparty/astcenc/astcenc_image.cpp]

1	// SPDX-License-Identifier: Apache-2.0
2	// ----------------------------------------------------------------------------
3	// Copyright 2011-2022 Arm Limited
4	//
5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6	// use this file except in compliance with the License. You may obtain a copy
7	// of the License at:
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing, software
12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14	// License for the specific language governing permissions and limitations
15	// under the License.
16	// ----------------------------------------------------------------------------
17
18	/**
19	* @brief Functions for creating in-memory ASTC image structures.
20	*/
21
22	#include <cassert>
23	#include <cstring>
24
25	#include "astcenc_internal.h"
26
27	/**
28	* @brief Loader pipeline function type for data fetch from memory.
29	*/
30	using pixel_loader = vfloat4()(const* void, int*);
31
32	/**
33	* @brief Loader pipeline function type for swizzling data in a vector.
34	*/
35	using pixel_swizzler = vfloat4()(vfloat4, const* astcenc_swizzle&);
36
37	/**
38	* @brief Loader pipeline function type for converting data in a vector to LNS.
39	*/
40	using pixel_converter = vfloat4(*)(vfloat4, vmask4);
41
42	/**
43	* @brief Load a 8-bit UNORM texel from a data array.
44	*
45	* @param data The data pointer.
46	* @param base_offset The index offset to the start of the pixel.
47	*/
48	static vfloat4 load_texel_u8(
49	const void* data,
50	int base_offset
51	) {
52	const uint8_t* data8 = static_cast<const uint8_t*>(data);
53	return int_to_float(vint4 (data8 + base_offset)) / `255.0f`;
54	}
55
56	/**
57	* @brief Load a 16-bit fp16 texel from a data array.
58	*
59	* @param data The data pointer.
60	* @param base_offset The index offset to the start of the pixel.
61	*/
62	static vfloat4 load_texel_f16(
63	const void* data,
64	int base_offset
65	) {
66	const uint16_t* data16 = static_cast<const uint16_t*>(data);
67	int r = data16[base_offset ];
68	int g = data16[base_offset + `1`];
69	int b = data16[base_offset + `2`];
70	int a = data16[base_offset + `3`];
71	return float16_to_float(vint4 (r, g, b, a));
72	}
73
74	/**
75	* @brief Load a 32-bit float texel from a data array.
76	*
77	* @param data The data pointer.
78	* @param base_offset The index offset to the start of the pixel.
79	*/
80	static vfloat4 load_texel_f32(
81	const void* data,
82	int base_offset
83	) {
84	const float* data32 = static_cast<const float*>(data);
85	return vfloat4 (data32 + base_offset);
86	}
87
88	/**
89	* @brief Dummy no-op swizzle function.
90	*
91	* @param data The source RGBA vector to swizzle.
92	* @param swz The swizzle to use.
93	*/
94	static vfloat4 swz_texel_skip(
95	vfloat4 data,
96	const astcenc_swizzle& swz
97	) {
98	(void)swz;
99	return data;
100	}
101
102	/**
103	* @brief Swizzle a texel into a new arrangement.
104	*
105	* @param data The source RGBA vector to swizzle.
106	* @param swz The swizzle to use.
107	*/
108	static vfloat4 swz_texel(
109	vfloat4 data,
110	const astcenc_swizzle& swz
111	) {
112	alignas(`16`) float datas[`6`];
113
114	storea(data, datas);
115	datas[ASTCENC_SWZ_0] = `0.0f`;
116	datas[ASTCENC_SWZ_1] = `1.0f`;
117
118	return vfloat4 (datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
119	}
120
121	/**
122	* @brief Encode a texel that is entirely LDR linear.
123	*
124	* @param data The RGBA data to encode.
125	* @param lns_mask The mask for the HDR channels than need LNS encoding.
126	*/
127	static vfloat4 encode_texel_unorm(
128	vfloat4 data,
129	vmask4 lns_mask
130	) {
131	(void)lns_mask;
132	return data * `65535.0f`;
133	}
134
135	/**
136	* @brief Encode a texel that includes at least some HDR LNS texels.
137	*
138	* @param data The RGBA data to encode.
139	* @param lns_mask The mask for the HDR channels than need LNS encoding.
140	*/
141	static vfloat4 encode_texel_lns(
142	vfloat4 data,
143	vmask4 lns_mask
144	) {
145	vfloat4 datav_unorm = data * `65535.0f`;
146	vfloat4 datav_lns = float_to_lns(data);
147	return select(datav_unorm, datav_lns, lns_mask);
148	}
149
150	/ See header for documentation. /
151	void load_image_block(
152	astcenc_profile decode_mode,
153	const astcenc_image& img,
154	image_block& blk,
155	const block_size_descriptor& bsd,
156	unsigned int xpos,
157	unsigned int ypos,
158	unsigned int zpos,
159	const astcenc_swizzle& swz
160	) {
161	unsigned int xsize = img.dim_x;
162	unsigned int ysize = img.dim_y;
163	unsigned int zsize = img.dim_z;
164
165	blk.xpos = xpos;
166	blk.ypos = ypos;
167	blk.zpos = zpos;
168
169	// True if any non-identity swizzle
170	bool needs_swz = (swz.r != ASTCENC_SWZ_R) \|\| (swz.g != ASTCENC_SWZ_G) \|\|
171	(swz.b != ASTCENC_SWZ_B) \|\| (swz.a != ASTCENC_SWZ_A);
172
173	int idx = `0`;
174
175	vfloat4 data_min(`1e38f`);
176	vfloat4 data_mean(`0.0f`);
177	vfloat4 data_mean_scale(`1.0f` / static_cast<float>(bsd.texel_count));
178	vfloat4 data_max(-`1e38f`);
179	vmask4 grayscalev(true);
180
181	// This works because we impose the same choice everywhere during encode
182	uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) \|\|
183	(decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? `1` : `0`;
184	uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? `1` : `0`;
185	vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
186	vmask4 lns_mask = use_lns != vint4::zero();
187
188	// Set up the function pointers for loading pipeline as needed
189	pixel_loader loader = load_texel_u8;
190	if (img.data_type == ASTCENC_TYPE_F16)
191	{
192	loader = load_texel_f16;
193	}
194	else if (img.data_type == ASTCENC_TYPE_F32)
195	{
196	loader = load_texel_f32;
197	}
198
199	pixel_swizzler swizzler = swz_texel_skip;
200	if (needs_swz)
201	{
202	swizzler = swz_texel;
203	}
204
205	pixel_converter converter = encode_texel_unorm;
206	if (any(lns_mask))
207	{
208	converter = encode_texel_lns;
209	}
210
211	for (unsigned int z = `0`; z < bsd.zdim; z++)
212	{
213	unsigned int zi = astc::min(zpos + z, zsize - `1`);
214	void* plane = img.data[zi];
215
216	for (unsigned int y = `0`; y < bsd.ydim; y++)
217	{
218	unsigned int yi = astc::min(ypos + y, ysize - `1`);
219
220	for (unsigned int x = `0`; x < bsd.xdim; x++)
221	{
222	unsigned int xi = astc::min(xpos + x, xsize - `1`);
223
224	vfloat4 datav = loader(plane, (`4` * xsize * yi) + (`4` * xi));
225	datav = swizzler(datav, swz);
226	datav = converter(datav, lns_mask);
227
228	// Compute block metadata
229	data_min = min(data_min, datav);
230	data_mean += datav * data_mean_scale;
231	data_max = max(data_max, datav);
232
233	grayscalev = grayscalev & (datav.swz<`0`,`0`,`0`,`0`>() == datav.swz<`1`,`1`,`2`,`2`>());
234
235	blk.data_r[idx] = datav.lane<`0`>();
236	blk.data_g[idx] = datav.lane<`1`>();
237	blk.data_b[idx] = datav.lane<`2`>();
238	blk.data_a[idx] = datav.lane<`3`>();
239
240	blk.rgb_lns[idx] = rgb_lns;
241	blk.alpha_lns[idx] = a_lns;
242
243	idx++;
244	}
245	}
246	}
247
248	// Reverse the encoding so we store origin block in the original format
249	vfloat4 data_enc = blk.texel(`0`);
250	vfloat4 data_enc_unorm = data_enc / `65535.0f`;
251	vfloat4 data_enc_lns = vfloat4::zero();
252
253	if (rgb_lns \|\| a_lns)
254	{
255	data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
256	}
257
258	blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
259
260	// Store block metadata
261	blk.data_min = data_min;
262	blk.data_mean = data_mean;
263	blk.data_max = data_max;
264	blk.grayscale = all(grayscalev);
265	}
266
267	/ See header for documentation. /
268	void load_image_block_fast_ldr(
269	astcenc_profile decode_mode,
270	const astcenc_image& img,
271	image_block& blk,
272	const block_size_descriptor& bsd,
273	unsigned int xpos,
274	unsigned int ypos,
275	unsigned int zpos,
276	const astcenc_swizzle& swz
277	) {
278	(void)swz;
279	(void)decode_mode;
280
281	unsigned int xsize = img.dim_x;
282	unsigned int ysize = img.dim_y;
283
284	blk.xpos = xpos;
285	blk.ypos = ypos;
286	blk.zpos = zpos;
287
288	vfloat4 data_min(`1e38f`);
289	vfloat4 data_mean = vfloat4::zero();
290	vfloat4 data_max(-`1e38f`);
291	vmask4 grayscalev(true);
292	int idx = `0`;
293
294	const uint8_t* plane = static_cast<const uint8_t*>(img.data[`0`]);
295	for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
296	{
297	unsigned int yi = astc::min(y, ysize - `1`);
298
299	for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
300	{
301	unsigned int xi = astc::min(x, xsize - `1`);
302
303	vint4 datavi = vint4 (plane + (`4` * xsize * yi) + (`4` * xi));
304	vfloat4 datav = int_to_float(datavi) * (`65535.0f` / `255.0f`);
305
306	// Compute block metadata
307	data_min = min(data_min, datav);
308	data_mean += datav;
309	data_max = max(data_max, datav);
310
311	grayscalev = grayscalev & (datav.swz<`0`,`0`,`0`,`0`>() == datav.swz<`1`,`1`,`2`,`2`>());
312
313	blk.data_r[idx] = datav.lane<`0`>();
314	blk.data_g[idx] = datav.lane<`1`>();
315	blk.data_b[idx] = datav.lane<`2`>();
316	blk.data_a[idx] = datav.lane<`3`>();
317
318	idx++;
319	}
320	}
321
322	// Reverse the encoding so we store origin block in the original format
323	blk.origin_texel = blk.texel(`0`) / `65535.0f`;
324
325	// Store block metadata
326	blk.rgb_lns[`0`] = `0`;
327	blk.alpha_lns[`0`] = `0`;
328	blk.data_min = data_min;
329	blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
330	blk.data_max = data_max;
331	blk.grayscale = all(grayscalev);
332	}
333
334	/ See header for documentation. /
335	void store_image_block(
336	astcenc_image& img,
337	const image_block& blk,
338	const block_size_descriptor& bsd,
339	unsigned int xpos,
340	unsigned int ypos,
341	unsigned int zpos,
342	const astcenc_swizzle& swz
343	) {
344	unsigned int x_size = img.dim_x;
345	unsigned int x_start = xpos;
346	unsigned int x_end = astc::min(x_size, xpos + bsd.xdim);
347	unsigned int x_count = x_end - x_start;
348	unsigned int x_nudge = bsd.xdim - x_count;
349
350	unsigned int y_size = img.dim_y;
351	unsigned int y_start = ypos;
352	unsigned int y_end = astc::min(y_size, ypos + bsd.ydim);
353	unsigned int y_count = y_end - y_start;
354	unsigned int y_nudge = (bsd.ydim - y_count) * bsd.xdim;
355
356	unsigned int z_size = img.dim_z;
357	unsigned int z_start = zpos;
358	unsigned int z_end = astc::min(z_size, zpos + bsd.zdim);
359
360	// True if any non-identity swizzle
361	bool needs_swz = (swz.r != ASTCENC_SWZ_R) \|\| (swz.g != ASTCENC_SWZ_G) \|\|
362	(swz.b != ASTCENC_SWZ_B) \|\| (swz.a != ASTCENC_SWZ_A);
363
364	// True if any swizzle uses Z reconstruct
365	bool needs_z = (swz.r == ASTCENC_SWZ_Z) \|\| (swz.g == ASTCENC_SWZ_Z) \|\|
366	(swz.b == ASTCENC_SWZ_Z) \|\| (swz.a == ASTCENC_SWZ_Z);
367
368	int idx = `0`;
369	if (img.data_type == ASTCENC_TYPE_U8)
370	{
371	for (unsigned int z = z_start; z < z_end; z++)
372	{
373	// Fetch the image plane
374	uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
375
376	for (unsigned int y = y_start; y < y_end; y++)
377	{
378	uint8_t* data8_row = data8 + (`4` * x_size * y) + (`4` * x_start);
379
380	for (unsigned int x = `0`; x < x_count; x += ASTCENC_SIMD_WIDTH)
381	{
382	unsigned int max_texels = ASTCENC_SIMD_WIDTH;
383	unsigned int used_texels = astc::min(x_count - x, max_texels);
384
385	// Unaligned load as rows are not always SIMD_WIDTH long
386	vfloat data_r(blk.data_r + idx);
387	vfloat data_g(blk.data_g + idx);
388	vfloat data_b(blk.data_b + idx);
389	vfloat data_a(blk.data_a + idx);
390
391	vint data_ri = float_to_int_rtn(min(data_r, `1.0f`) * `255.0f`);
392	vint data_gi = float_to_int_rtn(min(data_g, `1.0f`) * `255.0f`);
393	vint data_bi = float_to_int_rtn(min(data_b, `1.0f`) * `255.0f`);
394	vint data_ai = float_to_int_rtn(min(data_a, `1.0f`) * `255.0f`);
395
396	if (needs_swz)
397	{
398	vint swizzle_table[`7`];
399	swizzle_table[ASTCENC_SWZ_0] = vint (`0`);
400	swizzle_table[ASTCENC_SWZ_1] = vint (`255`);
401	swizzle_table[ASTCENC_SWZ_R] = data_ri;
402	swizzle_table[ASTCENC_SWZ_G] = data_gi;
403	swizzle_table[ASTCENC_SWZ_B] = data_bi;
404	swizzle_table[ASTCENC_SWZ_A] = data_ai;
405
406	if (needs_z)
407	{
408	vfloat data_x = (data_r * vfloat (`2.0f`)) - vfloat (`1.0f`);
409	vfloat data_y = (data_a * vfloat (`2.0f`)) - vfloat (`1.0f`);
410	vfloat data_z = vfloat (`1.0f`) - (data_x * data_x) - (data_y * data_y);
411	data_z = max(data_z, `0.0f`);
412	data_z = (sqrt(data_z) * vfloat (`0.5f`)) + vfloat (`0.5f`);
413
414	swizzle_table[ASTCENC_SWZ_Z] = float_to_int_rtn(min(data_z, `1.0f`) * `255.0f`);
415	}
416
417	data_ri = swizzle_table[swz.r];
418	data_gi = swizzle_table[swz.g];
419	data_bi = swizzle_table[swz.b];
420	data_ai = swizzle_table[swz.a];
421	}
422
423	// Errors are NaN encoded - convert to magenta error color
424	// Branch is OK here - it is almost never true so predicts well
425	vmask nan_mask = data_r != data_r;
426	if (any(nan_mask))
427	{
428	data_ri = select(data_ri, vint (`0xFF`), nan_mask);
429	data_gi = select(data_gi, vint (`0x00`), nan_mask);
430	data_bi = select(data_bi, vint (`0xFF`), nan_mask);
431	data_ai = select(data_ai, vint (`0xFF`), nan_mask);
432	}
433
434	vint data_rgbai = interleave_rgba8(data_ri, data_gi, data_bi, data_ai);
435	vmask store_mask = vint::lane_id() < vint (used_texels);
436	store_lanes_masked(reinterpret_cast<int*>(data8_row), data_rgbai, store_mask);
437
438	data8_row += ASTCENC_SIMD_WIDTH * `4`;
439	idx += used_texels;
440	}
441	idx += x_nudge;
442	}
443	idx += y_nudge;
444	}
445	}
446	else if (img.data_type == ASTCENC_TYPE_F16)
447	{
448	for (unsigned int z = z_start; z < z_end; z++)
449	{
450	// Fetch the image plane
451	uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
452
453	for (unsigned int y = y_start; y < y_end; y++)
454	{
455	uint16_t* data16_row = data16 + (`4` * x_size * y) + (`4` * x_start);
456
457	for (unsigned int x = `0`; x < x_count; x++)
458	{
459	vint4 color;
460
461	// NaNs are handled inline - no need to special case
462	if (needs_swz)
463	{
464	float data[`7`];
465	data[ASTCENC_SWZ_0] = `0.0f`;
466	data[ASTCENC_SWZ_1] = `1.0f`;
467	data[ASTCENC_SWZ_R] = blk.data_r[idx];
468	data[ASTCENC_SWZ_G] = blk.data_g[idx];
469	data[ASTCENC_SWZ_B] = blk.data_b[idx];
470	data[ASTCENC_SWZ_A] = blk.data_a[idx];
471
472	if (needs_z)
473	{
474	float xN = (data[`0`] * `2.0f`) - `1.0f`;
475	float yN = (data[`3`] * `2.0f`) - `1.0f`;
476	float zN = `1.0f` - xN * xN - yN * yN;
477	if (zN < `0.0f`)
478	{
479	zN = `0.0f`;
480	}
481	data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * `0.5f`) + `0.5f`;
482	}
483
484	vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
485	color = float_to_float16(colorf);
486	}
487	else
488	{
489	vfloat4 colorf = blk.texel(idx);
490	color = float_to_float16(colorf);
491	}
492
493	// TODO: Vectorize with store N shorts?
494	data16_row[`0`] = static_cast<uint16_t>(color.lane<`0`>());
495	data16_row[`1`] = static_cast<uint16_t>(color.lane<`1`>());
496	data16_row[`2`] = static_cast<uint16_t>(color.lane<`2`>());
497	data16_row[`3`] = static_cast<uint16_t>(color.lane<`3`>());
498	data16_row += `4`;
499	idx++;
500	}
501	idx += x_nudge;
502	}
503	idx += y_nudge;
504	}
505	}
506	else // if (img.data_type == ASTCENC_TYPE_F32)
507	{
508	assert(img.data_type == ASTCENC_TYPE_F32);
509
510	for (unsigned int z = z_start; z < z_end; z++)
511	{
512	// Fetch the image plane
513	float* data32 = static_cast<float*>(img.data[z]);
514
515	for (unsigned int y = y_start; y < y_end; y++)
516	{
517	float* data32_row = data32 + (`4` * x_size * y) + (`4` * x_start);
518
519	for (unsigned int x = `0`; x < x_count; x++)
520	{
521	vfloat4 color = blk.texel(idx);
522
523	// NaNs are handled inline - no need to special case
524	if (needs_swz)
525	{
526	float data[`7`];
527	data[ASTCENC_SWZ_0] = `0.0f`;
528	data[ASTCENC_SWZ_1] = `1.0f`;
529	data[ASTCENC_SWZ_R] = color.lane<`0`>();
530	data[ASTCENC_SWZ_G] = color.lane<`1`>();
531	data[ASTCENC_SWZ_B] = color.lane<`2`>();
532	data[ASTCENC_SWZ_A] = color.lane<`3`>();
533
534	if (needs_z)
535	{
536	float xN = (data[`0`] * `2.0f`) - `1.0f`;
537	float yN = (data[`3`] * `2.0f`) - `1.0f`;
538	float zN = `1.0f` - xN * xN - yN * yN;
539	if (zN < `0.0f`)
540	{
541	zN = `0.0f`;
542	}
543	data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * `0.5f`) + `0.5f`;
544	}
545
546	color = vfloat4 (data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
547	}
548
549	store(color, data32_row);
550	data32_row += `4`;
551	idx++;
552	}
553	idx += x_nudge;
554	}
555	idx += y_nudge;
556	}
557	}
558	}
559

Browse the source code of Godot/thirdparty/astcenc/astcenc_image.cpp