astcenc_mathlib_softfloat.cpp source code [Godot/thirdparty/astcenc/astcenc_mathlib_softfloat.cpp]

1	// SPDX-License-Identifier: Apache-2.0
2	// ----------------------------------------------------------------------------
3	// Copyright 2011-2021 Arm Limited
4	//
5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6	// use this file except in compliance with the License. You may obtain a copy
7	// of the License at:
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing, software
12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14	// License for the specific language governing permissions and limitations
15	// under the License.
16	// ----------------------------------------------------------------------------
17
18	/**
19	* @brief Soft-float library for IEEE-754.
20	*/
21	#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
22
23	#include "astcenc_mathlib.h"
24
25	/ sized soft-float types. These are mapped to the sized integer*
26	types of C99, instead of C's floating-point types; this is because
27	the library needs to maintain exact, bit-level control on all
28	operations on these data types. /*
29	typedef uint16_t sf16;
30	typedef uint32_t sf32;
31
32	/******************************************
33	helper functions and their lookup tables
34	******************************************/
35	/ count leading zeros functions. Only used when the input is nonzero. /
36
37	#if defined(__GNUC__) && (defined(__i386) \|\| defined(__amd64))
38	#elif defined(__arm__) && defined(__ARMCC_VERSION)
39	#elif defined(__arm__) && defined(__GNUC__)
40	#else
41	/ table used for the slow default versions. /
42	static const uint8_t clz_table[`256`] =
43	{
44	`8`, `7`, `6`, `6`, `5`, `5`, `5`, `5`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
45	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
46	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
47	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
48	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
49	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
50	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
51	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
52	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
53	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
54	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
55	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
56	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
57	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
58	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
59	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
60	};
61	#endif
62
63	/*
64	32-bit count-leading-zeros function: use the Assembly instruction whenever possible. /*
65	static uint32_t clz32(uint32_t inp)
66	{
67	#if defined(__GNUC__) && (defined(__i386) \|\| defined(__amd64))
68	uint32_t bsr;
69	__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp \| `1`));
70	return `31` - bsr;
71	#else
72	#if defined(__arm__) && defined(__ARMCC_VERSION)
73	return __clz(inp); / armcc builtin /
74	#else
75	#if defined(__arm__) && defined(__GNUC__)
76	uint32_t lz;
77	__asm__("clz %0, %1": "=r"(lz):"r"(inp));
78	return lz;
79	#else
80	/ slow default version /
81	uint32_t summa = `24`;
82	if (inp >= UINT32_C(`0x10000`))
83	{
84	inp >>= `16`;
85	summa -= `16`;
86	}
87	if (inp >= UINT32_C(`0x100`))
88	{
89	inp >>= `8`;
90	summa -= `8`;
91	}
92	return summa + clz_table[inp];
93	#endif
94	#endif
95	#endif
96	}
97
98	/ the five rounding modes that IEEE-754r defines /
99	typedef enum
100	{
101	SF_UP = `0`, / round towards positive infinity /
102	SF_DOWN = `1`, / round towards negative infinity /
103	SF_TOZERO = `2`, / round towards zero /
104	SF_NEARESTEVEN = `3`, / round toward nearest value; if mid-between, round to even value /
105	SF_NEARESTAWAY = `4` / round toward nearest value; if mid-between, round away from zero /
106	} roundmode;
107
108
109	static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
110	{
111	uint32_t vl1 = UINT32_C(`1`) << shamt;
112	uint32_t inp2 = inp + (vl1 >> `1`); / added 0.5 ULP /
113	uint32_t msk = (inp \| UINT32_C(`1`)) & vl1; / nonzero if odd. '\| 1' forces it to 1 if the shamt is 0. /
114	msk--; / negative if even, nonnegative if odd. /
115	inp2 -= (msk >> `31`); / subtract epsilon before shift if even. /
116	inp2 >>= shamt;
117	return inp2;
118	}
119
120	static uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
121	{
122	uint32_t vl1 = (UINT32_C(`1`) << shamt) >> `1`;
123	inp += vl1;
124	inp >>= shamt;
125	return inp;
126	}
127
128	static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
129	{
130	uint32_t vl1 = UINT32_C(`1`) << shamt;
131	inp += vl1;
132	inp--;
133	inp >>= shamt;
134	return inp;
135	}
136
137	/ convert from FP16 to FP32. /
138	static sf32 sf16_to_sf32(sf16 inp)
139	{
140	uint32_t inpx = inp;
141
142	/*
143	This table contains, for every FP16 sign/exponent value combination,
144	the difference between the input FP16 value and the value obtained
145	by shifting the correct FP32 result right by 13 bits.
146	This table allows us to handle every case except denormals and NaN
147	with just 1 table lookup, 2 shifts and 1 add.
148	*/
149
150	#define WITH_MSB(a) (UINT32_C(a) \| (1u << 31))
151	static const uint32_t tbl[`64`] =
152	{
153	WITH_MSB(`0x00000`), `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`,
154	`0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`,
155	`0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`,
156	`0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, `0x1C000`, WITH_MSB(`0x38000`),
157	WITH_MSB(`0x38000`), `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`,
158	`0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`,
159	`0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`,
160	`0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, `0x54000`, WITH_MSB(`0x70000`)
161	};
162
163	uint32_t res = tbl[inpx >> `10`];
164	res += inpx;
165
166	/ Normal cases: MSB of 'res' not set. /
167	if ((res & WITH_MSB(`0`)) == `0`)
168	{
169	return res << `13`;
170	}
171
172	/ Infinity and Zero: 10 LSB of 'res' not set. /
173	if ((res & `0x3FF`) == `0`)
174	{
175	return res << `13`;
176	}
177
178	/ NaN: the exponent field of 'inp' is non-zero. /
179	if ((inpx & `0x7C00`) != `0`)
180	{
181	/ All NaNs are quietened. /
182	return (res << `13`) \| `0x400000`;
183	}
184
185	/ Denormal cases /
186	uint32_t sign = (inpx & `0x8000`) << `16`;
187	uint32_t mskval = inpx & `0x7FFF`;
188	uint32_t leadingzeroes = clz32(mskval);
189	mskval <<= leadingzeroes;
190	return (mskval >> `8`) + ((`0x85` - leadingzeroes) << `23`) + sign;
191	}
192
193	/ Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. /
194	static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
195	{
196	/ for each possible sign/exponent combination, store a case index. This gives a 512-byte table /
197	static const uint8_t tab[`512`] {
198	`0`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`,
199	`10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`,
200	`10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`,
201	`10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`,
202	`10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`,
203	`10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`, `10`,
204	`10`, `10`, `10`, `10`, `10`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`, `20`,
205	`20`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`,
206	`30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `30`, `40`,
207	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
208	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
209	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
210	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
211	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
212	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
213	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `50`,
214
215	`5`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`,
216	`15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`,
217	`15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`,
218	`15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`,
219	`15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`,
220	`15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`, `15`,
221	`15`, `15`, `15`, `15`, `15`, `25`, `25`, `25`, `25`, `25`, `25`, `25`, `25`, `25`, `25`, `25`,
222	`25`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`,
223	`35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `35`, `45`,
224	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`,
225	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`,
226	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`,
227	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`,
228	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`,
229	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`,
230	`45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `45`, `55`,
231	};
232
233	/ many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code*
234	size. /*
235	static const uint32_t tabx[`60`] {
236	UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0x8000`), UINT32_C(`0x80000000`), UINT32_C(`0x8000`), UINT32_C(`0x8000`), UINT32_C(`0x8000`),
237	UINT32_C(`1`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0x8000`), UINT32_C(`0x8001`), UINT32_C(`0x8000`), UINT32_C(`0x8000`), UINT32_C(`0x8000`),
238	UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0`), UINT32_C(`0x8000`), UINT32_C(`0x8000`), UINT32_C(`0x8000`), UINT32_C(`0x8000`), UINT32_C(`0x8000`),
239	UINT32_C(`0xC8001FFF`), UINT32_C(`0xC8000000`), UINT32_C(`0xC8000000`), UINT32_C(`0xC8000FFF`), UINT32_C(`0xC8001000`),
240	UINT32_C(`0x58000000`), UINT32_C(`0x38001FFF`), UINT32_C(`0x58000000`), UINT32_C(`0x58000FFF`), UINT32_C(`0x58001000`),
241	UINT32_C(`0x7C00`), UINT32_C(`0x7BFF`), UINT32_C(`0x7BFF`), UINT32_C(`0x7C00`), UINT32_C(`0x7C00`),
242	UINT32_C(`0xFBFF`), UINT32_C(`0xFC00`), UINT32_C(`0xFBFF`), UINT32_C(`0xFC00`), UINT32_C(`0xFC00`),
243	UINT32_C(`0x90000000`), UINT32_C(`0x90000000`), UINT32_C(`0x90000000`), UINT32_C(`0x90000000`), UINT32_C(`0x90000000`),
244	UINT32_C(`0x20000000`), UINT32_C(`0x20000000`), UINT32_C(`0x20000000`), UINT32_C(`0x20000000`), UINT32_C(`0x20000000`)
245	};
246
247	uint32_t p;
248	uint32_t idx = rmode + tab[inp >> `23`];
249	uint32_t vlx = tabx[idx];
250	switch (idx)
251	{
252	/*
253	Positive number which may be Infinity or NaN.
254	We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
255	(If we don't do this quieting, then a NaN that is distinguished only by having
256	its low-order bits set, would be turned into an INF. /*
257	case `50`:
258	case `51`:
259	case `52`:
260	case `53`:
261	case `54`:
262	case `55`:
263	case `56`:
264	case `57`:
265	case `58`:
266	case `59`:
267	/*
268	the input value is 0x7F800000 or 0xFF800000 if it is INF.
269	By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
270	For NaNs, however, this operation will keep bit 23 with the value 1.
271	We can then extract bit 23, and logical-OR bit 9 of the result with this
272	bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
273	of the mantissa is set.)
274	*/
275	p = (inp - `1`) & UINT32_C(`0x800000`); / zero if INF, nonzero if NaN. /
276	return static_cast<sf16>(((inp + vlx) >> `13`) \| (p >> `14`));
277	/*
278	positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
279	If it is, then return 0, else return 1 (the smallest representable nonzero number)
280	*/
281	case `0`:
282	/*
283	-inp will set the MSB if the input number is nonzero.
284	Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
285	*/
286	return static_cast<sf16>(static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> `31`);
287
288	/*
289	negative, exponent = , round-mode == DOWN, need to check whether number is
290	actually 0. If it is, return 0x8000 ( float -0.0 )
291	Else return the smallest negative number ( 0x8001 ) /*
292	case `6`:
293	/*
294	in this case 'vlx' is 0x80000000. By subtracting the input value from it,
295	we obtain a value that is 0 if the input value is in fact zero and has
296	the MSB set if it isn't. We then right-shift the value by 31 places to
297	get a value that is 0 if the input is -0.0 and 1 otherwise.
298	*/
299	return static_cast<sf16>(((vlx - inp) >> `31`) + UINT32_C(`0x8000`));
300
301	/*
302	for all other cases involving underflow/overflow, we don't need to
303	do actual tests; we just return 'vlx'.
304	*/
305	case `1`:
306	case `2`:
307	case `3`:
308	case `4`:
309	case `5`:
310	case `7`:
311	case `8`:
312	case `9`:
313	case `10`:
314	case `11`:
315	case `12`:
316	case `13`:
317	case `14`:
318	case `15`:
319	case `16`:
320	case `17`:
321	case `18`:
322	case `19`:
323	case `40`:
324	case `41`:
325	case `42`:
326	case `43`:
327	case `44`:
328	case `45`:
329	case `46`:
330	case `47`:
331	case `48`:
332	case `49`:
333	return static_cast<sf16>(vlx);
334
335	/*
336	for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
337	FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
338	baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
339	from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
340	for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
341	except for odd input numbers. For odd input numbers, we need to add 1 to the constant. /*
342
343	/ normal number, all rounding modes except round-to-nearest-even: /
344	case `30`:
345	case `31`:
346	case `32`:
347	case `34`:
348	case `35`:
349	case `36`:
350	case `37`:
351	case `39`:
352	return static_cast<sf16>((inp + vlx) >> `13`);
353
354	/ normal number, round-to-nearest-even. /
355	case `33`:
356	case `38`:
357	p = inp + vlx;
358	p += (inp >> `13`) & `1`;
359	return static_cast<sf16>(p >> `13`);
360
361	/*
362	the various denormal cases. These are not expected to be common, so their performance is a bit
363	less important. For each of these cases, we need to extract an exponent and a mantissa
364	(including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
365	depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
366	sign of the resulting denormal number.
367	*/
368	case `21`:
369	case `22`:
370	case `25`:
371	case `27`:
372	/ denormal, round towards zero. /
373	p = `126` - ((inp >> `23`) & `0xFF`);
374	return static_cast<sf16>((((inp & UINT32_C(`0x7FFFFF`)) + UINT32_C(`0x800000`)) >> p) \| vlx);
375	case `20`:
376	case `26`:
377	/ denormal, round away from zero. /
378	p = `126` - ((inp >> `23`) & `0xFF`);
379	return static_cast<sf16>(rtup_shift32((inp & UINT32_C(`0x7FFFFF`)) + UINT32_C(`0x800000`), p) \| vlx);
380	case `24`:
381	case `29`:
382	/ denormal, round to nearest-away /
383	p = `126` - ((inp >> `23`) & `0xFF`);
384	return static_cast<sf16>(rtna_shift32((inp & UINT32_C(`0x7FFFFF`)) + UINT32_C(`0x800000`), p) \| vlx);
385	case `23`:
386	case `28`:
387	/ denormal, round to nearest-even. /
388	p = `126` - ((inp >> `23`) & `0xFF`);
389	return static_cast<sf16>(rtne_shift32((inp & UINT32_C(`0x7FFFFF`)) + UINT32_C(`0x800000`), p) \| vlx);
390	}
391
392	return `0`;
393	}
394
395	/ convert from soft-float to native-float /
396	float sf16_to_float(uint16_t p)
397	{
398	if32 i;
399	i.u = sf16_to_sf32(p);
400	return i.f;
401	}
402
403	/ convert from native-float to soft-float /
404	uint16_t float_to_sf16(float p)
405	{
406	if32 i;
407	i.f = p;
408	return sf32_to_sf16(i.u, SF_NEARESTEVEN);
409	}
410
411	#endif
412

Browse the source code of Godot/thirdparty/astcenc/astcenc_mathlib_softfloat.cpp