floattypes.cpp source code [LOVE/common/floattypes.cpp]

1	/**
2	* Copyright (c) 2006-2023 LOVE Development Team
3	*
4	* This software is provided 'as-is', without any express or implied
5	* warranty. In no event will the authors be held liable for any damages
6	* arising from the use of this software.
7	*
8	* Permission is granted to anyone to use this software for any purpose,
9	* including commercial applications, and to alter it and redistribute it
10	* freely, subject to the following restrictions:
11	*
12	* 1. The origin of this software must not be misrepresented; you must not
13	* claim that you wrote the original software. If you use this software
14	* in a product, an acknowledgment in the product documentation would be
15	* appreciated but is not required.
16	* 2. Altered source versions must be plainly marked as such, and must not be
17	* misrepresented as being the original software.
18	* 3. This notice may not be removed or altered from any source distribution.
19	**/
20
21	#include "floattypes.h"
22
23	#include <limits>
24	#include <cmath>
25
26	namespace love
27	{
28
29	// Code from ftp://www.fox-toolkit.org/pub/fasthalffloatconversion.pdf
30
31	static bool halfInitialized = false;
32
33	// tables for half -> float conversions
34	static uint32 mantissatable[`2048`];
35	static uint16 offsettable[`64`];
36	static uint32 exponenttable[`64`];
37
38	// tables for float -> half conversions
39	static uint16 basetable[`512`];
40	static uint8 shifttable[`512`];
41
42
43	static uint32 convertMantissa(uint32 i)
44	{
45	uint32 m = i << `13`; // Zero pad mantissa bits
46	uint32 e = `0`; // Zero exponent
47
48	while (!(m & `0x00800000`)) // While not normalized
49	{
50	e -= `0x00800000`; // Decrement exponent (1<<23)
51	m <<= `1`; // Shift mantissa
52	}
53
54	m &= ~(`0x00800000`); // Clear leading 1 bit
55	e += `0x38800000`; // Adjust bias ((127-14)<<23)
56
57	return m \| e; // Return combined number
58	}
59
60	void float16Init()
61	{
62	if (halfInitialized)
63	return;
64
65	halfInitialized = true;
66
67
68	// tables for float16 -> float32 conversions.
69
70	mantissatable[`0`] = `0`;
71
72	for (uint32 i = `1`; i < `1024`; i++)
73	mantissatable[i] = convertMantissa(i);
74
75	for (uint32 i = `1024`; i < `2048`; i++)
76	mantissatable[i] = `0x38000000` + ((i - `1024`) << `13`);
77
78	exponenttable[`0`] = `0`;
79	exponenttable[`32`] = `0x80000000`;
80
81	for (uint32 i = `0`; i < `31`; i++)
82	exponenttable[i] = i << `23`;
83
84	for (uint32 i = `33`; i < `63`; i++)
85	exponenttable[i] = `0x80000000` + ((i - `32`) << `23`);
86
87	exponenttable[`31`] = `0x47800000`;
88	exponenttable[`63`] = `0xC7800000`;
89
90	for (int i = `0`; i < `64`; i++)
91	{
92	if (i == `0` \|\| i == `32`)
93	offsettable[i] = `0`;
94	else
95	offsettable[i] = `1024`;
96	}
97
98
99	// tables for float32 -> float16 conversions.
100
101	for (uint32 i = `0`; i < `256`; i++)
102	{
103	int e = (int) i - `127`;
104
105	if (e < -`24`) // Very small numbers map to zero
106	{
107	basetable[i \| `0x000`] = `0x0000`;
108	basetable[i \| `0x100`] = `0x8000`;
109	shifttable[i \| `0x000`] = `24`;
110	shifttable[i \| `0x100`] = `24`;
111	}
112	else if (e < -`14`) // Small numbers map to denorms
113	{
114	basetable[i \| `0x000`] = (`0x0400` >> (-e - `14`));
115	basetable[i \| `0x100`] = (`0x0400` >> (-e - `14`)) \| `0x8000`;
116	shifttable[i \| `0x000`] = -e - `1`;
117	shifttable[i \| `0x100`] = -e - `1`;
118	}
119	else if (e <= `15`) // Normal numbers just lose precision
120	{
121	basetable[i \| `0x000`] = ((e + `15`) << `10`);
122	basetable[i \| `0x100`] = ((e + `15`) << `10`) \| `0x8000`;
123	shifttable[i \| `0x000`] = `13`;
124	shifttable[i \| `0x100`] = `13`;
125	}
126	else if (e < `128`) // Large numbers map to Infinity
127	{
128	basetable[i \| `0x000`] = `0x7C00`;
129	basetable[i \| `0x100`] = `0xFC00`;
130	shifttable[i \| `0x000`] = `24`;
131	shifttable[i \| `0x100`] = `24`;
132	}
133	else // Infinity and NaN's stay Infinity and NaN's
134	{
135	basetable[i \| `0x000`] = `0x7C00`;
136	basetable[i \| `0x100`] = `0xFC00`;
137	shifttable[i \| `0x000`] = `13`;
138	shifttable[i \| `0x100`] = `13`;
139	}
140	}
141	}
142
143	static inline uint32 asuint32(float f)
144	{
145	union { float f; uint32 u; } conv;
146	conv.f = f;
147	return conv.u;
148	}
149
150	static inline float asfloat32(uint32 u)
151	{
152	union { float f; uint32 u; } conv;
153	conv.u = u;
154	return conv.f;
155	}
156
157	float float16to32(float16 f)
158	{
159	return asfloat32(mantissatable[offsettable[f >> `10`] + (f & `0x3FF`)] + exponenttable[f >> `10`]);
160	}
161
162	float16 float32to16(float f)
163	{
164	uint32 u = asuint32(f);
165	return basetable[(u >> `23`) & `0x1FF`] + ((u & `0x007FFFFF`) >> shifttable[(u >> `23`) & `0x1FF`]);
166	}
167
168	// Adapted from https://stackoverflow.com/questions/41532085/how-to-pack-unpack-11-and-10-bit-floats-in-javascript-for-webgl2
169
170	float float11to32(float11 f)
171	{
172	uint16 exponent = f >> `6`;
173	uint16 mantissa = f & `0x3F`;
174
175	if (exponent == `0`)
176	return mantissa == `0` ? `0` : powf(`2.0f`, -`14.0f`) * (mantissa / `64.0f`);
177
178	if (exponent < `31`)
179	return powf(`2.0f`, exponent - `15`) * (`1.0f` + mantissa / `64.0f`);
180
181	return mantissa == `0` ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
182	}
183
184	float11 float32to11(float f)
185	{
186	const uint16 EXPONENT_BITS = `0x1F`;
187	const uint16 EXPONENT_SHIFT = `6`;
188	const uint16 EXPONENT_BIAS = `15`;
189	const uint16 MANTISSA_BITS = `0x3F`;
190	const uint16 MANTISSA_SHIFT = (`23` - EXPONENT_SHIFT);
191	const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
192
193	uint32 u = asuint32(f);
194
195	if (u & `0x80000000`)
196	return `0`; // Negative values go to 0.
197
198	// Map exponent to the range [-127,128]
199	int32 exponent = (int32)((u >> `23`) & `0xFF`) - `127`;
200	uint32 mantissa = u & `0x007FFFFF`;
201
202	if (exponent > `15`) // Infinity or NaN
203	return MAX_EXPONENT \| (exponent == `128` ? (mantissa & MANTISSA_BITS) : `0`);
204	else if (exponent <= -`15`)
205	return `0`;
206
207	exponent += EXPONENT_BIAS;
208
209	return ((uint16)exponent << EXPONENT_SHIFT) \| (mantissa >> MANTISSA_SHIFT);
210	}
211
212	float float10to32(float10 f)
213	{
214	uint16 exponent = f >> `5`;
215	uint16 mantissa = f & `0x1F`;
216
217	if (exponent == `0`)
218	return mantissa == `0` ? `0` : powf(`2.0f`, -`14.0f`) * (mantissa / `32.0f`);
219
220	if (exponent < `31`)
221	return powf(`2.0f`, exponent - `15`) * (`1.0f` + mantissa / `32.0f`);
222
223	return mantissa == `0` ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
224	}
225
226	float10 float32to10(float f)
227	{
228	const uint16 EXPONENT_BITS = `0x1F`;
229	const uint16 EXPONENT_SHIFT = `5`;
230	const uint16 EXPONENT_BIAS = `15`;
231	const uint16 MANTISSA_BITS = `0x1F`;
232	const uint16 MANTISSA_SHIFT = (`23` - EXPONENT_SHIFT);
233	const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
234
235	uint32 u = asuint32(f);
236
237	if (u & `0x80000000`)
238	return `0`; // Negative values go to 0.
239
240	// Map exponent to the range [-127,128]
241	int32 exponent = (int32)((u >> `23`) & `0xFF`) - `127`;
242	uint32 mantissa = u & `0x007FFFFF`;
243
244	if (exponent > `15`) // Infinity or NaN
245	return MAX_EXPONENT \| (exponent == `128` ? (mantissa & MANTISSA_BITS) : `0`);
246	else if (exponent <= -`15`)
247	return `0`;
248
249	exponent += EXPONENT_BIAS;
250
251	return ((uint16)exponent << EXPONENT_SHIFT) \| (mantissa >> MANTISSA_SHIFT);
252	}
253
254	} // love
255

Browse the source code of LOVE/common/floattypes.cpp