1 | /** |
2 | * Copyright (c) 2006-2023 LOVE Development Team |
3 | * |
4 | * This software is provided 'as-is', without any express or implied |
5 | * warranty. In no event will the authors be held liable for any damages |
6 | * arising from the use of this software. |
7 | * |
8 | * Permission is granted to anyone to use this software for any purpose, |
9 | * including commercial applications, and to alter it and redistribute it |
10 | * freely, subject to the following restrictions: |
11 | * |
12 | * 1. The origin of this software must not be misrepresented; you must not |
13 | * claim that you wrote the original software. If you use this software |
14 | * in a product, an acknowledgment in the product documentation would be |
15 | * appreciated but is not required. |
16 | * 2. Altered source versions must be plainly marked as such, and must not be |
17 | * misrepresented as being the original software. |
18 | * 3. This notice may not be removed or altered from any source distribution. |
19 | **/ |
20 | |
21 | #include "floattypes.h" |
22 | |
23 | #include <limits> |
24 | #include <cmath> |
25 | |
26 | namespace love |
27 | { |
28 | |
29 | // Code from ftp://www.fox-toolkit.org/pub/fasthalffloatconversion.pdf |
30 | |
31 | static bool halfInitialized = false; |
32 | |
33 | // tables for half -> float conversions |
34 | static uint32 mantissatable[2048]; |
35 | static uint16 offsettable[64]; |
36 | static uint32 exponenttable[64]; |
37 | |
38 | // tables for float -> half conversions |
39 | static uint16 basetable[512]; |
40 | static uint8 shifttable[512]; |
41 | |
42 | |
43 | static uint32 convertMantissa(uint32 i) |
44 | { |
45 | uint32 m = i << 13; // Zero pad mantissa bits |
46 | uint32 e = 0; // Zero exponent |
47 | |
48 | while (!(m & 0x00800000)) // While not normalized |
49 | { |
50 | e -= 0x00800000; // Decrement exponent (1<<23) |
51 | m <<= 1; // Shift mantissa |
52 | } |
53 | |
54 | m &= ~(0x00800000); // Clear leading 1 bit |
55 | e += 0x38800000; // Adjust bias ((127-14)<<23) |
56 | |
57 | return m | e; // Return combined number |
58 | } |
59 | |
60 | void float16Init() |
61 | { |
62 | if (halfInitialized) |
63 | return; |
64 | |
65 | halfInitialized = true; |
66 | |
67 | |
68 | // tables for float16 -> float32 conversions. |
69 | |
70 | mantissatable[0] = 0; |
71 | |
72 | for (uint32 i = 1; i < 1024; i++) |
73 | mantissatable[i] = convertMantissa(i); |
74 | |
75 | for (uint32 i = 1024; i < 2048; i++) |
76 | mantissatable[i] = 0x38000000 + ((i - 1024) << 13); |
77 | |
78 | exponenttable[0] = 0; |
79 | exponenttable[32] = 0x80000000; |
80 | |
81 | for (uint32 i = 0; i < 31; i++) |
82 | exponenttable[i] = i << 23; |
83 | |
84 | for (uint32 i = 33; i < 63; i++) |
85 | exponenttable[i] = 0x80000000 + ((i - 32) << 23); |
86 | |
87 | exponenttable[31] = 0x47800000; |
88 | exponenttable[63] = 0xC7800000; |
89 | |
90 | for (int i = 0; i < 64; i++) |
91 | { |
92 | if (i == 0 || i == 32) |
93 | offsettable[i] = 0; |
94 | else |
95 | offsettable[i] = 1024; |
96 | } |
97 | |
98 | |
99 | // tables for float32 -> float16 conversions. |
100 | |
101 | for (uint32 i = 0; i < 256; i++) |
102 | { |
103 | int e = (int) i - 127; |
104 | |
105 | if (e < -24) // Very small numbers map to zero |
106 | { |
107 | basetable[i | 0x000] = 0x0000; |
108 | basetable[i | 0x100] = 0x8000; |
109 | shifttable[i | 0x000] = 24; |
110 | shifttable[i | 0x100] = 24; |
111 | } |
112 | else if (e < -14) // Small numbers map to denorms |
113 | { |
114 | basetable[i | 0x000] = (0x0400 >> (-e - 14)); |
115 | basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000; |
116 | shifttable[i | 0x000] = -e - 1; |
117 | shifttable[i | 0x100] = -e - 1; |
118 | } |
119 | else if (e <= 15) // Normal numbers just lose precision |
120 | { |
121 | basetable[i | 0x000] = ((e + 15) << 10); |
122 | basetable[i | 0x100] = ((e + 15) << 10) | 0x8000; |
123 | shifttable[i | 0x000] = 13; |
124 | shifttable[i | 0x100] = 13; |
125 | } |
126 | else if (e < 128) // Large numbers map to Infinity |
127 | { |
128 | basetable[i | 0x000] = 0x7C00; |
129 | basetable[i | 0x100] = 0xFC00; |
130 | shifttable[i | 0x000] = 24; |
131 | shifttable[i | 0x100] = 24; |
132 | } |
133 | else // Infinity and NaN's stay Infinity and NaN's |
134 | { |
135 | basetable[i | 0x000] = 0x7C00; |
136 | basetable[i | 0x100] = 0xFC00; |
137 | shifttable[i | 0x000] = 13; |
138 | shifttable[i | 0x100] = 13; |
139 | } |
140 | } |
141 | } |
142 | |
143 | static inline uint32 asuint32(float f) |
144 | { |
145 | union { float f; uint32 u; } conv; |
146 | conv.f = f; |
147 | return conv.u; |
148 | } |
149 | |
150 | static inline float asfloat32(uint32 u) |
151 | { |
152 | union { float f; uint32 u; } conv; |
153 | conv.u = u; |
154 | return conv.f; |
155 | } |
156 | |
157 | float float16to32(float16 f) |
158 | { |
159 | return asfloat32(mantissatable[offsettable[f >> 10] + (f & 0x3FF)] + exponenttable[f >> 10]); |
160 | } |
161 | |
162 | float16 float32to16(float f) |
163 | { |
164 | uint32 u = asuint32(f); |
165 | return basetable[(u >> 23) & 0x1FF] + ((u & 0x007FFFFF) >> shifttable[(u >> 23) & 0x1FF]); |
166 | } |
167 | |
168 | // Adapted from https://stackoverflow.com/questions/41532085/how-to-pack-unpack-11-and-10-bit-floats-in-javascript-for-webgl2 |
169 | |
170 | float float11to32(float11 f) |
171 | { |
172 | uint16 exponent = f >> 6; |
173 | uint16 mantissa = f & 0x3F; |
174 | |
175 | if (exponent == 0) |
176 | return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 64.0f); |
177 | |
178 | if (exponent < 31) |
179 | return powf(2.0f, exponent - 15) * (1.0f + mantissa / 64.0f); |
180 | |
181 | return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN(); |
182 | } |
183 | |
184 | float11 float32to11(float f) |
185 | { |
186 | const uint16 EXPONENT_BITS = 0x1F; |
187 | const uint16 EXPONENT_SHIFT = 6; |
188 | const uint16 EXPONENT_BIAS = 15; |
189 | const uint16 MANTISSA_BITS = 0x3F; |
190 | const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT); |
191 | const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT); |
192 | |
193 | uint32 u = asuint32(f); |
194 | |
195 | if (u & 0x80000000) |
196 | return 0; // Negative values go to 0. |
197 | |
198 | // Map exponent to the range [-127,128] |
199 | int32 exponent = (int32)((u >> 23) & 0xFF) - 127; |
200 | uint32 mantissa = u & 0x007FFFFF; |
201 | |
202 | if (exponent > 15) // Infinity or NaN |
203 | return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0); |
204 | else if (exponent <= -15) |
205 | return 0; |
206 | |
207 | exponent += EXPONENT_BIAS; |
208 | |
209 | return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT); |
210 | } |
211 | |
212 | float float10to32(float10 f) |
213 | { |
214 | uint16 exponent = f >> 5; |
215 | uint16 mantissa = f & 0x1F; |
216 | |
217 | if (exponent == 0) |
218 | return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 32.0f); |
219 | |
220 | if (exponent < 31) |
221 | return powf(2.0f, exponent - 15) * (1.0f + mantissa / 32.0f); |
222 | |
223 | return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN(); |
224 | } |
225 | |
226 | float10 float32to10(float f) |
227 | { |
228 | const uint16 EXPONENT_BITS = 0x1F; |
229 | const uint16 EXPONENT_SHIFT = 5; |
230 | const uint16 EXPONENT_BIAS = 15; |
231 | const uint16 MANTISSA_BITS = 0x1F; |
232 | const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT); |
233 | const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT); |
234 | |
235 | uint32 u = asuint32(f); |
236 | |
237 | if (u & 0x80000000) |
238 | return 0; // Negative values go to 0. |
239 | |
240 | // Map exponent to the range [-127,128] |
241 | int32 exponent = (int32)((u >> 23) & 0xFF) - 127; |
242 | uint32 mantissa = u & 0x007FFFFF; |
243 | |
244 | if (exponent > 15) // Infinity or NaN |
245 | return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0); |
246 | else if (exponent <= -15) |
247 | return 0; |
248 | |
249 | exponent += EXPONENT_BIAS; |
250 | |
251 | return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT); |
252 | } |
253 | |
254 | } // love |
255 | |