1/**
2 * Copyright (c) 2006-2023 LOVE Development Team
3 *
4 * This software is provided 'as-is', without any express or implied
5 * warranty. In no event will the authors be held liable for any damages
6 * arising from the use of this software.
7 *
8 * Permission is granted to anyone to use this software for any purpose,
9 * including commercial applications, and to alter it and redistribute it
10 * freely, subject to the following restrictions:
11 *
12 * 1. The origin of this software must not be misrepresented; you must not
13 * claim that you wrote the original software. If you use this software
14 * in a product, an acknowledgment in the product documentation would be
15 * appreciated but is not required.
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 * 3. This notice may not be removed or altered from any source distribution.
19 **/
20
21#include "floattypes.h"
22
23#include <limits>
24#include <cmath>
25
26namespace love
27{
28
29// Code from ftp://www.fox-toolkit.org/pub/fasthalffloatconversion.pdf
30
31static bool halfInitialized = false;
32
33// tables for half -> float conversions
34static uint32 mantissatable[2048];
35static uint16 offsettable[64];
36static uint32 exponenttable[64];
37
38// tables for float -> half conversions
39static uint16 basetable[512];
40static uint8 shifttable[512];
41
42
43static uint32 convertMantissa(uint32 i)
44{
45 uint32 m = i << 13; // Zero pad mantissa bits
46 uint32 e = 0; // Zero exponent
47
48 while (!(m & 0x00800000)) // While not normalized
49 {
50 e -= 0x00800000; // Decrement exponent (1<<23)
51 m <<= 1; // Shift mantissa
52 }
53
54 m &= ~(0x00800000); // Clear leading 1 bit
55 e += 0x38800000; // Adjust bias ((127-14)<<23)
56
57 return m | e; // Return combined number
58}
59
60void float16Init()
61{
62 if (halfInitialized)
63 return;
64
65 halfInitialized = true;
66
67
68 // tables for float16 -> float32 conversions.
69
70 mantissatable[0] = 0;
71
72 for (uint32 i = 1; i < 1024; i++)
73 mantissatable[i] = convertMantissa(i);
74
75 for (uint32 i = 1024; i < 2048; i++)
76 mantissatable[i] = 0x38000000 + ((i - 1024) << 13);
77
78 exponenttable[0] = 0;
79 exponenttable[32] = 0x80000000;
80
81 for (uint32 i = 0; i < 31; i++)
82 exponenttable[i] = i << 23;
83
84 for (uint32 i = 33; i < 63; i++)
85 exponenttable[i] = 0x80000000 + ((i - 32) << 23);
86
87 exponenttable[31] = 0x47800000;
88 exponenttable[63] = 0xC7800000;
89
90 for (int i = 0; i < 64; i++)
91 {
92 if (i == 0 || i == 32)
93 offsettable[i] = 0;
94 else
95 offsettable[i] = 1024;
96 }
97
98
99 // tables for float32 -> float16 conversions.
100
101 for (uint32 i = 0; i < 256; i++)
102 {
103 int e = (int) i - 127;
104
105 if (e < -24) // Very small numbers map to zero
106 {
107 basetable[i | 0x000] = 0x0000;
108 basetable[i | 0x100] = 0x8000;
109 shifttable[i | 0x000] = 24;
110 shifttable[i | 0x100] = 24;
111 }
112 else if (e < -14) // Small numbers map to denorms
113 {
114 basetable[i | 0x000] = (0x0400 >> (-e - 14));
115 basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
116 shifttable[i | 0x000] = -e - 1;
117 shifttable[i | 0x100] = -e - 1;
118 }
119 else if (e <= 15) // Normal numbers just lose precision
120 {
121 basetable[i | 0x000] = ((e + 15) << 10);
122 basetable[i | 0x100] = ((e + 15) << 10) | 0x8000;
123 shifttable[i | 0x000] = 13;
124 shifttable[i | 0x100] = 13;
125 }
126 else if (e < 128) // Large numbers map to Infinity
127 {
128 basetable[i | 0x000] = 0x7C00;
129 basetable[i | 0x100] = 0xFC00;
130 shifttable[i | 0x000] = 24;
131 shifttable[i | 0x100] = 24;
132 }
133 else // Infinity and NaN's stay Infinity and NaN's
134 {
135 basetable[i | 0x000] = 0x7C00;
136 basetable[i | 0x100] = 0xFC00;
137 shifttable[i | 0x000] = 13;
138 shifttable[i | 0x100] = 13;
139 }
140 }
141}
142
143static inline uint32 asuint32(float f)
144{
145 union { float f; uint32 u; } conv;
146 conv.f = f;
147 return conv.u;
148}
149
150static inline float asfloat32(uint32 u)
151{
152 union { float f; uint32 u; } conv;
153 conv.u = u;
154 return conv.f;
155}
156
157float float16to32(float16 f)
158{
159 return asfloat32(mantissatable[offsettable[f >> 10] + (f & 0x3FF)] + exponenttable[f >> 10]);
160}
161
162float16 float32to16(float f)
163{
164 uint32 u = asuint32(f);
165 return basetable[(u >> 23) & 0x1FF] + ((u & 0x007FFFFF) >> shifttable[(u >> 23) & 0x1FF]);
166}
167
168// Adapted from https://stackoverflow.com/questions/41532085/how-to-pack-unpack-11-and-10-bit-floats-in-javascript-for-webgl2
169
170float float11to32(float11 f)
171{
172 uint16 exponent = f >> 6;
173 uint16 mantissa = f & 0x3F;
174
175 if (exponent == 0)
176 return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 64.0f);
177
178 if (exponent < 31)
179 return powf(2.0f, exponent - 15) * (1.0f + mantissa / 64.0f);
180
181 return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
182}
183
184float11 float32to11(float f)
185{
186 const uint16 EXPONENT_BITS = 0x1F;
187 const uint16 EXPONENT_SHIFT = 6;
188 const uint16 EXPONENT_BIAS = 15;
189 const uint16 MANTISSA_BITS = 0x3F;
190 const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT);
191 const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
192
193 uint32 u = asuint32(f);
194
195 if (u & 0x80000000)
196 return 0; // Negative values go to 0.
197
198 // Map exponent to the range [-127,128]
199 int32 exponent = (int32)((u >> 23) & 0xFF) - 127;
200 uint32 mantissa = u & 0x007FFFFF;
201
202 if (exponent > 15) // Infinity or NaN
203 return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0);
204 else if (exponent <= -15)
205 return 0;
206
207 exponent += EXPONENT_BIAS;
208
209 return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT);
210}
211
212float float10to32(float10 f)
213{
214 uint16 exponent = f >> 5;
215 uint16 mantissa = f & 0x1F;
216
217 if (exponent == 0)
218 return mantissa == 0 ? 0 : powf(2.0f, -14.0f) * (mantissa / 32.0f);
219
220 if (exponent < 31)
221 return powf(2.0f, exponent - 15) * (1.0f + mantissa / 32.0f);
222
223 return mantissa == 0 ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
224}
225
226float10 float32to10(float f)
227{
228 const uint16 EXPONENT_BITS = 0x1F;
229 const uint16 EXPONENT_SHIFT = 5;
230 const uint16 EXPONENT_BIAS = 15;
231 const uint16 MANTISSA_BITS = 0x1F;
232 const uint16 MANTISSA_SHIFT = (23 - EXPONENT_SHIFT);
233 const uint16 MAX_EXPONENT = (EXPONENT_BITS << EXPONENT_SHIFT);
234
235 uint32 u = asuint32(f);
236
237 if (u & 0x80000000)
238 return 0; // Negative values go to 0.
239
240 // Map exponent to the range [-127,128]
241 int32 exponent = (int32)((u >> 23) & 0xFF) - 127;
242 uint32 mantissa = u & 0x007FFFFF;
243
244 if (exponent > 15) // Infinity or NaN
245 return MAX_EXPONENT | (exponent == 128 ? (mantissa & MANTISSA_BITS) : 0);
246 else if (exponent <= -15)
247 return 0;
248
249 exponent += EXPONENT_BIAS;
250
251 return ((uint16)exponent << EXPONENT_SHIFT) | (mantissa >> MANTISSA_SHIFT);
252}
253
254} // love
255