Half.hpp source code [engine/third_party/swiftshader/src/System/Half.hpp]

1	// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#ifndef sw_Half_hpp
16	#define sw_Half_hpp
17
18	#include "Math.hpp"
19
20	#include <algorithm>
21	#include <cmath>
22
23	namespace sw
24	{
25	class half
26	{
27	public:
28	half() = default;
29	explicit half(float f);
30
31	operator float() const;
32
33	half &operator=(half h);
34	half &operator=(float f);
35
36	private:
37	unsigned short fp16i;
38	};
39
40	inline half shortAsHalf(short s)
41	{
42	union
43	{
44	half h;
45	short s;
46	} hs;
47
48	hs.s = s;
49
50	return hs.h;
51	}
52
53	class RGB9E5
54	{
55	unsigned int R : `9`;
56	unsigned int G : `9`;
57	unsigned int B : `9`;
58	unsigned int E : `5`;
59
60	public:
61	RGB9E5(float rgb[`3`]) : RGB9E5 (rgb[`0`], rgb[`1`], rgb[`2`])
62	{
63	}
64
65	RGB9E5(float r, float g, float b)
66	{
67	// Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
68
69	// B is the exponent bias (15)
70	constexpr int g_sharedexp_bias = `15`;
71
72	// N is the number of mantissa bits per component (9)
73	constexpr int g_sharedexp_mantissabits = `9`;
74
75	// Emax is the maximum allowed biased exponent value (31)
76	constexpr int g_sharedexp_maxexponent = `31`;
77
78	constexpr float g_sharedexp_max =
79	((static_cast<float>(`1` << g_sharedexp_mantissabits) - `1`) /
80	static_cast<float>(`1` << g_sharedexp_mantissabits)) *
81	static_cast<float>(`1` << (g_sharedexp_maxexponent - g_sharedexp_bias));
82
83	// Clamp components to valid range. NaN becomes 0.
84	const float red_c = std::min(!(r > `0`) ? `0` : r, g_sharedexp_max);
85	const float green_c = std::min(!(g > `0`) ? `0` : g, g_sharedexp_max);
86	const float blue_c = std::min(!(b > `0`) ? `0` : b, g_sharedexp_max);
87
88	// We're reducing the mantissa to 9 bits, so we must round up if the next
89	// bit is 1. In other words add 0.5 to the new mantissa's position and
90	// allow overflow into the exponent so we can scale correctly.
91	constexpr int half = `1` << (`23` - g_sharedexp_mantissabits);
92	const float red_r = bit_cast<float>(bit_cast<int>(red_c) + half);
93	const float green_r = bit_cast<float>(bit_cast<int>(green_c) + half);
94	const float blue_r = bit_cast<float>(bit_cast<int>(blue_c) + half);
95
96	// The largest component determines the shared exponent. It can't be lower
97	// than 0 (after bias subtraction) so also limit to the mimimum representable.
98	constexpr float min_s = `0.5f` / (`1` << g_sharedexp_bias);
99	float max_s = std::max(std::max(red_r, green_r), std::max(blue_r, min_s));
100
101	// Obtain the reciprocal of the shared exponent by inverting the bits,
102	// and scale by the new mantissa's size. Note that the IEEE-754 single-precision
103	// format has an implicit leading 1, but this shared component format does not.
104	float scale = bit_cast<float>((bit_cast<int>(max_s) & `0x7F800000`) ^ `0x7F800000`) * (`1` << (g_sharedexp_mantissabits - `2`));
105
106	R = static_cast<unsigned int>(round(red_c * scale));
107	G = static_cast<unsigned int>(round(green_c * scale));
108	B = static_cast<unsigned int>(round(blue_c * scale));
109	E = (bit_cast<unsigned int>(max_s) >> `23`) - `127` + `15` + `1`;
110	}
111
112	operator unsigned int() const
113	{
114	return *reinterpret_cast<const unsigned int>(this*);
115	}
116
117	void toRGB16F(half rgb[`3`]) const
118	{
119	constexpr int offset = `24`; // Exponent bias (15) + number of mantissa bits per component (9) = 24
120
121	const float factor = (`1u` << E) * (`1.0f` / (`1` << offset));
122	rgb[`0`] = half (R * factor);
123	rgb[`1`] = half (G * factor);
124	rgb[`2`] = half (B * factor);
125	}
126	};
127
128	class R11G11B10F
129	{
130	unsigned int R : `11`;
131	unsigned int G : `11`;
132	unsigned int B : `10`;
133
134	static inline half float11ToFloat16(unsigned short fp11)
135	{
136	return shortAsHalf(fp11 << `4`); // Sign bit 0
137	}
138
139	static inline half float10ToFloat16(unsigned short fp10)
140	{
141	return shortAsHalf(fp10 << `5`); // Sign bit 0
142	}
143
144	inline unsigned short float32ToFloat11(float fp32)
145	{
146	const unsigned int float32MantissaMask = `0x7FFFFF`;
147	const unsigned int float32ExponentMask = `0x7F800000`;
148	const unsigned int float32SignMask = `0x80000000`;
149	const unsigned int float32ValueMask = ~float32SignMask;
150	const unsigned int float32ExponentFirstBit = `23`;
151	const unsigned int float32ExponentBias = `127`;
152
153	const unsigned short float11Max = `0x7BF`;
154	const unsigned short float11MantissaMask = `0x3F`;
155	const unsigned short float11ExponentMask = `0x7C0`;
156	const unsigned short float11BitMask = `0x7FF`;
157	const unsigned int float11ExponentBias = `14`;
158
159	const unsigned int float32Maxfloat11 = `0x477E0000`;
160	const unsigned int float32Minfloat11 = `0x38800000`;
161
162	const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
163	const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
164
165	unsigned int float32Val = float32Bits & float32ValueMask;
166
167	if((float32Val & float32ExponentMask) == float32ExponentMask)
168	{
169	// INF or NAN
170	if((float32Val & float32MantissaMask) != `0`)
171	{
172	return float11ExponentMask \|
173	(((float32Val >> `17`) \| (float32Val >> `11`) \| (float32Val >> `6`) \| (float32Val)) &
174	float11MantissaMask);
175	}
176	else if(float32Sign)
177	{
178	// -INF is clamped to 0 since float11 is positive only
179	return `0`;
180	}
181	else
182	{
183	return float11ExponentMask;
184	}
185	}
186	else if(float32Sign)
187	{
188	// float11 is positive only, so clamp to zero
189	return `0`;
190	}
191	else if(float32Val > float32Maxfloat11)
192	{
193	// The number is too large to be represented as a float11, set to max
194	return float11Max;
195	}
196	else
197	{
198	if(float32Val < float32Minfloat11)
199	{
200	// The number is too small to be represented as a normalized float11
201	// Convert it to a denormalized value.
202	const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
203	(float32Val >> float32ExponentFirstBit);
204	float32Val =
205	((`1` << float32ExponentFirstBit) \| (float32Val & float32MantissaMask)) >> shift;
206	}
207	else
208	{
209	// Rebias the exponent to represent the value as a normalized float11
210	float32Val += `0xC8000000`;
211	}
212
213	return ((float32Val + `0xFFFF` + ((float32Val >> `17`) & `1`)) >> `17`) & float11BitMask;
214	}
215	}
216
217	inline unsigned short float32ToFloat10(float fp32)
218	{
219	const unsigned int float32MantissaMask = `0x7FFFFF`;
220	const unsigned int float32ExponentMask = `0x7F800000`;
221	const unsigned int float32SignMask = `0x80000000`;
222	const unsigned int float32ValueMask = ~float32SignMask;
223	const unsigned int float32ExponentFirstBit = `23`;
224	const unsigned int float32ExponentBias = `127`;
225
226	const unsigned short float10Max = `0x3DF`;
227	const unsigned short float10MantissaMask = `0x1F`;
228	const unsigned short float10ExponentMask = `0x3E0`;
229	const unsigned short float10BitMask = `0x3FF`;
230	const unsigned int float10ExponentBias = `14`;
231
232	const unsigned int float32Maxfloat10 = `0x477C0000`;
233	const unsigned int float32Minfloat10 = `0x38800000`;
234
235	const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
236	const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
237
238	unsigned int float32Val = float32Bits & float32ValueMask;
239
240	if((float32Val & float32ExponentMask) == float32ExponentMask)
241	{
242	// INF or NAN
243	if((float32Val & float32MantissaMask) != `0`)
244	{
245	return float10ExponentMask \|
246	(((float32Val >> `18`) \| (float32Val >> `13`) \| (float32Val >> `3`) \| (float32Val)) &
247	float10MantissaMask);
248	}
249	else if(float32Sign)
250	{
251	// -INF is clamped to 0 since float11 is positive only
252	return `0`;
253	}
254	else
255	{
256	return float10ExponentMask;
257	}
258	}
259	else if(float32Sign)
260	{
261	// float10 is positive only, so clamp to zero
262	return `0`;
263	}
264	else if(float32Val > float32Maxfloat10)
265	{
266	// The number is too large to be represented as a float11, set to max
267	return float10Max;
268	}
269	else
270	{
271	if(float32Val < float32Minfloat10)
272	{
273	// The number is too small to be represented as a normalized float11
274	// Convert it to a denormalized value.
275	const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
276	(float32Val >> float32ExponentFirstBit);
277	float32Val =
278	((`1` << float32ExponentFirstBit) \| (float32Val & float32MantissaMask)) >> shift;
279	}
280	else
281	{
282	// Rebias the exponent to represent the value as a normalized float11
283	float32Val += `0xC8000000`;
284	}
285
286	return ((float32Val + `0x1FFFF` + ((float32Val >> `18`) & `1`)) >> `18`) & float10BitMask;
287	}
288	}
289
290	public:
291	R11G11B10F(float rgb[`3`])
292	{
293	R = float32ToFloat11(rgb[`0`]);
294	G = float32ToFloat11(rgb[`1`]);
295	B = float32ToFloat10(rgb[`2`]);
296	}
297
298	operator unsigned int() const
299	{
300	return *reinterpret_cast<const unsigned int>(this*);
301	}
302
303	void toRGB16F(half rgb[`3`]) const
304	{
305	rgb[`0`] = float11ToFloat16(R);
306	rgb[`1`] = float11ToFloat16(G);
307	rgb[`2`] = float10ToFloat16(B);
308	}
309	};
310	}
311
312	#endif // sw_Half_hpp
313

Browse the source code of engine/third_party/swiftshader/src/System/Half.hpp