Half.h source code [NanoGUI/ext/eigen/Eigen/src/Core/arch/CUDA/Half.h]

1	// This file is part of Eigen, a lightweight C++ template library
2	// for linear algebra.
3	//
4	// This Source Code Form is subject to the terms of the Mozilla
5	// Public License v. 2.0. If a copy of the MPL was not distributed
6	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
7	//
8	// The conversion routines are Copyright (c) Fabian Giesen, 2016.
9	// The original license follows:
10	//
11	// Copyright (c) Fabian Giesen, 2016
12	// All rights reserved.
13	// Redistribution and use in source and binary forms, with or without
14	// modification, are permitted.
15	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19	// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
28	// Standard 16-bit float type, mostly useful for GPUs. Defines a new
29	// type Eigen::half (inheriting from CUDA's __half struct) with
30	// operator overloads such that it behaves basically as an arithmetic
31	// type. It will be quite slow on CPUs (so it is recommended to stay
32	// in float32_bits for CPUs, except for simple parameter conversions, I/O
33	// to disk and the likes), but fast on GPUs.
34
35
36	#ifndef EIGEN_HALF_CUDA_H
37	#define EIGEN_HALF_CUDA_H
38
39	#if __cplusplus > 199711L
40	#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
41	#else
42	#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
43	#endif
44
45
46	namespace Eigen {
47
48	struct half;
49
50	namespace half_impl {
51
52	#if !defined(EIGEN_HAS_CUDA_FP16)
53	// Make our own __half_raw definition that is similar to CUDA's.
54	struct __half_raw {
55	EIGEN_DEVICE_FUNC __half_raw() : x(`0`) {}
56	explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
57	unsigned short x;
58	};
59	#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
60	// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
61	typedef __half __half_raw;
62	#endif
63
64	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
65	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
66	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
67
68	struct half_base : public __half_raw {
69	EIGEN_DEVICE_FUNC half_base() {}
70	EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw (h) {}
71	EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw (h) {}
72	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
73	EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw((__half_raw)&h) {}
74	#endif
75	};
76
77	} // namespace half_impl
78
79	// Class definition.
80	struct half : public half_impl::half_base {
81	#if !defined(EIGEN_HAS_CUDA_FP16) \|\| (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
82	typedef half_impl::__half_raw __half_raw;
83	#endif
84
85	EIGEN_DEVICE_FUNC half() {}
86
87	EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base (h) {}
88	EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base (h) {}
89	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
90	EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
91	#endif
92
93	explicit EIGEN_DEVICE_FUNC half(bool b)
94	: half_impl::half_base (half_impl::raw_uint16_to_half(b ? `0x3c00` : `0`)) {}
95	template<class T>
96	explicit EIGEN_DEVICE_FUNC half(const T& val)
97	: half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
98	explicit EIGEN_DEVICE_FUNC half(float f)
99	: half_impl::half_base (half_impl::float_to_half_rtne(f)) {}
100
101	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
102	// +0.0 and -0.0 become false, everything else becomes true.
103	return (x & `0x7fff`) != `0`;
104	}
105	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
106	return static_cast<signed char>(half_impl::half_to_float(*this));
107	}
108	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
109	return static_cast<unsigned char>(half_impl::half_to_float(*this));
110	}
111	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
112	return static_cast<short>(half_impl::half_to_float(*this));
113	}
114	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
115	return static_cast<unsigned short>(half_impl::half_to_float(*this));
116	}
117	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
118	return static_cast<int>(half_impl::half_to_float(*this));
119	}
120	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
121	return static_cast<unsigned int>(half_impl::half_to_float(*this));
122	}
123	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
124	return static_cast<long>(half_impl::half_to_float(*this));
125	}
126	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
127	return static_cast<unsigned long>(half_impl::half_to_float(*this));
128	}
129	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
130	return static_cast<long long>(half_impl::half_to_float(*this));
131	}
132	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
133	return static_cast<unsigned long long>(half_to_float(*this));
134	}
135	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
136	return half_impl::half_to_float(*this);
137	}
138	EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
139	return static_cast<double>(half_impl::half_to_float(*this));
140	}
141
142	EIGEN_DEVICE_FUNC half& operator=(const half& other) {
143	x = other.x;
144	return *this;
145	}
146	};
147
148	} // end namespace Eigen
149
150	namespace std {
151	template<>
152	struct numeric_limits<Eigen::half> {
153	static const bool is_specialized = true;
154	static const bool is_signed = true;
155	static const bool is_integer = false;
156	static const bool is_exact = false;
157	static const bool has_infinity = true;
158	static const bool has_quiet_NaN = true;
159	static const bool has_signaling_NaN = true;
160	static const float_denorm_style has_denorm = denorm_present;
161	static const bool has_denorm_loss = false;
162	static const std::float_round_style round_style = std::round_to_nearest;
163	static const bool is_iec559 = false;
164	static const bool is_bounded = false;
165	static const bool is_modulo = false;
166	static const int digits = `11`;
167	static const int digits10 = `3`; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
168	static const int max_digits10 = `5`; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
169	static const int radix = `2`;
170	static const int min_exponent = -`13`;
171	static const int min_exponent10 = -`4`;
172	static const int max_exponent = `16`;
173	static const int max_exponent10 = `4`;
174	static const bool traps = true;
175	static const bool tinyness_before = false;
176
177	static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(`0x400`); }
178	static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(`0xfbff`); }
179	static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(`0x7bff`); }
180	static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(`0x0800`); }
181	static Eigen::half round_error() { return Eigen::half (`0.5`); }
182	static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(`0x7c00`); }
183	static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(`0x7e00`); }
184	static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(`0x7e00`); }
185	static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(`0x1`); }
186	};
187
188	// If std::numeric_limits<T> is specialized, should also specialize
189	// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
190	// std::numeric_limits<const volatile T>
191	// https://stackoverflow.com/a/16519653/
192	template<>
193	struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
194	template<>
195	struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
196	template<>
197	struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
198	} // end namespace std
199
200	namespace Eigen {
201
202	namespace half_impl {
203
204	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
205
206	// Intrinsics for native fp16 support. Note that on current hardware,
207	// these are no faster than float32_bits arithmetic (you need to use the half2
208	// versions to get the ALU speed increased), but you do save the
209	// conversion steps back and forth.
210
211	EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
212	return __hadd(a, b);
213	}
214	EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
215	return __hmul(a, b);
216	}
217	EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
218	return __hsub(a, b);
219	}
220	EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
221	float num = __half2float(a);
222	float denom = __half2float(b);
223	return __float2half(num / denom);
224	}
225	EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
226	return __hneg(a);
227	}
228	EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
229	a = a + b;
230	return a;
231	}
232	EIGEN_STRONG_INLINE __device__ half& operator = (half& a, const* half& b) {
233	a = a * b;
234	return a;
235	}
236	EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
237	a = a - b;
238	return a;
239	}
240	EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
241	a = a / b;
242	return a;
243	}
244	EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
245	return __heq(a, b);
246	}
247	EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
248	return __hne(a, b);
249	}
250	EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
251	return __hlt(a, b);
252	}
253	EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
254	return __hle(a, b);
255	}
256	EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
257	return __hgt(a, b);
258	}
259	EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
260	return __hge(a, b);
261	}
262
263	#else // Emulate support for half floats
264
265	// Definitions for CPUs and older CUDA, mostly working through conversion
266	// to/from float32_bits.
267
268	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
269	return half (float(a) + float(b));
270	}
271	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
272	return half (float(a) * float(b));
273	}
274	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
275	return half (float(a) - float(b));
276	}
277	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
278	return half (float(a) / float(b));
279	}
280	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
281	half result;
282	result.x = a.x ^ `0x8000`;
283	return result;
284	}
285	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
286	a = half (float(a) + float(b));
287	return a;
288	}
289	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator = (half& a, const* half& b) {
290	a = half (float(a) * float(b));
291	return a;
292	}
293	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
294	a = half (float(a) - float(b));
295	return a;
296	}
297	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
298	a = half (float(a) / float(b));
299	return a;
300	}
301	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
302	return numext::equal_strict(float(a),float(b));
303	}
304	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
305	return numext::not_equal_strict(float(a), float(b));
306	}
307	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
308	return float(a) < float(b);
309	}
310	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
311	return float(a) <= float(b);
312	}
313	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
314	return float(a) > float(b);
315	}
316	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
317	return float(a) >= float(b);
318	}
319
320	#endif // Emulate support for half floats
321
322	// Division by an index. Do it in full float precision to avoid accuracy
323	// issues in converting the denominator to half.
324	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
325	return half (static_cast<float>(a) / static_cast<float>(b));
326	}
327
328	// Conversion routines, including fallbacks for the host or older CUDA.
329	// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
330	// these in hardware. If we need more performance on older/other CPUs, they are
331	// also possible to vectorize directly.
332
333	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
334	__half_raw h;
335	h.x = x;
336	return h;
337	}
338
339	union float32_bits {
340	unsigned int u;
341	float f;
342	};
343
344	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
345	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
346	__half tmp_ff = __float2half(ff);
347	return (__half_raw)&tmp_ff;
348
349	#elif defined(EIGEN_HAS_FP16_C)
350	__half_raw h;
351	h.x = _cvtss_sh(ff, `0`);
352	return h;
353
354	#else
355	float32_bits f; f.f = ff;
356
357	const float32_bits f32infty = { `255` << `23` };
358	const float32_bits f16max = { (`127` + `16`) << `23` };
359	const float32_bits denorm_magic = { ((`127` - `15`) + (`23` - `10`) + `1`) << `23` };
360	unsigned int sign_mask = `0x80000000u`;
361	__half_raw o;
362	o.x = static_cast<unsigned short>(`0x0u`);
363
364	unsigned int sign = f.u & sign_mask;
365	f.u ^= sign;
366
367	// NOTE all the integer compares in this function can be safely
368	// compiled into signed compares since all operands are below
369	// 0x80000000. Important if you want fast straight SSE2 code
370	// (since there's no unsigned PCMPGTD).
371
372	if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
373	o.x = (f.u > f32infty.u) ? `0x7e00` : `0x7c00`; // NaN->qNaN and Inf->Inf
374	} else { // (De)normalized number or zero
375	if (f.u < (`113` << `23`)) { // resulting FP16 is subnormal or zero
376	// use a magic value to align our 10 mantissa bits at the bottom of
377	// the float. as long as FP addition is round-to-nearest-even this
378	// just works.
379	f.f += denorm_magic.f;
380
381	// and one integer subtract of the bias later, we have our final float!
382	o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
383	} else {
384	unsigned int mant_odd = (f.u >> `13`) & `1`; // resulting mantissa is odd
385
386	// update exponent, rounding bias part 1
387	f.u += ((unsigned int)(`15` - `127`) << `23`) + `0xfff`;
388	// rounding bias part 2
389	f.u += mant_odd;
390	// take the bits!
391	o.x = static_cast<unsigned short>(f.u >> `13`);
392	}
393	}
394
395	o.x \|= static_cast<unsigned short>(sign >> `16`);
396	return o;
397	#endif
398	}
399
400	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
401	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
402	return __half2float(h);
403
404	#elif defined(EIGEN_HAS_FP16_C)
405	return _cvtsh_ss(h.x);
406
407	#else
408	const float32_bits magic = { `113` << `23` };
409	const unsigned int shifted_exp = `0x7c00` << `13`; // exponent mask after shift
410	float32_bits o;
411
412	o.u = (h.x & `0x7fff`) << `13`; // exponent/mantissa bits
413	unsigned int exp = shifted_exp & o.u; // just the exponent
414	o.u += (`127` - `15`) << `23`; // exponent adjust
415
416	// handle exponent special cases
417	if (exp == shifted_exp) { // Inf/NaN?
418	o.u += (`128` - `16`) << `23`; // extra exp adjust
419	} else if (exp == `0`) { // Zero/Denormal?
420	o.u += `1` << `23`; // extra exp adjust
421	o.f -= magic.f; // renormalize
422	}
423
424	o.u \|= (h.x & `0x8000`) << `16`; // sign bit
425	return o.f;
426	#endif
427	}
428
429	// --- standard functions ---
430
431	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
432	return (a.x & `0x7fff`) == `0x7c00`;
433	}
434	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
435	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
436	return __hisnan(a);
437	#else
438	return (a.x & `0x7fff`) > `0x7c00`;
439	#endif
440	}
441	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
442	return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
443	}
444
445	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
446	half result;
447	result.x = a.x & `0x7FFF`;
448	return result;
449	}
450	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
451	#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
452	return half(hexp(a));
453	#else
454	return half (::expf(float(a)));
455	#endif
456	}
457	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
458	#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
459	return half(::hlog(a));
460	#else
461	return half (::logf(float(a)));
462	#endif
463	}
464	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
465	return half (numext::log1p(float(a)));
466	}
467	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
468	return half (::log10f(float(a)));
469	}
470	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
471	#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
472	return half(hsqrt(a));
473	#else
474	return half (::sqrtf(float(a)));
475	#endif
476	}
477	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
478	return half (::powf(float(a), float(b)));
479	}
480	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
481	return half (::sinf(float(a)));
482	}
483	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
484	return half (::cosf(float(a)));
485	}
486	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
487	return half (::tanf(float(a)));
488	}
489	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
490	return half (::tanhf(float(a)));
491	}
492	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
493	#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
494	return half(hfloor(a));
495	#else
496	return half (::floorf(float(a)));
497	#endif
498	}
499	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
500	#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
501	return half(hceil(a));
502	#else
503	return half (::ceilf(float(a)));
504	#endif
505	}
506
507	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
508	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
509	return __hlt(b, a) ? b : a;
510	#else
511	const float f1 = static_cast<float>(a);
512	const float f2 = static_cast<float>(b);
513	return f2 < f1 ? b : a;
514	#endif
515	}
516	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
517	#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
518	return __hlt(a, b) ? b : a;
519	#else
520	const float f1 = static_cast<float>(a);
521	const float f2 = static_cast<float>(b);
522	return f1 < f2 ? b : a;
523	#endif
524	}
525
526	EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
527	os << static_cast<float>(v);
528	return os;
529	}
530
531	} // end namespace half_impl
532
533	// import Eigen::half_impl::half into Eigen namespace
534	// using half_impl::half;
535
536	namespace internal {
537
538	template<>
539	struct random_default_impl<half, false, false>
540	{
541	static inline half run(const half& x, const half& y)
542	{
543	return x + (y -x) * half (float(std::rand()) / float(RAND_MAX));
544	}
545	static inline half run()
546	{
547	return run(half (-`1.f`), half (`1.f`));
548	}
549	};
550
551	template<> struct is_arithmetic<half> { enum { value = true }; };
552
553	} // end namespace internal
554
555	template<> struct NumTraits<Eigen::half>
556	: GenericNumTraits<Eigen::half>
557	{
558	enum {
559	IsSigned = true,
560	IsInteger = false,
561	IsComplex = false,
562	RequireInitialization = false
563	};
564
565	EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
566	return half_impl::raw_uint16_to_half(`0x0800`);
567	}
568	EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half (`1e-2f`); }
569	EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
570	return half_impl::raw_uint16_to_half(`0x7bff`);
571	}
572	EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
573	return half_impl::raw_uint16_to_half(`0xfbff`);
574	}
575	EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
576	return half_impl::raw_uint16_to_half(`0x7c00`);
577	}
578	EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
579	return half_impl::raw_uint16_to_half(`0x7c01`);
580	}
581	};
582
583	} // end namespace Eigen
584
585	// C-like standard mathematical functions and trancendentals.
586	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
587	Eigen::half result;
588	result.x = a.x & `0x7FFF`;
589	return result;
590	}
591	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
592	return Eigen::half (::expf(float(a)));
593	}
594	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
595	#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
596	return Eigen::half(::hlog(a));
597	#else
598	return Eigen::half (::logf(float(a)));
599	#endif
600	}
601	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
602	return Eigen::half (::sqrtf(float(a)));
603	}
604	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
605	return Eigen::half (::powf(float(a), float(b)));
606	}
607	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
608	return Eigen::half (::floorf(float(a)));
609	}
610	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
611	return Eigen::half (::ceilf(float(a)));
612	}
613
614	namespace std {
615
616	#if __cplusplus > 199711L
617	template <>
618	struct hash<Eigen::half> {
619	EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
620	return static_cast<std::size_t>(a.x);
621	}
622	};
623	#endif
624
625	} // end namespace std
626
627
628	// Add the missing shfl_xor intrinsic
629	#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
630	__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
631	#if EIGEN_CUDACC_VER < 90000
632	return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
633	#else
634	return static_cast<Eigen::half>(__shfl_xor_sync(`0xFFFFFFFF`, static_cast<float>(var), laneMask, width));
635	#endif
636	}
637	#endif
638
639	// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
640	#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
641	EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
642	return Eigen::half_impl::raw_uint16_to_half(
643	__ldg(reinterpret_cast<const unsigned short*>(ptr)));
644	}
645	#endif
646
647
648	#if defined(EIGEN_CUDA_ARCH)
649	namespace Eigen {
650	namespace numext {
651
652	template<>
653	EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
654	bool (isnan)(const Eigen::half& h) {
655	return (half_impl::isnan)(h);
656	}
657
658	template<>
659	EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
660	bool (isinf)(const Eigen::half& h) {
661	return (half_impl::isinf)(h);
662	}
663
664	template<>
665	EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
666	bool (isfinite)(const Eigen::half& h) {
667	return (half_impl::isfinite)(h);
668	}
669
670	} // namespace Eigen
671	} // namespace numext
672	#endif
673
674	#endif // EIGEN_HALF_CUDA_H
675

Browse the source code of NanoGUI/ext/eigen/Eigen/src/Core/arch/CUDA/Half.h