math.h source code [Godot/thirdparty/embree/common/math/math.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "../sys/platform.h"
7	#include "../sys/intrinsics.h"
8	#include "constants.h"
9	#include <cmath>
10
11	#if defined(__ARM_NEON)
12	#include "../simd/arm/emulation.h"
13	#else
14	#include <emmintrin.h>
15	#include <xmmintrin.h>
16	#include <immintrin.h>
17	#endif
18
19	#if defined(__WIN32__)
20	#if defined(_MSC_VER) && (_MSC_VER <= 1700)
21	namespace std
22	{
23	__forceinline bool isinf ( const float x ) { return _finite(x) == `0`; }
24	__forceinline bool isnan ( const float x ) { return _isnan(x) != `0`; }
25	__forceinline bool isfinite (const float x) { return _finite(x) != `0`; }
26	}
27	#endif
28	#endif
29
30	namespace embree
31	{
32	__forceinline bool isvalid ( const float& v ) {
33	return (v > -FLT_LARGE) & (v < +FLT_LARGE);
34	}
35
36	__forceinline int cast_f2i(float f) {
37	union { float f; int i; } v; v.f = f; return v.i;
38	}
39
40	__forceinline float cast_i2f(int i) {
41	union { float f; int i; } v; v.i = i; return v.f;
42	}
43
44	__forceinline int toInt (const float& a) { return int(a); }
45	__forceinline float toFloat(const int& a) { return float(a); }
46
47	#if defined(__WIN32__)
48	__forceinline bool finite ( const float x ) { return _finite(x) != `0`; }
49	#endif
50
51	__forceinline float sign ( const float x ) { return x<`0`?-`1.0f`:`1.0f`; }
52	__forceinline float sqr ( const float x ) { return x*x; }
53
54	__forceinline float rcp ( const float x )
55	{
56	#if defined(__aarch64__)
57	// Move scalar to vector register and do rcp.
58	__m128 a;
59	a[`0`] = x;
60	float32x4_t reciprocal = vrecpeq_f32(a);
61	reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
62	reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
63	return reciprocal[`0`];
64	#else
65
66	const __m128 a = _mm_set_ss(x);
67
68	#if defined(__AVX512VL__)
69	const __m128 r = _mm_rcp14_ss(_mm_set_ss(`0.0f`),a);
70	#else
71	const __m128 r = _mm_rcp_ss(a);
72	#endif
73
74	#if defined(__AVX2__)
75	return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(`2.0f`))));
76	#else
77	return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(`2.0f`), _mm_mul_ss(r, a))));
78	#endif
79
80	#endif //defined(__aarch64__)
81	}
82
83	__forceinline float signmsk ( const float x ) {
84	#if defined(__aarch64__)
85	// FP and Neon shares same vector register in arm64
86	__m128 a;
87	__m128i b;
88	a[`0`] = x;
89	b[`0`] = `0x80000000`;
90	a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
91	return a[`0`];
92	#else
93	return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(`0x80000000`))));
94	#endif
95	}
96	__forceinline float xorf( const float x, const float y ) {
97	#if defined(__aarch64__)
98	// FP and Neon shares same vector register in arm64
99	__m128 a;
100	__m128 b;
101	a[`0`] = x;
102	b[`0`] = y;
103	a = _mm_xor_ps(a, b);
104	return a[`0`];
105	#else
106	return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
107	#endif
108	}
109	__forceinline float andf( const float x, const unsigned y ) {
110	#if defined(__aarch64__)
111	// FP and Neon shares same vector register in arm64
112	__m128 a;
113	__m128i b;
114	a[`0`] = x;
115	b[`0`] = y;
116	a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
117	return a[`0`];
118	#else
119	return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
120	#endif
121	}
122	__forceinline float rsqrt( const float x )
123	{
124	#if defined(__aarch64__)
125	// FP and Neon shares same vector register in arm64
126	__m128 a;
127	a[`0`] = x;
128	__m128 value = _mm_rsqrt_ps(a);
129	value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
130	value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
131	return value[`0`];
132	#else
133
134	const __m128 a = _mm_set_ss(x);
135	#if defined(__AVX512VL__)
136	__m128 r = _mm_rsqrt14_ss(_mm_set_ss(`0.0f`),a);
137	#else
138	__m128 r = _mm_rsqrt_ss(a);
139	#endif
140	const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(`1.5f`), r),
141	_mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-`0.5f`)), r), _mm_mul_ss(r, r)));
142	return _mm_cvtss_f32(c);
143	#endif
144	}
145
146	#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
147	__forceinline float nextafter(float x, float y) { if ((x<y) == (x>`0`)) return x(`1.1f`+float(ulp)); else* return x(`0.9f`-float*(ulp)); }
148	__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
149	__forceinline int roundf(float f) { return (int)(f + `0.5f`); }
150	#else
151	__forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
152	__forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
153	#endif
154
155	__forceinline float abs ( const float x ) { return ::fabsf(x); }
156	__forceinline float acos ( const float x ) { return ::acosf (x); }
157	__forceinline float asin ( const float x ) { return ::asinf (x); }
158	__forceinline float atan ( const float x ) { return ::atanf (x); }
159	__forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
160	__forceinline float cos ( const float x ) { return ::cosf (x); }
161	__forceinline float cosh ( const float x ) { return ::coshf (x); }
162	__forceinline float exp ( const float x ) { return ::expf (x); }
163	__forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
164	__forceinline float log ( const float x ) { return ::logf (x); }
165	__forceinline float log10( const float x ) { return ::log10f(x); }
166	__forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
167	__forceinline float sin ( const float x ) { return ::sinf (x); }
168	__forceinline float sinh ( const float x ) { return ::sinhf (x); }
169	__forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
170	__forceinline float tan ( const float x ) { return ::tanf (x); }
171	__forceinline float tanh ( const float x ) { return ::tanhf (x); }
172	__forceinline float floor( const float x ) { return ::floorf (x); }
173	__forceinline float ceil ( const float x ) { return ::ceilf (x); }
174	__forceinline float frac ( const float x ) { return x-floor(x); }
175
176	__forceinline double abs ( const double x ) { return ::fabs(x); }
177	__forceinline double sign ( const double x ) { return x<`0`?-`1.0`:`1.0`; }
178	__forceinline double acos ( const double x ) { return ::acos (x); }
179	__forceinline double asin ( const double x ) { return ::asin (x); }
180	__forceinline double atan ( const double x ) { return ::atan (x); }
181	__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
182	__forceinline double cos ( const double x ) { return ::cos (x); }
183	__forceinline double cosh ( const double x ) { return ::cosh (x); }
184	__forceinline double exp ( const double x ) { return ::exp (x); }
185	__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
186	__forceinline double log ( const double x ) { return ::log (x); }
187	__forceinline double log10( const double x ) { return ::log10(x); }
188	__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
189	__forceinline double rcp ( const double x ) { return `1.0`/x; }
190	__forceinline double rsqrt( const double x ) { return `1.0`/::sqrt(x); }
191	__forceinline double sin ( const double x ) { return ::sin (x); }
192	__forceinline double sinh ( const double x ) { return ::sinh (x); }
193	__forceinline double sqr ( const double x ) { return x*x; }
194	__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
195	__forceinline double tan ( const double x ) { return ::tan (x); }
196	__forceinline double tanh ( const double x ) { return ::tanh (x); }
197	__forceinline double floor( const double x ) { return ::floor (x); }
198	__forceinline double ceil ( const double x ) { return ::ceil (x); }
199
200	#if defined(__aarch64__)
201	__forceinline float mini(float a, float b) {
202	// FP and Neon shares same vector register in arm64
203	__m128 x;
204	__m128 y;
205	x[`0`] = a;
206	y[`0`] = b;
207	x = _mm_min_ps(x, y);
208	return x[`0`];
209	}
210	#elif defined(__SSE4_1__)
211	__forceinline float mini(float a, float b) {
212	const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
213	const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
214	const __m128i ci = _mm_min_epi32(ai,bi);
215	return _mm_cvtss_f32(_mm_castsi128_ps(ci));
216	}
217	#endif
218
219	#if defined(__aarch64__)
220	__forceinline float maxi(float a, float b) {
221	// FP and Neon shares same vector register in arm64
222	__m128 x;
223	__m128 y;
224	x[`0`] = a;
225	y[`0`] = b;
226	x = _mm_max_ps(x, y);
227	return x[`0`];
228	}
229	#elif defined(__SSE4_1__)
230	__forceinline float maxi(float a, float b) {
231	const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
232	const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
233	const __m128i ci = _mm_max_epi32(ai,bi);
234	return _mm_cvtss_f32(_mm_castsi128_ps(ci));
235	}
236	#endif
237
238	template<typename T>
239	__forceinline T twice(const T& a) { return a+a; }
240
241	__forceinline int min(int a, int b) { return a<b ? a:b; }
242	__forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
243	__forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
244	__forceinline float min(float a, float b) { return a<b ? a:b; }
245	__forceinline double min(double a, double b) { return a<b ? a:b; }
246	#if defined(__64BIT__) \|\| defined(__EMSCRIPTEN__)
247	__forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
248	#endif
249	#if defined(__EMSCRIPTEN__)
250	__forceinline long min(long a, long b) { return a<b ? a:b; }
251	#endif
252
253	template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
254	template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
255	template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
256
257	template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
258	template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
259	template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
260
261	__forceinline int max(int a, int b) { return a<b ? b:a; }
262	__forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
263	__forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
264	__forceinline float max(float a, float b) { return a<b ? b:a; }
265	__forceinline double max(double a, double b) { return a<b ? b:a; }
266	#if defined(__64BIT__) \|\| defined(__EMSCRIPTEN__)
267	__forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
268	#endif
269	#if defined(__EMSCRIPTEN__)
270	__forceinline long max(long a, long b) { return a<b ? b:a; }
271	#endif
272
273	template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
274	template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
275	template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
276
277	template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
278	template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
279	template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
280
281	#if defined(__MACOSX__)
282	__forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
283	__forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
284	#endif
285
286	#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
287	__forceinline void sincosf(float x, float sin, float* *cos) {
288	__sincosf(x,sin,cos);
289	}
290	#endif
291
292	#if defined(__WIN32__) \|\| defined(__FreeBSD__)
293	__forceinline void sincosf(float x, float s, float* *c) {
294	s = sinf(x); c = cosf(x);
295	}
296	#endif
297
298	template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
299	template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
300
301	template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(`1.74532925199432957692e-2f`); }
302	template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(`5.72957795130823208768e1f`); }
303	template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
304	template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
305
306	#if defined(__AVX2__)
307	__forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
308	__forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
309	__forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
310	__forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
311
312	#elif defined (__aarch64__) && defined(__clang__)
313	#pragma clang fp contract(fast)
314	__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
315	__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
316	__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
317	__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
318	#pragma clang fp contract(on)
319
320	#else
321	__forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
322	__forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
323	__forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
324	__forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
325	#endif
326
327	/! random functions /
328	template<typename T> T random() { return T(`0`); }
329	#if defined(_WIN32)
330	template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << `8`) ^ (int(rand()) << `16`); }
331	template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << `8`) ^ (uint32_t(rand()) << `16`); }
332	#else
333	template<> __forceinline int random() { return int(rand()); }
334	template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << `16`); }
335	#endif
336	template<> __forceinline float random() { return rand()/float(RAND_MAX); }
337	template<> __forceinline double random() { return rand()/double(RAND_MAX); }
338
339	#if _WIN32
340	__forceinline double drand48() {
341	return double(rand())/double(RAND_MAX);
342	}
343
344	__forceinline void srand48(long seed) {
345	return srand(seed);
346	}
347	#endif
348
349	/! selects /
350	__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
351	__forceinline int select(bool s, int t, int f) { return s ? t : f; }
352	__forceinline float select(bool s, float t, float f) { return s ? t : f; }
353
354	__forceinline bool all(bool s) { return s; }
355
356	__forceinline float lerp(const float v0, const float v1, const float t) {
357	return madd(`1.0f`-t,v0,t*v1);
358	}
359
360	template<typename T>
361	__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
362	return madd((`1.0f`-u),madd((`1.0f`-v),T(x0),vT(x2)),umadd((`1.0f`-v),T(x1),v*T(x3)));
363	}
364
365	/! exchange /
366	template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
367
368	/ load/store /
369	template<typename Ty> struct mem;
370
371	template<> struct mem<float> {
372	static __forceinline float load (bool mask, const void* ptr) { return mask ? (float**)ptr : `0.0f`; }
373	static __forceinline float loadu(bool mask, const void* ptr) { return mask ? (float**)ptr : `0.0f`; }
374
375	static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) (float**)ptr = v; }
376	static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) (float**)ptr = v; }
377	};
378
379	/! bit reverse operation /
380	template<class T>
381	__forceinline T bitReverse(const T& vin)
382	{
383	T v = vin;
384	v = ((v >> `1`) & `0x55555555`) \| ((v & `0x55555555`) << `1`);
385	v = ((v >> `2`) & `0x33333333`) \| ((v & `0x33333333`) << `2`);
386	v = ((v >> `4`) & `0x0F0F0F0F`) \| ((v & `0x0F0F0F0F`) << `4`);
387	v = ((v >> `8`) & `0x00FF00FF`) \| ((v & `0x00FF00FF`) << `8`);
388	v = ( v >> `16` ) \| ( v << `16`);
389	return v;
390	}
391
392	/! bit interleave operation /
393	template<class T>
394	__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
395	{
396	T x = xin, y = yin, z = zin;
397	x = (x \| (x << `16`)) & `0x030000FF`;
398	x = (x \| (x << `8`)) & `0x0300F00F`;
399	x = (x \| (x << `4`)) & `0x030C30C3`;
400	x = (x \| (x << `2`)) & `0x09249249`;
401
402	y = (y \| (y << `16`)) & `0x030000FF`;
403	y = (y \| (y << `8`)) & `0x0300F00F`;
404	y = (y \| (y << `4`)) & `0x030C30C3`;
405	y = (y \| (y << `2`)) & `0x09249249`;
406
407	z = (z \| (z << `16`)) & `0x030000FF`;
408	z = (z \| (z << `8`)) & `0x0300F00F`;
409	z = (z \| (z << `4`)) & `0x030C30C3`;
410	z = (z \| (z << `2`)) & `0x09249249`;
411
412	return x \| (y << `1`) \| (z << `2`);
413	}
414
415	#if defined(__AVX2__) && !defined(__aarch64__)
416
417	template<>
418	__forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
419	{
420	const unsigned int xx = pdep(xi,`0x49249249` / 0b01001001001001001001001001001001 / );
421	const unsigned int yy = pdep(yi,`0x92492492` / 0b10010010010010010010010010010010 /);
422	const unsigned int zz = pdep(zi,`0x24924924` / 0b00100100100100100100100100100100 /);
423	return xx \| yy \| zz;
424	}
425
426	#endif
427
428	/! bit interleave operation for 64bit data types/
429	template<class T>
430	__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
431	T x = xin & `0x1fffff`;
432	T y = yin & `0x1fffff`;
433	T z = zin & `0x1fffff`;
434
435	x = (x \| x << `32`) & `0x1f00000000ffff`;
436	x = (x \| x << `16`) & `0x1f0000ff0000ff`;
437	x = (x \| x << `8`) & `0x100f00f00f00f00f`;
438	x = (x \| x << `4`) & `0x10c30c30c30c30c3`;
439	x = (x \| x << `2`) & `0x1249249249249249`;
440
441	y = (y \| y << `32`) & `0x1f00000000ffff`;
442	y = (y \| y << `16`) & `0x1f0000ff0000ff`;
443	y = (y \| y << `8`) & `0x100f00f00f00f00f`;
444	y = (y \| y << `4`) & `0x10c30c30c30c30c3`;
445	y = (y \| y << `2`) & `0x1249249249249249`;
446
447	z = (z \| z << `32`) & `0x1f00000000ffff`;
448	z = (z \| z << `16`) & `0x1f0000ff0000ff`;
449	z = (z \| z << `8`) & `0x100f00f00f00f00f`;
450	z = (z \| z << `4`) & `0x10c30c30c30c30c3`;
451	z = (z \| z << `2`) & `0x1249249249249249`;
452
453	return x \| (y << `1`) \| (z << `2`);
454	}
455	}
456

Browse the source code of Godot/thirdparty/embree/common/math/math.h