1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../sys/platform.h" |
7 | #include "../sys/intrinsics.h" |
8 | #include "constants.h" |
9 | #include <cmath> |
10 | |
11 | #if defined(__ARM_NEON) |
12 | #include "../simd/arm/emulation.h" |
13 | #else |
14 | #include <emmintrin.h> |
15 | #include <xmmintrin.h> |
16 | #include <immintrin.h> |
17 | #endif |
18 | |
19 | #if defined(__WIN32__) |
20 | #if defined(_MSC_VER) && (_MSC_VER <= 1700) |
21 | namespace std |
22 | { |
23 | __forceinline bool isinf ( const float x ) { return _finite(x) == 0; } |
24 | __forceinline bool isnan ( const float x ) { return _isnan(x) != 0; } |
25 | __forceinline bool isfinite (const float x) { return _finite(x) != 0; } |
26 | } |
27 | #endif |
28 | #endif |
29 | |
30 | namespace embree |
31 | { |
32 | __forceinline bool isvalid ( const float& v ) { |
33 | return (v > -FLT_LARGE) & (v < +FLT_LARGE); |
34 | } |
35 | |
36 | __forceinline int cast_f2i(float f) { |
37 | union { float f; int i; } v; v.f = f; return v.i; |
38 | } |
39 | |
40 | __forceinline float cast_i2f(int i) { |
41 | union { float f; int i; } v; v.i = i; return v.f; |
42 | } |
43 | |
44 | __forceinline int toInt (const float& a) { return int(a); } |
45 | __forceinline float toFloat(const int& a) { return float(a); } |
46 | |
47 | #if defined(__WIN32__) |
48 | __forceinline bool finite ( const float x ) { return _finite(x) != 0; } |
49 | #endif |
50 | |
51 | __forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; } |
52 | __forceinline float sqr ( const float x ) { return x*x; } |
53 | |
54 | __forceinline float rcp ( const float x ) |
55 | { |
56 | #if defined(__aarch64__) |
57 | // Move scalar to vector register and do rcp. |
58 | __m128 a; |
59 | a[0] = x; |
60 | float32x4_t reciprocal = vrecpeq_f32(a); |
61 | reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); |
62 | reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); |
63 | return reciprocal[0]; |
64 | #else |
65 | |
66 | const __m128 a = _mm_set_ss(x); |
67 | |
68 | #if defined(__AVX512VL__) |
69 | const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a); |
70 | #else |
71 | const __m128 r = _mm_rcp_ss(a); |
72 | #endif |
73 | |
74 | #if defined(__AVX2__) |
75 | return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f)))); |
76 | #else |
77 | return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a)))); |
78 | #endif |
79 | |
80 | #endif //defined(__aarch64__) |
81 | } |
82 | |
83 | __forceinline float signmsk ( const float x ) { |
84 | #if defined(__aarch64__) |
85 | // FP and Neon shares same vector register in arm64 |
86 | __m128 a; |
87 | __m128i b; |
88 | a[0] = x; |
89 | b[0] = 0x80000000; |
90 | a = _mm_and_ps(a, vreinterpretq_f32_s32(b)); |
91 | return a[0]; |
92 | #else |
93 | return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); |
94 | #endif |
95 | } |
96 | __forceinline float xorf( const float x, const float y ) { |
97 | #if defined(__aarch64__) |
98 | // FP and Neon shares same vector register in arm64 |
99 | __m128 a; |
100 | __m128 b; |
101 | a[0] = x; |
102 | b[0] = y; |
103 | a = _mm_xor_ps(a, b); |
104 | return a[0]; |
105 | #else |
106 | return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y))); |
107 | #endif |
108 | } |
109 | __forceinline float andf( const float x, const unsigned y ) { |
110 | #if defined(__aarch64__) |
111 | // FP and Neon shares same vector register in arm64 |
112 | __m128 a; |
113 | __m128i b; |
114 | a[0] = x; |
115 | b[0] = y; |
116 | a = _mm_and_ps(a, vreinterpretq_f32_s32(b)); |
117 | return a[0]; |
118 | #else |
119 | return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y)))); |
120 | #endif |
121 | } |
122 | __forceinline float rsqrt( const float x ) |
123 | { |
124 | #if defined(__aarch64__) |
125 | // FP and Neon shares same vector register in arm64 |
126 | __m128 a; |
127 | a[0] = x; |
128 | __m128 value = _mm_rsqrt_ps(a); |
129 | value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value)); |
130 | value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value)); |
131 | return value[0]; |
132 | #else |
133 | |
134 | const __m128 a = _mm_set_ss(x); |
135 | #if defined(__AVX512VL__) |
136 | __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a); |
137 | #else |
138 | __m128 r = _mm_rsqrt_ss(a); |
139 | #endif |
140 | const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), |
141 | _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r))); |
142 | return _mm_cvtss_f32(c); |
143 | #endif |
144 | } |
145 | |
146 | #if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700) |
147 | __forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); } |
148 | __forceinline double nextafter(double x, double y) { return _nextafter(x, y); } |
149 | __forceinline int roundf(float f) { return (int)(f + 0.5f); } |
150 | #else |
151 | __forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); } |
152 | __forceinline double nextafter(double x, double y) { return ::nextafter(x, y); } |
153 | #endif |
154 | |
155 | __forceinline float abs ( const float x ) { return ::fabsf(x); } |
156 | __forceinline float acos ( const float x ) { return ::acosf (x); } |
157 | __forceinline float asin ( const float x ) { return ::asinf (x); } |
158 | __forceinline float atan ( const float x ) { return ::atanf (x); } |
159 | __forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); } |
160 | __forceinline float cos ( const float x ) { return ::cosf (x); } |
161 | __forceinline float cosh ( const float x ) { return ::coshf (x); } |
162 | __forceinline float exp ( const float x ) { return ::expf (x); } |
163 | __forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); } |
164 | __forceinline float log ( const float x ) { return ::logf (x); } |
165 | __forceinline float log10( const float x ) { return ::log10f(x); } |
166 | __forceinline float pow ( const float x, const float y ) { return ::powf (x, y); } |
167 | __forceinline float sin ( const float x ) { return ::sinf (x); } |
168 | __forceinline float sinh ( const float x ) { return ::sinhf (x); } |
169 | __forceinline float sqrt ( const float x ) { return ::sqrtf (x); } |
170 | __forceinline float tan ( const float x ) { return ::tanf (x); } |
171 | __forceinline float tanh ( const float x ) { return ::tanhf (x); } |
172 | __forceinline float floor( const float x ) { return ::floorf (x); } |
173 | __forceinline float ceil ( const float x ) { return ::ceilf (x); } |
174 | __forceinline float frac ( const float x ) { return x-floor(x); } |
175 | |
176 | __forceinline double abs ( const double x ) { return ::fabs(x); } |
177 | __forceinline double sign ( const double x ) { return x<0?-1.0:1.0; } |
178 | __forceinline double acos ( const double x ) { return ::acos (x); } |
179 | __forceinline double asin ( const double x ) { return ::asin (x); } |
180 | __forceinline double atan ( const double x ) { return ::atan (x); } |
181 | __forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); } |
182 | __forceinline double cos ( const double x ) { return ::cos (x); } |
183 | __forceinline double cosh ( const double x ) { return ::cosh (x); } |
184 | __forceinline double exp ( const double x ) { return ::exp (x); } |
185 | __forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); } |
186 | __forceinline double log ( const double x ) { return ::log (x); } |
187 | __forceinline double log10( const double x ) { return ::log10(x); } |
188 | __forceinline double pow ( const double x, const double y ) { return ::pow (x, y); } |
189 | __forceinline double rcp ( const double x ) { return 1.0/x; } |
190 | __forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); } |
191 | __forceinline double sin ( const double x ) { return ::sin (x); } |
192 | __forceinline double sinh ( const double x ) { return ::sinh (x); } |
193 | __forceinline double sqr ( const double x ) { return x*x; } |
194 | __forceinline double sqrt ( const double x ) { return ::sqrt (x); } |
195 | __forceinline double tan ( const double x ) { return ::tan (x); } |
196 | __forceinline double tanh ( const double x ) { return ::tanh (x); } |
197 | __forceinline double floor( const double x ) { return ::floor (x); } |
198 | __forceinline double ceil ( const double x ) { return ::ceil (x); } |
199 | |
200 | #if defined(__aarch64__) |
201 | __forceinline float mini(float a, float b) { |
202 | // FP and Neon shares same vector register in arm64 |
203 | __m128 x; |
204 | __m128 y; |
205 | x[0] = a; |
206 | y[0] = b; |
207 | x = _mm_min_ps(x, y); |
208 | return x[0]; |
209 | } |
210 | #elif defined(__SSE4_1__) |
211 | __forceinline float mini(float a, float b) { |
212 | const __m128i ai = _mm_castps_si128(_mm_set_ss(a)); |
213 | const __m128i bi = _mm_castps_si128(_mm_set_ss(b)); |
214 | const __m128i ci = _mm_min_epi32(ai,bi); |
215 | return _mm_cvtss_f32(_mm_castsi128_ps(ci)); |
216 | } |
217 | #endif |
218 | |
219 | #if defined(__aarch64__) |
220 | __forceinline float maxi(float a, float b) { |
221 | // FP and Neon shares same vector register in arm64 |
222 | __m128 x; |
223 | __m128 y; |
224 | x[0] = a; |
225 | y[0] = b; |
226 | x = _mm_max_ps(x, y); |
227 | return x[0]; |
228 | } |
229 | #elif defined(__SSE4_1__) |
230 | __forceinline float maxi(float a, float b) { |
231 | const __m128i ai = _mm_castps_si128(_mm_set_ss(a)); |
232 | const __m128i bi = _mm_castps_si128(_mm_set_ss(b)); |
233 | const __m128i ci = _mm_max_epi32(ai,bi); |
234 | return _mm_cvtss_f32(_mm_castsi128_ps(ci)); |
235 | } |
236 | #endif |
237 | |
238 | template<typename T> |
239 | __forceinline T twice(const T& a) { return a+a; } |
240 | |
241 | __forceinline int min(int a, int b) { return a<b ? a:b; } |
242 | __forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; } |
243 | __forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; } |
244 | __forceinline float min(float a, float b) { return a<b ? a:b; } |
245 | __forceinline double min(double a, double b) { return a<b ? a:b; } |
246 | #if defined(__64BIT__) || defined(__EMSCRIPTEN__) |
247 | __forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; } |
248 | #endif |
249 | #if defined(__EMSCRIPTEN__) |
250 | __forceinline long min(long a, long b) { return a<b ? a:b; } |
251 | #endif |
252 | |
253 | template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); } |
254 | template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); } |
255 | template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); } |
256 | |
257 | template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); } |
258 | template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); } |
259 | template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); } |
260 | |
261 | __forceinline int max(int a, int b) { return a<b ? b:a; } |
262 | __forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; } |
263 | __forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; } |
264 | __forceinline float max(float a, float b) { return a<b ? b:a; } |
265 | __forceinline double max(double a, double b) { return a<b ? b:a; } |
266 | #if defined(__64BIT__) || defined(__EMSCRIPTEN__) |
267 | __forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; } |
268 | #endif |
269 | #if defined(__EMSCRIPTEN__) |
270 | __forceinline long max(long a, long b) { return a<b ? b:a; } |
271 | #endif |
272 | |
273 | template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); } |
274 | template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); } |
275 | template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); } |
276 | |
277 | template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); } |
278 | template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); } |
279 | template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); } |
280 | |
281 | #if defined(__MACOSX__) |
282 | __forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; } |
283 | __forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; } |
284 | #endif |
285 | |
286 | #if defined(__MACOSX__) && !defined(__INTEL_COMPILER) |
287 | __forceinline void sincosf(float x, float *sin, float *cos) { |
288 | __sincosf(x,sin,cos); |
289 | } |
290 | #endif |
291 | |
292 | #if defined(__WIN32__) || defined(__FreeBSD__) |
293 | __forceinline void sincosf(float x, float *s, float *c) { |
294 | *s = sinf(x); *c = cosf(x); |
295 | } |
296 | #endif |
297 | |
298 | template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); } |
299 | template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); } |
300 | |
301 | template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); } |
302 | template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); } |
303 | template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); } |
304 | template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); } |
305 | |
306 | #if defined(__AVX2__) |
307 | __forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } |
308 | __forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } |
309 | __forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } |
310 | __forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } |
311 | |
312 | #elif defined (__aarch64__) && defined(__clang__) |
313 | #pragma clang fp contract(fast) |
314 | __forceinline float madd ( const float a, const float b, const float c) { return a*b + c; } |
315 | __forceinline float msub ( const float a, const float b, const float c) { return a*b - c; } |
316 | __forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; } |
317 | __forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); } |
318 | #pragma clang fp contract(on) |
319 | |
320 | #else |
321 | __forceinline float madd ( const float a, const float b, const float c) { return a*b+c; } |
322 | __forceinline float msub ( const float a, const float b, const float c) { return a*b-c; } |
323 | __forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;} |
324 | __forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; } |
325 | #endif |
326 | |
327 | /*! random functions */ |
328 | template<typename T> T random() { return T(0); } |
329 | #if defined(_WIN32) |
330 | template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); } |
331 | template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); } |
332 | #else |
333 | template<> __forceinline int random() { return int(rand()); } |
334 | template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); } |
335 | #endif |
336 | template<> __forceinline float random() { return rand()/float(RAND_MAX); } |
337 | template<> __forceinline double random() { return rand()/double(RAND_MAX); } |
338 | |
339 | #if _WIN32 |
340 | __forceinline double drand48() { |
341 | return double(rand())/double(RAND_MAX); |
342 | } |
343 | |
344 | __forceinline void srand48(long seed) { |
345 | return srand(seed); |
346 | } |
347 | #endif |
348 | |
349 | /*! selects */ |
350 | __forceinline bool select(bool s, bool t , bool f) { return s ? t : f; } |
351 | __forceinline int select(bool s, int t, int f) { return s ? t : f; } |
352 | __forceinline float select(bool s, float t, float f) { return s ? t : f; } |
353 | |
354 | __forceinline bool all(bool s) { return s; } |
355 | |
356 | __forceinline float lerp(const float v0, const float v1, const float t) { |
357 | return madd(1.0f-t,v0,t*v1); |
358 | } |
359 | |
360 | template<typename T> |
361 | __forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) { |
362 | return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3))); |
363 | } |
364 | |
365 | /*! exchange */ |
366 | template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; } |
367 | |
368 | /* load/store */ |
369 | template<typename Ty> struct mem; |
370 | |
371 | template<> struct mem<float> { |
372 | static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } |
373 | static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } |
374 | |
375 | static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } |
376 | static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } |
377 | }; |
378 | |
379 | /*! bit reverse operation */ |
380 | template<class T> |
381 | __forceinline T bitReverse(const T& vin) |
382 | { |
383 | T v = vin; |
384 | v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); |
385 | v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); |
386 | v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); |
387 | v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); |
388 | v = ( v >> 16 ) | ( v << 16); |
389 | return v; |
390 | } |
391 | |
392 | /*! bit interleave operation */ |
393 | template<class T> |
394 | __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin) |
395 | { |
396 | T x = xin, y = yin, z = zin; |
397 | x = (x | (x << 16)) & 0x030000FF; |
398 | x = (x | (x << 8)) & 0x0300F00F; |
399 | x = (x | (x << 4)) & 0x030C30C3; |
400 | x = (x | (x << 2)) & 0x09249249; |
401 | |
402 | y = (y | (y << 16)) & 0x030000FF; |
403 | y = (y | (y << 8)) & 0x0300F00F; |
404 | y = (y | (y << 4)) & 0x030C30C3; |
405 | y = (y | (y << 2)) & 0x09249249; |
406 | |
407 | z = (z | (z << 16)) & 0x030000FF; |
408 | z = (z | (z << 8)) & 0x0300F00F; |
409 | z = (z | (z << 4)) & 0x030C30C3; |
410 | z = (z | (z << 2)) & 0x09249249; |
411 | |
412 | return x | (y << 1) | (z << 2); |
413 | } |
414 | |
415 | #if defined(__AVX2__) && !defined(__aarch64__) |
416 | |
417 | template<> |
418 | __forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi) |
419 | { |
420 | const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ ); |
421 | const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */); |
422 | const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */); |
423 | return xx | yy | zz; |
424 | } |
425 | |
426 | #endif |
427 | |
428 | /*! bit interleave operation for 64bit data types*/ |
429 | template<class T> |
430 | __forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){ |
431 | T x = xin & 0x1fffff; |
432 | T y = yin & 0x1fffff; |
433 | T z = zin & 0x1fffff; |
434 | |
435 | x = (x | x << 32) & 0x1f00000000ffff; |
436 | x = (x | x << 16) & 0x1f0000ff0000ff; |
437 | x = (x | x << 8) & 0x100f00f00f00f00f; |
438 | x = (x | x << 4) & 0x10c30c30c30c30c3; |
439 | x = (x | x << 2) & 0x1249249249249249; |
440 | |
441 | y = (y | y << 32) & 0x1f00000000ffff; |
442 | y = (y | y << 16) & 0x1f0000ff0000ff; |
443 | y = (y | y << 8) & 0x100f00f00f00f00f; |
444 | y = (y | y << 4) & 0x10c30c30c30c30c3; |
445 | y = (y | y << 2) & 0x1249249249249249; |
446 | |
447 | z = (z | z << 32) & 0x1f00000000ffff; |
448 | z = (z | z << 16) & 0x1f0000ff0000ff; |
449 | z = (z | z << 8) & 0x100f00f00f00f00f; |
450 | z = (z | z << 4) & 0x10c30c30c30c30c3; |
451 | z = (z | z << 2) & 0x1249249249249249; |
452 | |
453 | return x | (y << 1) | (z << 2); |
454 | } |
455 | } |
456 | |