1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "default.h" |
7 | |
8 | namespace embree |
9 | { |
10 | /*! An item on the stack holds the node ID and distance of that node. */ |
11 | template<typename T> |
12 | struct __aligned(16) StackItemT |
13 | { |
14 | /*! assert that the xchg function works */ |
15 | static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed" ); |
16 | |
17 | __forceinline StackItemT() {} |
18 | |
19 | __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {} |
20 | |
21 | /*! use SSE instructions to swap stack items */ |
22 | __forceinline static void xchg(StackItemT& a, StackItemT& b) |
23 | { |
24 | const vfloat4 sse_a = vfloat4::load((float*)&a); |
25 | const vfloat4 sse_b = vfloat4::load((float*)&b); |
26 | vfloat4::store(&a,sse_b); |
27 | vfloat4::store(&b,sse_a); |
28 | } |
29 | |
30 | /*! Sort 2 stack items. */ |
31 | __forceinline friend void sort(StackItemT& s1, StackItemT& s2) { |
32 | if (s2.dist < s1.dist) xchg(s2,s1); |
33 | } |
34 | |
35 | /*! Sort 3 stack items. */ |
36 | __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3) |
37 | { |
38 | if (s2.dist < s1.dist) xchg(s2,s1); |
39 | if (s3.dist < s2.dist) xchg(s3,s2); |
40 | if (s2.dist < s1.dist) xchg(s2,s1); |
41 | } |
42 | |
43 | /*! Sort 4 stack items. */ |
44 | __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4) |
45 | { |
46 | if (s2.dist < s1.dist) xchg(s2,s1); |
47 | if (s4.dist < s3.dist) xchg(s4,s3); |
48 | if (s3.dist < s1.dist) xchg(s3,s1); |
49 | if (s4.dist < s2.dist) xchg(s4,s2); |
50 | if (s3.dist < s2.dist) xchg(s3,s2); |
51 | } |
52 | |
53 | /*! use SSE instructions to swap stack items */ |
54 | __forceinline static void cmp_xchg(vint4& a, vint4& b) |
55 | { |
56 | #if defined(__AVX512VL__) |
57 | const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a)); |
58 | #else |
59 | const vboolf4 mask0(b < a); |
60 | const vboolf4 mask(shuffle<2,2,2,2>(mask0)); |
61 | #endif |
62 | const vint4 c = select(mask,b,a); |
63 | const vint4 d = select(mask,a,b); |
64 | a = c; |
65 | b = d; |
66 | } |
67 | |
68 | /*! Sort 3 stack items. */ |
69 | __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3) |
70 | { |
71 | cmp_xchg(s2,s1); |
72 | cmp_xchg(s3,s2); |
73 | cmp_xchg(s2,s1); |
74 | } |
75 | |
76 | /*! Sort 4 stack items. */ |
77 | __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4) |
78 | { |
79 | cmp_xchg(s2,s1); |
80 | cmp_xchg(s4,s3); |
81 | cmp_xchg(s3,s1); |
82 | cmp_xchg(s4,s2); |
83 | cmp_xchg(s3,s2); |
84 | } |
85 | |
86 | |
87 | /*! Sort N stack items. */ |
88 | __forceinline friend void sort(StackItemT* begin, StackItemT* end) |
89 | { |
90 | for (StackItemT* i = begin+1; i != end; ++i) |
91 | { |
92 | const vfloat4 item = vfloat4::load((float*)i); |
93 | const unsigned dist = i->dist; |
94 | StackItemT* j = i; |
95 | |
96 | while ((j != begin) && ((j-1)->dist < dist)) |
97 | { |
98 | vfloat4::store(j, vfloat4::load((float*)(j-1))); |
99 | --j; |
100 | } |
101 | |
102 | vfloat4::store(j, item); |
103 | } |
104 | } |
105 | |
106 | public: |
107 | T ptr; |
108 | unsigned dist; |
109 | }; |
110 | |
111 | /*! An item on the stack holds the node ID and active ray mask. */ |
112 | template<typename T> |
113 | struct __aligned(8) StackItemMaskT |
114 | { |
115 | T ptr; |
116 | size_t mask; |
117 | }; |
118 | |
119 | struct __aligned(8) StackItemMaskCoherent |
120 | { |
121 | size_t mask; |
122 | size_t parent; |
123 | size_t child; |
124 | }; |
125 | } |
126 | |