1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "default.h"
7
8namespace embree
9{
10 /*! An item on the stack holds the node ID and distance of that node. */
11 template<typename T>
12 struct __aligned(16) StackItemT
13 {
14 /*! assert that the xchg function works */
15 static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
16
17 __forceinline StackItemT() {}
18
19 __forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
20
21 /*! use SSE instructions to swap stack items */
22 __forceinline static void xchg(StackItemT& a, StackItemT& b)
23 {
24 const vfloat4 sse_a = vfloat4::load((float*)&a);
25 const vfloat4 sse_b = vfloat4::load((float*)&b);
26 vfloat4::store(&a,sse_b);
27 vfloat4::store(&b,sse_a);
28 }
29
30 /*! Sort 2 stack items. */
31 __forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
32 if (s2.dist < s1.dist) xchg(s2,s1);
33 }
34
35 /*! Sort 3 stack items. */
36 __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
37 {
38 if (s2.dist < s1.dist) xchg(s2,s1);
39 if (s3.dist < s2.dist) xchg(s3,s2);
40 if (s2.dist < s1.dist) xchg(s2,s1);
41 }
42
43 /*! Sort 4 stack items. */
44 __forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
45 {
46 if (s2.dist < s1.dist) xchg(s2,s1);
47 if (s4.dist < s3.dist) xchg(s4,s3);
48 if (s3.dist < s1.dist) xchg(s3,s1);
49 if (s4.dist < s2.dist) xchg(s4,s2);
50 if (s3.dist < s2.dist) xchg(s3,s2);
51 }
52
53 /*! use SSE instructions to swap stack items */
54 __forceinline static void cmp_xchg(vint4& a, vint4& b)
55 {
56#if defined(__AVX512VL__)
57 const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
58#else
59 const vboolf4 mask0(b < a);
60 const vboolf4 mask(shuffle<2,2,2,2>(mask0));
61#endif
62 const vint4 c = select(mask,b,a);
63 const vint4 d = select(mask,a,b);
64 a = c;
65 b = d;
66 }
67
68 /*! Sort 3 stack items. */
69 __forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
70 {
71 cmp_xchg(s2,s1);
72 cmp_xchg(s3,s2);
73 cmp_xchg(s2,s1);
74 }
75
76 /*! Sort 4 stack items. */
77 __forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
78 {
79 cmp_xchg(s2,s1);
80 cmp_xchg(s4,s3);
81 cmp_xchg(s3,s1);
82 cmp_xchg(s4,s2);
83 cmp_xchg(s3,s2);
84 }
85
86
87 /*! Sort N stack items. */
88 __forceinline friend void sort(StackItemT* begin, StackItemT* end)
89 {
90 for (StackItemT* i = begin+1; i != end; ++i)
91 {
92 const vfloat4 item = vfloat4::load((float*)i);
93 const unsigned dist = i->dist;
94 StackItemT* j = i;
95
96 while ((j != begin) && ((j-1)->dist < dist))
97 {
98 vfloat4::store(j, vfloat4::load((float*)(j-1)));
99 --j;
100 }
101
102 vfloat4::store(j, item);
103 }
104 }
105
106 public:
107 T ptr;
108 unsigned dist;
109 };
110
111 /*! An item on the stack holds the node ID and active ray mask. */
112 template<typename T>
113 struct __aligned(8) StackItemMaskT
114 {
115 T ptr;
116 size_t mask;
117 };
118
119 struct __aligned(8) StackItemMaskCoherent
120 {
121 size_t mask;
122 size_t parent;
123 size_t child;
124 };
125}
126