1 | /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_DETAIL_NULL_SHUFFLE_H |
9 | #define LIBSIMDPP_DETAIL_NULL_SHUFFLE_H |
10 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC |
11 | |
12 | #ifndef LIBSIMDPP_SIMD_H |
13 | #error "This file must be included through simd.h" |
14 | #endif |
15 | |
16 | #include <simdpp/types.h> |
17 | #include <simdpp/core/bit_and.h> |
18 | #include <simdpp/core/bit_andnot.h> |
19 | #include <simdpp/core/bit_or.h> |
20 | |
21 | namespace simdpp { |
22 | namespace SIMDPP_ARCH_NAMESPACE { |
23 | namespace detail { |
24 | namespace null { |
25 | |
26 | template<unsigned count, class V> SIMDPP_INL |
27 | V move_n_l(const V& a) |
28 | { |
29 | V r; |
30 | for (unsigned i = 0; i < V::length - count; i++) { |
31 | r.el(i) = a.el(i+count); |
32 | } |
33 | for (unsigned i = V::length - count; i < V::length; i++) { |
34 | r.el(i) = 0; |
35 | } |
36 | return r; |
37 | } |
38 | |
39 | template<unsigned count, class V> SIMDPP_INL |
40 | V move_n_r(const V& a) |
41 | { |
42 | V r; |
43 | for (unsigned i = 0; i < count; i++) { |
44 | r.el(i) = 0; |
45 | } |
46 | for (unsigned i = 0; i < V::length - count; i++) { |
47 | r.el(i+count) = a.el(i); |
48 | } |
49 | return r; |
50 | } |
51 | |
52 | template<class V> SIMDPP_INL |
53 | V zip_lo_impl(const V& a, const V& b) |
54 | { |
55 | V r; |
56 | for (unsigned i = 0; i < V::length/2; i++) { |
57 | r.el(i*2) = a.el(i); |
58 | r.el(i*2+1) = b.el(i); |
59 | } |
60 | return r; |
61 | } |
62 | |
63 | template<class V> SIMDPP_INL |
64 | V zip_hi_impl(const V& a, const V& b) |
65 | { |
66 | V r; |
67 | unsigned half = V::length/2; |
68 | for (unsigned i = 0; i < half; i++) { |
69 | r.el(i*2) = a.el(half+i); |
70 | r.el(i*2+1) = b.el(half+i); |
71 | } |
72 | return r; |
73 | } |
74 | |
75 | |
76 | template<class V> SIMDPP_INL |
77 | V unzip_lo_impl(const V& a, const V& b) |
78 | { |
79 | V r; |
80 | unsigned half = V::length/2; |
81 | for (unsigned i = 0; i < half; i++) { |
82 | r.el(i) = a.el(i*2); |
83 | r.el(i + half) = b.el(i*2); |
84 | } |
85 | return r; |
86 | } |
87 | |
88 | template<class V> SIMDPP_INL |
89 | V unzip_hi_impl(const V& a, const V& b) |
90 | { |
91 | V r; |
92 | unsigned half = V::length/2; |
93 | for (unsigned i = 0; i < half; i++) { |
94 | r.el(i) = a.el(i*2+1); |
95 | r.el(i + half) = b.el(i*2+1); |
96 | } |
97 | return r; |
98 | } |
99 | |
100 | template<class V> SIMDPP_INL V zip16_lo(const V& a, const V& b) { return zip_lo_impl(a, b); } |
101 | template<class V> SIMDPP_INL V zip8_lo(const V& a, const V& b) { return zip_lo_impl(a, b); } |
102 | template<class V> SIMDPP_INL V zip4_lo(const V& a, const V& b) { return zip_lo_impl(a, b); } |
103 | template<class V> SIMDPP_INL V zip2_lo(const V& a, const V& b) { return zip_lo_impl(a, b); } |
104 | template<class V> SIMDPP_INL V zip16_hi(const V& a, const V& b) { return zip_hi_impl(a, b); } |
105 | template<class V> SIMDPP_INL V zip8_hi(const V& a, const V& b) { return zip_hi_impl(a, b); } |
106 | template<class V> SIMDPP_INL V zip4_hi(const V& a, const V& b) { return zip_hi_impl(a, b); } |
107 | template<class V> SIMDPP_INL V zip2_hi(const V& a, const V& b) { return zip_hi_impl(a, b); } |
108 | |
109 | template<class V> SIMDPP_INL V unzip16_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); } |
110 | template<class V> SIMDPP_INL V unzip8_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); } |
111 | template<class V> SIMDPP_INL V unzip4_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); } |
112 | template<class V> SIMDPP_INL V unzip2_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); } |
113 | template<class V> SIMDPP_INL V unzip16_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); } |
114 | template<class V> SIMDPP_INL V unzip8_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); } |
115 | template<class V> SIMDPP_INL V unzip4_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); } |
116 | template<class V> SIMDPP_INL V unzip2_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); } |
117 | |
118 | template<unsigned pos, class V> SIMDPP_INL |
119 | V splat(const V& v) |
120 | { |
121 | V r; |
122 | for (unsigned i = 0; i < V::length; i++) { |
123 | r.el(i) = v.el(pos); |
124 | } |
125 | return r; |
126 | } |
127 | |
128 | template<unsigned shift, class V> SIMDPP_INL |
129 | V align(const V& lo, const V& hi) |
130 | { |
131 | V r; |
132 | for (unsigned i = 0; i < 16-shift; i++) { |
133 | r.el(i) = lo.el(i + shift); |
134 | } |
135 | for (unsigned i = 16-shift; i < 16; i++) { |
136 | r.el(i) = hi.el(i - 16 + shift); |
137 | } |
138 | return r; |
139 | } |
140 | |
141 | template<class V> SIMDPP_INL |
142 | V blend(const V& on, const V& off, const V& mask) |
143 | { |
144 | V r; |
145 | using E = typename V::element_type; |
146 | using U = typename V::uint_element_type; |
147 | for (unsigned i = 0; i < V::length; i++) { |
148 | U on1 = bit_cast<U, E>(on.el(i)); |
149 | U off1 = bit_cast<U, E>(off.el(i)); |
150 | U mask1 = bit_cast<U, E>(mask.el(i)); |
151 | r.el(i) = bit_cast<E, U>((on1 & mask1) | (off1 & ~mask1)); |
152 | } |
153 | return r; |
154 | } |
155 | |
156 | template<unsigned L> struct blend_mask_impl { |
157 | template<class V, class M> SIMDPP_INL |
158 | static V run(const V& on, const V& off, const M& mask) |
159 | { |
160 | V r; |
161 | for (unsigned i = 0; i < L; i++) { |
162 | r.el(i) = mask.el(i) ? on.el(i) : off.el(i); |
163 | } |
164 | return r; |
165 | } |
166 | }; |
167 | |
168 | template<> struct blend_mask_impl<1> { |
169 | template<class V, class M> SIMDPP_INL |
170 | static V run(const V& on, const V& off, const M& mask) |
171 | { |
172 | V r; |
173 | r.el(0) = mask.el(0) ? on.el(0) : off.el(0); |
174 | return r; |
175 | } |
176 | }; |
177 | template<> struct blend_mask_impl<2> { |
178 | template<class V, class M> SIMDPP_INL |
179 | static V run(const V& on, const V& off, const M& mask) |
180 | { |
181 | V r; |
182 | r.el(0) = mask.el(0) ? on.el(0) : off.el(0); |
183 | r.el(1) = mask.el(1) ? on.el(1) : off.el(1); |
184 | return r; |
185 | } |
186 | }; |
187 | template<> struct blend_mask_impl<4> { |
188 | template<class V, class M> SIMDPP_INL |
189 | static V run(const V& on, const V& off, const M& mask) |
190 | { |
191 | V r; |
192 | r.el(0) = mask.el(0) ? on.el(0) : off.el(0); |
193 | r.el(1) = mask.el(1) ? on.el(1) : off.el(1); |
194 | r.el(2) = mask.el(2) ? on.el(2) : off.el(2); |
195 | r.el(3) = mask.el(3) ? on.el(3) : off.el(3); |
196 | return r; |
197 | } |
198 | }; |
199 | |
200 | template<class V, class M> SIMDPP_INL |
201 | V blend_mask(const V& on, const V& off, const M& mask) |
202 | { |
203 | return blend_mask_impl<V::length>::run(on, off, mask); |
204 | } |
205 | |
206 | template<unsigned s0, unsigned s1, class V> SIMDPP_INL |
207 | V permute(const V& a) |
208 | { |
209 | V r; |
210 | for (unsigned i = 0; i < V::length; i+=2) { |
211 | r.el(i) = a.el(i + s0); |
212 | r.el(i+1) = a.el(i + s1); |
213 | } |
214 | return r; |
215 | } |
216 | |
217 | template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, class V> SIMDPP_INL |
218 | V permute(const V& a) |
219 | { |
220 | V r; |
221 | for (unsigned i = 0; i < V::length; i+=4) { |
222 | r.el(i) = a.el(i + s0); |
223 | r.el(i+1) = a.el(i + s1); |
224 | r.el(i+2) = a.el(i + s2); |
225 | r.el(i+3) = a.el(i + s3); |
226 | } |
227 | return r; |
228 | } |
229 | |
230 | template<unsigned s0, unsigned s1, class V> SIMDPP_INL |
231 | V shuffle1(const V& a, const V& b) |
232 | { |
233 | V r; |
234 | for (unsigned i = 0; i < V::length; i+=2) { |
235 | r.el(i) = a.el(i + s0); |
236 | r.el(i+1) = b.el(i + s1); |
237 | } |
238 | return r; |
239 | } |
240 | |
241 | template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, class V> SIMDPP_INL |
242 | V shuffle2(const V& a, const V& b) |
243 | { |
244 | V r; |
245 | for (unsigned i = 0; i < V::length; i+=4) { |
246 | r.el(i) = a.el(i + s0); |
247 | r.el(i+1) = a.el(i + s1); |
248 | r.el(i+2) = b.el(i + s2); |
249 | r.el(i+3) = b.el(i + s3); |
250 | } |
251 | return r; |
252 | } |
253 | |
254 | } // namespace null |
255 | } // namespace detail |
256 | } // namespace SIMDPP_ARCH_NAMESPACE |
257 | } // namespace simdpp |
258 | |
259 | #endif |
260 | #endif |
261 | |