1/* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_DETAIL_NULL_SHUFFLE_H
9#define LIBSIMDPP_DETAIL_NULL_SHUFFLE_H
10#if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
11
12#ifndef LIBSIMDPP_SIMD_H
13 #error "This file must be included through simd.h"
14#endif
15
16#include <simdpp/types.h>
17#include <simdpp/core/bit_and.h>
18#include <simdpp/core/bit_andnot.h>
19#include <simdpp/core/bit_or.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23namespace detail {
24namespace null {
25
26template<unsigned count, class V> SIMDPP_INL
27V move_n_l(const V& a)
28{
29 V r;
30 for (unsigned i = 0; i < V::length - count; i++) {
31 r.el(i) = a.el(i+count);
32 }
33 for (unsigned i = V::length - count; i < V::length; i++) {
34 r.el(i) = 0;
35 }
36 return r;
37}
38
39template<unsigned count, class V> SIMDPP_INL
40V move_n_r(const V& a)
41{
42 V r;
43 for (unsigned i = 0; i < count; i++) {
44 r.el(i) = 0;
45 }
46 for (unsigned i = 0; i < V::length - count; i++) {
47 r.el(i+count) = a.el(i);
48 }
49 return r;
50}
51
52template<class V> SIMDPP_INL
53V zip_lo_impl(const V& a, const V& b)
54{
55 V r;
56 for (unsigned i = 0; i < V::length/2; i++) {
57 r.el(i*2) = a.el(i);
58 r.el(i*2+1) = b.el(i);
59 }
60 return r;
61}
62
63template<class V> SIMDPP_INL
64V zip_hi_impl(const V& a, const V& b)
65{
66 V r;
67 unsigned half = V::length/2;
68 for (unsigned i = 0; i < half; i++) {
69 r.el(i*2) = a.el(half+i);
70 r.el(i*2+1) = b.el(half+i);
71 }
72 return r;
73}
74
75
76template<class V> SIMDPP_INL
77V unzip_lo_impl(const V& a, const V& b)
78{
79 V r;
80 unsigned half = V::length/2;
81 for (unsigned i = 0; i < half; i++) {
82 r.el(i) = a.el(i*2);
83 r.el(i + half) = b.el(i*2);
84 }
85 return r;
86}
87
88template<class V> SIMDPP_INL
89V unzip_hi_impl(const V& a, const V& b)
90{
91 V r;
92 unsigned half = V::length/2;
93 for (unsigned i = 0; i < half; i++) {
94 r.el(i) = a.el(i*2+1);
95 r.el(i + half) = b.el(i*2+1);
96 }
97 return r;
98}
99
100template<class V> SIMDPP_INL V zip16_lo(const V& a, const V& b) { return zip_lo_impl(a, b); }
101template<class V> SIMDPP_INL V zip8_lo(const V& a, const V& b) { return zip_lo_impl(a, b); }
102template<class V> SIMDPP_INL V zip4_lo(const V& a, const V& b) { return zip_lo_impl(a, b); }
103template<class V> SIMDPP_INL V zip2_lo(const V& a, const V& b) { return zip_lo_impl(a, b); }
104template<class V> SIMDPP_INL V zip16_hi(const V& a, const V& b) { return zip_hi_impl(a, b); }
105template<class V> SIMDPP_INL V zip8_hi(const V& a, const V& b) { return zip_hi_impl(a, b); }
106template<class V> SIMDPP_INL V zip4_hi(const V& a, const V& b) { return zip_hi_impl(a, b); }
107template<class V> SIMDPP_INL V zip2_hi(const V& a, const V& b) { return zip_hi_impl(a, b); }
108
109template<class V> SIMDPP_INL V unzip16_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); }
110template<class V> SIMDPP_INL V unzip8_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); }
111template<class V> SIMDPP_INL V unzip4_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); }
112template<class V> SIMDPP_INL V unzip2_lo(const V& a, const V& b) { return unzip_lo_impl(a, b); }
113template<class V> SIMDPP_INL V unzip16_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); }
114template<class V> SIMDPP_INL V unzip8_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); }
115template<class V> SIMDPP_INL V unzip4_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); }
116template<class V> SIMDPP_INL V unzip2_hi(const V& a, const V& b) { return unzip_hi_impl(a, b); }
117
118template<unsigned pos, class V> SIMDPP_INL
119V splat(const V& v)
120{
121 V r;
122 for (unsigned i = 0; i < V::length; i++) {
123 r.el(i) = v.el(pos);
124 }
125 return r;
126}
127
128template<unsigned shift, class V> SIMDPP_INL
129V align(const V& lo, const V& hi)
130{
131 V r;
132 for (unsigned i = 0; i < 16-shift; i++) {
133 r.el(i) = lo.el(i + shift);
134 }
135 for (unsigned i = 16-shift; i < 16; i++) {
136 r.el(i) = hi.el(i - 16 + shift);
137 }
138 return r;
139}
140
141template<class V> SIMDPP_INL
142V blend(const V& on, const V& off, const V& mask)
143{
144 V r;
145 using E = typename V::element_type;
146 using U = typename V::uint_element_type;
147 for (unsigned i = 0; i < V::length; i++) {
148 U on1 = bit_cast<U, E>(on.el(i));
149 U off1 = bit_cast<U, E>(off.el(i));
150 U mask1 = bit_cast<U, E>(mask.el(i));
151 r.el(i) = bit_cast<E, U>((on1 & mask1) | (off1 & ~mask1));
152 }
153 return r;
154}
155
156template<unsigned L> struct blend_mask_impl {
157 template<class V, class M> SIMDPP_INL
158 static V run(const V& on, const V& off, const M& mask)
159 {
160 V r;
161 for (unsigned i = 0; i < L; i++) {
162 r.el(i) = mask.el(i) ? on.el(i) : off.el(i);
163 }
164 return r;
165 }
166};
167
168template<> struct blend_mask_impl<1> {
169 template<class V, class M> SIMDPP_INL
170 static V run(const V& on, const V& off, const M& mask)
171 {
172 V r;
173 r.el(0) = mask.el(0) ? on.el(0) : off.el(0);
174 return r;
175 }
176};
177template<> struct blend_mask_impl<2> {
178 template<class V, class M> SIMDPP_INL
179 static V run(const V& on, const V& off, const M& mask)
180 {
181 V r;
182 r.el(0) = mask.el(0) ? on.el(0) : off.el(0);
183 r.el(1) = mask.el(1) ? on.el(1) : off.el(1);
184 return r;
185 }
186};
187template<> struct blend_mask_impl<4> {
188 template<class V, class M> SIMDPP_INL
189 static V run(const V& on, const V& off, const M& mask)
190 {
191 V r;
192 r.el(0) = mask.el(0) ? on.el(0) : off.el(0);
193 r.el(1) = mask.el(1) ? on.el(1) : off.el(1);
194 r.el(2) = mask.el(2) ? on.el(2) : off.el(2);
195 r.el(3) = mask.el(3) ? on.el(3) : off.el(3);
196 return r;
197 }
198};
199
200template<class V, class M> SIMDPP_INL
201V blend_mask(const V& on, const V& off, const M& mask)
202{
203 return blend_mask_impl<V::length>::run(on, off, mask);
204}
205
206template<unsigned s0, unsigned s1, class V> SIMDPP_INL
207V permute(const V& a)
208{
209 V r;
210 for (unsigned i = 0; i < V::length; i+=2) {
211 r.el(i) = a.el(i + s0);
212 r.el(i+1) = a.el(i + s1);
213 }
214 return r;
215}
216
217template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, class V> SIMDPP_INL
218V permute(const V& a)
219{
220 V r;
221 for (unsigned i = 0; i < V::length; i+=4) {
222 r.el(i) = a.el(i + s0);
223 r.el(i+1) = a.el(i + s1);
224 r.el(i+2) = a.el(i + s2);
225 r.el(i+3) = a.el(i + s3);
226 }
227 return r;
228}
229
230template<unsigned s0, unsigned s1, class V> SIMDPP_INL
231V shuffle1(const V& a, const V& b)
232{
233 V r;
234 for (unsigned i = 0; i < V::length; i+=2) {
235 r.el(i) = a.el(i + s0);
236 r.el(i+1) = b.el(i + s1);
237 }
238 return r;
239}
240
241template<unsigned s0, unsigned s1, unsigned s2, unsigned s3, class V> SIMDPP_INL
242V shuffle2(const V& a, const V& b)
243{
244 V r;
245 for (unsigned i = 0; i < V::length; i+=4) {
246 r.el(i) = a.el(i + s0);
247 r.el(i+1) = a.el(i + s1);
248 r.el(i+2) = b.el(i + s2);
249 r.el(i+3) = b.el(i + s3);
250 }
251 return r;
252}
253
254} // namespace null
255} // namespace detail
256} // namespace SIMDPP_ARCH_NAMESPACE
257} // namespace simdpp
258
259#endif
260#endif
261