1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_OR_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_OR_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/to_mask.h> |
17 | #include <simdpp/detail/null/bitwise.h> |
18 | #include <simdpp/detail/vector_array_macros.h> |
19 | |
20 | namespace simdpp { |
21 | namespace SIMDPP_ARCH_NAMESPACE { |
22 | namespace detail { |
23 | namespace insn { |
24 | |
25 | // ----------------------------------------------------------------------------- |
26 | // uint8, uint8 |
27 | static SIMDPP_INL |
28 | uint8<16> i_bit_or(const uint8<16>& a, const uint8<16>& b) |
29 | { |
30 | #if SIMDPP_USE_NULL |
31 | return detail::null::bit_or(uint8x16(a), uint8x16(b)); |
32 | #elif SIMDPP_USE_SSE2 |
33 | return _mm_or_si128(a.native(), b.native()); |
34 | #elif SIMDPP_USE_NEON |
35 | return vorrq_u8(a.native(), b.native()); |
36 | #elif SIMDPP_USE_ALTIVEC |
37 | return vec_or(a.native(), b.native()); |
38 | #elif SIMDPP_USE_MSA |
39 | return __msa_or_v(a.native(), b.native()); |
40 | #endif |
41 | } |
42 | |
43 | #if SIMDPP_USE_AVX2 |
44 | static SIMDPP_INL |
45 | uint8<32> i_bit_or(const uint8<32>& a, const uint8<32>& b) |
46 | { |
47 | return _mm256_or_si256(a.native(), b.native()); |
48 | } |
49 | #endif |
50 | |
51 | #if SIMDPP_USE_AVX512BW |
52 | SIMDPP_INL uint8<64> i_bit_or(const uint8<64>& a, const uint8<64>& b) |
53 | { |
54 | return _mm512_or_si512(a.native(), b.native()); |
55 | } |
56 | #endif |
57 | |
58 | // ----------------------------------------------------------------------------- |
59 | // mask_int8, mask_int8 |
60 | static SIMDPP_INL |
61 | mask_int8<16> i_bit_or(const mask_int8<16>& a, const mask_int8<16>& b) |
62 | { |
63 | #if SIMDPP_USE_NULL |
64 | return detail::null::bit_or_mm(a, b); |
65 | #elif SIMDPP_USE_AVX512VL |
66 | return a.native() | b.native(); |
67 | #else |
68 | return to_mask(i_bit_or(uint8<16>(a), uint8<16>(b))); |
69 | #endif |
70 | } |
71 | |
72 | #if SIMDPP_USE_AVX2 |
73 | static SIMDPP_INL |
74 | mask_int8<32> i_bit_or(const mask_int8<32>& a, const mask_int8<32>& b) |
75 | { |
76 | #if SIMDPP_USE_AVX512VL |
77 | return a.native() | b.native(); |
78 | #else |
79 | return to_mask(i_bit_or(uint8<32>(a), uint8<32>(b))); |
80 | #endif |
81 | } |
82 | #endif |
83 | |
84 | #if SIMDPP_USE_AVX512BW |
85 | SIMDPP_INL mask_int8<64> i_bit_or(const mask_int8<64>& a, const mask_int8<64>& b) |
86 | { |
87 | return a.native() | b.native(); |
88 | } |
89 | #endif |
90 | |
91 | // ----------------------------------------------------------------------------- |
92 | // uint16, uint16 |
93 | static SIMDPP_INL |
94 | uint16<8> i_bit_or(const uint16<8>& a, const uint16<8>& b) |
95 | { |
96 | return uint16<8>(i_bit_or(uint8<16>(a), uint8<16>(b))); |
97 | } |
98 | |
99 | #if SIMDPP_USE_AVX2 |
100 | static SIMDPP_INL |
101 | uint16<16> i_bit_or(const uint16<16>& a, const uint16<16>& b) |
102 | { |
103 | return _mm256_or_si256(a.native(), b.native()); |
104 | } |
105 | #endif |
106 | |
107 | #if SIMDPP_USE_AVX512BW |
108 | SIMDPP_INL uint16<32> i_bit_or(const uint16<32>& a, const uint16<32>& b) |
109 | { |
110 | return _mm512_or_si512(a.native(), b.native()); |
111 | } |
112 | #endif |
113 | |
114 | // ----------------------------------------------------------------------------- |
115 | // mask_int16, mask_int16 |
116 | static SIMDPP_INL |
117 | mask_int16<8> i_bit_or(const mask_int16<8>& a, const mask_int16<8>& b) |
118 | { |
119 | #if SIMDPP_USE_NULL |
120 | return detail::null::bit_or_mm(a, b); |
121 | #elif SIMDPP_USE_AVX512VL |
122 | return a.native() | b.native(); |
123 | #else |
124 | return to_mask(i_bit_or(uint16<8>(a), uint16<8>(b))); |
125 | #endif |
126 | } |
127 | |
128 | #if SIMDPP_USE_AVX2 |
129 | static SIMDPP_INL |
130 | mask_int16<16> i_bit_or(const mask_int16<16>& a, const mask_int16<16>& b) |
131 | { |
132 | #if SIMDPP_USE_AVX512VL |
133 | return a.native() | b.native(); |
134 | #else |
135 | return to_mask(i_bit_or(uint16<16>(a), uint16<16>(b))); |
136 | #endif |
137 | } |
138 | #endif |
139 | |
140 | #if SIMDPP_USE_AVX512BW |
141 | SIMDPP_INL mask_int16<32> i_bit_or(const mask_int16<32>& a, const mask_int16<32>& b) |
142 | { |
143 | return a.native() | b.native(); |
144 | } |
145 | #endif |
146 | |
147 | // ----------------------------------------------------------------------------- |
148 | // uint32, uint32 |
149 | static SIMDPP_INL |
150 | uint32<4> i_bit_or(const uint32<4>& a, const uint32<4>& b) |
151 | { |
152 | return uint32<4>(i_bit_or(uint8<16>(a), uint8<16>(b))); |
153 | } |
154 | |
155 | #if SIMDPP_USE_AVX2 |
156 | static SIMDPP_INL |
157 | uint32<8> i_bit_or(const uint32<8>& a, const uint32<8>& b) |
158 | { |
159 | return _mm256_or_si256(a.native(), b.native()); |
160 | } |
161 | #endif |
162 | |
163 | #if SIMDPP_USE_AVX512F |
164 | static SIMDPP_INL |
165 | uint32<16> i_bit_or(const uint32<16>& a, const uint32<16>& b) |
166 | { |
167 | return _mm512_or_epi32(a.native(), b.native()); |
168 | } |
169 | #endif |
170 | |
171 | // ----------------------------------------------------------------------------- |
172 | // mask_int32, mask_int32 |
173 | static SIMDPP_INL |
174 | mask_int32<4> i_bit_or(const mask_int32<4>& a, const mask_int32<4>& b) |
175 | { |
176 | #if SIMDPP_USE_NULL |
177 | return detail::null::bit_or_mm(a, b); |
178 | #elif SIMDPP_USE_AVX512VL |
179 | return a.native() | b.native(); |
180 | #else |
181 | return to_mask(i_bit_or(uint32<4>(a), uint32<4>(b))); |
182 | #endif |
183 | } |
184 | |
185 | #if SIMDPP_USE_AVX2 |
186 | static SIMDPP_INL |
187 | mask_int32<8> i_bit_or(const mask_int32<8>& a, const mask_int32<8>& b) |
188 | { |
189 | #if SIMDPP_USE_AVX512VL |
190 | return a.native() | b.native(); |
191 | #else |
192 | return to_mask(i_bit_or(uint32<8>(a), uint32<8>(b))); |
193 | #endif |
194 | } |
195 | #endif |
196 | |
197 | #if SIMDPP_USE_AVX512F |
198 | static SIMDPP_INL |
199 | mask_int32<16> i_bit_or(const mask_int32<16>& a, const mask_int32<16>& b) |
200 | { |
201 | return _mm512_kor(a.native(), b.native()); |
202 | } |
203 | #endif |
204 | |
205 | // ----------------------------------------------------------------------------- |
206 | // uint64, uint64 |
207 | static SIMDPP_INL |
208 | uint64<2> i_bit_or(const uint64<2>& a, const uint64<2>& b) |
209 | { |
210 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
211 | return detail::null::bit_or(a, b); |
212 | #else |
213 | return uint64<2>(i_bit_or(uint8<16>(a), uint8<16>(b))); |
214 | #endif |
215 | } |
216 | |
217 | #if SIMDPP_USE_AVX2 |
218 | static SIMDPP_INL |
219 | uint64<4> i_bit_or(const uint64<4>& a, const uint64<4>& b) |
220 | { |
221 | return _mm256_or_si256(a.native(), b.native()); |
222 | } |
223 | #endif |
224 | |
225 | #if SIMDPP_USE_AVX512F |
226 | static SIMDPP_INL |
227 | uint64<8> i_bit_or(const uint64<8>& a, const uint64<8>& b) |
228 | { |
229 | return _mm512_or_epi64(a.native(), b.native()); |
230 | } |
231 | #endif |
232 | |
233 | // ----------------------------------------------------------------------------- |
234 | // mask_int64, mask_int64 |
235 | static SIMDPP_INL |
236 | mask_int64<2> i_bit_or(const mask_int64<2>& a, const mask_int64<2>& b) |
237 | { |
238 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
239 | return detail::null::bit_or_mm(a, b); |
240 | #elif SIMDPP_USE_AVX512VL |
241 | return a.native() | b.native(); |
242 | #else |
243 | return to_mask(i_bit_or(uint64<2>(a), uint64<2>(b))); |
244 | #endif |
245 | } |
246 | |
247 | #if SIMDPP_USE_AVX2 |
248 | static SIMDPP_INL |
249 | mask_int64<4> i_bit_or(const mask_int64<4>& a, const mask_int64<4>& b) |
250 | { |
251 | #if SIMDPP_USE_AVX512VL |
252 | return a.native() | b.native(); |
253 | #else |
254 | return to_mask(i_bit_or(uint64<4>(a), uint64<4>(b))); |
255 | #endif |
256 | } |
257 | #endif |
258 | |
259 | #if SIMDPP_USE_AVX512F |
260 | static SIMDPP_INL |
261 | mask_int64<8> i_bit_or(const mask_int64<8>& a, const mask_int64<8>& b) |
262 | { |
263 | return _mm512_kor(a.native(), b.native()); |
264 | } |
265 | #endif |
266 | |
267 | // ----------------------------------------------------------------------------- |
268 | // float32, float32 |
269 | static SIMDPP_INL |
270 | float32<4> i_bit_or(const float32<4>& a, const float32<4>& b) |
271 | { |
272 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
273 | return detail::null::bit_or(a, b); |
274 | #elif SIMDPP_USE_SSE2 |
275 | return _mm_or_ps(a.native(), b.native()); |
276 | #elif SIMDPP_USE_NEON |
277 | return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.native()), |
278 | vreinterpretq_u32_f32(b.native()))); |
279 | #elif SIMDPP_USE_ALTIVEC |
280 | return vec_or(a.native(), b.native()); |
281 | #elif SIMDPP_USE_MSA |
282 | return (float32<4>) i_bit_or(uint8<16>(a), uint8<16>(b)); |
283 | #endif |
284 | } |
285 | |
286 | #if SIMDPP_USE_AVX |
287 | static SIMDPP_INL |
288 | float32<8> i_bit_or(const float32<8>& a, const float32<8>& b) |
289 | { |
290 | return _mm256_or_ps(a.native(), b.native()); |
291 | } |
292 | #endif |
293 | |
294 | #if SIMDPP_USE_AVX512F |
295 | static SIMDPP_INL |
296 | float32<16> i_bit_or(const float32<16>& a, const float32<16>& b) |
297 | { |
298 | #if SIMDPP_USE_AVX512DQ |
299 | return _mm512_or_ps(a.native(), b.native()); |
300 | #else |
301 | return (float32<16>) i_bit_or(uint32<16>(a), uint32<16>(b)); |
302 | #endif |
303 | } |
304 | #endif |
305 | |
306 | // ----------------------------------------------------------------------------- |
307 | // mask_float32, mask_float32 |
308 | static SIMDPP_INL |
309 | mask_float32<4> i_bit_or(const mask_float32<4>& a, const mask_float32<4>& b) |
310 | { |
311 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
312 | return detail::null::bit_or_mm(a, b); |
313 | #elif SIMDPP_USE_AVX512VL |
314 | return a.native() | b.native(); |
315 | #else |
316 | return to_mask(i_bit_or(float32<4>(a), float32<4>(b))); |
317 | #endif |
318 | } |
319 | |
320 | #if SIMDPP_USE_AVX |
321 | static SIMDPP_INL |
322 | mask_float32<8> i_bit_or(const mask_float32<8>& a, const mask_float32<8>& b) |
323 | { |
324 | #if SIMDPP_USE_AVX512VL |
325 | return a.native() | b.native(); |
326 | #else |
327 | return to_mask(i_bit_or(float32<8>(a), float32<8>(b))); |
328 | #endif |
329 | } |
330 | #endif |
331 | |
332 | #if SIMDPP_USE_AVX512F |
333 | static SIMDPP_INL |
334 | mask_float32<16> i_bit_or(const mask_float32<16>& a, const mask_float32<16>& b) |
335 | { |
336 | return _mm512_kor(a.native(), b.native()); |
337 | } |
338 | #endif |
339 | |
340 | // ----------------------------------------------------------------------------- |
341 | // float64, float64 |
342 | static SIMDPP_INL |
343 | float64<2> i_bit_or(const float64<2>& a, const float64<2>& b) |
344 | { |
345 | #if SIMDPP_USE_SSE2 |
346 | return _mm_or_pd(a.native(), b.native()); |
347 | #elif SIMDPP_USE_NEON64 |
348 | return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.native()), |
349 | vreinterpretq_u64_f64(b.native()))); |
350 | #elif SIMDPP_USE_VSX_206 |
351 | return vec_or(a.native(), b.native()); |
352 | #elif SIMDPP_USE_MSA |
353 | return (float64<2>) i_bit_or(uint8<16>(a), uint8<16>(b)); |
354 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC |
355 | return detail::null::bit_or(a, b); |
356 | #endif |
357 | } |
358 | |
359 | #if SIMDPP_USE_AVX |
360 | static SIMDPP_INL |
361 | float64<4> i_bit_or(const float64<4>& a, const float64<4>& b) |
362 | { |
363 | return _mm256_or_pd(a.native(), b.native()); |
364 | } |
365 | #endif |
366 | |
367 | #if SIMDPP_USE_AVX512F |
368 | static SIMDPP_INL |
369 | float64<8> i_bit_or(const float64<8>& a, const float64<8>& b) |
370 | { |
371 | #if SIMDPP_USE_AVX512DQ |
372 | return _mm512_or_pd(a.native(), b.native()); |
373 | #else |
374 | return (float64<8>) i_bit_or(uint64<8>(a), uint64<8>(b)); |
375 | #endif |
376 | } |
377 | #endif |
378 | |
379 | // ----------------------------------------------------------------------------- |
380 | // mask_float64, mask_float64 |
381 | static SIMDPP_INL |
382 | mask_float64<2> i_bit_or(const mask_float64<2>& a, const mask_float64<2>& b) |
383 | { |
384 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206) |
385 | return detail::null::bit_or_mm(a, b); |
386 | #elif SIMDPP_USE_AVX512VL |
387 | return a.native() | b.native(); |
388 | #else |
389 | return to_mask(i_bit_or(float64<2>(a), float64<2>(b))); |
390 | #endif |
391 | } |
392 | |
393 | #if SIMDPP_USE_AVX |
394 | static SIMDPP_INL |
395 | mask_float64<4> i_bit_or(const mask_float64<4>& a, const mask_float64<4>& b) |
396 | { |
397 | #if SIMDPP_USE_AVX512VL |
398 | return a.native() | b.native(); |
399 | #else |
400 | return to_mask(i_bit_or(float64<4>(a), float64<4>(b))); |
401 | #endif |
402 | } |
403 | #endif |
404 | |
405 | #if SIMDPP_USE_AVX512F |
406 | static SIMDPP_INL |
407 | mask_float64<8> i_bit_or(const mask_float64<8>& a, const mask_float64<8>& b) |
408 | { |
409 | return _mm512_kor(a.native(), b.native()); |
410 | } |
411 | #endif |
412 | |
413 | // ----------------------------------------------------------------------------- |
414 | |
415 | template<class V, class VM> SIMDPP_INL |
416 | V i_bit_or(const V& a, const VM& b) |
417 | { |
418 | SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_or, a, b) |
419 | } |
420 | |
421 | } // namespace insn |
422 | } // namespace detail |
423 | } // namespace SIMDPP_ARCH_NAMESPACE |
424 | } // namespace simdpp |
425 | |
426 | #endif |
427 | |
428 | |
429 | |