1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_OR_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_OR_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/to_mask.h>
17#include <simdpp/detail/null/bitwise.h>
18#include <simdpp/detail/vector_array_macros.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25// -----------------------------------------------------------------------------
26// uint8, uint8
27static SIMDPP_INL
28uint8<16> i_bit_or(const uint8<16>& a, const uint8<16>& b)
29{
30#if SIMDPP_USE_NULL
31 return detail::null::bit_or(uint8x16(a), uint8x16(b));
32#elif SIMDPP_USE_SSE2
33 return _mm_or_si128(a.native(), b.native());
34#elif SIMDPP_USE_NEON
35 return vorrq_u8(a.native(), b.native());
36#elif SIMDPP_USE_ALTIVEC
37 return vec_or(a.native(), b.native());
38#elif SIMDPP_USE_MSA
39 return __msa_or_v(a.native(), b.native());
40#endif
41}
42
43#if SIMDPP_USE_AVX2
44static SIMDPP_INL
45uint8<32> i_bit_or(const uint8<32>& a, const uint8<32>& b)
46{
47 return _mm256_or_si256(a.native(), b.native());
48}
49#endif
50
51#if SIMDPP_USE_AVX512BW
52SIMDPP_INL uint8<64> i_bit_or(const uint8<64>& a, const uint8<64>& b)
53{
54 return _mm512_or_si512(a.native(), b.native());
55}
56#endif
57
58// -----------------------------------------------------------------------------
59// mask_int8, mask_int8
60static SIMDPP_INL
61mask_int8<16> i_bit_or(const mask_int8<16>& a, const mask_int8<16>& b)
62{
63#if SIMDPP_USE_NULL
64 return detail::null::bit_or_mm(a, b);
65#elif SIMDPP_USE_AVX512VL
66 return a.native() | b.native();
67#else
68 return to_mask(i_bit_or(uint8<16>(a), uint8<16>(b)));
69#endif
70}
71
72#if SIMDPP_USE_AVX2
73static SIMDPP_INL
74mask_int8<32> i_bit_or(const mask_int8<32>& a, const mask_int8<32>& b)
75{
76#if SIMDPP_USE_AVX512VL
77 return a.native() | b.native();
78#else
79 return to_mask(i_bit_or(uint8<32>(a), uint8<32>(b)));
80#endif
81}
82#endif
83
84#if SIMDPP_USE_AVX512BW
85SIMDPP_INL mask_int8<64> i_bit_or(const mask_int8<64>& a, const mask_int8<64>& b)
86{
87 return a.native() | b.native();
88}
89#endif
90
91// -----------------------------------------------------------------------------
92// uint16, uint16
93static SIMDPP_INL
94uint16<8> i_bit_or(const uint16<8>& a, const uint16<8>& b)
95{
96 return uint16<8>(i_bit_or(uint8<16>(a), uint8<16>(b)));
97}
98
99#if SIMDPP_USE_AVX2
100static SIMDPP_INL
101uint16<16> i_bit_or(const uint16<16>& a, const uint16<16>& b)
102{
103 return _mm256_or_si256(a.native(), b.native());
104}
105#endif
106
107#if SIMDPP_USE_AVX512BW
108SIMDPP_INL uint16<32> i_bit_or(const uint16<32>& a, const uint16<32>& b)
109{
110 return _mm512_or_si512(a.native(), b.native());
111}
112#endif
113
114// -----------------------------------------------------------------------------
115// mask_int16, mask_int16
116static SIMDPP_INL
117mask_int16<8> i_bit_or(const mask_int16<8>& a, const mask_int16<8>& b)
118{
119#if SIMDPP_USE_NULL
120 return detail::null::bit_or_mm(a, b);
121#elif SIMDPP_USE_AVX512VL
122 return a.native() | b.native();
123#else
124 return to_mask(i_bit_or(uint16<8>(a), uint16<8>(b)));
125#endif
126}
127
128#if SIMDPP_USE_AVX2
129static SIMDPP_INL
130mask_int16<16> i_bit_or(const mask_int16<16>& a, const mask_int16<16>& b)
131{
132#if SIMDPP_USE_AVX512VL
133 return a.native() | b.native();
134#else
135 return to_mask(i_bit_or(uint16<16>(a), uint16<16>(b)));
136#endif
137}
138#endif
139
140#if SIMDPP_USE_AVX512BW
141SIMDPP_INL mask_int16<32> i_bit_or(const mask_int16<32>& a, const mask_int16<32>& b)
142{
143 return a.native() | b.native();
144}
145#endif
146
147// -----------------------------------------------------------------------------
148// uint32, uint32
149static SIMDPP_INL
150uint32<4> i_bit_or(const uint32<4>& a, const uint32<4>& b)
151{
152 return uint32<4>(i_bit_or(uint8<16>(a), uint8<16>(b)));
153}
154
155#if SIMDPP_USE_AVX2
156static SIMDPP_INL
157uint32<8> i_bit_or(const uint32<8>& a, const uint32<8>& b)
158{
159 return _mm256_or_si256(a.native(), b.native());
160}
161#endif
162
163#if SIMDPP_USE_AVX512F
164static SIMDPP_INL
165uint32<16> i_bit_or(const uint32<16>& a, const uint32<16>& b)
166{
167 return _mm512_or_epi32(a.native(), b.native());
168}
169#endif
170
171// -----------------------------------------------------------------------------
172// mask_int32, mask_int32
173static SIMDPP_INL
174mask_int32<4> i_bit_or(const mask_int32<4>& a, const mask_int32<4>& b)
175{
176#if SIMDPP_USE_NULL
177 return detail::null::bit_or_mm(a, b);
178#elif SIMDPP_USE_AVX512VL
179 return a.native() | b.native();
180#else
181 return to_mask(i_bit_or(uint32<4>(a), uint32<4>(b)));
182#endif
183}
184
185#if SIMDPP_USE_AVX2
186static SIMDPP_INL
187mask_int32<8> i_bit_or(const mask_int32<8>& a, const mask_int32<8>& b)
188{
189#if SIMDPP_USE_AVX512VL
190 return a.native() | b.native();
191#else
192 return to_mask(i_bit_or(uint32<8>(a), uint32<8>(b)));
193#endif
194}
195#endif
196
197#if SIMDPP_USE_AVX512F
198static SIMDPP_INL
199mask_int32<16> i_bit_or(const mask_int32<16>& a, const mask_int32<16>& b)
200{
201 return _mm512_kor(a.native(), b.native());
202}
203#endif
204
205// -----------------------------------------------------------------------------
206// uint64, uint64
207static SIMDPP_INL
208uint64<2> i_bit_or(const uint64<2>& a, const uint64<2>& b)
209{
210#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
211 return detail::null::bit_or(a, b);
212#else
213 return uint64<2>(i_bit_or(uint8<16>(a), uint8<16>(b)));
214#endif
215}
216
217#if SIMDPP_USE_AVX2
218static SIMDPP_INL
219uint64<4> i_bit_or(const uint64<4>& a, const uint64<4>& b)
220{
221 return _mm256_or_si256(a.native(), b.native());
222}
223#endif
224
225#if SIMDPP_USE_AVX512F
226static SIMDPP_INL
227uint64<8> i_bit_or(const uint64<8>& a, const uint64<8>& b)
228{
229 return _mm512_or_epi64(a.native(), b.native());
230}
231#endif
232
233// -----------------------------------------------------------------------------
234// mask_int64, mask_int64
235static SIMDPP_INL
236mask_int64<2> i_bit_or(const mask_int64<2>& a, const mask_int64<2>& b)
237{
238#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
239 return detail::null::bit_or_mm(a, b);
240#elif SIMDPP_USE_AVX512VL
241 return a.native() | b.native();
242#else
243 return to_mask(i_bit_or(uint64<2>(a), uint64<2>(b)));
244#endif
245}
246
247#if SIMDPP_USE_AVX2
248static SIMDPP_INL
249mask_int64<4> i_bit_or(const mask_int64<4>& a, const mask_int64<4>& b)
250{
251#if SIMDPP_USE_AVX512VL
252 return a.native() | b.native();
253#else
254 return to_mask(i_bit_or(uint64<4>(a), uint64<4>(b)));
255#endif
256}
257#endif
258
259#if SIMDPP_USE_AVX512F
260static SIMDPP_INL
261mask_int64<8> i_bit_or(const mask_int64<8>& a, const mask_int64<8>& b)
262{
263 return _mm512_kor(a.native(), b.native());
264}
265#endif
266
267// -----------------------------------------------------------------------------
268// float32, float32
269static SIMDPP_INL
270float32<4> i_bit_or(const float32<4>& a, const float32<4>& b)
271{
272#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
273 return detail::null::bit_or(a, b);
274#elif SIMDPP_USE_SSE2
275 return _mm_or_ps(a.native(), b.native());
276#elif SIMDPP_USE_NEON
277 return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.native()),
278 vreinterpretq_u32_f32(b.native())));
279#elif SIMDPP_USE_ALTIVEC
280 return vec_or(a.native(), b.native());
281#elif SIMDPP_USE_MSA
282 return (float32<4>) i_bit_or(uint8<16>(a), uint8<16>(b));
283#endif
284}
285
286#if SIMDPP_USE_AVX
287static SIMDPP_INL
288float32<8> i_bit_or(const float32<8>& a, const float32<8>& b)
289{
290 return _mm256_or_ps(a.native(), b.native());
291}
292#endif
293
294#if SIMDPP_USE_AVX512F
295static SIMDPP_INL
296float32<16> i_bit_or(const float32<16>& a, const float32<16>& b)
297{
298#if SIMDPP_USE_AVX512DQ
299 return _mm512_or_ps(a.native(), b.native());
300#else
301 return (float32<16>) i_bit_or(uint32<16>(a), uint32<16>(b));
302#endif
303}
304#endif
305
306// -----------------------------------------------------------------------------
307// mask_float32, mask_float32
308static SIMDPP_INL
309mask_float32<4> i_bit_or(const mask_float32<4>& a, const mask_float32<4>& b)
310{
311#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
312 return detail::null::bit_or_mm(a, b);
313#elif SIMDPP_USE_AVX512VL
314 return a.native() | b.native();
315#else
316 return to_mask(i_bit_or(float32<4>(a), float32<4>(b)));
317#endif
318}
319
320#if SIMDPP_USE_AVX
321static SIMDPP_INL
322mask_float32<8> i_bit_or(const mask_float32<8>& a, const mask_float32<8>& b)
323{
324#if SIMDPP_USE_AVX512VL
325 return a.native() | b.native();
326#else
327 return to_mask(i_bit_or(float32<8>(a), float32<8>(b)));
328#endif
329}
330#endif
331
332#if SIMDPP_USE_AVX512F
333static SIMDPP_INL
334mask_float32<16> i_bit_or(const mask_float32<16>& a, const mask_float32<16>& b)
335{
336 return _mm512_kor(a.native(), b.native());
337}
338#endif
339
340// -----------------------------------------------------------------------------
341// float64, float64
342static SIMDPP_INL
343float64<2> i_bit_or(const float64<2>& a, const float64<2>& b)
344{
345#if SIMDPP_USE_SSE2
346 return _mm_or_pd(a.native(), b.native());
347#elif SIMDPP_USE_NEON64
348 return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.native()),
349 vreinterpretq_u64_f64(b.native())));
350#elif SIMDPP_USE_VSX_206
351 return vec_or(a.native(), b.native());
352#elif SIMDPP_USE_MSA
353 return (float64<2>) i_bit_or(uint8<16>(a), uint8<16>(b));
354#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
355 return detail::null::bit_or(a, b);
356#endif
357}
358
359#if SIMDPP_USE_AVX
360static SIMDPP_INL
361float64<4> i_bit_or(const float64<4>& a, const float64<4>& b)
362{
363 return _mm256_or_pd(a.native(), b.native());
364}
365#endif
366
367#if SIMDPP_USE_AVX512F
368static SIMDPP_INL
369float64<8> i_bit_or(const float64<8>& a, const float64<8>& b)
370{
371#if SIMDPP_USE_AVX512DQ
372 return _mm512_or_pd(a.native(), b.native());
373#else
374 return (float64<8>) i_bit_or(uint64<8>(a), uint64<8>(b));
375#endif
376}
377#endif
378
379// -----------------------------------------------------------------------------
380// mask_float64, mask_float64
381static SIMDPP_INL
382mask_float64<2> i_bit_or(const mask_float64<2>& a, const mask_float64<2>& b)
383{
384#if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
385 return detail::null::bit_or_mm(a, b);
386#elif SIMDPP_USE_AVX512VL
387 return a.native() | b.native();
388#else
389 return to_mask(i_bit_or(float64<2>(a), float64<2>(b)));
390#endif
391}
392
393#if SIMDPP_USE_AVX
394static SIMDPP_INL
395mask_float64<4> i_bit_or(const mask_float64<4>& a, const mask_float64<4>& b)
396{
397#if SIMDPP_USE_AVX512VL
398 return a.native() | b.native();
399#else
400 return to_mask(i_bit_or(float64<4>(a), float64<4>(b)));
401#endif
402}
403#endif
404
405#if SIMDPP_USE_AVX512F
406static SIMDPP_INL
407mask_float64<8> i_bit_or(const mask_float64<8>& a, const mask_float64<8>& b)
408{
409 return _mm512_kor(a.native(), b.native());
410}
411#endif
412
413// -----------------------------------------------------------------------------
414
415template<class V, class VM> SIMDPP_INL
416V i_bit_or(const V& a, const VM& b)
417{
418 SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_or, a, b)
419}
420
421} // namespace insn
422} // namespace detail
423} // namespace SIMDPP_ARCH_NAMESPACE
424} // namespace simdpp
425
426#endif
427
428
429