1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_AND_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_AND_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/to_mask.h>
17#include <simdpp/detail/null/bitwise.h>
18#include <simdpp/detail/vector_array_macros.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25// -----------------------------------------------------------------------------
26// uint8, uint8
27static SIMDPP_INL
28uint8<16> i_bit_and(const uint8<16>& a, const uint8<16>& b)
29{
30#if SIMDPP_USE_NULL
31 return detail::null::bit_and(a, b);
32#elif SIMDPP_USE_SSE2
33 return _mm_and_si128(a.native(), b.native());
34#elif SIMDPP_USE_NEON
35 return vandq_u8(a.native(), b.native());
36#elif SIMDPP_USE_ALTIVEC
37 return vec_and(a.native(), b.native());
38#elif SIMDPP_USE_MSA
39 return __msa_and_v(a.native(), b.native());
40#endif
41}
42
43#if SIMDPP_USE_AVX2
44static SIMDPP_INL
45uint8<32> i_bit_and(const uint8<32>& a, const uint8<32>& b)
46{
47 return _mm256_and_si256(a.native(), b.native());
48}
49#endif
50
51#if SIMDPP_USE_AVX512BW
52SIMDPP_INL uint8<64> i_bit_and(const uint8<64>& a, const uint8<64>& b)
53{
54 return _mm512_and_si512(a.native(), b.native());
55}
56#endif
57
58// -----------------------------------------------------------------------------
59// uint8, mask_int8
60static SIMDPP_INL
61uint8<16> i_bit_and(const uint8<16>& a, const mask_int8<16>& b)
62{
63#if SIMDPP_USE_NULL
64 return detail::null::bit_and_vm(a, b);
65#elif SIMDPP_USE_AVX512VL
66 return _mm_maskz_mov_epi8(b.native(), a.native());
67#else
68 return i_bit_and(a, uint8<16>(b));
69#endif
70}
71
72#if SIMDPP_USE_AVX2
73static SIMDPP_INL
74uint8<32> i_bit_and(const uint8<32>& a, const mask_int8<32>& b)
75{
76#if SIMDPP_USE_AVX512VL
77 return _mm256_maskz_mov_epi8(b.native(), a.native());
78#else
79 return i_bit_and(a, uint8<32>(b));
80#endif
81}
82#endif
83
84#if SIMDPP_USE_AVX512BW
85SIMDPP_INL uint8<64> i_bit_and(const uint8<64>& a, const mask_int8<64>& b)
86{
87 return _mm512_maskz_mov_epi8(b.native(), a.native());
88}
89#endif
90
91// -----------------------------------------------------------------------------
92// mask_int8, mask_int8
93static SIMDPP_INL
94mask_int8<16> i_bit_and(const mask_int8<16>& a, const mask_int8<16>& b)
95{
96#if SIMDPP_USE_NULL
97 return detail::null::bit_and_mm(a, b);
98#elif SIMDPP_USE_AVX512VL
99 return a.native() & b.native();
100#else
101 return to_mask(i_bit_and(uint8<16>(a), uint8<16>(b)));
102#endif
103}
104
105#if SIMDPP_USE_AVX2
106static SIMDPP_INL
107mask_int8<32> i_bit_and(const mask_int8<32>& a, const mask_int8<32>& b)
108{
109#if SIMDPP_USE_AVX512VL
110 return a.native() & b.native();
111#else
112 return to_mask(i_bit_and(uint8<32>(a), uint8<32>(b)));
113#endif
114}
115#endif
116
117#if SIMDPP_USE_AVX512BW
118SIMDPP_INL mask_int8<64> i_bit_and(const mask_int8<64>& a, const mask_int8<64>& b)
119{
120 return a.native() & b.native();
121}
122#endif
123
124// -----------------------------------------------------------------------------
125// uint16, uint16
126static SIMDPP_INL
127uint16<8> i_bit_and(const uint16<8>& a, const uint16<8>& b)
128{
129 return uint16<8>(i_bit_and(uint8<16>(a), uint8<16>(b)));
130}
131
132#if SIMDPP_USE_AVX2
133static SIMDPP_INL
134uint16<16> i_bit_and(const uint16<16>& a, const uint16<16>& b)
135{
136 return _mm256_and_si256(a.native(), b.native());
137}
138#endif
139
140#if SIMDPP_USE_AVX512BW
141SIMDPP_INL uint16<32> i_bit_and(const uint16<32>& a, const uint16<32>& b)
142{
143 return _mm512_and_si512(a.native(), b.native());
144}
145#endif
146
147// -----------------------------------------------------------------------------
148// uint16, mask_int16
149static SIMDPP_INL
150uint16<8> i_bit_and(const uint16<8>& a, const mask_int16<8>& b)
151{
152#if SIMDPP_USE_NULL
153 return detail::null::bit_and_vm(a, b);
154#elif SIMDPP_USE_AVX512VL
155 return _mm_maskz_mov_epi16(b.native(), a.native());
156#else
157 return i_bit_and(a, uint16<8>(b));
158#endif
159}
160
161#if SIMDPP_USE_AVX2
162static SIMDPP_INL
163uint16<16> i_bit_and(const uint16<16>& a, const mask_int16<16>& b)
164{
165#if SIMDPP_USE_AVX512VL
166 return _mm256_maskz_mov_epi16(b.native(), a.native());
167#else
168 return i_bit_and(a, uint16<16>(b));
169#endif
170}
171#endif
172
173#if SIMDPP_USE_AVX512BW
174SIMDPP_INL uint16<32> i_bit_and(const uint16<32>& a, const mask_int16<32>& b)
175{
176 return _mm512_maskz_mov_epi16(b.native(), a.native());
177}
178#endif
179
180// -----------------------------------------------------------------------------
181// mask_int16, mask_int16
182static SIMDPP_INL
183mask_int16<8> i_bit_and(const mask_int16<8>& a, const mask_int16<8>& b)
184{
185#if SIMDPP_USE_NULL
186 return detail::null::bit_and_mm(a, b);
187#elif SIMDPP_USE_AVX512VL
188 return a.native() & b.native();
189#else
190 return to_mask((uint16<8>) i_bit_and(uint8<16>(a), uint8<16>(b)));
191#endif
192}
193
194#if SIMDPP_USE_AVX2
195static SIMDPP_INL
196mask_int16<16> i_bit_and(const mask_int16<16>& a, const mask_int16<16>& b)
197{
198#if SIMDPP_USE_AVX512VL
199 return a.native() & b.native();
200#else
201 return to_mask((uint16<16>) i_bit_and(uint16<16>(a), uint16<16>(b)));
202#endif
203}
204#endif
205
206#if SIMDPP_USE_AVX512BW
207SIMDPP_INL mask_int16<32> i_bit_and(const mask_int16<32>& a, const mask_int16<32>& b)
208{
209 return a.native() & b.native();
210}
211#endif
212
213// -----------------------------------------------------------------------------
214// uint32, uint32
215static SIMDPP_INL
216uint32<4> i_bit_and(const uint32<4>& a, const uint32<4>& b)
217{
218 return uint32<4>(i_bit_and(uint8<16>(a), uint8<16>(b)));
219}
220
221#if SIMDPP_USE_AVX2
222static SIMDPP_INL
223uint32<8> i_bit_and(const uint32<8>& a, const uint32<8>& b)
224{
225 return _mm256_and_si256(a.native(), b.native());
226}
227#endif
228
229#if SIMDPP_USE_AVX512F
230static SIMDPP_INL
231uint32<16> i_bit_and(const uint32<16>& a, const uint32<16>& b)
232{
233 return _mm512_and_epi32(a.native(), b.native());
234}
235#endif
236
237// -----------------------------------------------------------------------------
238// uint32, mask_int32
239static SIMDPP_INL
240uint32<4> i_bit_and(const uint32<4>& a, const mask_int32<4>& b)
241{
242#if SIMDPP_USE_NULL
243 return detail::null::bit_and_vm(a, b);
244#elif SIMDPP_USE_AVX512VL
245 return _mm_maskz_mov_epi32(b.native(), a.native());
246#else
247 return i_bit_and(a, uint32<4>(b));
248#endif
249}
250
251#if SIMDPP_USE_AVX2
252static SIMDPP_INL
253uint32<8> i_bit_and(const uint32<8>& a, const mask_int32<8>& b)
254{
255#if SIMDPP_USE_AVX512VL
256 return _mm256_maskz_mov_epi32(b.native(), a.native());
257#else
258 return i_bit_and(a, uint32<8>(b));
259#endif
260}
261#endif
262
263#if SIMDPP_USE_AVX512F
264static SIMDPP_INL
265uint32<16> i_bit_and(const uint32<16>& a, const mask_int32<16>& b)
266{
267 return _mm512_maskz_mov_epi32(b.native(), a.native());
268}
269#endif
270
271// -----------------------------------------------------------------------------
272// mask_int32, mask_int32
273static SIMDPP_INL
274mask_int32<4> i_bit_and(const mask_int32<4>& a, const mask_int32<4>& b)
275{
276#if SIMDPP_USE_NULL
277 return detail::null::bit_and_mm(a, b);
278#elif SIMDPP_USE_AVX512VL
279 return _mm512_kand(a.native(), b.native());
280#else
281 return to_mask((uint32<4>) i_bit_and(uint8<16>(a), uint8<16>(b)));
282#endif
283}
284
285#if SIMDPP_USE_AVX2
286static SIMDPP_INL
287mask_int32<8> i_bit_and(const mask_int32<8>& a, const mask_int32<8>& b)
288{
289#if SIMDPP_USE_AVX512VL
290 return _mm512_kand(a.native(), b.native());
291#else
292 return to_mask((uint32<8>) i_bit_and(uint32<8>(a), uint32<8>(b)));
293#endif
294}
295#endif
296
297#if SIMDPP_USE_AVX512F
298static SIMDPP_INL
299mask_int32<16> i_bit_and(const mask_int32<16>& a, const mask_int32<16>& b)
300{
301 return _mm512_kand(a.native(), b.native());
302}
303#endif
304
305// -----------------------------------------------------------------------------
306// uint64, uint64
307static SIMDPP_INL
308uint64<2> i_bit_and(const uint64<2>& a, const uint64<2>& b)
309{
310#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
311 return detail::null::bit_and(a, b);
312#else
313 return uint64<2>(i_bit_and(uint8<16>(a), uint8<16>(b)));
314#endif
315}
316
317#if SIMDPP_USE_AVX2
318static SIMDPP_INL
319uint64<4> i_bit_and(const uint64<4>& a, const uint64<4>& b)
320{
321 return _mm256_and_si256(a.native(), b.native());
322}
323#endif
324
325#if SIMDPP_USE_AVX512F
326static SIMDPP_INL
327uint64<8> i_bit_and(const uint64<8>& a, const uint64<8>& b)
328{
329 return _mm512_and_epi64(a.native(), b.native());
330}
331#endif
332
333// -----------------------------------------------------------------------------
334// uint64, mask_int64
335static SIMDPP_INL
336uint64<2> i_bit_and(const uint64<2>& a, const mask_int64<2>& b)
337{
338#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
339 return detail::null::bit_and_vm(a, b);
340#elif SIMDPP_USE_AVX512VL
341 return _mm_maskz_mov_epi64(b.native(), a.native());
342#else
343 return i_bit_and(a, uint64<2>(b));
344#endif
345}
346
347#if SIMDPP_USE_AVX2
348static SIMDPP_INL
349uint64<4> i_bit_and(const uint64<4>& a, const mask_int64<4>& b)
350{
351#if SIMDPP_USE_AVX512VL
352 return _mm256_maskz_mov_epi64(b.native(), a.native());
353#else
354 return i_bit_and(a, uint64<4>(b));
355#endif
356}
357#endif
358
359#if SIMDPP_USE_AVX512F
360static SIMDPP_INL
361uint64<8> i_bit_and(const uint64<8>& a, const mask_int64<8>& b)
362{
363 return _mm512_maskz_mov_epi64(b.native(), a.native());
364}
365#endif
366
367// -----------------------------------------------------------------------------
368// mask_int64, mask_int64
369static SIMDPP_INL
370mask_int64<2> i_bit_and(const mask_int64<2>& a, const mask_int64<2>& b)
371{
372#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
373 return detail::null::bit_and_mm(a, b);
374#elif SIMDPP_USE_AVX512VL
375 return _mm512_kand(a.native(), b.native());
376#else
377 return to_mask((uint64<2>) i_bit_and(uint8<16>(a), uint8<16>(b)));
378#endif
379}
380
381#if SIMDPP_USE_AVX2
382static SIMDPP_INL
383mask_int64<4> i_bit_and(const mask_int64<4>& a, const mask_int64<4>& b)
384{
385#if SIMDPP_USE_AVX512VL
386 return _mm512_kand(a.native(), b.native());
387#else
388 return to_mask((uint64<4>) i_bit_and(uint64<4>(a), uint64<4>(b)));
389#endif
390}
391#endif
392
393#if SIMDPP_USE_AVX512F
394static SIMDPP_INL
395mask_int64<8> i_bit_and(const mask_int64<8>& a, const mask_int64<8>& b)
396{
397 return _mm512_kand(a.native(), b.native());
398}
399#endif
400
401// -----------------------------------------------------------------------------
402// float32, float32
403static SIMDPP_INL
404float32<4> i_bit_and(const float32<4>& a, const float32<4>& b)
405{
406#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
407 return detail::null::bit_and(a, b);
408#elif SIMDPP_USE_SSE2
409 return _mm_and_ps(a.native(), b.native());
410#elif SIMDPP_USE_NEON
411 return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.native()),
412 vreinterpretq_u32_f32(b.native())));
413#elif SIMDPP_USE_ALTIVEC
414 return vec_and(a.native(), b.native());
415#elif SIMDPP_USE_MSA
416 return (v4f32) __msa_and_v((v16u8) a.native(), (v16u8) b.native());
417#endif
418}
419
420#if SIMDPP_USE_AVX
421static SIMDPP_INL
422float32<8> i_bit_and(const float32<8>& a, const float32<8>& b)
423{
424 return _mm256_and_ps(a.native(), b.native());
425}
426#endif
427
428#if SIMDPP_USE_AVX512F
429static SIMDPP_INL
430float32<16> i_bit_and(const float32<16>& a, const float32<16>& b)
431{
432#if SIMDPP_USE_AVX512DQ
433 return _mm512_and_ps(a.native(), b.native());
434#else
435 return float32<16>(i_bit_and(uint32<16>(a), uint32<16>(b)));
436#endif
437}
438#endif
439
440// -----------------------------------------------------------------------------
441// float32, mask_float32
442static SIMDPP_INL
443float32<4> i_bit_and(const float32<4>& a, const mask_float32<4>& b)
444{
445#if SIMDPP_USE_NULL
446 return detail::null::bit_and_vm(a, b);
447#elif SIMDPP_USE_AVX512VL
448 return _mm_maskz_mov_ps(b.native(), a.native());
449#else
450 return i_bit_and(a, float32<4>(b));
451#endif
452}
453
454#if SIMDPP_USE_AVX
455static SIMDPP_INL
456float32<8> i_bit_and(const float32<8>& a, const mask_float32<8>& b)
457{
458#if SIMDPP_USE_AVX512VL
459 return _mm256_maskz_mov_ps(b.native(), a.native());
460#else
461 return i_bit_and(a, float32<8>(b));
462#endif
463}
464#endif
465
466#if SIMDPP_USE_AVX512F
467static SIMDPP_INL
468float32<16> i_bit_and(const float32<16>& a, const mask_float32<16>& b)
469{
470 return _mm512_maskz_mov_ps(b.native(), a.native());
471}
472#endif
473
474// -----------------------------------------------------------------------------
475// mask_float32, mask_float32
476static SIMDPP_INL
477mask_float32<4> i_bit_and(const mask_float32<4>& a, const mask_float32<4>& b)
478{
479#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
480 return detail::null::bit_and_mm(a, b);
481#elif SIMDPP_USE_AVX512VL
482 return _mm512_kand(a.native(), b.native());
483#else
484 return to_mask(i_bit_and(float32<4>(a), float32<4>(b)));
485#endif
486}
487
488#if SIMDPP_USE_AVX
489static SIMDPP_INL
490mask_float32<8> i_bit_and(const mask_float32<8>& a, const mask_float32<8>& b)
491{
492#if SIMDPP_USE_AVX512VL
493 return _mm512_kand(a.native(), b.native());
494#else
495 return to_mask(i_bit_and(float32<8>(a), float32<8>(b)));
496#endif
497}
498#endif
499
500#if SIMDPP_USE_AVX512F
501static SIMDPP_INL
502mask_float32<16> i_bit_and(const mask_float32<16>& a, const mask_float32<16>& b)
503{
504 return _mm512_kand(a.native(), b.native());
505}
506#endif
507
508// -----------------------------------------------------------------------------
509// float64, float64
510static SIMDPP_INL
511float64<2> i_bit_and(const float64<2>& a, const float64<2>& b)
512{
513#if SIMDPP_USE_SSE2
514 return _mm_and_pd(a.native(), b.native());
515#elif SIMDPP_USE_NEON64
516 return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.native()),
517 vreinterpretq_u64_f64(b.native())));
518#elif SIMDPP_USE_VSX_206
519 return vec_and(a.native(), b.native());
520#elif SIMDPP_USE_MSA
521 return (v2f64) __msa_and_v((v16u8) a.native(), (v16u8) b.native());
522#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
523 return detail::null::bit_and(a, b);
524#endif
525}
526
527#if SIMDPP_USE_AVX
528static SIMDPP_INL
529float64<4> i_bit_and(const float64<4>& a, const float64<4>& b)
530{
531 return _mm256_and_pd(a.native(), b.native());
532}
533#endif
534
535#if SIMDPP_USE_AVX512F
536static SIMDPP_INL
537float64<8> i_bit_and(const float64<8>& a, const float64<8>& b)
538{
539#if SIMDPP_USE_AVX512DQ
540 return _mm512_and_pd(a.native(), b.native());
541#else
542 return float64<8>(i_bit_and(uint64<8>(a), uint64<8>(b)));
543#endif
544}
545#endif
546
547// -----------------------------------------------------------------------------
548// float64, mask_float64
549static SIMDPP_INL
550float64<2> i_bit_and(const float64<2>& a, const mask_float64<2>& b)
551{
552#if SIMDPP_USE_NULL
553 return detail::null::bit_and_vm(a, b);
554#elif SIMDPP_USE_AVX512VL
555 return _mm_maskz_mov_pd(b.native(), a.native());
556#else
557 return i_bit_and(a, float64<2>(b));
558#endif
559}
560
561#if SIMDPP_USE_AVX
562static SIMDPP_INL
563float64<4> i_bit_and(const float64<4>& a, const mask_float64<4>& b)
564{
565#if SIMDPP_USE_AVX512VL
566 return _mm256_maskz_mov_pd(b.native(), a.native());
567#else
568 return i_bit_and(a, float64<4>(b));
569#endif
570}
571#endif
572
573#if SIMDPP_USE_AVX512F
574static SIMDPP_INL
575float64<8> i_bit_and(const float64<8>& a, const mask_float64<8>& b)
576{
577 return _mm512_maskz_mov_pd(b.native(), a.native());
578}
579#endif
580
581// -----------------------------------------------------------------------------
582// mask_float64, mask_float64
583static SIMDPP_INL
584mask_float64<2> i_bit_and(const mask_float64<2>& a, const mask_float64<2>& b)
585{
586#if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
587 return detail::null::bit_and_mm(a, b);
588#elif SIMDPP_USE_AVX512VL
589 return _mm512_kand(a.native(), b.native());
590#else
591 return to_mask(i_bit_and(float64<2>(a), float64<2>(b)));
592#endif
593}
594
595#if SIMDPP_USE_AVX
596static SIMDPP_INL
597mask_float64<4> i_bit_and(const mask_float64<4>& a, const mask_float64<4>& b)
598{
599#if SIMDPP_USE_AVX512VL
600 return _mm512_kand(a.native(), b.native());
601#else
602 return to_mask(i_bit_and(float64<4>(a), float64<4>(b)));
603#endif
604}
605#endif
606
607#if SIMDPP_USE_AVX512F
608static SIMDPP_INL
609mask_float64<8> i_bit_and(const mask_float64<8>& a, const mask_float64<8>& b)
610{
611 return _mm512_kand(a.native(), b.native());
612}
613#endif
614
615// -----------------------------------------------------------------------------
616
617template<class V, class VM> SIMDPP_INL
618V i_bit_and(const V& a, const VM& b)
619{
620 SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_and, a, b)
621}
622
623} // namespace insn
624} // namespace detail
625} // namespace SIMDPP_ARCH_NAMESPACE
626} // namespace simdpp
627
628#endif
629
630