1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_AND_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_AND_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/to_mask.h> |
17 | #include <simdpp/detail/null/bitwise.h> |
18 | #include <simdpp/detail/vector_array_macros.h> |
19 | |
20 | namespace simdpp { |
21 | namespace SIMDPP_ARCH_NAMESPACE { |
22 | namespace detail { |
23 | namespace insn { |
24 | |
25 | // ----------------------------------------------------------------------------- |
26 | // uint8, uint8 |
27 | static SIMDPP_INL |
28 | uint8<16> i_bit_and(const uint8<16>& a, const uint8<16>& b) |
29 | { |
30 | #if SIMDPP_USE_NULL |
31 | return detail::null::bit_and(a, b); |
32 | #elif SIMDPP_USE_SSE2 |
33 | return _mm_and_si128(a.native(), b.native()); |
34 | #elif SIMDPP_USE_NEON |
35 | return vandq_u8(a.native(), b.native()); |
36 | #elif SIMDPP_USE_ALTIVEC |
37 | return vec_and(a.native(), b.native()); |
38 | #elif SIMDPP_USE_MSA |
39 | return __msa_and_v(a.native(), b.native()); |
40 | #endif |
41 | } |
42 | |
43 | #if SIMDPP_USE_AVX2 |
44 | static SIMDPP_INL |
45 | uint8<32> i_bit_and(const uint8<32>& a, const uint8<32>& b) |
46 | { |
47 | return _mm256_and_si256(a.native(), b.native()); |
48 | } |
49 | #endif |
50 | |
51 | #if SIMDPP_USE_AVX512BW |
52 | SIMDPP_INL uint8<64> i_bit_and(const uint8<64>& a, const uint8<64>& b) |
53 | { |
54 | return _mm512_and_si512(a.native(), b.native()); |
55 | } |
56 | #endif |
57 | |
58 | // ----------------------------------------------------------------------------- |
59 | // uint8, mask_int8 |
60 | static SIMDPP_INL |
61 | uint8<16> i_bit_and(const uint8<16>& a, const mask_int8<16>& b) |
62 | { |
63 | #if SIMDPP_USE_NULL |
64 | return detail::null::bit_and_vm(a, b); |
65 | #elif SIMDPP_USE_AVX512VL |
66 | return _mm_maskz_mov_epi8(b.native(), a.native()); |
67 | #else |
68 | return i_bit_and(a, uint8<16>(b)); |
69 | #endif |
70 | } |
71 | |
72 | #if SIMDPP_USE_AVX2 |
73 | static SIMDPP_INL |
74 | uint8<32> i_bit_and(const uint8<32>& a, const mask_int8<32>& b) |
75 | { |
76 | #if SIMDPP_USE_AVX512VL |
77 | return _mm256_maskz_mov_epi8(b.native(), a.native()); |
78 | #else |
79 | return i_bit_and(a, uint8<32>(b)); |
80 | #endif |
81 | } |
82 | #endif |
83 | |
84 | #if SIMDPP_USE_AVX512BW |
85 | SIMDPP_INL uint8<64> i_bit_and(const uint8<64>& a, const mask_int8<64>& b) |
86 | { |
87 | return _mm512_maskz_mov_epi8(b.native(), a.native()); |
88 | } |
89 | #endif |
90 | |
91 | // ----------------------------------------------------------------------------- |
92 | // mask_int8, mask_int8 |
93 | static SIMDPP_INL |
94 | mask_int8<16> i_bit_and(const mask_int8<16>& a, const mask_int8<16>& b) |
95 | { |
96 | #if SIMDPP_USE_NULL |
97 | return detail::null::bit_and_mm(a, b); |
98 | #elif SIMDPP_USE_AVX512VL |
99 | return a.native() & b.native(); |
100 | #else |
101 | return to_mask(i_bit_and(uint8<16>(a), uint8<16>(b))); |
102 | #endif |
103 | } |
104 | |
105 | #if SIMDPP_USE_AVX2 |
106 | static SIMDPP_INL |
107 | mask_int8<32> i_bit_and(const mask_int8<32>& a, const mask_int8<32>& b) |
108 | { |
109 | #if SIMDPP_USE_AVX512VL |
110 | return a.native() & b.native(); |
111 | #else |
112 | return to_mask(i_bit_and(uint8<32>(a), uint8<32>(b))); |
113 | #endif |
114 | } |
115 | #endif |
116 | |
117 | #if SIMDPP_USE_AVX512BW |
118 | SIMDPP_INL mask_int8<64> i_bit_and(const mask_int8<64>& a, const mask_int8<64>& b) |
119 | { |
120 | return a.native() & b.native(); |
121 | } |
122 | #endif |
123 | |
124 | // ----------------------------------------------------------------------------- |
125 | // uint16, uint16 |
126 | static SIMDPP_INL |
127 | uint16<8> i_bit_and(const uint16<8>& a, const uint16<8>& b) |
128 | { |
129 | return uint16<8>(i_bit_and(uint8<16>(a), uint8<16>(b))); |
130 | } |
131 | |
132 | #if SIMDPP_USE_AVX2 |
133 | static SIMDPP_INL |
134 | uint16<16> i_bit_and(const uint16<16>& a, const uint16<16>& b) |
135 | { |
136 | return _mm256_and_si256(a.native(), b.native()); |
137 | } |
138 | #endif |
139 | |
140 | #if SIMDPP_USE_AVX512BW |
141 | SIMDPP_INL uint16<32> i_bit_and(const uint16<32>& a, const uint16<32>& b) |
142 | { |
143 | return _mm512_and_si512(a.native(), b.native()); |
144 | } |
145 | #endif |
146 | |
147 | // ----------------------------------------------------------------------------- |
148 | // uint16, mask_int16 |
149 | static SIMDPP_INL |
150 | uint16<8> i_bit_and(const uint16<8>& a, const mask_int16<8>& b) |
151 | { |
152 | #if SIMDPP_USE_NULL |
153 | return detail::null::bit_and_vm(a, b); |
154 | #elif SIMDPP_USE_AVX512VL |
155 | return _mm_maskz_mov_epi16(b.native(), a.native()); |
156 | #else |
157 | return i_bit_and(a, uint16<8>(b)); |
158 | #endif |
159 | } |
160 | |
161 | #if SIMDPP_USE_AVX2 |
162 | static SIMDPP_INL |
163 | uint16<16> i_bit_and(const uint16<16>& a, const mask_int16<16>& b) |
164 | { |
165 | #if SIMDPP_USE_AVX512VL |
166 | return _mm256_maskz_mov_epi16(b.native(), a.native()); |
167 | #else |
168 | return i_bit_and(a, uint16<16>(b)); |
169 | #endif |
170 | } |
171 | #endif |
172 | |
173 | #if SIMDPP_USE_AVX512BW |
174 | SIMDPP_INL uint16<32> i_bit_and(const uint16<32>& a, const mask_int16<32>& b) |
175 | { |
176 | return _mm512_maskz_mov_epi16(b.native(), a.native()); |
177 | } |
178 | #endif |
179 | |
180 | // ----------------------------------------------------------------------------- |
181 | // mask_int16, mask_int16 |
182 | static SIMDPP_INL |
183 | mask_int16<8> i_bit_and(const mask_int16<8>& a, const mask_int16<8>& b) |
184 | { |
185 | #if SIMDPP_USE_NULL |
186 | return detail::null::bit_and_mm(a, b); |
187 | #elif SIMDPP_USE_AVX512VL |
188 | return a.native() & b.native(); |
189 | #else |
190 | return to_mask((uint16<8>) i_bit_and(uint8<16>(a), uint8<16>(b))); |
191 | #endif |
192 | } |
193 | |
194 | #if SIMDPP_USE_AVX2 |
195 | static SIMDPP_INL |
196 | mask_int16<16> i_bit_and(const mask_int16<16>& a, const mask_int16<16>& b) |
197 | { |
198 | #if SIMDPP_USE_AVX512VL |
199 | return a.native() & b.native(); |
200 | #else |
201 | return to_mask((uint16<16>) i_bit_and(uint16<16>(a), uint16<16>(b))); |
202 | #endif |
203 | } |
204 | #endif |
205 | |
206 | #if SIMDPP_USE_AVX512BW |
207 | SIMDPP_INL mask_int16<32> i_bit_and(const mask_int16<32>& a, const mask_int16<32>& b) |
208 | { |
209 | return a.native() & b.native(); |
210 | } |
211 | #endif |
212 | |
213 | // ----------------------------------------------------------------------------- |
214 | // uint32, uint32 |
215 | static SIMDPP_INL |
216 | uint32<4> i_bit_and(const uint32<4>& a, const uint32<4>& b) |
217 | { |
218 | return uint32<4>(i_bit_and(uint8<16>(a), uint8<16>(b))); |
219 | } |
220 | |
221 | #if SIMDPP_USE_AVX2 |
222 | static SIMDPP_INL |
223 | uint32<8> i_bit_and(const uint32<8>& a, const uint32<8>& b) |
224 | { |
225 | return _mm256_and_si256(a.native(), b.native()); |
226 | } |
227 | #endif |
228 | |
229 | #if SIMDPP_USE_AVX512F |
230 | static SIMDPP_INL |
231 | uint32<16> i_bit_and(const uint32<16>& a, const uint32<16>& b) |
232 | { |
233 | return _mm512_and_epi32(a.native(), b.native()); |
234 | } |
235 | #endif |
236 | |
237 | // ----------------------------------------------------------------------------- |
238 | // uint32, mask_int32 |
239 | static SIMDPP_INL |
240 | uint32<4> i_bit_and(const uint32<4>& a, const mask_int32<4>& b) |
241 | { |
242 | #if SIMDPP_USE_NULL |
243 | return detail::null::bit_and_vm(a, b); |
244 | #elif SIMDPP_USE_AVX512VL |
245 | return _mm_maskz_mov_epi32(b.native(), a.native()); |
246 | #else |
247 | return i_bit_and(a, uint32<4>(b)); |
248 | #endif |
249 | } |
250 | |
251 | #if SIMDPP_USE_AVX2 |
252 | static SIMDPP_INL |
253 | uint32<8> i_bit_and(const uint32<8>& a, const mask_int32<8>& b) |
254 | { |
255 | #if SIMDPP_USE_AVX512VL |
256 | return _mm256_maskz_mov_epi32(b.native(), a.native()); |
257 | #else |
258 | return i_bit_and(a, uint32<8>(b)); |
259 | #endif |
260 | } |
261 | #endif |
262 | |
263 | #if SIMDPP_USE_AVX512F |
264 | static SIMDPP_INL |
265 | uint32<16> i_bit_and(const uint32<16>& a, const mask_int32<16>& b) |
266 | { |
267 | return _mm512_maskz_mov_epi32(b.native(), a.native()); |
268 | } |
269 | #endif |
270 | |
271 | // ----------------------------------------------------------------------------- |
272 | // mask_int32, mask_int32 |
273 | static SIMDPP_INL |
274 | mask_int32<4> i_bit_and(const mask_int32<4>& a, const mask_int32<4>& b) |
275 | { |
276 | #if SIMDPP_USE_NULL |
277 | return detail::null::bit_and_mm(a, b); |
278 | #elif SIMDPP_USE_AVX512VL |
279 | return _mm512_kand(a.native(), b.native()); |
280 | #else |
281 | return to_mask((uint32<4>) i_bit_and(uint8<16>(a), uint8<16>(b))); |
282 | #endif |
283 | } |
284 | |
285 | #if SIMDPP_USE_AVX2 |
286 | static SIMDPP_INL |
287 | mask_int32<8> i_bit_and(const mask_int32<8>& a, const mask_int32<8>& b) |
288 | { |
289 | #if SIMDPP_USE_AVX512VL |
290 | return _mm512_kand(a.native(), b.native()); |
291 | #else |
292 | return to_mask((uint32<8>) i_bit_and(uint32<8>(a), uint32<8>(b))); |
293 | #endif |
294 | } |
295 | #endif |
296 | |
297 | #if SIMDPP_USE_AVX512F |
298 | static SIMDPP_INL |
299 | mask_int32<16> i_bit_and(const mask_int32<16>& a, const mask_int32<16>& b) |
300 | { |
301 | return _mm512_kand(a.native(), b.native()); |
302 | } |
303 | #endif |
304 | |
305 | // ----------------------------------------------------------------------------- |
306 | // uint64, uint64 |
307 | static SIMDPP_INL |
308 | uint64<2> i_bit_and(const uint64<2>& a, const uint64<2>& b) |
309 | { |
310 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
311 | return detail::null::bit_and(a, b); |
312 | #else |
313 | return uint64<2>(i_bit_and(uint8<16>(a), uint8<16>(b))); |
314 | #endif |
315 | } |
316 | |
317 | #if SIMDPP_USE_AVX2 |
318 | static SIMDPP_INL |
319 | uint64<4> i_bit_and(const uint64<4>& a, const uint64<4>& b) |
320 | { |
321 | return _mm256_and_si256(a.native(), b.native()); |
322 | } |
323 | #endif |
324 | |
325 | #if SIMDPP_USE_AVX512F |
326 | static SIMDPP_INL |
327 | uint64<8> i_bit_and(const uint64<8>& a, const uint64<8>& b) |
328 | { |
329 | return _mm512_and_epi64(a.native(), b.native()); |
330 | } |
331 | #endif |
332 | |
333 | // ----------------------------------------------------------------------------- |
334 | // uint64, mask_int64 |
335 | static SIMDPP_INL |
336 | uint64<2> i_bit_and(const uint64<2>& a, const mask_int64<2>& b) |
337 | { |
338 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
339 | return detail::null::bit_and_vm(a, b); |
340 | #elif SIMDPP_USE_AVX512VL |
341 | return _mm_maskz_mov_epi64(b.native(), a.native()); |
342 | #else |
343 | return i_bit_and(a, uint64<2>(b)); |
344 | #endif |
345 | } |
346 | |
347 | #if SIMDPP_USE_AVX2 |
348 | static SIMDPP_INL |
349 | uint64<4> i_bit_and(const uint64<4>& a, const mask_int64<4>& b) |
350 | { |
351 | #if SIMDPP_USE_AVX512VL |
352 | return _mm256_maskz_mov_epi64(b.native(), a.native()); |
353 | #else |
354 | return i_bit_and(a, uint64<4>(b)); |
355 | #endif |
356 | } |
357 | #endif |
358 | |
359 | #if SIMDPP_USE_AVX512F |
360 | static SIMDPP_INL |
361 | uint64<8> i_bit_and(const uint64<8>& a, const mask_int64<8>& b) |
362 | { |
363 | return _mm512_maskz_mov_epi64(b.native(), a.native()); |
364 | } |
365 | #endif |
366 | |
367 | // ----------------------------------------------------------------------------- |
368 | // mask_int64, mask_int64 |
369 | static SIMDPP_INL |
370 | mask_int64<2> i_bit_and(const mask_int64<2>& a, const mask_int64<2>& b) |
371 | { |
372 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
373 | return detail::null::bit_and_mm(a, b); |
374 | #elif SIMDPP_USE_AVX512VL |
375 | return _mm512_kand(a.native(), b.native()); |
376 | #else |
377 | return to_mask((uint64<2>) i_bit_and(uint8<16>(a), uint8<16>(b))); |
378 | #endif |
379 | } |
380 | |
381 | #if SIMDPP_USE_AVX2 |
382 | static SIMDPP_INL |
383 | mask_int64<4> i_bit_and(const mask_int64<4>& a, const mask_int64<4>& b) |
384 | { |
385 | #if SIMDPP_USE_AVX512VL |
386 | return _mm512_kand(a.native(), b.native()); |
387 | #else |
388 | return to_mask((uint64<4>) i_bit_and(uint64<4>(a), uint64<4>(b))); |
389 | #endif |
390 | } |
391 | #endif |
392 | |
393 | #if SIMDPP_USE_AVX512F |
394 | static SIMDPP_INL |
395 | mask_int64<8> i_bit_and(const mask_int64<8>& a, const mask_int64<8>& b) |
396 | { |
397 | return _mm512_kand(a.native(), b.native()); |
398 | } |
399 | #endif |
400 | |
401 | // ----------------------------------------------------------------------------- |
402 | // float32, float32 |
403 | static SIMDPP_INL |
404 | float32<4> i_bit_and(const float32<4>& a, const float32<4>& b) |
405 | { |
406 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
407 | return detail::null::bit_and(a, b); |
408 | #elif SIMDPP_USE_SSE2 |
409 | return _mm_and_ps(a.native(), b.native()); |
410 | #elif SIMDPP_USE_NEON |
411 | return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.native()), |
412 | vreinterpretq_u32_f32(b.native()))); |
413 | #elif SIMDPP_USE_ALTIVEC |
414 | return vec_and(a.native(), b.native()); |
415 | #elif SIMDPP_USE_MSA |
416 | return (v4f32) __msa_and_v((v16u8) a.native(), (v16u8) b.native()); |
417 | #endif |
418 | } |
419 | |
420 | #if SIMDPP_USE_AVX |
421 | static SIMDPP_INL |
422 | float32<8> i_bit_and(const float32<8>& a, const float32<8>& b) |
423 | { |
424 | return _mm256_and_ps(a.native(), b.native()); |
425 | } |
426 | #endif |
427 | |
428 | #if SIMDPP_USE_AVX512F |
429 | static SIMDPP_INL |
430 | float32<16> i_bit_and(const float32<16>& a, const float32<16>& b) |
431 | { |
432 | #if SIMDPP_USE_AVX512DQ |
433 | return _mm512_and_ps(a.native(), b.native()); |
434 | #else |
435 | return float32<16>(i_bit_and(uint32<16>(a), uint32<16>(b))); |
436 | #endif |
437 | } |
438 | #endif |
439 | |
440 | // ----------------------------------------------------------------------------- |
441 | // float32, mask_float32 |
442 | static SIMDPP_INL |
443 | float32<4> i_bit_and(const float32<4>& a, const mask_float32<4>& b) |
444 | { |
445 | #if SIMDPP_USE_NULL |
446 | return detail::null::bit_and_vm(a, b); |
447 | #elif SIMDPP_USE_AVX512VL |
448 | return _mm_maskz_mov_ps(b.native(), a.native()); |
449 | #else |
450 | return i_bit_and(a, float32<4>(b)); |
451 | #endif |
452 | } |
453 | |
454 | #if SIMDPP_USE_AVX |
455 | static SIMDPP_INL |
456 | float32<8> i_bit_and(const float32<8>& a, const mask_float32<8>& b) |
457 | { |
458 | #if SIMDPP_USE_AVX512VL |
459 | return _mm256_maskz_mov_ps(b.native(), a.native()); |
460 | #else |
461 | return i_bit_and(a, float32<8>(b)); |
462 | #endif |
463 | } |
464 | #endif |
465 | |
466 | #if SIMDPP_USE_AVX512F |
467 | static SIMDPP_INL |
468 | float32<16> i_bit_and(const float32<16>& a, const mask_float32<16>& b) |
469 | { |
470 | return _mm512_maskz_mov_ps(b.native(), a.native()); |
471 | } |
472 | #endif |
473 | |
474 | // ----------------------------------------------------------------------------- |
475 | // mask_float32, mask_float32 |
476 | static SIMDPP_INL |
477 | mask_float32<4> i_bit_and(const mask_float32<4>& a, const mask_float32<4>& b) |
478 | { |
479 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
480 | return detail::null::bit_and_mm(a, b); |
481 | #elif SIMDPP_USE_AVX512VL |
482 | return _mm512_kand(a.native(), b.native()); |
483 | #else |
484 | return to_mask(i_bit_and(float32<4>(a), float32<4>(b))); |
485 | #endif |
486 | } |
487 | |
488 | #if SIMDPP_USE_AVX |
489 | static SIMDPP_INL |
490 | mask_float32<8> i_bit_and(const mask_float32<8>& a, const mask_float32<8>& b) |
491 | { |
492 | #if SIMDPP_USE_AVX512VL |
493 | return _mm512_kand(a.native(), b.native()); |
494 | #else |
495 | return to_mask(i_bit_and(float32<8>(a), float32<8>(b))); |
496 | #endif |
497 | } |
498 | #endif |
499 | |
500 | #if SIMDPP_USE_AVX512F |
501 | static SIMDPP_INL |
502 | mask_float32<16> i_bit_and(const mask_float32<16>& a, const mask_float32<16>& b) |
503 | { |
504 | return _mm512_kand(a.native(), b.native()); |
505 | } |
506 | #endif |
507 | |
508 | // ----------------------------------------------------------------------------- |
509 | // float64, float64 |
510 | static SIMDPP_INL |
511 | float64<2> i_bit_and(const float64<2>& a, const float64<2>& b) |
512 | { |
513 | #if SIMDPP_USE_SSE2 |
514 | return _mm_and_pd(a.native(), b.native()); |
515 | #elif SIMDPP_USE_NEON64 |
516 | return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.native()), |
517 | vreinterpretq_u64_f64(b.native()))); |
518 | #elif SIMDPP_USE_VSX_206 |
519 | return vec_and(a.native(), b.native()); |
520 | #elif SIMDPP_USE_MSA |
521 | return (v2f64) __msa_and_v((v16u8) a.native(), (v16u8) b.native()); |
522 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
523 | return detail::null::bit_and(a, b); |
524 | #endif |
525 | } |
526 | |
527 | #if SIMDPP_USE_AVX |
528 | static SIMDPP_INL |
529 | float64<4> i_bit_and(const float64<4>& a, const float64<4>& b) |
530 | { |
531 | return _mm256_and_pd(a.native(), b.native()); |
532 | } |
533 | #endif |
534 | |
535 | #if SIMDPP_USE_AVX512F |
536 | static SIMDPP_INL |
537 | float64<8> i_bit_and(const float64<8>& a, const float64<8>& b) |
538 | { |
539 | #if SIMDPP_USE_AVX512DQ |
540 | return _mm512_and_pd(a.native(), b.native()); |
541 | #else |
542 | return float64<8>(i_bit_and(uint64<8>(a), uint64<8>(b))); |
543 | #endif |
544 | } |
545 | #endif |
546 | |
547 | // ----------------------------------------------------------------------------- |
548 | // float64, mask_float64 |
549 | static SIMDPP_INL |
550 | float64<2> i_bit_and(const float64<2>& a, const mask_float64<2>& b) |
551 | { |
552 | #if SIMDPP_USE_NULL |
553 | return detail::null::bit_and_vm(a, b); |
554 | #elif SIMDPP_USE_AVX512VL |
555 | return _mm_maskz_mov_pd(b.native(), a.native()); |
556 | #else |
557 | return i_bit_and(a, float64<2>(b)); |
558 | #endif |
559 | } |
560 | |
561 | #if SIMDPP_USE_AVX |
562 | static SIMDPP_INL |
563 | float64<4> i_bit_and(const float64<4>& a, const mask_float64<4>& b) |
564 | { |
565 | #if SIMDPP_USE_AVX512VL |
566 | return _mm256_maskz_mov_pd(b.native(), a.native()); |
567 | #else |
568 | return i_bit_and(a, float64<4>(b)); |
569 | #endif |
570 | } |
571 | #endif |
572 | |
573 | #if SIMDPP_USE_AVX512F |
574 | static SIMDPP_INL |
575 | float64<8> i_bit_and(const float64<8>& a, const mask_float64<8>& b) |
576 | { |
577 | return _mm512_maskz_mov_pd(b.native(), a.native()); |
578 | } |
579 | #endif |
580 | |
581 | // ----------------------------------------------------------------------------- |
582 | // mask_float64, mask_float64 |
583 | static SIMDPP_INL |
584 | mask_float64<2> i_bit_and(const mask_float64<2>& a, const mask_float64<2>& b) |
585 | { |
586 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206) |
587 | return detail::null::bit_and_mm(a, b); |
588 | #elif SIMDPP_USE_AVX512VL |
589 | return _mm512_kand(a.native(), b.native()); |
590 | #else |
591 | return to_mask(i_bit_and(float64<2>(a), float64<2>(b))); |
592 | #endif |
593 | } |
594 | |
595 | #if SIMDPP_USE_AVX |
596 | static SIMDPP_INL |
597 | mask_float64<4> i_bit_and(const mask_float64<4>& a, const mask_float64<4>& b) |
598 | { |
599 | #if SIMDPP_USE_AVX512VL |
600 | return _mm512_kand(a.native(), b.native()); |
601 | #else |
602 | return to_mask(i_bit_and(float64<4>(a), float64<4>(b))); |
603 | #endif |
604 | } |
605 | #endif |
606 | |
607 | #if SIMDPP_USE_AVX512F |
608 | static SIMDPP_INL |
609 | mask_float64<8> i_bit_and(const mask_float64<8>& a, const mask_float64<8>& b) |
610 | { |
611 | return _mm512_kand(a.native(), b.native()); |
612 | } |
613 | #endif |
614 | |
615 | // ----------------------------------------------------------------------------- |
616 | |
617 | template<class V, class VM> SIMDPP_INL |
618 | V i_bit_and(const V& a, const VM& b) |
619 | { |
620 | SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_and, a, b) |
621 | } |
622 | |
623 | } // namespace insn |
624 | } // namespace detail |
625 | } // namespace SIMDPP_ARCH_NAMESPACE |
626 | } // namespace simdpp |
627 | |
628 | #endif |
629 | |
630 | |