blend.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/blend.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BLEND_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BLEND_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_and.h>
17	#include <simdpp/core/bit_andnot.h>
18	#include <simdpp/core/bit_or.h>
19	#include <simdpp/core/to_mask.h>
20	#include <simdpp/detail/null/shuffle.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	// -----------------------------------------------------------------------------
28	// uint8, uint8, uint8
29
30	static SIMDPP_INL
31	uint8<`16`> i_blend(const uint8<`16`>& con, const uint8<`16`>& coff, const uint8<`16`>& mask)
32	{
33	uint8<`16`> on = con, off = coff;
34	#if SIMDPP_USE_NULL
35	return detail::null::blend(on, off, mask);
36	#elif SIMDPP_USE_AVX2
37	return _mm_blendv_epi8(off.native(), on.native(), mask.native());
38	#elif SIMDPP_USE_XOP
39	return _mm_cmov_si128(on.native(), off.native(), mask.native());
40	#elif SIMDPP_USE_SSE2
41	// _mm_blendv_epi8 needs xmm0 and occupies the shuffle ports, yet saves
42	// only one uop
43	uint8<`16`> r;
44	on = bit_and(on, mask);
45	off = bit_andnot(off, mask);
46	r = bit_or(on, off);
47	return r;
48	#elif SIMDPP_USE_NEON
49	return vbslq_u8(mask.native(), on.native(), off.native());
50	#elif SIMDPP_USE_ALTIVEC
51	return vec_sel(off.native(), on.native(), mask.native());
52	#elif SIMDPP_USE_MSA
53	return __msa_bsel_v(mask.native(), off.native(), on.native());
54	#endif
55	}
56
57	#if SIMDPP_USE_AVX2
58	static SIMDPP_INL
59	uint8<`32`> i_blend(const uint8<`32`>& on, const uint8<`32`>& off, const uint8<`32`>& mask)
60	{
61	return _mm256_blendv_epi8(off.native(), on.native(), mask.native());
62	}
63	#endif
64
65	#if SIMDPP_USE_AVX512BW
66	SIMDPP_INL uint8<`64`> i_blend(const uint8<`64`>& on, const uint8<`64`>& off, const uint8<`64`>& mask)
67	{
68	return _mm512_ternarylogic_epi32(on.native(), off.native(), mask.native(), `0xe4`);
69	}
70	#endif
71
72	// -----------------------------------------------------------------------------
73	// uint8, uint8, mask_int8
74
75	static SIMDPP_INL
76	uint8<`16`> i_blend(const uint8<`16`>& on, const uint8<`16`>& off, const mask_int8<`16`>& mask)
77	{
78	#if SIMDPP_USE_NULL
79	return detail::null::blend_mask(on, off, mask);
80	#else
81	return i_blend(on, off, uint8<`16`>(mask));
82	#endif
83	}
84
85	#if SIMDPP_USE_AVX2
86	static SIMDPP_INL
87	uint8<`32`> i_blend(const uint8<`32`>& on, const uint8<`32`>& off, const mask_int8<`32`>& mask)
88	{
89	return i_blend(on, off, uint8<`32`>(mask));
90	}
91	#endif
92
93	#if SIMDPP_USE_AVX512BW
94	SIMDPP_INL uint8<`64`> i_blend(const uint8<`64`>& on, const uint8<`64`>& off, const mask_int8<`64`>& mask)
95	{
96	return _mm512_mask_blend_epi8(mask.native(), off.native(), on.native());
97	}
98	#endif
99
100	// -----------------------------------------------------------------------------
101	// mask_int8, mask_int8, mask_int8
102
103	static SIMDPP_INL
104	mask_int8<`16`> i_blend(const mask_int8<`16`>& on, const mask_int8<`16`>& off, const mask_int8<`16`>& mask)
105	{
106	#if SIMDPP_USE_NULL
107	return detail::null::blend_mask(on, off, mask);
108	#elif SIMDPP_USE_AVX512VL
109	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
110	#else
111	return mask_int8<`16`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
112	#endif
113	}
114
115	#if SIMDPP_USE_AVX2
116	static SIMDPP_INL
117	mask_int8<`32`> i_blend(const mask_int8<`32`>& on, const mask_int8<`32`>& off, const mask_int8<`32`>& mask)
118	{
119	#if SIMDPP_USE_AVX512VL
120	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
121	#else
122	return mask_int8<`32`>(i_blend(uint8<`32`>(on), uint8<`32`>(off), uint8<`32`>(mask)));
123	#endif
124	}
125	#endif
126
127	#if SIMDPP_USE_AVX512BW
128	SIMDPP_INL mask_int8<`64`> i_blend(const mask_int8<`64`>& on, const mask_int8<`64`>& off, const mask_int8<`64`>& mask)
129	{
130	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
131	}
132	#endif
133
134	// -----------------------------------------------------------------------------
135	// uint16, uint16, uint16
136
137	static SIMDPP_INL
138	uint16<`8`> i_blend(const uint16<`8`>& on, const uint16<`8`>& off, const uint16<`8`>& mask)
139	{
140	return uint16<`8`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
141	}
142
143	#if SIMDPP_USE_AVX2
144	static SIMDPP_INL
145	uint16<`16`> i_blend(const uint16<`16`>& on, const uint16<`16`>& off, const uint16<`16`>& mask)
146	{
147	return _mm256_blendv_epi8(off.native(), on.native(), mask.native());
148	}
149	#endif
150
151	#if SIMDPP_USE_AVX512BW
152	SIMDPP_INL uint16<`32`> i_blend(const uint16<`32`>& on, const uint16<`32`>& off, const uint16<`32`>& mask)
153	{
154	return _mm512_ternarylogic_epi32(on.native(), off.native(), mask.native(), `0xe4`);
155	}
156	#endif
157
158	// -----------------------------------------------------------------------------
159	// uint16, uint16, mask_int16
160
161	static SIMDPP_INL
162	uint16<`8`> i_blend(const uint16<`8`>& on, const uint16<`8`>& off, const mask_int16<`8`>& mask)
163	{
164	#if SIMDPP_USE_NULL
165	return detail::null::blend_mask(on, off, mask);
166	#else
167	return uint16<`8`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
168	#endif
169	}
170
171	#if SIMDPP_USE_AVX2
172	static SIMDPP_INL
173	uint16<`16`> i_blend(const uint16<`16`>& on, const uint16<`16`>& off, const mask_int16<`16`>& mask)
174	{
175	return i_blend(on, off, uint16<`16`>(mask));
176	}
177	#endif
178
179	#if SIMDPP_USE_AVX512BW
180	SIMDPP_INL uint16<`32`> i_blend(const uint16<`32`>& on, const uint16<`32`>& off, const mask_int16<`32`>& mask)
181	{
182	return _mm512_mask_blend_epi16(mask.native(), off.native(), on.native());
183	}
184	#endif
185
186	// -----------------------------------------------------------------------------
187	// mask_int16, mask_int16, mask_int16
188
189	static SIMDPP_INL
190	mask_int16<`8`> i_blend(const mask_int16<`8`>& on, const mask_int16<`8`>& off, const mask_int16<`8`>& mask)
191	{
192	#if SIMDPP_USE_NULL
193	return detail::null::blend_mask(on, off, mask);
194	#elif SIMDPP_USE_AVX512VL
195	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
196	#else
197	return to_mask(i_blend(uint16<`8`>(on), uint16<`8`>(off), uint16<`8`>(mask)));
198	#endif
199	}
200
201	#if SIMDPP_USE_AVX2
202	static SIMDPP_INL
203	mask_int16<`16`> i_blend(const mask_int16<`16`>& on, const mask_int16<`16`>& off, const mask_int16<`16`>& mask)
204	{
205	#if SIMDPP_USE_AVX512VL
206	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
207	#else
208	return to_mask(i_blend(uint16<`16`>(on), uint16<`16`>(off), uint16<`16`>(mask)));
209	#endif
210	}
211	#endif
212
213	#if SIMDPP_USE_AVX512BW
214	SIMDPP_INL mask_int16<`32`> i_blend(const mask_int16<`32`>& on, const mask_int16<`32`>& off, const mask_int16<`32`>& mask)
215	{
216	return (on.native() & mask.native()) \| (off.native() & ~mask.native());
217	}
218	#endif
219
220	// -----------------------------------------------------------------------------
221	// uint32, uint32, uint32
222
223	static SIMDPP_INL
224	uint32<`4`> i_blend(const uint32<`4`>& on, const uint32<`4`>& off, const uint32<`4`>& mask)
225	{
226	return uint32<`4`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
227	}
228
229	#if SIMDPP_USE_AVX2
230	static SIMDPP_INL
231	uint32<`8`> i_blend(const uint32<`8`>& on, const uint32<`8`>& off, const uint32<`8`>& mask)
232	{
233	return _mm256_blendv_epi8(off.native(), on.native(), mask.native());
234	}
235	#endif
236
237	#if SIMDPP_USE_AVX512F
238	static SIMDPP_INL
239	uint32<`16`> i_blend(const uint32<`16`>& on, const uint32<`16`>& off, const uint32<`16`>& mask)
240	{
241	return _mm512_ternarylogic_epi32(on.native(), off.native(), mask.native(), `0xe4`);
242	}
243	#endif
244
245	// -----------------------------------------------------------------------------
246	// uint32, uint32, mask_int32
247
248	static SIMDPP_INL
249	uint32<`4`> i_blend(const uint32<`4`>& on, const uint32<`4`>& off, const mask_int32<`4`>& mask)
250	{
251	#if SIMDPP_USE_NULL
252	return detail::null::blend_mask(on, off, mask);
253	#else
254	return uint32<`4`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
255	#endif
256	}
257
258	#if SIMDPP_USE_AVX2
259	static SIMDPP_INL
260	uint32<`8`> i_blend(const uint32<`8`>& on, const uint32<`8`>& off, const mask_int32<`8`>& mask)
261	{
262	return i_blend(on, off, uint32<`8`>(mask));
263	}
264	#endif
265
266	#if SIMDPP_USE_AVX512F
267	static SIMDPP_INL
268	uint32<`16`> i_blend(const uint32<`16`>& on, const uint32<`16`>& off, const mask_int32<`16`>& mask)
269	{
270	return _mm512_mask_blend_epi32(mask.native(), off.native(), on.native());
271	}
272	#endif
273
274	// -----------------------------------------------------------------------------
275	// mask_int32, mask_int32, mask_int32
276
277	static SIMDPP_INL
278	mask_int32<`4`> i_blend(const mask_int32<`4`>& on, const mask_int32<`4`>& off, const mask_int32<`4`>& mask)
279	{
280	#if SIMDPP_USE_NULL
281	return detail::null::blend_mask(on, off, mask);
282	#elif SIMDPP_USE_AVX512VL
283	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
284	#else
285	return to_mask(i_blend(uint32<`4`>(on), uint32<`4`>(off), uint32<`4`>(mask)));
286	#endif
287	}
288
289	#if SIMDPP_USE_AVX2
290	static SIMDPP_INL
291	mask_int32<`8`> i_blend(const mask_int32<`8`>& on, const mask_int32<`8`>& off, const mask_int32<`8`>& mask)
292	{
293	#if SIMDPP_USE_AVX512VL
294	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
295	#else
296	return to_mask(i_blend(uint32<`8`>(on), uint32<`8`>(off), uint32<`8`>(mask)));
297	#endif
298	}
299	#endif
300
301	#if SIMDPP_USE_AVX512F
302	static SIMDPP_INL
303	mask_int32<`16`> i_blend(const mask_int32<`16`>& on, const mask_int32<`16`>& off, const mask_int32<`16`>& mask)
304	{
305	return _mm512_kor(_mm512_kand(on.native(), mask.native()),
306	_mm512_kandn(mask.native(), off.native()));
307	}
308	#endif
309
310	// -----------------------------------------------------------------------------
311	// float32, float32, float32
312
313	static SIMDPP_INL
314	float32<`4`> i_blend(const float32<`4`>& con, const float32<`4`>& coff, const float32<`4`>& mask)
315	{
316	float32<`4`> on = con, off = coff;
317	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
318	return detail::null::blend(on, off, mask);
319	#elif SIMDPP_USE_AVX
320	return _mm_blendv_ps(off.native(), on.native(), mask.native());
321	#elif SIMDPP_USE_SSE2
322	float32x4 r;
323	on = bit_and(on, mask);
324	off = bit_andnot(off, mask);
325	r = bit_or(on, off);
326	return r;
327	#elif SIMDPP_USE_NEON
328	return vbslq_f32(uint32x4(mask).native(), on.native(), off.native());
329	#elif SIMDPP_USE_ALTIVEC
330	return vec_sel(off.native(), on.native(), mask.native());
331	#elif SIMDPP_USE_MSA
332	return (v4f32) __msa_bsel_v((v16u8) mask.native(),
333	(v16u8) off.native(), (v16u8) on.native());
334	#endif
335	}
336
337	#if SIMDPP_USE_AVX
338	static SIMDPP_INL
339	float32<`8`> i_blend(const float32<`8`>& on, const float32<`8`>& off, const float32<`8`>& mask)
340	{
341	return _mm256_blendv_ps(off.native(), on.native(), mask.native());
342	}
343	#endif
344
345	#if SIMDPP_USE_AVX512F
346	static SIMDPP_INL
347	float32<`16`> i_blend(const float32<`16`>& on, const float32<`16`>& off, const float32<`16`>& mask)
348	{
349	// maybe cmp_eq + blend has lower latency?
350	return (float32<`16`>) i_blend(uint32<`16`>(on), uint32<`16`>(off), uint32<`16`>(mask));
351	}
352	#endif
353
354	// -----------------------------------------------------------------------------
355	// float32, float32, mask_float32
356
357	static SIMDPP_INL
358	float32<`4`> i_blend(const float32<`4`>& on, const float32<`4`>& off, const mask_float32<`4`>& mask)
359	{
360	#if SIMDPP_USE_NULL
361	return detail::null::blend_mask(on, off, mask);
362	#else
363	return i_blend(on, off, float32<`4`>(mask));
364	#endif
365	}
366
367	#if SIMDPP_USE_AVX
368	static SIMDPP_INL
369	float32<`8`> i_blend(const float32<`8`>& on, const float32<`8`>& off, const mask_float32<`8`>& mask)
370	{
371	return i_blend(on, off, float32<`8`>(mask));
372	}
373	#endif
374
375	#if SIMDPP_USE_AVX512F
376	static SIMDPP_INL
377	float32<`16`> i_blend(const float32<`16`>& on, const float32<`16`>& off, const mask_float32<`16`>& mask)
378	{
379	return _mm512_mask_blend_ps(mask.native(), off.native(), on.native());
380	}
381	#endif
382
383	// -----------------------------------------------------------------------------
384	// mask_float32, mask_float32, mask_float32
385
386	static SIMDPP_INL
387	mask_float32<`4`> i_blend(const mask_float32<`4`>& on, const mask_float32<`4`>& off, const mask_float32<`4`>& mask)
388	{
389	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
390	return detail::null::blend_mask(on, off, mask);
391	#elif SIMDPP_USE_AVX512VL
392	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
393	#else
394	return to_mask(i_blend(float32<`4`>(on), float32<`4`>(off), float32<`4`>(mask)));
395	#endif
396	}
397
398	#if SIMDPP_USE_AVX
399	static SIMDPP_INL
400	mask_float32<`8`> i_blend(const mask_float32<`8`>& on, const mask_float32<`8`>& off,const mask_float32<`8`>& mask)
401	{
402	#if SIMDPP_USE_AVX512VL
403	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
404	#else
405	return to_mask(i_blend(float32<`8`>(on), float32<`8`>(off), float32<`8`>(mask)));
406	#endif
407	}
408	#endif
409
410	#if SIMDPP_USE_AVX512F
411	static SIMDPP_INL
412	mask_float32<`16`> i_blend(const mask_float32<`16`>& on, const mask_float32<`16`>& off, const mask_float32<`16`>& mask)
413	{
414	return _mm512_kor(_mm512_kand(on.native(), mask.native()),
415	_mm512_kandn(mask.native(), off.native()));
416	}
417	#endif
418
419	// -----------------------------------------------------------------------------
420	// uint64, uint64, uint64
421
422	static SIMDPP_INL
423	uint64<`2`> i_blend(const uint64<`2`>& on, const uint64<`2`>& off, const uint64<`2`>& mask)
424	{
425	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
426	return detail::null::blend(on, off, mask);
427	#else
428	return uint64<`2`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
429	#endif
430	}
431
432	#if SIMDPP_USE_AVX2
433	static SIMDPP_INL
434	uint64<`4`> i_blend(const uint64<`4`>& on, const uint64<`4`>& off, const uint64<`4`>& mask)
435	{
436	return _mm256_blendv_epi8(off.native(), on.native(), mask.native());
437	}
438	#endif
439
440	#if SIMDPP_USE_AVX512F
441	static SIMDPP_INL
442	uint64<`8`> i_blend(const uint64<`8`>& on, const uint64<`8`>& off, const uint64<`8`>& mask)
443	{
444	return _mm512_ternarylogic_epi64(on.native(), off.native(), mask.native(), `0xe4`);
445	}
446	#endif
447
448	// -----------------------------------------------------------------------------
449	// uint64, uint64, mask_int64
450
451	static SIMDPP_INL
452	uint64<`2`> i_blend(const uint64<`2`>& on, const uint64<`2`>& off, const mask_int64<`2`>& mask)
453	{
454	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
455	return detail::null::blend_mask(on, off, mask);
456	#else
457	return uint64<`2`>(i_blend(uint8<`16`>(on), uint8<`16`>(off), uint8<`16`>(mask)));
458	#endif
459	}
460
461	#if SIMDPP_USE_AVX2
462	static SIMDPP_INL
463	uint64<`4`> i_blend(const uint64<`4`>& on, const uint64<`4`>& off, const mask_int64<`4`>& mask)
464	{
465	return i_blend(on, off, uint64<`4`>(mask));
466	}
467	#endif
468
469	#if SIMDPP_USE_AVX512F
470	static SIMDPP_INL
471	uint64<`8`> i_blend(const uint64<`8`>& on, const uint64<`8`>& off, const mask_int64<`8`>& mask)
472	{
473	return _mm512_mask_blend_epi64(mask.native(), off.native(), on.native());
474	}
475	#endif
476
477	// -----------------------------------------------------------------------------
478	// mask_int64, mask_int64, mask_int64
479
480	static SIMDPP_INL
481	mask_int64<`2`> i_blend(const mask_int64<`2`>& on, const mask_int64<`2`>& off, const mask_int64<`2`>& mask)
482	{
483	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
484	return detail::null::blend_mask(on, off, mask);
485	#elif SIMDPP_USE_AVX512VL
486	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
487	#else
488	return to_mask(i_blend(uint64<`2`>(on), uint64<`2`>(off), uint64<`2`>(mask)));
489	#endif
490	}
491
492	#if SIMDPP_USE_AVX2
493	static SIMDPP_INL
494	mask_int64<`4`> i_blend(const mask_int64<`4`>& on, const mask_int64<`4`>& off, const mask_int64<`4`>& mask)
495	{
496	#if SIMDPP_USE_AVX512VL
497	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
498	#else
499	return to_mask(i_blend(uint64<`4`>(on), uint64<`4`>(off), uint64<`4`>(mask)));
500	#endif
501	}
502	#endif
503
504	#if SIMDPP_USE_AVX512F
505	static SIMDPP_INL
506	mask_int64<`8`> i_blend(const mask_int64<`8`>& on, const mask_int64<`8`>& off, const mask_int64<`8`>& mask)
507	{
508	return _mm512_kor(_mm512_kand(on.native(), mask.native()),
509	_mm512_kandn(mask.native(), off.native()));
510	}
511	#endif
512
513	// -----------------------------------------------------------------------------
514	// float64, float64, float64
515
516	static SIMDPP_INL
517	float64<`2`> i_blend(const float64<`2`>& con, const float64<`2`>& coff, const float64<`2`>& mask)
518	{
519	float64<`2`> on = con, off = coff;
520	#if SIMDPP_USE_AVX
521	return _mm_blendv_pd(off.native(), on.native(), mask.native());
522	#elif SIMDPP_USE_SSE2
523	float64x2 r;
524	on = bit_and(on, mask);
525	off = bit_andnot(off, mask);
526	r = bit_or(on, off);
527	return r;
528	#elif SIMDPP_USE_NEON64
529	return vbslq_f64(vreinterpretq_u64_f64(mask.native()), on.native(), off.native());
530	#elif SIMDPP_USE_VSX_206
531	return vec_sel(off.native(), on.native(), mask.native());
532	#elif SIMDPP_USE_MSA
533	return (v2f64) __msa_bsel_v((v16u8) mask.native(),
534	(v16u8) off.native(), (v16u8) on.native());
535	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
536	return detail::null::blend(on, off, mask);
537	#endif
538	}
539
540	#if SIMDPP_USE_AVX
541	static SIMDPP_INL
542	float64<`4`> i_blend(const float64<`4`>& on, const float64<`4`>& off, const float64<`4`>& mask)
543	{
544	return _mm256_blendv_pd(off.native(), on.native(), mask.native());
545	}
546	#endif
547
548	#if SIMDPP_USE_AVX512F
549	static SIMDPP_INL
550	float64<`8`> i_blend(const float64<`8`>& on, const float64<`8`>& off, const float64<`8`>& mask)
551	{
552	return (float64<`8`>) i_blend(uint64<`8`>(on), uint64<`8`>(off), uint64<`8`>(mask));
553	}
554	#endif
555
556	// -----------------------------------------------------------------------------
557	// float64, float64, mask_float64
558
559	static SIMDPP_INL
560	float64<`2`> i_blend(const float64<`2`>& on, const float64<`2`>& off, const mask_float64<`2`>& mask)
561	{
562	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
563	return detail::null::blend_mask(on, off, mask);
564	#else
565	return i_blend(on, off, float64<`2`>(mask));
566	#endif
567	}
568
569	#if SIMDPP_USE_AVX
570	static SIMDPP_INL
571	float64<`4`> i_blend(const float64<`4`>& on, const float64<`4`>& off, const mask_float64<`4`>& mask)
572	{
573	return i_blend(on, off, float64<`4`>(mask));
574	}
575	#endif
576
577	#if SIMDPP_USE_AVX512F
578	static SIMDPP_INL
579	float64<`8`> i_blend(const float64<`8`>& on, const float64<`8`>& off, const mask_float64<`8`>& mask)
580	{
581	return _mm512_mask_blend_pd(mask.native(), off.native(), on.native());
582	}
583	#endif
584
585	// -----------------------------------------------------------------------------
586	// mask_float64, mask_float64, mask_float64
587
588	static SIMDPP_INL
589	mask_float64<`2`> i_blend(const mask_float64<`2`>& on, const mask_float64<`2`>& off, const mask_float64<`2`>& mask)
590	{
591	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
592	return detail::null::blend_mask(on, off, mask);
593	#elif SIMDPP_USE_AVX512VL
594	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
595	#else
596	return to_mask(i_blend(float64<`2`>(on), float64<`2`>(off), float64<`2`>(mask)));
597	#endif
598	}
599
600	#if SIMDPP_USE_AVX
601	static SIMDPP_INL
602	mask_float64<`4`> i_blend(const mask_float64<`4`>& on, const mask_float64<`4`>& off, const mask_float64<`4`>& mask)
603	{
604	#if SIMDPP_USE_AVX512VL
605	return (on.native() & mask.native()) \| (off.native() & ~(mask.native()));
606	#else
607	return to_mask(i_blend(float64<`4`>(on), float64<`4`>(off), float64<`4`>(mask)));
608	#endif
609	}
610	#endif
611
612	#if SIMDPP_USE_AVX512F
613	static SIMDPP_INL
614	mask_float64<`8`> i_blend(const mask_float64<`8`>& on, const mask_float64<`8`>& off, const mask_float64<`8`>& mask)
615	{
616	return _mm512_kor(_mm512_kand(on.native(), mask.native()),
617	_mm512_kandn(mask.native(), off.native()));
618	}
619	#endif
620
621	// -----------------------------------------------------------------------------
622
623	template<class V1, class V2, class V3> SIMDPP_INL
624	V1 i_blend(const V1& on, const V2& off, const V3& mask)
625	{
626	SIMDPP_VEC_ARRAY_IMPL3(V1, i_blend, on, off, mask)
627	}
628
629
630	} // namespace insn
631	} // namespace detail
632	} // namespace SIMDPP_ARCH_NAMESPACE
633	} // namespace simdpp
634
635	#endif
636
637

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/blend.h