shuffle_zbytes16.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/shuffle_zbytes16.h]

1	/ Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_SHUFFLE_ZBYTES16_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_SHUFFLE_ZBYTES16_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_andnot.h>
17	#include <simdpp/core/bit_or.h>
18	#include <simdpp/core/blend.h>
19	#include <simdpp/core/cmp_lt.h>
20	#include <simdpp/core/shuffle_bytes16.h>
21	#include <simdpp/core/i_add.h>
22	#include <simdpp/detail/not_implemented.h>
23
24	namespace simdpp {
25	namespace SIMDPP_ARCH_NAMESPACE {
26	namespace detail {
27	namespace insn {
28
29	#if _MSC_VER
30	#pragma warning(push)
31	#pragma warning(disable: 4800)
32	#endif
33
34	static SIMDPP_INL
35	uint8x16 i_shuffle_zbytes16(const uint8x16& a, const uint8x16& b, const uint8x16& mask)
36	{
37	#if SIMDPP_USE_NULL
38	uint8x16 r;
39
40	for (unsigned i = `0`; i < `16`; i++) {
41	unsigned j = mask.el(i) & `0x0f`;
42	unsigned which = mask.el(i) < `0x10`;
43	bool zero = mask.el(i) & `0x80`;
44	r.el(i) = zero ? `0` : (which ? a.el(j) : b.el(j));
45	}
46	return r;
47	#elif SIMDPP_USE_XOP
48	return _mm_perm_epi8(a.native(), b.native(), mask.native());
49	#elif SIMDPP_USE_SSE4_1
50	uint8<`16`> sel = _mm_slli_epi16(mask.native(), `3`);
51
52	uint8<`16`> ai = _mm_shuffle_epi8(a.native(), mask.native());
53	uint8<`16`> bi = _mm_shuffle_epi8(b.native(), mask.native());
54	return _mm_blendv_epi8(ai.native(), bi.native(), sel.native());
55	#elif SIMDPP_USE_SSSE3
56	mask_int8<`16`> select_a = cmp_lt((int8<`16`>) bit_and(mask, `0x1f`), `0x10`);
57
58	uint8<`16`> ai = _mm_shuffle_epi8(a.native(), mask.native());
59	uint8<`16`> bi = _mm_shuffle_epi8(b.native(), mask.native());
60	return blend(ai, bi, select_a);
61	#elif SIMDPP_USE_NEON
62	return shuffle_bytes16(a, b, mask);
63	#elif SIMDPP_USE_ALTIVEC
64	int8x16 a0 = a, b0 = b;
65	int8x16 zero_mask = mask;
66	zero_mask = shift_r<`7`>(zero_mask); // shift in the sign bit
67	a0 = i_shuffle_bytes16(a0, b0, mask);
68	a0 = bit_andnot(a0, zero_mask);
69	return a0;
70	#elif SIMDPP_USE_MSA
71	return (v16u8) __msa_vshf_b((v16i8) mask.native(),
72	(v16i8) b.native(),
73	(v16i8) a.native());
74	#else
75	return SIMDPP_NOT_IMPLEMENTED3(a, b, mask);
76	#endif
77	}
78
79	#if _MSC_VER
80	#pragma warning(pop)
81	#endif
82
83	#if SIMDPP_USE_AVX2
84	static SIMDPP_INL
85	uint8x32 i_shuffle_zbytes16(const uint8x32& a, const uint8x32& b, const uint8x32& mask)
86	{
87	int8x32 sel, ai, bi, r;
88	sel = mask;
89	sel = _mm256_slli_epi16(sel.native(), `3`);
90
91	ai = _mm256_shuffle_epi8(a.native(), mask.native());
92	bi = _mm256_shuffle_epi8(b.native(), mask.native());
93	r = _mm256_blendv_epi8(ai.native(), bi.native(), sel.native());
94	return r;
95	}
96	#endif
97
98	#if SIMDPP_USE_AVX512BW
99	SIMDPP_INL uint8<`64`> i_shuffle_zbytes16(const uint8<`64`>& a, const uint8<`64`>& b, const uint8<`64`>& mask)
100	{
101	uint8<`64`> sel_mask, ai, bi, r;
102	sel_mask = make_uint(`0x10`);
103	__mmask64 sel = _mm512_test_epi8_mask(mask.native(), sel_mask.native());
104
105	ai = _mm512_shuffle_epi8(a.native(), mask.native());
106	bi = _mm512_shuffle_epi8(b.native(), mask.native());
107	r = _mm512_mask_blend_epi8(sel, ai.native(), bi.native());
108	return r;
109	}
110	#endif
111
112	template<unsigned N> SIMDPP_INL
113	uint8<N> i_shuffle_zbytes16(const uint8<N>& a, const uint8<N>& b, const uint8<N>& mask)
114	{
115	SIMDPP_VEC_ARRAY_IMPL3(uint8<N>, i_shuffle_zbytes16, a, b, mask);
116	}
117
118	template<unsigned N> SIMDPP_INL
119	uint16<N> i_shuffle_zbytes16(const uint16<N>& a, const uint16<N>& b, const uint16<N>& mask)
120	{
121	return (uint16<N>) i_shuffle_zbytes16(uint8<N`2`>(a), uint8<N`2`>(b), uint8<N*`2`>(mask));
122	}
123	template<unsigned N> SIMDPP_INL
124	uint32<N> i_shuffle_zbytes16(const uint32<N>& a, const uint32<N>& b, const uint32<N>& mask)
125	{
126	return (uint32<N>) i_shuffle_zbytes16(uint8<N`4`>(a), uint8<N`4`>(b), uint8<N*`4`>(mask));
127	}
128	template<unsigned N> SIMDPP_INL
129	uint64<N> i_shuffle_zbytes16(const uint64<N>& a, const uint64<N>& b, const uint64<N>& mask)
130	{
131	return (uint64<N>) i_shuffle_zbytes16(uint8<N`8`>(a), uint8<N`8`>(b), uint8<N*`8`>(mask));
132	}
133	template<unsigned N> SIMDPP_INL
134	float32<N> i_shuffle_zbytes16(const float32<N>& a, const float32<N>& b, const uint32<N>& mask)
135	{
136	return float32<N>(i_shuffle_zbytes16(uint32<N>(a), uint32<N>(b), mask));
137	}
138	template<unsigned N> SIMDPP_INL
139	float64<N> i_shuffle_zbytes16(const float64<N>& a, const float64<N>& b, const uint64<N>& mask)
140	{
141	return float64<N>(i_shuffle_zbytes16(uint64<N>(a), uint64<N>(b), mask));
142	}
143
144
145	} // namespace insn
146	} // namespace detail
147	} // namespace SIMDPP_ARCH_NAMESPACE
148	} // namespace simdpp
149
150	#endif
151
152

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/shuffle_zbytes16.h