permute_zbytes16.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/permute_zbytes16.h]

1	/ Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE_ZBYTES16_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE_ZBYTES16_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/not_implemented.h>
17	#include <simdpp/core/permute_bytes16.h>
18	#include <simdpp/detail/vector_array_macros.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25	#if _MSC_VER
26	#pragma warning(push)
27	#pragma warning(disable: 4800)
28	#endif
29
30	static SIMDPP_INL
31	uint8x16 i_permute_zbytes16(const uint8x16& a, const uint8x16& mask)
32	{
33	#if SIMDPP_USE_NULL
34	uint8x16 r;
35
36	for (unsigned i = `0`; i < `16`; i++) {
37	unsigned j = mask.el(i) & `0x0f`;
38	bool zero = mask.el(i) & `0x80`;
39	r.el(i) = zero ? `0` : a.el(j);
40	}
41	return r;
42	#elif SIMDPP_USE_SSSE3 \|\| SIMDPP_USE_NEON
43	return permute_bytes16(a, mask);
44	#elif SIMDPP_USE_ALTIVEC
45	int8x16 a0 = a;
46	int8x16 zero_mask = mask;
47	zero_mask = shift_r<`7`>(zero_mask); // shift in the sign bit
48	a0 = i_permute_bytes16(a0, mask);
49	a0 = bit_andnot(a0, zero_mask);
50	return a0;
51	#elif SIMDPP_USE_MSA
52	return (v16u8) __msa_vshf_b((v16i8) mask.native(),
53	(v16i8) a.native(),
54	(v16i8) a.native());
55	#else
56	return SIMDPP_NOT_IMPLEMENTED2(a, mask);
57	#endif
58	}
59
60	#if _MSC_VER
61	#pragma warning(pop)
62	#endif
63
64	#if SIMDPP_USE_AVX2
65	static SIMDPP_INL
66	uint8x32 i_permute_zbytes16(const uint8x32& a, const uint8x32& mask)
67	{
68	return _mm256_shuffle_epi8(a.native(), mask.native());
69	}
70	#endif
71
72	#if SIMDPP_USE_AVX512BW
73	SIMDPP_INL uint8<`64`> i_permute_zbytes16(const uint8<`64`>& a, const uint8<`64`>& mask)
74	{
75	return _mm512_shuffle_epi8(a.native(), mask.native());
76	}
77	#endif
78
79	template<unsigned N> SIMDPP_INL
80	uint8<N> i_permute_zbytes16(const uint8<N>& a, const uint8<N>& mask)
81	{
82	SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_permute_zbytes16, a, mask);
83	}
84	template<unsigned N> SIMDPP_INL
85	uint16<N> i_permute_zbytes16(const uint16<N>& a, const uint16<N>& mask)
86	{
87	return (uint16<N>) i_permute_zbytes16(uint8<N`2`>(a), uint8<N`2`>(mask));
88	}
89	template<unsigned N> SIMDPP_INL
90	uint32<N> i_permute_zbytes16(const uint32<N>& a, const uint32<N>& mask)
91	{
92	return (uint32<N>) i_permute_zbytes16(uint8<N`4`>(a), uint8<N`4`>(mask));
93	}
94	template<unsigned N> SIMDPP_INL
95	uint64<N> i_permute_zbytes16(const uint64<N>& a, const uint64<N>& mask)
96	{
97	return (uint64<N>) i_permute_zbytes16(uint8<N`8`>(a), uint8<N`8`>(mask));
98	}
99	template<unsigned N> SIMDPP_INL
100	float32<N> i_permute_zbytes16(const float32<N>& a, const uint32<N>& mask)
101	{
102	return float32<N>(i_permute_zbytes16(uint32<N>(a), mask));
103	}
104	template<unsigned N> SIMDPP_INL
105	float64<N> i_permute_zbytes16(const float64<N>& a, const uint64<N>& mask)
106	{
107	return float64<N>(i_permute_zbytes16(uint64<N>(a), mask));
108	}
109
110
111	} // namespace insn
112	} // namespace detail
113	} // namespace SIMDPP_ARCH_NAMESPACE
114	} // namespace simdpp
115
116	#endif
117
118

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/permute_zbytes16.h