permute_bytes16.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/permute_bytes16.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE_BYTES16_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE_BYTES16_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/not_implemented.h>
17	#include <simdpp/detail/vector_array_macros.h>
18
19	namespace simdpp {
20	namespace SIMDPP_ARCH_NAMESPACE {
21	namespace detail {
22	namespace insn {
23
24	static SIMDPP_INL
25	uint8x16 i_permute_bytes16(const uint8x16& a, const uint8x16& mask)
26	{
27	#if SIMDPP_USE_NULL
28	uint8x16 r;
29
30	for (unsigned i = `0`; i < `16`; i++) {
31	unsigned j = mask.el(i) & `0x0f`;
32	r.el(i) = a.el(j);
33	}
34	return r;
35	#elif SIMDPP_USE_SSSE3
36	return _mm_shuffle_epi8(a.native(), mask.native());
37	#elif SIMDPP_USE_NEON32
38	uint8x8x2_t table = {{vget_low_u8(a.native()), vget_high_u8(a.native())}};
39	uint8x8_t lo = vtbl2_u8(table, vget_low_u8(mask.native()));
40	uint8x8_t hi = vtbl2_u8(table, vget_high_u8(mask.native()));
41	return vcombine_u8(lo, hi);
42	#elif SIMDPP_USE_NEON64
43	return vqtbl1q_u8(a.native(), mask.native());
44	#elif SIMDPP_USE_ALTIVEC
45	return vec_perm(a.native(), a.native(), mask.native());
46	#elif SIMDPP_USE_MSA
47	return (v16u8) __msa_vshf_b((v16i8)mask.native(),
48	(v16i8)a.native(),
49	(v16i8)a.native());
50	#else
51	return SIMDPP_NOT_IMPLEMENTED2(a, mask);
52	#endif
53	}
54
55	#if SIMDPP_USE_AVX2
56	static SIMDPP_INL
57	uint8x32 i_permute_bytes16(const uint8x32& a, const uint8x32& mask)
58	{
59	return _mm256_shuffle_epi8(a.native(), mask.native());
60	}
61	#endif
62
63	#if SIMDPP_USE_AVX512BW
64	SIMDPP_INL uint8<`64`> i_permute_bytes16(const uint8<`64`>& a, const uint8<`64`>& mask)
65	{
66	return _mm512_shuffle_epi8(a.native(), mask.native());
67	}
68	#endif
69
70	template<unsigned N> SIMDPP_INL
71	uint8<N> i_permute_bytes16(const uint8<N>& a, const uint8<N>& mask)
72	{
73	SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_permute_bytes16, a, mask)
74	}
75	template<unsigned N> SIMDPP_INL
76	uint16<N> i_permute_bytes16(const uint16<N>& a, const uint16<N>& mask)
77	{
78	return (uint16<N>) i_permute_bytes16(uint8<N`2`>(a), uint8<N`2`>(mask));
79	}
80	template<unsigned N> SIMDPP_INL
81	uint32<N> i_permute_bytes16(const uint32<N>& a, const uint32<N>& mask)
82	{
83	return (uint32<N>) i_permute_bytes16(uint8<N`4`>(a), uint8<N`4`>(mask));
84	}
85	template<unsigned N> SIMDPP_INL
86	uint64<N> i_permute_bytes16(const uint64<N>& a, const uint64<N>& mask)
87	{
88	return (uint64<N>) i_permute_bytes16(uint8<N`8`>(a), uint8<N`8`>(mask));
89	}
90	template<unsigned N> SIMDPP_INL
91	float32<N> i_permute_bytes16(const float32<N>& a, const uint32<N>& mask)
92	{
93	return float32<N>(i_permute_bytes16(uint32<N>(a), mask));
94	}
95	template<unsigned N> SIMDPP_INL
96	float64<N> i_permute_bytes16(const float64<N>& a, const uint64<N>& mask)
97	{
98	return float64<N>(i_permute_bytes16(uint64<N>(a), mask));
99	}
100
101
102	} // namespace insn
103	} // namespace detail
104	} // namespace SIMDPP_ARCH_NAMESPACE
105	} // namespace simdpp
106
107	#endif
108
109

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/permute_bytes16.h