neon_int64x2.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/shuffle/neon_int64x2.h]

1	/ Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_DETAIL_SHUFFLE_NEON_INT64x2_H
9	#define LIBSIMDPP_DETAIL_SHUFFLE_NEON_INT64x2_H
10	#if SIMDPP_USE_NEON
11
12	#include <type_traits>
13
14	namespace simdpp {
15	namespace SIMDPP_ARCH_NAMESPACE {
16	namespace detail {
17	namespace neon_shuffle_int64x2 {
18
19	#if SIMDPP_USE_NEON32
20
21	/*
22	The code below implements generalized permutations of elements within
23	int64x2 vectors using half-vector move instructions available on NEON.
24	*/
25	using T = uint64x2; // full vector
26	using H = uint64x1_t; // half vector
27
28
29	/// Returns the lower/higher part of a vector. Cost: 0
30	SIMDPP_INL H lo(T a) { return vget_low_u64(a.native()); }
31	SIMDPP_INL H hi(T a) { return vget_high_u64(a.native()); }
32
33	/// Combines two half vectors. Cost: 0
34	SIMDPP_INL T co(H lo, H hi) { return vcombine_u64(lo, hi); }
35
36	// 2-element permutation
37	template<unsigned s0, unsigned s1> SIMDPP_INL
38	T permute2(T a)
39	{
40	const unsigned sel = s0*`2` + s1;
41	switch (sel) {
42	default:
43	case `0`: /00/ return co(lo(a), lo(a));
44	case `1`: /01/ return a;
45	case `2`: /10/ return co(hi(a), lo(a));
46	case `3`: /11/ return co(hi(a), hi(a));
47	}
48	}
49
50	// 2-element shuffle: the first element must come from a, the second - from b
51	template<unsigned s0, unsigned s1> SIMDPP_INL
52	T shuffle1(T a, T b)
53	{
54	const unsigned sel = s0*`2` + s1;
55	switch (sel) {
56	default:
57	case `0`: /00/ return co(lo(a), lo(b));
58	case `1`: /01/ return co(lo(a), hi(b));
59	case `2`: /10/ return co(hi(a), lo(b));
60	case `3`: /11/ return co(hi(a), hi(b));
61	}
62	}
63
64	template<unsigned s0, unsigned s1> SIMDPP_INL
65	T shuffle2x2(const T& a, const T& b)
66	{
67	const unsigned sel = s0*`4` + s1;
68	switch (sel) {
69	default:
70	case `0`: /00/ return co(lo(a), lo(a));
71	case `1`: /01/ return a;
72	case `2`: /02/ return co(lo(a), lo(b));
73	case `3`: /03/ return co(lo(a), hi(b));
74	case `4`: /10/ return co(hi(a), lo(a));
75	case `5`: /11/ return co(hi(a), hi(a));
76	case `6`: /12/ return co(hi(a), lo(b));
77	case `7`: /13/ return co(hi(a), hi(b));
78	case `8`: /20/ return co(lo(b), lo(a));
79	case `9`: /21/ return co(lo(b), hi(a));
80	case `10`: /22/ return co(lo(b), lo(b));
81	case `11`: /23/ return b;
82	case `12`: /30/ return co(hi(b), lo(a));
83	case `13`: /31/ return co(hi(b), hi(a));
84	case `14`: /32/ return co(hi(b), lo(b));
85	case `15`: /33/ return co(hi(b), hi(b));
86	}
87	}
88
89	#else // SIMDPP_USE_NEON64
90
91	using T = uint64x2; // full vector
92
93	// Moves the high half of b onto high half of a
94	SIMDPP_INL T move_hi(const T& a, const T& b)
95	{
96	T mask = make_uint(`0xffffffffffffffff`, `0x0`);
97	return vbslq_u64(mask.native(), a.native(), b.native());
98	}
99
100	// 2-element permutation
101	template<unsigned s0, unsigned s1> SIMDPP_INL
102	T permute2(const T& a)
103	{
104	const unsigned sel = s0*`2` + s1;
105	switch (sel) {
106	default:
107	case `0`: /00/ return vzip1q_u64(a.native(), a.native());
108	case `1`: /01/ return a;
109	case `2`: /10/ return vextq_u64(a.native(), a.native(), `1`);
110	case `3`: /11/ return vzip2q_u64(a.native(), a.native());
111	}
112	}
113
114	// 2-element shuffle: the first element must come from a, the second - from b
115	template<unsigned s0, unsigned s1> SIMDPP_INL
116	T shuffle1(const T& a, const T& b)
117	{
118	const unsigned sel = s0*`2` + s1;
119	switch (sel) {
120	default:
121	case `0`: /00/ return vzip1q_u64(a.native(), b.native());
122	case `1`: /01/ return move_hi(a, b);
123	case `2`: /10/ return vextq_u64(a.native(), b.native(), `1`);
124	case `3`: /11/ return vzip2q_u64(a.native(), b.native());
125	}
126	}
127
128	template<unsigned s0, unsigned s1> SIMDPP_INL
129	T shuffle2x2(const T& a, const T& b)
130	{
131	const unsigned sel = s0*`4` + s1;
132	switch (sel) {
133	default:
134	case `0`: /00/ return vzip1q_u64(a.native(), a.native());
135	case `1`: /01/ return a;
136	case `2`: /02/ return vzip1q_u64(a.native(), b.native());
137	case `3`: /03/ return move_hi(a, b);
138	case `4`: /10/ return vextq_u64(a.native(), a.native(), `1`);
139	case `5`: /11/ return vzip2q_u64(a.native(), a.native());
140	case `6`: /12/ return vextq_u64(a.native(), b.native(), `1`);
141	case `7`: /13/ return vzip2q_u64(a.native(), b.native());
142	case `8`: /20/ return vzip1q_u64(b.native(), a.native());
143	case `9`: /21/ return move_hi(b, a);
144	case `10`: /22/ return vzip1q_u64(b.native(), b.native());
145	case `11`: /23/ return b;
146	case `12`: /30/ return vextq_u64(b.native(), a.native(), `1`);
147	case `13`: /31/ return vzip2q_u64(b.native(), a.native());
148	case `14`: /32/ return vextq_u64(b.native(), b.native(), `1`);
149	case `15`: /33/ return vzip2q_u64(b.native(), b.native());
150	}
151	}
152
153	#endif
154
155	} // namespace neon_shuffle_int64x2
156	} // namespace detail
157	} // namespace SIMDPP_ARCH_NAMESPACE
158	} // namespace simdpp
159
160	#endif
161	#endif
162
163

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/shuffle/neon_int64x2.h