align.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/core/align.h]

1	/ Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_CORE_ALIGN_H
9	#define LIBSIMDPP_SIMDPP_CORE_ALIGN_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/insn/align.h>
17	#include <simdpp/detail/get_expr.h>
18
19	namespace simdpp {
20	namespace SIMDPP_ARCH_NAMESPACE {
21
22
23	/* Extracts a int8x16 vector from two concatenated int8x16 vectors*
24
25	@code
26	shift: pos:\| 0 1 . 14 15 \|
27	0 r = [ l0 l1 . l14 l15 ]
28	1 r = [ l1 l2 . l15 u0 ]
29	2 r = [ l2 l3 . u0 l1 ]
30	... .. .. .. ... .. ..
31	15 r = [ l15 u0 . u13 u14 ]
32	16 r = [ u0 u1 . u14 u15 ]
33	@endcode
34
35	@par 128-bit version:
36	@icost{SSE2-SSE3, 3}
37
38	@par 256-bit version:
39	The lower and higher 128-bit halves are processed as if 128-bit instruction
40	was applied to each of them separately.
41
42	@icost{SSE2-SSE3, 6}
43	@icost{SSSE3-AVX, NEON, ALTIVEC, 2}
44	*/
45	template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
46	typename detail::get_expr2_nomask<V1, V2>::empty
47	align16(const any_vec8<N,V1>& lower,
48	const any_vec8<N,V2>& upper)
49	{
50	static_assert(shift <= `16`, "Shift out of bounds");
51	if (shift == `0`) return lower.wrapped().eval();
52	if (shift == `16`) return upper.wrapped().eval();
53
54	typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
55	qlower = lower.wrapped().eval();
56	qupper = upper.wrapped().eval();
57	return detail::insn::i_align16<shift>(qlower, qupper);
58	}
59
60	/* Extracts a int16x8 vector from two concatenated int16x8 vectors*
61
62	@code
63	shift: pos:\| 0 1 . 6 7 \|
64	0 r = [ l0 l1 . l6 l7 ]
65	1 r = [ l1 l2 . l7 u0 ]
66	2 r = [ l2 l3 . u0 l1 ]
67	... .. .. .. ... .. ..
68	7 r = [ l3 u0 . u5 u6 ]
69	8 r = [ u0 u1 . u6 u7 ]
70	@endcode
71
72	@par 128-bit version:
73	@icost{SSE2-SSE3, 3}
74
75	@par 256-bit version:
76	@icost{SSE2-SSE3, 6}
77	@icost{SSSE3-AVX, NEON, ALTIVEC, 2}
78
79	The all 128-bit sub-vectors are processed as if 128-bit instruction
80	was applied to each of them separately.
81	*/
82	template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
83	typename detail::get_expr2_nomask<V1, V2>::empty
84	align8(const any_vec16<N,V1>& lower,
85	const any_vec16<N,V2>& upper)
86	{
87	static_assert(shift <= `8`, "Shift out of bounds");
88	if (shift == `0`) return lower.wrapped().eval();
89	if (shift == `8`) return upper.wrapped().eval();
90
91	typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
92	qlower = lower.wrapped().eval();
93	qupper = upper.wrapped().eval();
94	return detail::insn::i_align8<shift>(qlower, qupper);
95	}
96
97	/* Extracts a int32x4 vector from two concatenated int32x4 vectors*
98
99	@code
100	shift: pos:\| 0 1 2 3 \|
101	0 r = [ l0 l1 l2 l3 ]
102	1 r = [ l1 l2 l3 u0 ]
103	2 r = [ l2 l3 u0 u1 ]
104	3 r = [ l3 u0 u1 u2 ]
105	4 r = [ u0 u1 u2 u3 ]
106	@endcode
107
108	@par int32
109
110	@par 128-bit version:
111	@icost{SSE2-SSE3, 3}
112
113	@par 256-bit version:
114	The lower and higher 128-bit halves are processed as if 128-bit instruction
115	was applied to each of them separately.
116
117	@icost{SSE2-SSE3, 6}
118	@icost{SSSE3-AVX, NEON, ALTIVEC, 2}
119
120	@par float32
121
122	@par 128-bit version:
123	@icost{SSE2-SSE3, 3}
124
125	@par 256-bit version:
126	The lower and higher 128-bit halves are processed as if 128-bit instruction
127	was applied to each of them separately.
128
129	@icost{SSE2-SSE3, 6}
130	@icost{SSSE3-SSE4.1 NEON, ALTIVEC, 2}
131	*/
132	template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
133	typename detail::get_expr2_nomask<V1, V2>::empty
134	align4(const any_vec32<N,V1>& lower,
135	const any_vec32<N,V2>& upper)
136	{
137	static_assert(shift <= `4`, "Shift out of bounds");
138	if (shift == `0`) return lower.wrapped().eval();
139	if (shift == `4`) return upper.wrapped().eval();
140
141	typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
142	qlower = lower.wrapped().eval();
143	qupper = upper.wrapped().eval();
144	return detail::insn::i_align4<shift>(qlower, qupper);
145	}
146
147
148	/* Extracts a int64x2 vector from two concatenated int64x2 vectors*
149
150	@code
151	shift: pos:\| 0 1 \|
152	0 r = [ l0 l1 ]
153	1 r = [ l1 u0 ]
154	2 r = [ u0 u1 ]
155	@endcode
156
157	@par int64
158
159	@par 128-bit version:
160	@icost{SSE2-SSE3, 3}
161
162	@par 256-bit version:
163	The lower and higher 128-bit halves are processed as if 128-bit instruction
164	was applied to each of them separately.
165
166	@icost{SSE2-SSE3, 6}
167	@icost{SSSE3-AVX, NEON, ALTIVEC, 2}
168
169	@par float64
170
171	@par 128-bit version:
172	@icost{SSE2-SSE3, 3}
173
174	@par 256-bit version:
175	The lower and higher 128-bit halves are processed as if 128-bit instruction
176	was applied to each of them separately.
177
178	@icost{SSE2-SSE3, 6}
179	@icost{SSSE3-SSE4.1 NEON, ALTIVEC, 2}
180	*/
181	template<unsigned shift, unsigned N, class V1, class V2> SIMDPP_INL
182	typename detail::get_expr2_nomask<V1, V2>::empty
183	align2(const any_vec64<N,V1>& lower,
184	const any_vec64<N,V2>& upper)
185	{
186	static_assert(shift <= `2`, "Shift out of bounds");
187	if (shift == `0`) return lower.wrapped().eval();
188	if (shift == `2`) return upper.wrapped().eval();
189
190	typename detail::get_expr2_nomask_nosign<V1, V2>::type qlower, qupper;
191	qlower = lower.wrapped().eval();
192	qupper = upper.wrapped().eval();
193	return detail::insn::i_align2<shift>(qlower, qupper);
194	}
195
196	} // namespace SIMDPP_ARCH_NAMESPACE
197	} // namespace simdpp
198
199	#endif
200
201

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/core/align.h