extract.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/extract.h]

1	/ Copyright (C) 2011-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_EXTRACT_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_EXTRACT_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/cast.h>
17	#include <simdpp/core/move_l.h>
18	#include <simdpp/core/i_shift_l.h>
19	#include <simdpp/core/i_sub.h>
20	#include <simdpp/core/make_int.h>
21	#include <simdpp/detail/insn/split.h>
22	#include <simdpp/detail/mem_block.h>
23
24	namespace simdpp {
25	namespace SIMDPP_ARCH_NAMESPACE {
26	namespace detail {
27	namespace insn {
28
29	template<unsigned id> SIMDPP_INL
30	uint8_t i_extract(const uint8<`16`>& a)
31	{
32	#if SIMDPP_USE_NULL
33	return a.el(id);
34	#elif SIMDPP_USE_SSE4_1
35	// Explicit cast is needed due to bug in Clang headers (intrinsic
36	// implemented as a macro with no appropriate casts) and a bug in Clang
37	// (thinks explicit conversion operators have the same rank as the regular
38	// ones)
39	return _mm_extract_epi8(a.native(), id);
40	#elif SIMDPP_USE_SSE2
41	unsigned shift = (id % `2` == `1`) ? `8` : `0`;
42	return _mm_extract_epi16(a.native(), id/`2`) >> shift;
43	#elif SIMDPP_USE_NEON
44	return vgetq_lane_u8(a.native(), id);
45	#elif SIMDPP_USE_ALTIVEC
46	detail::mem_block<uint8x16> ax(a);
47	vec_ste(a.native(), `0`, &ax[id]);
48	return ax[id];
49	#elif SIMDPP_USE_MSA
50	return __msa_copy_u_b((v16i8) a.native(), id);
51	#endif
52	}
53
54	#if SIMDPP_USE_AVX2
55	template<unsigned id> SIMDPP_INL
56	uint8_t i_extract(const uint8<`32`>& a)
57	{
58	__m128i val = _mm256_extracti128_si256(a.native(), id / `16`);
59	return _mm_extract_epi8(val, id % `16`);
60	}
61	#endif
62
63	#if SIMDPP_USE_AVX512BW
64	template<unsigned id> SIMDPP_INL
65	uint8_t i_extract(const uint8<`64`>& a)
66	{
67	__m128i val = _mm512_extracti32x4_epi32(a.native(), id / `16`);
68	return _mm_extract_epi8(val, id % `16`);
69	}
70	#endif
71
72	// -----------------------------------------------------------------------------
73
74	template<unsigned id> SIMDPP_INL
75	int8_t i_extract(const int8<`16`>& a)
76	{
77	#if SIMDPP_USE_MSA
78	return __msa_copy_s_b(a.native(), id);
79	#else
80	return i_extract<id>(uint8x16 (a));
81	#endif
82	}
83
84	#if SIMDPP_USE_AVX2
85	template<unsigned id> SIMDPP_INL
86	int8_t i_extract(const int8<`32`>& a)
87	{
88	__m128i val = _mm256_extracti128_si256(a.native(), id / `16`);
89	return _mm_extract_epi8(val, id % `16`);
90	}
91	#endif
92
93	#if SIMDPP_USE_AVX512BW
94	template<unsigned id> SIMDPP_INL
95	int8_t i_extract(const int8<`64`>& a)
96	{
97	__m128i val = _mm512_extracti32x4_epi32(a.native(), id / `16`);
98	return _mm_extract_epi8(val, id % `16`);
99	}
100	#endif
101
102	// -----------------------------------------------------------------------------
103
104	template<unsigned id> SIMDPP_INL
105	uint16_t i_extract(const uint16<`8`>& a)
106	{
107	#if SIMDPP_USE_NULL
108	return a.el(id);
109	#elif SIMDPP_USE_SSE2
110	return _mm_extract_epi16(a.native(), id);
111	#elif SIMDPP_USE_NEON
112	return vgetq_lane_u16(a.native(), id);
113	#elif SIMDPP_USE_ALTIVEC
114	detail::mem_block<uint16x8> ax(a);
115	vec_ste(a.native(), `0`, &ax[id]);
116	return ax[id];
117	#elif SIMDPP_USE_MSA
118	return __msa_copy_u_h((v8i16) a.native(), id);
119	#endif
120	}
121
122	#if SIMDPP_USE_AVX2
123	template<unsigned id> SIMDPP_INL
124	uint16_t i_extract(const uint16<`16`>& a)
125	{
126	__m128i val = _mm256_extracti128_si256(a.native(), id / `8`);
127	return _mm_extract_epi16(val, id % `8`);
128	}
129	#endif
130
131	#if SIMDPP_USE_AVX512BW
132	template<unsigned id> SIMDPP_INL
133	uint16_t i_extract(const uint16<`32`>& a)
134	{
135	__m128i val = _mm512_extracti32x4_epi32(a.native(), id / `8`);
136	return _mm_extract_epi16(val, id % `8`);
137	}
138	#endif
139
140	// -----------------------------------------------------------------------------
141
142	template<unsigned id> SIMDPP_INL
143	int16_t i_extract(const int16<`8`>& a)
144	{
145	#if SIMDPP_USE_MSA
146	return __msa_copy_s_h(a.native(), id);
147	#else
148	return i_extract<id>(uint16x8 (a));
149	#endif
150	}
151
152	#if SIMDPP_USE_AVX2
153	template<unsigned id> SIMDPP_INL
154	int16_t i_extract(const int16<`16`>& a)
155	{
156	__m128i val = _mm256_extracti128_si256(a.native(), id / `8`);
157	return _mm_extract_epi16(val, id % `8`);
158	}
159	#endif
160
161	#if SIMDPP_USE_AVX512BW
162	template<unsigned id> SIMDPP_INL
163	int16_t i_extract(const int16<`32`>& a)
164	{
165	__m128i val = _mm512_extracti32x4_epi32(a.native(), id / `8`);
166	return _mm_extract_epi16(val, id % `8`);
167	}
168	#endif
169
170	// -----------------------------------------------------------------------------
171
172	template<unsigned id> SIMDPP_INL
173	uint32_t i_extract(const uint32<`4`>& a)
174	{
175	#if SIMDPP_USE_NULL
176	return a.el(id);
177	#elif SIMDPP_USE_SSE4_1
178	return _mm_extract_epi32(a.native(), id);
179	#elif SIMDPP_USE_SSE2
180	// when id==0, move_l is template-specialized and does nothing
181	return _mm_cvtsi128_si32(move4_l<id>(a).eval().native());
182	#elif SIMDPP_USE_NEON
183	return vgetq_lane_u32(a.native(), id);
184	#elif SIMDPP_USE_ALTIVEC
185	detail::mem_block<uint32x4> ax(a);
186	vec_ste(a.native(), `0`, &ax[id]);
187	return ax[id];
188	#elif SIMDPP_USE_MSA
189	return __msa_copy_u_w((v4i32) a.native(), id);
190	#endif
191	}
192
193	#if SIMDPP_USE_AVX2
194	template<unsigned id> SIMDPP_INL
195	uint32_t i_extract(const uint32<`8`>& a)
196	{
197	__m128i val = _mm256_extracti128_si256(a.native(), id / `4`);
198	return _mm_extract_epi32(val, id % `4`);
199	}
200	#endif
201
202	#if SIMDPP_USE_AVX512F
203	template<unsigned id> SIMDPP_INL
204	uint32_t i_extract(const uint32<`16`>& a)
205	{
206	__m128i val = _mm512_extracti32x4_epi32(a.native(), id / `4`);
207	return _mm_extract_epi32(val, id % `4`);
208	}
209	#endif
210
211	// -----------------------------------------------------------------------------
212
213	template<unsigned id> SIMDPP_INL
214	int32_t i_extract(const int32<`4`>& a)
215	{
216	#if SIMDPP_USE_MSA
217	return __msa_copy_s_w(a.native(), id);
218	#else
219	return i_extract<id>(uint32x4 (a));
220	#endif
221	}
222
223	#if SIMDPP_USE_AVX2
224	template<unsigned id> SIMDPP_INL
225	int32_t i_extract(const int32<`8`>& a)
226	{
227	__m128i val = _mm256_extracti128_si256(a.native(), id / `4`);
228	return _mm_extract_epi32(val, id % `4`);
229	}
230	#endif
231
232	#if SIMDPP_USE_AVX512F
233	template<unsigned id> SIMDPP_INL
234	int32_t i_extract(const int32<`16`>& a)
235	{
236	__m128i val = _mm512_extracti32x4_epi32(a.native(), id / `4`);
237	return _mm_extract_epi32(val, id % `4`);
238	}
239	#endif
240
241	// -----------------------------------------------------------------------------
242
243	template<unsigned id> SIMDPP_INL
244	uint64_t i_extract(const uint64<`2`>& a)
245	{
246	#if SIMDPP_USE_NULL
247	return a.el(id);
248	#elif SIMDPP_USE_SSE4_1
249	#if SIMDPP_32_BITS
250	uint32x4 t = uint32x4(a);
251	uint64_t r = i_extract<id*`2`>(t);
252	r \|= uint64_t(i_extract<id*`2`+`1`>(t)) << `32`;
253	return r;
254	#else
255	return _mm_extract_epi64(a.native(), id);
256	#endif
257	#elif SIMDPP_USE_SSE2
258	#if SIMDPP_32_BITS
259	uint32x4 t = uint32x4(a);
260	uint64_t r = `0`;
261	t = move4_l<id`2`>(t); // when id==0, move_l is template-specialized and does nothing*
262	r = i_extract<`0`>(t);
263	t = move4_l<`1`>(t);
264	r \|= uint64_t(i_extract<`0`>(t)) << `32`;
265	return r;
266	#else
267	uint64x2 t = a;
268	if (id != `0`) {
269	t = move2_l<id>(t);
270	}
271	return _mm_cvtsi128_si64(t.native());
272	#endif
273	#elif SIMDPP_USE_NEON
274	return vgetq_lane_u64(a.native(), id);
275	#elif SIMDPP_USE_ALTIVEC
276	detail::mem_block<uint64x2> ax(a);
277	return ax[id];
278	#elif SIMDPP_USE_MSA
279	#if SIMDPP_64_BITS
280	return __msa_copy_u_d((v2i64) a.native(), id);
281	#else
282	v4i32 a32 = (v4i32) a.native();
283	uint64_t lo = __msa_copy_u_w(a32, id*`2`);
284	uint64_t hi = __msa_copy_u_w(a32, id*`2`+`1`);
285	return lo \| (hi << `32`);
286	#endif
287	#endif
288	}
289
290	#if SIMDPP_USE_AVX2
291	template<unsigned id> SIMDPP_INL
292	uint64_t i_extract(const uint64<`4`>& a)
293	{
294	uint64<`2`> val = _mm256_extracti128_si256(a.native(), id / `2`);
295	return i_extract<id % `2`>(val);
296	}
297	#endif
298
299	#if SIMDPP_USE_AVX512F
300	template<unsigned id> SIMDPP_INL
301	uint64_t i_extract(const uint64<`8`>& a)
302	{
303	uint64<`2`> val = _mm512_extracti32x4_epi32(a.native(), id / `2`);
304	return i_extract<id % `2`>(val);
305	}
306	#endif
307
308	// -----------------------------------------------------------------------------
309
310	template<unsigned id> SIMDPP_INL
311	int64_t i_extract(const int64<`2`>& a)
312	{
313	#if SIMDPP_USE_MSA
314	#if SIMDPP_64_BITS
315	return __msa_copy_s_d(a, id);
316	#else
317	v4i32 a32 = (v4i32) a.native();
318	int64_t lo = __msa_copy_s_w(a32, id*`2`);
319	int64_t hi = __msa_copy_s_w(a32, id*`2`+`1`);
320	return lo \| (hi << `32`);
321	#endif
322	#else
323	return i_extract<id>(uint64x2 (a));
324	#endif
325	}
326
327	#if SIMDPP_USE_AVX2
328	template<unsigned id> SIMDPP_INL
329	int64_t i_extract(const int64<`4`>& a)
330	{
331	uint64<`2`> val = _mm256_extracti128_si256(a.native(), id / `2`);
332	return i_extract<id % `2`>(val);
333	}
334	#endif
335
336	#if SIMDPP_USE_AVX512F
337	template<unsigned id> SIMDPP_INL
338	int64_t i_extract(const int64<`8`>& a)
339	{
340	uint64<`2`> val = _mm512_extracti32x4_epi32(a.native(), id / `2`);
341	return i_extract<id % `2`>(val);
342	}
343	#endif
344
345	// -----------------------------------------------------------------------------
346
347	template<unsigned id> SIMDPP_INL
348	float i_extract(const float32<`4`>& a)
349	{
350	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
351	return a.el(id);
352	#elif SIMDPP_USE_SSE2
353	return bit_cast<float>(i_extract<id>(int32x4 (a)));
354	#elif SIMDPP_USE_NEON
355	return vgetq_lane_f32(a.native(), id);
356	#elif SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
357	detail::mem_block<float32x4> ax(a);
358	return ax[id];
359	#endif
360	}
361
362	#if SIMDPP_USE_AVX
363	template<unsigned id> SIMDPP_INL
364	float i_extract(const float32<`8`>& a)
365	{
366	__m128 val = _mm256_extractf128_ps(a.native(), id / `4`);
367	return bit_cast<float>(_mm_extract_epi32(_mm_castps_si128(val), id % `4`));
368	}
369	#endif
370
371	#if SIMDPP_USE_AVX512F
372	template<unsigned id> SIMDPP_INL
373	float i_extract(const float32<`16`>& a)
374	{
375	__m128 val = _mm512_extractf32x4_ps(a.native(), id / `4`);
376	return bit_cast<float>(_mm_extract_epi32(_mm_castps_si128(val), id % `4`));
377	}
378	#endif
379
380	// -----------------------------------------------------------------------------
381
382	template<unsigned id> SIMDPP_INL
383	double i_extract(const float64<`2`>& a)
384	{
385	#if SIMDPP_USE_NULL
386	return a.el(id);
387	#elif SIMDPP_USE_SSE2
388	return bit_cast<double>(i_extract<id>(int64x2 (a)));
389	#elif SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
390	detail::mem_block<float64x2> ax(a);
391	return ax[id];
392	#elif SIMDPP_USE_NEON64
393	return vgetq_lane_f64(a.native(), id);
394	#endif
395	}
396
397	#if SIMDPP_USE_AVX
398	template<unsigned id> SIMDPP_INL
399	double i_extract(const float64<`4`>& a)
400	{
401	__m128d val = _mm256_extractf128_pd(a.native(), id / `2`);
402	return bit_cast<double>(i_extract<id % `2`>((uint64<`2`>)_mm_castpd_si128(val)));
403	}
404	#endif
405
406	#if SIMDPP_USE_AVX512F
407	template<unsigned id> SIMDPP_INL
408	double i_extract(const float64<`8`>& a)
409	{
410	__m128 val = _mm512_extractf32x4_ps(_mm512_castpd_ps(a.native()), id / `2`);
411	return bit_cast<double>(i_extract<id % `2`>((uint64<`2`>)_mm_castps_si128(val)));
412	}
413	#endif
414
415	// -----------------------------------------------------------------------------
416
417	template<unsigned id, class V> SIMDPP_INL
418	typename V::element_type i_extract(const V& a)
419	{
420	typename V::base_vector_type base = a.vec(id / V::base_length);
421	return i_extract<id % V::base_length>(base);
422	}
423
424	} // namespace insn
425	} // namespace detail
426	} // namespace SIMDPP_ARCH_NAMESPACE
427	} // namespace simdpp
428
429	#endif
430

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/extract.h