conv_extend_to_int64.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_extend_to_int64.h]

1	/ Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT64_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT64_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/i_shift_r.h>
17	#include <simdpp/core/move_l.h>
18	#include <simdpp/core/zip_lo.h>
19	#include <simdpp/core/combine.h>
20	#include <simdpp/detail/mem_block.h>
21	#include <simdpp/detail/vector_array_conv_macros.h>
22
23	namespace simdpp {
24	namespace SIMDPP_ARCH_NAMESPACE {
25	namespace detail {
26	namespace insn {
27
28	// -----------------------------------------------------------------------------
29
30	static SIMDPP_INL
31	uint64<`4`> i_to_uint64(const uint32<`4`>& a)
32	{
33	#if SIMDPP_USE_NULL
34	uint64x4 r;
35	r.vec(`0`).el(`0`) = uint64_t(a.el(`0`));
36	r.vec(`0`).el(`1`) = uint64_t(a.el(`1`));
37	r.vec(`1`).el(`0`) = uint64_t(a.el(`2`));
38	r.vec(`1`).el(`1`) = uint64_t(a.el(`3`));
39	return r;
40	#elif SIMDPP_USE_AVX2
41	return _mm256_cvtepu32_epi64(a.native());
42	#elif SIMDPP_USE_SSE4_1
43	uint64x2 r1, r2;
44	r1 = _mm_cvtepu32_epi64(a.native());
45	r2 = _mm_cvtepu32_epi64(move4_l<`2`>(a).eval().native());
46	return combine(r1, r2);
47	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_MSA \|\| SIMDPP_USE_VSX_207
48	return (uint64x4) combine(zip4_lo(a, (uint32x4) make_zero()),
49	zip4_hi(a, (uint32x4) make_zero()));
50	#elif SIMDPP_USE_NEON
51	uint64x2 r1, r2;
52	r1 = vmovl_u32(vget_low_u32(a.native()));
53	r2 = vmovl_u32(vget_high_u32(a.native()));
54	return combine(r1, r2);
55	#elif SIMDPP_USE_ALTIVEC
56	uint64x4 r;
57	mem_block<uint32x4> b = a;
58	r.vec(`0`).el(`0`) = b[`0`];
59	r.vec(`0`).el(`1`) = b[`1`];
60	r.vec(`1`).el(`0`) = b[`2`];
61	r.vec(`1`).el(`1`) = b[`3`];
62	return r;
63	#endif
64	}
65
66	#if SIMDPP_USE_AVX2
67	static SIMDPP_INL
68	uint64<`8`> i_to_uint64(const uint32<`8`>& a)
69	{
70	uint32<`4`> a1, a2;
71	uint64<`4`> r1, r2;
72	split(a, a1, a2);
73	r1 = _mm256_cvtepu32_epi64(a1.native());
74	r2 = _mm256_cvtepu32_epi64(a2.native());
75	return combine(r1, r2);
76	}
77	#endif
78
79	#if SIMDPP_USE_AVX512F
80	static SIMDPP_INL
81	uint64<`16`> i_to_uint64(const uint32<`16`>& a)
82	{
83	uint32<`8`> a1, a2;
84	uint64<`8`> r1, r2;
85	split(a, a1, a2);
86	r1 = _mm512_cvtepu32_epi64(a1.native());
87	r2 = _mm512_cvtepu32_epi64(a2.native());
88	return combine(r1, r2);
89	}
90	#endif
91
92	template<unsigned N> SIMDPP_INL
93	uint64<N> i_to_uint64(const uint32<N>& a)
94	{
95	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
96	}
97
98	// -----------------------------------------------------------------------------
99
100	static SIMDPP_INL
101	uint64<`8`> i_to_uint64(const uint16<`8`>& a)
102	{
103	#if SIMDPP_USE_NULL
104	uint64<`8`> r;
105	for (unsigned i = `0`; i < `8`; i++) {
106	r.vec(i/`2`).el(i%`2`) = uint64_t(a.vec(`0`).el(i));
107	}
108	return r;
109	#elif SIMDPP_USE_AVX512F
110	return _mm512_cvtepu16_epi64(a.native());
111	#elif SIMDPP_USE_AVX2
112	uint64<`8`> r;
113	r.vec(`0`) = _mm256_cvtepu16_epi64(a.native());
114	r.vec(`1`) = _mm256_cvtepu16_epi64(move8_l<`4`>(a).eval().native());
115	return r;
116	#elif SIMDPP_USE_SSE4_1
117	uint64<`8`> r;
118	r.vec(`0`) = _mm_cvtepu16_epi64(a.native());
119	r.vec(`1`) = _mm_cvtepu16_epi64(move8_l<`2`>(a).eval().native());
120	r.vec(`2`) = _mm_cvtepu16_epi64(move8_l<`4`>(a).eval().native());
121	r.vec(`3`) = _mm_cvtepu16_epi64(move8_l<`6`>(a).eval().native());
122	return r;
123	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_MSA \|\| SIMDPP_USE_VSX_207
124	return i_to_uint64(i_to_uint32(a));
125	#elif SIMDPP_USE_ALTIVEC
126	uint64<`8`> r;
127	mem_block<uint16<`8`>> b = a;
128	for (unsigned i = `0`; i < `8`; i++) {
129	r.vec(i/`2`).el(i%`2`) = uint64_t(b[i]);
130	}
131	return r;
132	#endif
133	}
134
135	#if SIMDPP_USE_AVX2
136	static SIMDPP_INL
137	uint64<`16`> i_to_uint64(const uint16<`16`>& a)
138	{
139	#if SIMDPP_USE_AVX512F
140	uint64<`16`> r;
141	uint16<`8`> a0, a1;
142	split(a, a0, a1);
143	r.vec(`0`) = _mm512_cvtepu16_epi64(a0.native());
144	r.vec(`1`) = _mm512_cvtepu16_epi64(a1.native());
145	return r;
146	#else
147	uint64<`16`> r;
148	uint16<`8`> a0, a1;
149	split(a, a0, a1);
150	r.vec(`0`) = _mm256_cvtepu16_epi64(a0.native());
151	r.vec(`1`) = _mm256_cvtepu16_epi64(move8_l<`4`>(a0).eval().native());
152	r.vec(`2`) = _mm256_cvtepu16_epi64(a1.native());
153	r.vec(`3`) = _mm256_cvtepu16_epi64(move8_l<`4`>(a1).eval().native());
154	return r;
155	#endif
156	}
157	#endif
158
159	#if SIMDPP_USE_AVX512F
160	static SIMDPP_INL
161	uint64<`32`> i_to_uint64(const uint16<`32`>& a)
162	{
163	uint64<`32`> r;
164	uint16<`16`> a01, a23;
165	uint16<`8`> a0, a1, a2, a3;
166	#if SIMDPP_USE_AVX512BW
167	split(a, a01, a23);
168	#else
169	a01 = a.vec(`0`);
170	a23 = a.vec(`1`);
171	#endif
172	split(a01, a0, a1);
173	split(a23, a2, a3);
174
175	r.vec(`0`) = _mm512_cvtepu16_epi64(a0.native());
176	r.vec(`1`) = _mm512_cvtepu16_epi64(a1.native());
177	r.vec(`2`) = _mm512_cvtepu16_epi64(a2.native());
178	r.vec(`3`) = _mm512_cvtepu16_epi64(a3.native());
179	return r;
180	}
181	#endif
182
183	template<unsigned N> SIMDPP_INL
184	uint64<N> i_to_uint64(const uint16<N>& a)
185	{
186	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
187	}
188
189	// -----------------------------------------------------------------------------
190
191	static SIMDPP_INL
192	uint64<`16`> i_to_uint64(const uint8<`16`>& a)
193	{
194	#if SIMDPP_USE_NULL
195	uint64<`16`> r;
196	for (unsigned i = `0`; i < `16`; i++) {
197	r.vec(i/`2`).el(i%`2`) = uint64_t(a.vec(`0`).el(i));
198	}
199	return r;
200	#elif SIMDPP_USE_AVX512F
201	uint64<`16`> r;
202	r.vec(`0`) = _mm512_cvtepu8_epi64(a.native());
203	r.vec(`1`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a).eval().native());
204	return r;
205	#elif SIMDPP_USE_AVX2
206	uint64<`16`> r;
207	r.vec(`0`) = _mm256_cvtepu8_epi64(a.native());
208	r.vec(`1`) = _mm256_cvtepu8_epi64(move16_l<`4`>(a).eval().native());
209	r.vec(`2`) = _mm256_cvtepu8_epi64(move16_l<`8`>(a).eval().native());
210	r.vec(`3`) = _mm256_cvtepu8_epi64(move16_l<`12`>(a).eval().native());
211	return r;
212	#elif SIMDPP_USE_SSE4_1
213	uint64<`16`> r;
214	r.vec(`0`) = _mm_cvtepu8_epi64(a.native());
215	r.vec(`1`) = _mm_cvtepu8_epi64(move16_l<`2`>(a).eval().native());
216	r.vec(`2`) = _mm_cvtepu8_epi64(move16_l<`4`>(a).eval().native());
217	r.vec(`3`) = _mm_cvtepu8_epi64(move16_l<`6`>(a).eval().native());
218	r.vec(`4`) = _mm_cvtepu8_epi64(move16_l<`8`>(a).eval().native());
219	r.vec(`5`) = _mm_cvtepu8_epi64(move16_l<`10`>(a).eval().native());
220	r.vec(`6`) = _mm_cvtepu8_epi64(move16_l<`12`>(a).eval().native());
221	r.vec(`7`) = _mm_cvtepu8_epi64(move16_l<`14`>(a).eval().native());
222	return r;
223	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_MSA \|\| SIMDPP_USE_VSX_207
224	return i_to_uint64(i_to_uint32(a));
225	#elif SIMDPP_USE_ALTIVEC
226	uint64<`16`> r;
227	mem_block<uint8<`16`>> b = a;
228	for (unsigned i = `0`; i < `16`; i++) {
229	r.vec(i/`2`).el(i%`2`) = uint64_t(b[i]);
230	}
231	return r;
232	#endif
233	}
234
235	#if SIMDPP_USE_AVX2
236	static SIMDPP_INL
237	uint64<`32`> i_to_uint64(const uint8<`32`>& a)
238	{
239	#if SIMDPP_USE_AVX512F
240	uint64<`32`> r;
241	uint8<`16`> a0, a1;
242	split(a, a0, a1);
243	r.vec(`0`) = _mm512_cvtepu8_epi64(a0.native());
244	r.vec(`1`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a0).eval().native());
245	r.vec(`2`) = _mm512_cvtepu8_epi64(a1.native());
246	r.vec(`3`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a1).eval().native());
247	return r;
248	#else
249	uint64<`32`> r;
250	uint8<`16`> a0, a1;
251	split(a, a0, a1);
252	r.vec(`0`) = _mm256_cvtepu8_epi64(a0.native());
253	r.vec(`1`) = _mm256_cvtepu8_epi64(move16_l<`4`>(a0).eval().native());
254	r.vec(`2`) = _mm256_cvtepu8_epi64(move16_l<`8`>(a0).eval().native());
255	r.vec(`3`) = _mm256_cvtepu8_epi64(move16_l<`12`>(a0).eval().native());
256	r.vec(`4`) = _mm256_cvtepu8_epi64(a1.native());
257	r.vec(`5`) = _mm256_cvtepu8_epi64(move16_l<`4`>(a1).eval().native());
258	r.vec(`6`) = _mm256_cvtepu8_epi64(move16_l<`8`>(a1).eval().native());
259	r.vec(`7`) = _mm256_cvtepu8_epi64(move16_l<`12`>(a1).eval().native());
260	return r;
261	#endif
262	}
263	#endif
264
265	#if SIMDPP_USE_AVX512F
266	static SIMDPP_INL
267	uint64<`64`> i_to_uint64(const uint8<`64`>& a)
268	{
269	uint64<`64`> r;
270	uint8<`32`> a01, a23;
271	uint8<`16`> a0, a1, a2, a3;
272	#if SIMDPP_USE_AVX512BW
273	split(a, a01, a23);
274	#else
275	a01 = a.vec(`0`);
276	a23 = a.vec(`1`);
277	#endif
278	split(a01, a0, a1);
279	split(a23, a2, a3);
280
281	r.vec(`0`) = _mm512_cvtepu8_epi64(a0.native());
282	r.vec(`1`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a0).eval().native());
283	r.vec(`2`) = _mm512_cvtepu8_epi64(a1.native());
284	r.vec(`3`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a1).eval().native());
285	r.vec(`4`) = _mm512_cvtepu8_epi64(a2.native());
286	r.vec(`5`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a2).eval().native());
287	r.vec(`6`) = _mm512_cvtepu8_epi64(a3.native());
288	r.vec(`7`) = _mm512_cvtepu8_epi64(move16_l<`8`>(a3).eval().native());
289	return r;
290	}
291	#endif
292
293	template<unsigned N> SIMDPP_INL
294	uint64<N> i_to_uint64(const uint8<N>& a)
295	{
296	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
297	}
298
299	// -----------------------------------------------------------------------------
300
301	static SIMDPP_INL
302	int64<`4`> i_to_int64(const int32<`4`>& a)
303	{
304	#if SIMDPP_USE_NULL
305	int64<`4`> r;
306	r.vec(`0`).el(`0`) = int64_t(a.el(`0`));
307	r.vec(`0`).el(`1`) = int64_t(a.el(`1`));
308	r.vec(`1`).el(`0`) = int64_t(a.el(`2`));
309	r.vec(`1`).el(`1`) = int64_t(a.el(`3`));
310	return r;
311	#elif SIMDPP_USE_AVX2
312	return _mm256_cvtepi32_epi64(a.native());
313	#elif SIMDPP_USE_SSE4_1
314	uint64x2 r1, r2;
315	r1 = _mm_cvtepi32_epi64(a.native());
316	r2 = _mm_cvtepi32_epi64(move4_l<`2`>(a).eval().native());
317	return combine(r1, r2);
318	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_MSA \|\| SIMDPP_USE_VSX_207
319	int32x4 sign = shift_r<`31`>(a);
320	int64x2 lo, hi;
321	lo = zip4_lo(a, sign);
322	hi = zip4_hi(a, sign);
323	return combine(lo, hi);
324	#elif SIMDPP_USE_NEON
325	int64x2 r1, r2;
326	r1 = vmovl_s32(vget_low_s32(a.native()));
327	r2 = vmovl_s32(vget_high_s32(a.native()));
328	return combine(r1, r2);
329	#elif SIMDPP_USE_ALTIVEC
330	int64x4 r;
331	mem_block<int32x4> b = a;
332	r.vec(`0`).el(`0`) = b[`0`];
333	r.vec(`0`).el(`1`) = b[`1`];
334	r.vec(`1`).el(`0`) = b[`2`];
335	r.vec(`1`).el(`1`) = b[`3`];
336	return r;
337	#endif
338	}
339
340	#if SIMDPP_USE_AVX2
341	static SIMDPP_INL
342	int64<`8`> i_to_int64(const int32<`8`>& a)
343	{
344	int32<`4`> a1, a2;
345	int64<`4`> r1, r2;
346	split(a, a1, a2);
347	r1 = _mm256_cvtepi32_epi64(a1.native());
348	r2 = _mm256_cvtepi32_epi64(a2.native());
349	return combine(r1, r2);
350	}
351	#endif
352
353	#if SIMDPP_USE_AVX512F
354	static SIMDPP_INL
355	int64<`16`> i_to_int64(const int32<`16`>& a)
356	{
357	int64<`8`> r1, r2;
358	r1 = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(a.native()));
359	r2 = _mm512_cvtepi32_epi64(_mm512_extracti64x4_epi64(a.native(), `1`));
360	return combine(r1, r2);
361	}
362	#endif
363
364	template<unsigned N> SIMDPP_INL
365	int64<N> i_to_int64(const int32<N>& a)
366	{
367	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
368	}
369
370	// -----------------------------------------------------------------------------
371
372	static SIMDPP_INL
373	int64<`8`> i_to_int64(const int16<`8`>& a)
374	{
375	#if SIMDPP_USE_NULL
376	int64<`8`> r;
377	for (unsigned i = `0`; i < `8`; i++) {
378	r.vec(i/`2`).el(i%`2`) = int64_t(a.vec(`0`).el(i));
379	}
380	return r;
381	#elif SIMDPP_USE_AVX512F
382	return _mm512_cvtepi16_epi64(a.native());
383	#elif SIMDPP_USE_AVX2
384	int64<`8`> r;
385	r.vec(`0`) = _mm256_cvtepi16_epi64(a.native());
386	r.vec(`1`) = _mm256_cvtepi16_epi64(move8_l<`4`>(a).eval().native());
387	return r;
388	#elif SIMDPP_USE_SSE4_1
389	int64<`8`> r;
390	r.vec(`0`) = _mm_cvtepi16_epi64(a.native());
391	r.vec(`1`) = _mm_cvtepi16_epi64(move8_l<`2`>(a).eval().native());
392	r.vec(`2`) = _mm_cvtepi16_epi64(move8_l<`4`>(a).eval().native());
393	r.vec(`3`) = _mm_cvtepi16_epi64(move8_l<`6`>(a).eval().native());
394	return r;
395	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_MSA \|\| SIMDPP_USE_VSX_207
396	return i_to_int64(i_to_int32(a));
397	#elif SIMDPP_USE_ALTIVEC
398	int64<`8`> r;
399	mem_block<int16<`8`>> b = a;
400	for (unsigned i = `0`; i < `8`; i++) {
401	r.vec(i/`2`).el(i%`2`) = int64_t(b[i]);
402	}
403	return r;
404	#endif
405	}
406
407	#if SIMDPP_USE_AVX2
408	static SIMDPP_INL
409	int64<`16`> i_to_int64(const int16<`16`>& a)
410	{
411	#if SIMDPP_USE_AVX512F
412	int64<`16`> r;
413	int16<`8`> a0, a1;
414	split(a, a0, a1);
415	r.vec(`0`) = _mm512_cvtepi16_epi64(a0.native());
416	r.vec(`1`) = _mm512_cvtepi16_epi64(a1.native());
417	return r;
418	#else
419	int64<`16`> r;
420	int16<`8`> a0, a1;
421	split(a, a0, a1);
422	r.vec(`0`) = _mm256_cvtepi16_epi64(a0.native());
423	r.vec(`1`) = _mm256_cvtepi16_epi64(move8_l<`4`>(a0).eval().native());
424	r.vec(`2`) = _mm256_cvtepi16_epi64(a1.native());
425	r.vec(`3`) = _mm256_cvtepi16_epi64(move8_l<`4`>(a1).eval().native());
426	return r;
427	#endif
428	}
429	#endif
430
431	#if SIMDPP_USE_AVX512F
432	static SIMDPP_INL
433	int64<`32`> i_to_int64(const int16<`32`>& a)
434	{
435	int64<`32`> r;
436	int16<`16`> a01, a23;
437	int16<`8`> a0, a1, a2, a3;
438	#if SIMDPP_USE_AVX512BW
439	split(a, a01, a23);
440	#else
441	a01 = a.vec(`0`);
442	a23 = a.vec(`1`);
443	#endif
444	split(a01, a0, a1);
445	split(a23, a2, a3);
446
447	r.vec(`0`) = _mm512_cvtepi16_epi64(a0.native());
448	r.vec(`1`) = _mm512_cvtepi16_epi64(a1.native());
449	r.vec(`2`) = _mm512_cvtepi16_epi64(a2.native());
450	r.vec(`3`) = _mm512_cvtepi16_epi64(a3.native());
451	return r;
452	}
453	#endif
454
455	template<unsigned N> SIMDPP_INL
456	int64<N> i_to_int64(const int16<N>& a)
457	{
458	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
459	}
460
461	// -----------------------------------------------------------------------------
462
463	static SIMDPP_INL
464	int64<`16`> i_to_int64(const int8<`16`>& a)
465	{
466	#if SIMDPP_USE_NULL
467	int64<`16`> r;
468	for (unsigned i = `0`; i < `16`; i++) {
469	r.vec(i/`2`).el(i%`2`) = int64_t(a.vec(`0`).el(i));
470	}
471	return r;
472	#elif SIMDPP_USE_AVX512F
473	int64<`16`> r;
474	r.vec(`0`) = _mm512_cvtepi8_epi64(a.native());
475	r.vec(`1`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a).eval().native());
476	return r;
477	#elif SIMDPP_USE_AVX2
478	int64<`16`> r;
479	r.vec(`0`) = _mm256_cvtepi8_epi64(a.native());
480	r.vec(`1`) = _mm256_cvtepi8_epi64(move16_l<`4`>(a).eval().native());
481	r.vec(`2`) = _mm256_cvtepi8_epi64(move16_l<`8`>(a).eval().native());
482	r.vec(`3`) = _mm256_cvtepi8_epi64(move16_l<`12`>(a).eval().native());
483	return r;
484	#elif SIMDPP_USE_SSE4_1
485	int64<`16`> r;
486	r.vec(`0`) = _mm_cvtepi8_epi64(a.native());
487	r.vec(`1`) = _mm_cvtepi8_epi64(move16_l<`2`>(a).eval().native());
488	r.vec(`2`) = _mm_cvtepi8_epi64(move16_l<`4`>(a).eval().native());
489	r.vec(`3`) = _mm_cvtepi8_epi64(move16_l<`6`>(a).eval().native());
490	r.vec(`4`) = _mm_cvtepi8_epi64(move16_l<`8`>(a).eval().native());
491	r.vec(`5`) = _mm_cvtepi8_epi64(move16_l<`10`>(a).eval().native());
492	r.vec(`6`) = _mm_cvtepi8_epi64(move16_l<`12`>(a).eval().native());
493	r.vec(`7`) = _mm_cvtepi8_epi64(move16_l<`14`>(a).eval().native());
494	return r;
495	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_MSA \|\| SIMDPP_USE_VSX_207
496	return i_to_int64(i_to_int32(a));
497	#elif SIMDPP_USE_ALTIVEC
498	int64<`16`> r;
499	mem_block<int8<`16`>> b = a;
500	for (unsigned i = `0`; i < `16`; i++) {
501	r.vec(i/`2`).el(i%`2`) = int64_t(b[i]);
502	}
503	return r;
504	#endif
505	}
506
507	#if SIMDPP_USE_AVX2
508	static SIMDPP_INL
509	int64<`32`> i_to_int64(const int8<`32`>& a)
510	{
511	#if SIMDPP_USE_AVX512F
512	int64<`32`> r;
513	int8<`16`> a0, a1;
514	split(a, a0, a1);
515	r.vec(`0`) = _mm512_cvtepi8_epi64(a0.native());
516	r.vec(`1`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a0).eval().native());
517	r.vec(`2`) = _mm512_cvtepi8_epi64(a1.native());
518	r.vec(`3`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a1).eval().native());
519	return r;
520	#else
521	int64<`32`> r;
522	int8<`16`> a0, a1;
523	split(a, a0, a1);
524	r.vec(`0`) = _mm256_cvtepi8_epi64(a0.native());
525	r.vec(`1`) = _mm256_cvtepi8_epi64(move16_l<`4`>(a0).eval().native());
526	r.vec(`2`) = _mm256_cvtepi8_epi64(move16_l<`8`>(a0).eval().native());
527	r.vec(`3`) = _mm256_cvtepi8_epi64(move16_l<`12`>(a0).eval().native());
528	r.vec(`4`) = _mm256_cvtepi8_epi64(a1.native());
529	r.vec(`5`) = _mm256_cvtepi8_epi64(move16_l<`4`>(a1).eval().native());
530	r.vec(`6`) = _mm256_cvtepi8_epi64(move16_l<`8`>(a1).eval().native());
531	r.vec(`7`) = _mm256_cvtepi8_epi64(move16_l<`12`>(a1).eval().native());
532	return r;
533	#endif
534	}
535	#endif
536
537	#if SIMDPP_USE_AVX512F
538	static SIMDPP_INL
539	int64<`64`> i_to_int64(const int8<`64`>& a)
540	{
541	int64<`64`> r;
542	int8<`32`> a01, a23;
543	int8<`16`> a0, a1, a2, a3;
544	#if SIMDPP_USE_AVX512BW
545	split(a, a01, a23);
546	#else
547	a01 = a.vec(`0`);
548	a23 = a.vec(`1`);
549	#endif
550	split(a01, a0, a1);
551	split(a23, a2, a3);
552
553	r.vec(`0`) = _mm512_cvtepi8_epi64(a0.native());
554	r.vec(`1`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a0).eval().native());
555	r.vec(`2`) = _mm512_cvtepi8_epi64(a1.native());
556	r.vec(`3`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a1).eval().native());
557	r.vec(`4`) = _mm512_cvtepi8_epi64(a2.native());
558	r.vec(`5`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a2).eval().native());
559	r.vec(`6`) = _mm512_cvtepi8_epi64(a3.native());
560	r.vec(`7`) = _mm512_cvtepi8_epi64(move16_l<`8`>(a3).eval().native());
561	return r;
562	}
563	#endif
564
565	template<unsigned N> SIMDPP_INL
566	int64<N> i_to_int64(const int8<N>& a)
567	{
568	SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
569	}
570
571	// -----------------------------------------------------------------------------
572
573	} // namespace insn
574	} // namespace detail
575	} // namespace SIMDPP_ARCH_NAMESPACE
576	} // namespace simdpp
577
578	#endif
579
580
581

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/conv_extend_to_int64.h