1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT64_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_CONV_EXTEND_TO_INT64_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/i_shift_r.h>
17#include <simdpp/core/move_l.h>
18#include <simdpp/core/zip_lo.h>
19#include <simdpp/core/combine.h>
20#include <simdpp/detail/mem_block.h>
21#include <simdpp/detail/vector_array_conv_macros.h>
22
23namespace simdpp {
24namespace SIMDPP_ARCH_NAMESPACE {
25namespace detail {
26namespace insn {
27
28// -----------------------------------------------------------------------------
29
30static SIMDPP_INL
31uint64<4> i_to_uint64(const uint32<4>& a)
32{
33#if SIMDPP_USE_NULL
34 uint64x4 r;
35 r.vec(0).el(0) = uint64_t(a.el(0));
36 r.vec(0).el(1) = uint64_t(a.el(1));
37 r.vec(1).el(0) = uint64_t(a.el(2));
38 r.vec(1).el(1) = uint64_t(a.el(3));
39 return r;
40#elif SIMDPP_USE_AVX2
41 return _mm256_cvtepu32_epi64(a.native());
42#elif SIMDPP_USE_SSE4_1
43 uint64x2 r1, r2;
44 r1 = _mm_cvtepu32_epi64(a.native());
45 r2 = _mm_cvtepu32_epi64(move4_l<2>(a).eval().native());
46 return combine(r1, r2);
47#elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA || SIMDPP_USE_VSX_207
48 return (uint64x4) combine(zip4_lo(a, (uint32x4) make_zero()),
49 zip4_hi(a, (uint32x4) make_zero()));
50#elif SIMDPP_USE_NEON
51 uint64x2 r1, r2;
52 r1 = vmovl_u32(vget_low_u32(a.native()));
53 r2 = vmovl_u32(vget_high_u32(a.native()));
54 return combine(r1, r2);
55#elif SIMDPP_USE_ALTIVEC
56 uint64x4 r;
57 mem_block<uint32x4> b = a;
58 r.vec(0).el(0) = b[0];
59 r.vec(0).el(1) = b[1];
60 r.vec(1).el(0) = b[2];
61 r.vec(1).el(1) = b[3];
62 return r;
63#endif
64}
65
66#if SIMDPP_USE_AVX2
67static SIMDPP_INL
68uint64<8> i_to_uint64(const uint32<8>& a)
69{
70 uint32<4> a1, a2;
71 uint64<4> r1, r2;
72 split(a, a1, a2);
73 r1 = _mm256_cvtepu32_epi64(a1.native());
74 r2 = _mm256_cvtepu32_epi64(a2.native());
75 return combine(r1, r2);
76}
77#endif
78
79#if SIMDPP_USE_AVX512F
80static SIMDPP_INL
81uint64<16> i_to_uint64(const uint32<16>& a)
82{
83 uint32<8> a1, a2;
84 uint64<8> r1, r2;
85 split(a, a1, a2);
86 r1 = _mm512_cvtepu32_epi64(a1.native());
87 r2 = _mm512_cvtepu32_epi64(a2.native());
88 return combine(r1, r2);
89}
90#endif
91
92template<unsigned N> SIMDPP_INL
93uint64<N> i_to_uint64(const uint32<N>& a)
94{
95 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
96}
97
98// -----------------------------------------------------------------------------
99
100static SIMDPP_INL
101uint64<8> i_to_uint64(const uint16<8>& a)
102{
103#if SIMDPP_USE_NULL
104 uint64<8> r;
105 for (unsigned i = 0; i < 8; i++) {
106 r.vec(i/2).el(i%2) = uint64_t(a.vec(0).el(i));
107 }
108 return r;
109#elif SIMDPP_USE_AVX512F
110 return _mm512_cvtepu16_epi64(a.native());
111#elif SIMDPP_USE_AVX2
112 uint64<8> r;
113 r.vec(0) = _mm256_cvtepu16_epi64(a.native());
114 r.vec(1) = _mm256_cvtepu16_epi64(move8_l<4>(a).eval().native());
115 return r;
116#elif SIMDPP_USE_SSE4_1
117 uint64<8> r;
118 r.vec(0) = _mm_cvtepu16_epi64(a.native());
119 r.vec(1) = _mm_cvtepu16_epi64(move8_l<2>(a).eval().native());
120 r.vec(2) = _mm_cvtepu16_epi64(move8_l<4>(a).eval().native());
121 r.vec(3) = _mm_cvtepu16_epi64(move8_l<6>(a).eval().native());
122 return r;
123#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_MSA || SIMDPP_USE_VSX_207
124 return i_to_uint64(i_to_uint32(a));
125#elif SIMDPP_USE_ALTIVEC
126 uint64<8> r;
127 mem_block<uint16<8>> b = a;
128 for (unsigned i = 0; i < 8; i++) {
129 r.vec(i/2).el(i%2) = uint64_t(b[i]);
130 }
131 return r;
132#endif
133}
134
135#if SIMDPP_USE_AVX2
136static SIMDPP_INL
137uint64<16> i_to_uint64(const uint16<16>& a)
138{
139#if SIMDPP_USE_AVX512F
140 uint64<16> r;
141 uint16<8> a0, a1;
142 split(a, a0, a1);
143 r.vec(0) = _mm512_cvtepu16_epi64(a0.native());
144 r.vec(1) = _mm512_cvtepu16_epi64(a1.native());
145 return r;
146#else
147 uint64<16> r;
148 uint16<8> a0, a1;
149 split(a, a0, a1);
150 r.vec(0) = _mm256_cvtepu16_epi64(a0.native());
151 r.vec(1) = _mm256_cvtepu16_epi64(move8_l<4>(a0).eval().native());
152 r.vec(2) = _mm256_cvtepu16_epi64(a1.native());
153 r.vec(3) = _mm256_cvtepu16_epi64(move8_l<4>(a1).eval().native());
154 return r;
155#endif
156}
157#endif
158
159#if SIMDPP_USE_AVX512F
160static SIMDPP_INL
161uint64<32> i_to_uint64(const uint16<32>& a)
162{
163 uint64<32> r;
164 uint16<16> a01, a23;
165 uint16<8> a0, a1, a2, a3;
166#if SIMDPP_USE_AVX512BW
167 split(a, a01, a23);
168#else
169 a01 = a.vec(0);
170 a23 = a.vec(1);
171#endif
172 split(a01, a0, a1);
173 split(a23, a2, a3);
174
175 r.vec(0) = _mm512_cvtepu16_epi64(a0.native());
176 r.vec(1) = _mm512_cvtepu16_epi64(a1.native());
177 r.vec(2) = _mm512_cvtepu16_epi64(a2.native());
178 r.vec(3) = _mm512_cvtepu16_epi64(a3.native());
179 return r;
180}
181#endif
182
183template<unsigned N> SIMDPP_INL
184uint64<N> i_to_uint64(const uint16<N>& a)
185{
186 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
187}
188
189// -----------------------------------------------------------------------------
190
191static SIMDPP_INL
192uint64<16> i_to_uint64(const uint8<16>& a)
193{
194#if SIMDPP_USE_NULL
195 uint64<16> r;
196 for (unsigned i = 0; i < 16; i++) {
197 r.vec(i/2).el(i%2) = uint64_t(a.vec(0).el(i));
198 }
199 return r;
200#elif SIMDPP_USE_AVX512F
201 uint64<16> r;
202 r.vec(0) = _mm512_cvtepu8_epi64(a.native());
203 r.vec(1) = _mm512_cvtepu8_epi64(move16_l<8>(a).eval().native());
204 return r;
205#elif SIMDPP_USE_AVX2
206 uint64<16> r;
207 r.vec(0) = _mm256_cvtepu8_epi64(a.native());
208 r.vec(1) = _mm256_cvtepu8_epi64(move16_l<4>(a).eval().native());
209 r.vec(2) = _mm256_cvtepu8_epi64(move16_l<8>(a).eval().native());
210 r.vec(3) = _mm256_cvtepu8_epi64(move16_l<12>(a).eval().native());
211 return r;
212#elif SIMDPP_USE_SSE4_1
213 uint64<16> r;
214 r.vec(0) = _mm_cvtepu8_epi64(a.native());
215 r.vec(1) = _mm_cvtepu8_epi64(move16_l<2>(a).eval().native());
216 r.vec(2) = _mm_cvtepu8_epi64(move16_l<4>(a).eval().native());
217 r.vec(3) = _mm_cvtepu8_epi64(move16_l<6>(a).eval().native());
218 r.vec(4) = _mm_cvtepu8_epi64(move16_l<8>(a).eval().native());
219 r.vec(5) = _mm_cvtepu8_epi64(move16_l<10>(a).eval().native());
220 r.vec(6) = _mm_cvtepu8_epi64(move16_l<12>(a).eval().native());
221 r.vec(7) = _mm_cvtepu8_epi64(move16_l<14>(a).eval().native());
222 return r;
223#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_MSA || SIMDPP_USE_VSX_207
224 return i_to_uint64(i_to_uint32(a));
225#elif SIMDPP_USE_ALTIVEC
226 uint64<16> r;
227 mem_block<uint8<16>> b = a;
228 for (unsigned i = 0; i < 16; i++) {
229 r.vec(i/2).el(i%2) = uint64_t(b[i]);
230 }
231 return r;
232#endif
233}
234
235#if SIMDPP_USE_AVX2
236static SIMDPP_INL
237uint64<32> i_to_uint64(const uint8<32>& a)
238{
239#if SIMDPP_USE_AVX512F
240 uint64<32> r;
241 uint8<16> a0, a1;
242 split(a, a0, a1);
243 r.vec(0) = _mm512_cvtepu8_epi64(a0.native());
244 r.vec(1) = _mm512_cvtepu8_epi64(move16_l<8>(a0).eval().native());
245 r.vec(2) = _mm512_cvtepu8_epi64(a1.native());
246 r.vec(3) = _mm512_cvtepu8_epi64(move16_l<8>(a1).eval().native());
247 return r;
248#else
249 uint64<32> r;
250 uint8<16> a0, a1;
251 split(a, a0, a1);
252 r.vec(0) = _mm256_cvtepu8_epi64(a0.native());
253 r.vec(1) = _mm256_cvtepu8_epi64(move16_l<4>(a0).eval().native());
254 r.vec(2) = _mm256_cvtepu8_epi64(move16_l<8>(a0).eval().native());
255 r.vec(3) = _mm256_cvtepu8_epi64(move16_l<12>(a0).eval().native());
256 r.vec(4) = _mm256_cvtepu8_epi64(a1.native());
257 r.vec(5) = _mm256_cvtepu8_epi64(move16_l<4>(a1).eval().native());
258 r.vec(6) = _mm256_cvtepu8_epi64(move16_l<8>(a1).eval().native());
259 r.vec(7) = _mm256_cvtepu8_epi64(move16_l<12>(a1).eval().native());
260 return r;
261#endif
262}
263#endif
264
265#if SIMDPP_USE_AVX512F
266static SIMDPP_INL
267uint64<64> i_to_uint64(const uint8<64>& a)
268{
269 uint64<64> r;
270 uint8<32> a01, a23;
271 uint8<16> a0, a1, a2, a3;
272#if SIMDPP_USE_AVX512BW
273 split(a, a01, a23);
274#else
275 a01 = a.vec(0);
276 a23 = a.vec(1);
277#endif
278 split(a01, a0, a1);
279 split(a23, a2, a3);
280
281 r.vec(0) = _mm512_cvtepu8_epi64(a0.native());
282 r.vec(1) = _mm512_cvtepu8_epi64(move16_l<8>(a0).eval().native());
283 r.vec(2) = _mm512_cvtepu8_epi64(a1.native());
284 r.vec(3) = _mm512_cvtepu8_epi64(move16_l<8>(a1).eval().native());
285 r.vec(4) = _mm512_cvtepu8_epi64(a2.native());
286 r.vec(5) = _mm512_cvtepu8_epi64(move16_l<8>(a2).eval().native());
287 r.vec(6) = _mm512_cvtepu8_epi64(a3.native());
288 r.vec(7) = _mm512_cvtepu8_epi64(move16_l<8>(a3).eval().native());
289 return r;
290}
291#endif
292
293template<unsigned N> SIMDPP_INL
294uint64<N> i_to_uint64(const uint8<N>& a)
295{
296 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(uint64<N>, i_to_uint64, a)
297}
298
299// -----------------------------------------------------------------------------
300
301static SIMDPP_INL
302int64<4> i_to_int64(const int32<4>& a)
303{
304#if SIMDPP_USE_NULL
305 int64<4> r;
306 r.vec(0).el(0) = int64_t(a.el(0));
307 r.vec(0).el(1) = int64_t(a.el(1));
308 r.vec(1).el(0) = int64_t(a.el(2));
309 r.vec(1).el(1) = int64_t(a.el(3));
310 return r;
311#elif SIMDPP_USE_AVX2
312 return _mm256_cvtepi32_epi64(a.native());
313#elif SIMDPP_USE_SSE4_1
314 uint64x2 r1, r2;
315 r1 = _mm_cvtepi32_epi64(a.native());
316 r2 = _mm_cvtepi32_epi64(move4_l<2>(a).eval().native());
317 return combine(r1, r2);
318#elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA || SIMDPP_USE_VSX_207
319 int32x4 sign = shift_r<31>(a);
320 int64x2 lo, hi;
321 lo = zip4_lo(a, sign);
322 hi = zip4_hi(a, sign);
323 return combine(lo, hi);
324#elif SIMDPP_USE_NEON
325 int64x2 r1, r2;
326 r1 = vmovl_s32(vget_low_s32(a.native()));
327 r2 = vmovl_s32(vget_high_s32(a.native()));
328 return combine(r1, r2);
329#elif SIMDPP_USE_ALTIVEC
330 int64x4 r;
331 mem_block<int32x4> b = a;
332 r.vec(0).el(0) = b[0];
333 r.vec(0).el(1) = b[1];
334 r.vec(1).el(0) = b[2];
335 r.vec(1).el(1) = b[3];
336 return r;
337#endif
338}
339
340#if SIMDPP_USE_AVX2
341static SIMDPP_INL
342int64<8> i_to_int64(const int32<8>& a)
343{
344 int32<4> a1, a2;
345 int64<4> r1, r2;
346 split(a, a1, a2);
347 r1 = _mm256_cvtepi32_epi64(a1.native());
348 r2 = _mm256_cvtepi32_epi64(a2.native());
349 return combine(r1, r2);
350}
351#endif
352
353#if SIMDPP_USE_AVX512F
354static SIMDPP_INL
355int64<16> i_to_int64(const int32<16>& a)
356{
357 int64<8> r1, r2;
358 r1 = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(a.native()));
359 r2 = _mm512_cvtepi32_epi64(_mm512_extracti64x4_epi64(a.native(), 1));
360 return combine(r1, r2);
361}
362#endif
363
364template<unsigned N> SIMDPP_INL
365int64<N> i_to_int64(const int32<N>& a)
366{
367 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
368}
369
370// -----------------------------------------------------------------------------
371
372static SIMDPP_INL
373int64<8> i_to_int64(const int16<8>& a)
374{
375#if SIMDPP_USE_NULL
376 int64<8> r;
377 for (unsigned i = 0; i < 8; i++) {
378 r.vec(i/2).el(i%2) = int64_t(a.vec(0).el(i));
379 }
380 return r;
381#elif SIMDPP_USE_AVX512F
382 return _mm512_cvtepi16_epi64(a.native());
383#elif SIMDPP_USE_AVX2
384 int64<8> r;
385 r.vec(0) = _mm256_cvtepi16_epi64(a.native());
386 r.vec(1) = _mm256_cvtepi16_epi64(move8_l<4>(a).eval().native());
387 return r;
388#elif SIMDPP_USE_SSE4_1
389 int64<8> r;
390 r.vec(0) = _mm_cvtepi16_epi64(a.native());
391 r.vec(1) = _mm_cvtepi16_epi64(move8_l<2>(a).eval().native());
392 r.vec(2) = _mm_cvtepi16_epi64(move8_l<4>(a).eval().native());
393 r.vec(3) = _mm_cvtepi16_epi64(move8_l<6>(a).eval().native());
394 return r;
395#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_MSA || SIMDPP_USE_VSX_207
396 return i_to_int64(i_to_int32(a));
397#elif SIMDPP_USE_ALTIVEC
398 int64<8> r;
399 mem_block<int16<8>> b = a;
400 for (unsigned i = 0; i < 8; i++) {
401 r.vec(i/2).el(i%2) = int64_t(b[i]);
402 }
403 return r;
404#endif
405}
406
407#if SIMDPP_USE_AVX2
408static SIMDPP_INL
409int64<16> i_to_int64(const int16<16>& a)
410{
411#if SIMDPP_USE_AVX512F
412 int64<16> r;
413 int16<8> a0, a1;
414 split(a, a0, a1);
415 r.vec(0) = _mm512_cvtepi16_epi64(a0.native());
416 r.vec(1) = _mm512_cvtepi16_epi64(a1.native());
417 return r;
418#else
419 int64<16> r;
420 int16<8> a0, a1;
421 split(a, a0, a1);
422 r.vec(0) = _mm256_cvtepi16_epi64(a0.native());
423 r.vec(1) = _mm256_cvtepi16_epi64(move8_l<4>(a0).eval().native());
424 r.vec(2) = _mm256_cvtepi16_epi64(a1.native());
425 r.vec(3) = _mm256_cvtepi16_epi64(move8_l<4>(a1).eval().native());
426 return r;
427#endif
428}
429#endif
430
431#if SIMDPP_USE_AVX512F
432static SIMDPP_INL
433int64<32> i_to_int64(const int16<32>& a)
434{
435 int64<32> r;
436 int16<16> a01, a23;
437 int16<8> a0, a1, a2, a3;
438#if SIMDPP_USE_AVX512BW
439 split(a, a01, a23);
440#else
441 a01 = a.vec(0);
442 a23 = a.vec(1);
443#endif
444 split(a01, a0, a1);
445 split(a23, a2, a3);
446
447 r.vec(0) = _mm512_cvtepi16_epi64(a0.native());
448 r.vec(1) = _mm512_cvtepi16_epi64(a1.native());
449 r.vec(2) = _mm512_cvtepi16_epi64(a2.native());
450 r.vec(3) = _mm512_cvtepi16_epi64(a3.native());
451 return r;
452}
453#endif
454
455template<unsigned N> SIMDPP_INL
456int64<N> i_to_int64(const int16<N>& a)
457{
458 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
459}
460
461// -----------------------------------------------------------------------------
462
463static SIMDPP_INL
464int64<16> i_to_int64(const int8<16>& a)
465{
466#if SIMDPP_USE_NULL
467 int64<16> r;
468 for (unsigned i = 0; i < 16; i++) {
469 r.vec(i/2).el(i%2) = int64_t(a.vec(0).el(i));
470 }
471 return r;
472#elif SIMDPP_USE_AVX512F
473 int64<16> r;
474 r.vec(0) = _mm512_cvtepi8_epi64(a.native());
475 r.vec(1) = _mm512_cvtepi8_epi64(move16_l<8>(a).eval().native());
476 return r;
477#elif SIMDPP_USE_AVX2
478 int64<16> r;
479 r.vec(0) = _mm256_cvtepi8_epi64(a.native());
480 r.vec(1) = _mm256_cvtepi8_epi64(move16_l<4>(a).eval().native());
481 r.vec(2) = _mm256_cvtepi8_epi64(move16_l<8>(a).eval().native());
482 r.vec(3) = _mm256_cvtepi8_epi64(move16_l<12>(a).eval().native());
483 return r;
484#elif SIMDPP_USE_SSE4_1
485 int64<16> r;
486 r.vec(0) = _mm_cvtepi8_epi64(a.native());
487 r.vec(1) = _mm_cvtepi8_epi64(move16_l<2>(a).eval().native());
488 r.vec(2) = _mm_cvtepi8_epi64(move16_l<4>(a).eval().native());
489 r.vec(3) = _mm_cvtepi8_epi64(move16_l<6>(a).eval().native());
490 r.vec(4) = _mm_cvtepi8_epi64(move16_l<8>(a).eval().native());
491 r.vec(5) = _mm_cvtepi8_epi64(move16_l<10>(a).eval().native());
492 r.vec(6) = _mm_cvtepi8_epi64(move16_l<12>(a).eval().native());
493 r.vec(7) = _mm_cvtepi8_epi64(move16_l<14>(a).eval().native());
494 return r;
495#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_MSA || SIMDPP_USE_VSX_207
496 return i_to_int64(i_to_int32(a));
497#elif SIMDPP_USE_ALTIVEC
498 int64<16> r;
499 mem_block<int8<16>> b = a;
500 for (unsigned i = 0; i < 16; i++) {
501 r.vec(i/2).el(i%2) = int64_t(b[i]);
502 }
503 return r;
504#endif
505}
506
507#if SIMDPP_USE_AVX2
508static SIMDPP_INL
509int64<32> i_to_int64(const int8<32>& a)
510{
511#if SIMDPP_USE_AVX512F
512 int64<32> r;
513 int8<16> a0, a1;
514 split(a, a0, a1);
515 r.vec(0) = _mm512_cvtepi8_epi64(a0.native());
516 r.vec(1) = _mm512_cvtepi8_epi64(move16_l<8>(a0).eval().native());
517 r.vec(2) = _mm512_cvtepi8_epi64(a1.native());
518 r.vec(3) = _mm512_cvtepi8_epi64(move16_l<8>(a1).eval().native());
519 return r;
520#else
521 int64<32> r;
522 int8<16> a0, a1;
523 split(a, a0, a1);
524 r.vec(0) = _mm256_cvtepi8_epi64(a0.native());
525 r.vec(1) = _mm256_cvtepi8_epi64(move16_l<4>(a0).eval().native());
526 r.vec(2) = _mm256_cvtepi8_epi64(move16_l<8>(a0).eval().native());
527 r.vec(3) = _mm256_cvtepi8_epi64(move16_l<12>(a0).eval().native());
528 r.vec(4) = _mm256_cvtepi8_epi64(a1.native());
529 r.vec(5) = _mm256_cvtepi8_epi64(move16_l<4>(a1).eval().native());
530 r.vec(6) = _mm256_cvtepi8_epi64(move16_l<8>(a1).eval().native());
531 r.vec(7) = _mm256_cvtepi8_epi64(move16_l<12>(a1).eval().native());
532 return r;
533#endif
534}
535#endif
536
537#if SIMDPP_USE_AVX512F
538static SIMDPP_INL
539int64<64> i_to_int64(const int8<64>& a)
540{
541 int64<64> r;
542 int8<32> a01, a23;
543 int8<16> a0, a1, a2, a3;
544#if SIMDPP_USE_AVX512BW
545 split(a, a01, a23);
546#else
547 a01 = a.vec(0);
548 a23 = a.vec(1);
549#endif
550 split(a01, a0, a1);
551 split(a23, a2, a3);
552
553 r.vec(0) = _mm512_cvtepi8_epi64(a0.native());
554 r.vec(1) = _mm512_cvtepi8_epi64(move16_l<8>(a0).eval().native());
555 r.vec(2) = _mm512_cvtepi8_epi64(a1.native());
556 r.vec(3) = _mm512_cvtepi8_epi64(move16_l<8>(a1).eval().native());
557 r.vec(4) = _mm512_cvtepi8_epi64(a2.native());
558 r.vec(5) = _mm512_cvtepi8_epi64(move16_l<8>(a2).eval().native());
559 r.vec(6) = _mm512_cvtepi8_epi64(a3.native());
560 r.vec(7) = _mm512_cvtepi8_epi64(move16_l<8>(a3).eval().native());
561 return r;
562}
563#endif
564
565template<unsigned N> SIMDPP_INL
566int64<N> i_to_int64(const int8<N>& a)
567{
568 SIMDPP_VEC_ARRAY_IMPL_CONV_INSERT(int64<N>, i_to_int64, a)
569}
570
571// -----------------------------------------------------------------------------
572
573} // namespace insn
574} // namespace detail
575} // namespace SIMDPP_ARCH_NAMESPACE
576} // namespace simdpp
577
578#endif
579
580
581