1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_MAKE_CONST_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_MAKE_CONST_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/null/set.h>
17#include <simdpp/detail/traits.h>
18
19#if _MSC_VER
20#pragma warning(push)
21#pragma warning(disable: 4244)
22#endif
23
24namespace simdpp {
25namespace SIMDPP_ARCH_NAMESPACE {
26namespace detail {
27namespace insn {
28
29#if SIMDPP_USE_NEON_FLT_SP
30template<class VE> SIMDPP_INL
31void i_make_const(float32<4>& v, const expr_vec_make_const<VE,1>& e, unsigned)
32{
33 float rv = e.val(0);
34 v = vld1q_dup_f32(&rv);
35}
36
37template<class VE> SIMDPP_INL
38void i_make_const(float32<4>& v, const expr_vec_make_const<VE,2>& e, unsigned off)
39{
40 float SIMDPP_ALIGN(8) data[2] = {
41 (float) e.val(off+0),
42 (float) e.val(off+1)
43 };
44 float32x2_t half = vld1_f32(data);
45 v = vcombine_f32(half, half);
46}
47#endif
48
49template<class VE, unsigned N> SIMDPP_INL
50void i_make_const(float32<4>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
51{
52#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
53 v = detail::null::make_vec<float32<4>, float>(e.val(off+0), e.val(off+1),
54 e.val(off+2), e.val(off+3));
55#elif SIMDPP_USE_SSE2
56 v = _mm_set_ps(e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
57#elif SIMDPP_USE_NEON
58 float SIMDPP_ALIGN(16) data[4] = {
59 (float) e.val(off+0),
60 (float) e.val(off+1),
61 (float) e.val(off+2),
62 (float) e.val(off+3)
63 };
64 v = vld1q_f32(data);
65#elif SIMDPP_USE_ALTIVEC
66 v = (__vector float){ float(e.val(off+0)), float(e.val(off+1)),
67 float(e.val(off+2)), float(e.val(off+3)) };
68#elif SIMDPP_USE_MSA
69 v = (v4f32){ float(e.val(off+0)), float(e.val(off+1)),
70 float(e.val(off+2)), float(e.val(off+3)) };
71#endif
72}
73
74#if SIMDPP_USE_AVX
75template<class VE, unsigned N> SIMDPP_INL
76void i_make_const(float32<8>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
77{
78 v = _mm256_set_ps(e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
79 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
80}
81
82#endif
83
84#if SIMDPP_USE_AVX512F
85template<class VE, unsigned N> SIMDPP_INL
86void i_make_const(float32<16>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
87{
88 v = _mm512_set_ps(e.val(off+15), e.val(off+14), e.val(off+13), e.val(off+12),
89 e.val(off+11), e.val(off+10), e.val(off+9), e.val(off+8),
90 e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
91 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
92}
93#endif
94
95// -----------------------------------------------------------------------------
96
97template<class VE, unsigned N> SIMDPP_INL
98void i_make_const(float64<2>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
99{
100#if SIMDPP_USE_SSE2
101 v = _mm_set_pd(e.val(off+1), e.val(off+0));
102#elif SIMDPP_USE_NEON64
103 double SIMDPP_ALIGN(16) data[2] = {
104 (double) e.val(off+0),
105 (double) e.val(off+1)
106 };
107 v = vld1q_f64(data);
108#elif SIMDPP_USE_VSX_206
109 __vector double r = { double(e.val(off+0)), double(e.val(off+1)) };
110 v = r;
111#elif SIMDPP_USE_MSA
112 v = (v2f64){ double(e.val(off+0)), double(e.val(off+1)) };
113#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
114 v = detail::null::make_vec<float64<2>, double>(e.val(off+0), e.val(off+1));
115#endif
116}
117
118#if SIMDPP_USE_AVX
119template<class VE, unsigned N> SIMDPP_INL
120void i_make_const(float64<4>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
121{
122 v = _mm256_set_pd(e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
123}
124#endif
125
126#if SIMDPP_USE_AVX512F
127template<class VE, unsigned N> SIMDPP_INL
128void i_make_const(float64<8>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
129{
130 v = _mm512_set_pd(e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
131 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
132}
133#endif
134
135// -----------------------------------------------------------------------------
136
137#if SIMDPP_USE_NEON
138template<class VE> SIMDPP_INL
139void i_make_const(uint8<16>& v, const expr_vec_make_const<VE,1>& e, unsigned off)
140{
141 uint8_t rv = e.val(off+0);
142 v = vld1q_dup_u8(&rv);
143}
144
145template<class VE> SIMDPP_INL
146void i_make_const(uint8<16>& v, const expr_vec_make_const<VE,2>& e, unsigned off)
147{
148 uint16_t rv = (e.val(off+0) & 0xff) | (e.val(off+1) & 0xff) << 8;
149 v = (uint16<8>) vld1q_dup_u16(&rv);
150}
151
152template<class VE> SIMDPP_INL
153void i_make_const(uint8<16>& v, const expr_vec_make_const<VE,4>& e, unsigned off)
154{
155 uint32_t rv = (e.val(off+0) & 0xff) | (e.val(off+1) & 0xff) << 8 |
156 (e.val(off+2) & 0xff) << 16 | (e.val(off+3) & 0xff) << 24;
157 v = (uint32<4>) vld1q_dup_u32(&rv);
158}
159
160template<class VE> SIMDPP_INL
161void i_make_const(uint8<16>& v, const expr_vec_make_const<VE,8>& e, unsigned off)
162{
163 uint8_t SIMDPP_ALIGN(8) data[8] = {
164 (uint8_t) e.val(off+0), (uint8_t) e.val(off+1),
165 (uint8_t) e.val(off+2), (uint8_t) e.val(off+3),
166 (uint8_t) e.val(off+4), (uint8_t) e.val(off+5),
167 (uint8_t) e.val(off+6), (uint8_t) e.val(off+7)
168 };
169 uint8x8_t half = vld1_u8(data);
170 v = vcombine_u8(half, half);
171}
172#endif
173
174template<class VE, unsigned N> SIMDPP_INL
175void i_make_const(uint8<16>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
176{
177#if SIMDPP_USE_NULL
178 v = detail::null::make_vec<uint8<16>, uint8_t>(
179 e.val(off+0), e.val(off+1), e.val(off+2), e.val(off+3),
180 e.val(off+4), e.val(off+5), e.val(off+6), e.val(off+7),
181 e.val(off+8), e.val(off+9), e.val(off+10), e.val(off+11),
182 e.val(off+12), e.val(off+13), e.val(off+14), e.val(off+15));
183#elif SIMDPP_USE_SSE2
184 v = _mm_set_epi8(e.val(off+15), e.val(off+14), e.val(off+13), e.val(off+12),
185 e.val(off+11), e.val(off+10), e.val(off+9), e.val(off+8),
186 e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
187 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
188#elif SIMDPP_USE_NEON
189 uint8_t SIMDPP_ALIGN(16) data[16] = {
190 (uint8_t) e.val(off+0), (uint8_t) e.val(off+1),
191 (uint8_t) e.val(off+2), (uint8_t) e.val(off+3),
192 (uint8_t) e.val(off+4), (uint8_t) e.val(off+5),
193 (uint8_t) e.val(off+6), (uint8_t) e.val(off+7),
194 (uint8_t) e.val(off+8), (uint8_t) e.val(off+9),
195 (uint8_t) e.val(off+10), (uint8_t) e.val(off+11),
196 (uint8_t) e.val(off+12), (uint8_t) e.val(off+13),
197 (uint8_t) e.val(off+14), (uint8_t) e.val(off+15)
198 };
199 v = vld1q_u8(data);
200#elif SIMDPP_USE_ALTIVEC
201 v = (__vector uint8_t){
202 uint8_t(e.val(off+0)), uint8_t(e.val(off+1)), uint8_t(e.val(off+2)), uint8_t(e.val(off+3)),
203 uint8_t(e.val(off+4)), uint8_t(e.val(off+5)), uint8_t(e.val(off+6)), uint8_t(e.val(off+7)),
204 uint8_t(e.val(off+8)), uint8_t(e.val(off+9)), uint8_t(e.val(off+10)), uint8_t(e.val(off+11)),
205 uint8_t(e.val(off+12)), uint8_t(e.val(off+13)), uint8_t(e.val(off+14)), uint8_t(e.val(off+15))
206 };
207#elif SIMDPP_USE_MSA
208 v = (v16u8){
209 uint8_t(e.val(off+0)), uint8_t(e.val(off+1)), uint8_t(e.val(off+2)), uint8_t(e.val(off+3)),
210 uint8_t(e.val(off+4)), uint8_t(e.val(off+5)), uint8_t(e.val(off+6)), uint8_t(e.val(off+7)),
211 uint8_t(e.val(off+8)), uint8_t(e.val(off+9)), uint8_t(e.val(off+10)), uint8_t(e.val(off+11)),
212 uint8_t(e.val(off+12)), uint8_t(e.val(off+13)), uint8_t(e.val(off+14)), uint8_t(e.val(off+15))
213 };
214#endif
215}
216
217#if SIMDPP_USE_AVX2
218template<class VE, unsigned N> SIMDPP_INL
219void i_make_const(uint8<32>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
220{
221 v = _mm256_set_epi8(e.val(off+31), e.val(off+30), e.val(off+29), e.val(off+28),
222 e.val(off+27), e.val(off+26), e.val(off+25), e.val(off+24),
223 e.val(off+23), e.val(off+22), e.val(off+21), e.val(off+20),
224 e.val(off+19), e.val(off+18), e.val(off+17), e.val(off+16),
225 e.val(off+15), e.val(off+14), e.val(off+13), e.val(off+12),
226 e.val(off+11), e.val(off+10), e.val(off+9), e.val(off+8),
227 e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
228 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
229}
230#endif
231
232#if SIMDPP_USE_AVX512BW
233SIMDPP_INL uint32_t make_uint32_uint8(uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4)
234{
235 return (a1 & 0xff) | ((a2 & 0xff) << 8) | ((a3 & 0xff) << 16) | ((a4 & 0xff) << 24);
236}
237
238template<class VE, unsigned N> SIMDPP_INL
239void i_make_const(uint8<64>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
240{
241 v = _mm512_set_epi32(
242 make_uint32_uint8(e.val(off+60), e.val(off+61), e.val(off+62), e.val(off+63)),
243 make_uint32_uint8(e.val(off+56), e.val(off+57), e.val(off+58), e.val(off+59)),
244 make_uint32_uint8(e.val(off+52), e.val(off+53), e.val(off+54), e.val(off+55)),
245 make_uint32_uint8(e.val(off+48), e.val(off+49), e.val(off+50), e.val(off+51)),
246 make_uint32_uint8(e.val(off+44), e.val(off+45), e.val(off+46), e.val(off+47)),
247 make_uint32_uint8(e.val(off+40), e.val(off+41), e.val(off+42), e.val(off+43)),
248 make_uint32_uint8(e.val(off+36), e.val(off+37), e.val(off+38), e.val(off+39)),
249 make_uint32_uint8(e.val(off+32), e.val(off+33), e.val(off+34), e.val(off+35)),
250 make_uint32_uint8(e.val(off+28), e.val(off+29), e.val(off+30), e.val(off+31)),
251 make_uint32_uint8(e.val(off+24), e.val(off+25), e.val(off+26), e.val(off+27)),
252 make_uint32_uint8(e.val(off+20), e.val(off+21), e.val(off+22), e.val(off+23)),
253 make_uint32_uint8(e.val(off+16), e.val(off+17), e.val(off+18), e.val(off+19)),
254 make_uint32_uint8(e.val(off+12), e.val(off+13), e.val(off+14), e.val(off+15)),
255 make_uint32_uint8(e.val(off+8), e.val(off+9), e.val(off+10), e.val(off+11)),
256 make_uint32_uint8(e.val(off+4), e.val(off+5), e.val(off+6), e.val(off+7)),
257 make_uint32_uint8(e.val(off+0), e.val(off+1), e.val(off+2), e.val(off+3))
258 );
259}
260#endif
261
262
263// -----------------------------------------------------------------------------
264
265#if SIMDPP_USE_NEON
266template<class VE> SIMDPP_INL
267void i_make_const(uint16<8>& v, const expr_vec_make_const<VE,1>& e, unsigned off)
268{
269 uint16_t rv = e.val(off+0);
270 v = vld1q_dup_u16(&rv);
271}
272
273template<class VE> SIMDPP_INL
274void i_make_const(uint16<8>& v, const expr_vec_make_const<VE,2>& e, unsigned off)
275{
276 uint32_t rv = (e.val(off+0) & 0xffff) | (e.val(off+1) & 0xffff) << 16;
277 v = (uint32<4>) vld1q_dup_u32(&rv);
278}
279
280template<class VE> SIMDPP_INL
281void i_make_const(uint16<8>& v, const expr_vec_make_const<VE,4>& e, unsigned off)
282{
283 uint16_t SIMDPP_ALIGN(8) data[4] = {
284 (uint16_t) e.val(off+0),
285 (uint16_t) e.val(off+1),
286 (uint16_t) e.val(off+2),
287 (uint16_t) e.val(off+3)
288 };
289 uint16x4_t half = vld1_u16(data);
290 v = vcombine_u16(half, half);
291}
292#endif
293
294
295template<class VE, unsigned N> SIMDPP_INL
296void i_make_const(uint16<8>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
297{
298#if SIMDPP_USE_NULL
299 v = detail::null::make_vec<uint16<8>, uint16_t>(e.val(off+0), e.val(off+1), e.val(off+2), e.val(off+3),
300 e.val(off+4), e.val(off+5), e.val(off+6), e.val(off+7));
301#elif SIMDPP_USE_SSE2
302 v = _mm_set_epi16(e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
303 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
304#elif SIMDPP_USE_NEON
305 uint16_t SIMDPP_ALIGN(16) data[8] = {
306 (uint16_t) e.val(off+0), (uint16_t) e.val(off+1),
307 (uint16_t) e.val(off+2), (uint16_t) e.val(off+3),
308 (uint16_t) e.val(off+4), (uint16_t) e.val(off+5),
309 (uint16_t) e.val(off+6), (uint16_t) e.val(off+7)
310 };
311 v = vld1q_u16(data);
312#elif SIMDPP_USE_ALTIVEC
313 v = (__vector uint16_t){
314 uint16_t(e.val(off+0)), uint16_t(e.val(off+1)), uint16_t(e.val(off+2)), uint16_t(e.val(off+3)),
315 uint16_t(e.val(off+4)), uint16_t(e.val(off+5)), uint16_t(e.val(off+6)), uint16_t(e.val(off+7))
316 };
317#elif SIMDPP_USE_MSA
318 v = (v8u16){
319 uint16_t(e.val(off+0)), uint16_t(e.val(off+1)), uint16_t(e.val(off+2)), uint16_t(e.val(off+3)),
320 uint16_t(e.val(off+4)), uint16_t(e.val(off+5)), uint16_t(e.val(off+6)), uint16_t(e.val(off+7))
321 };
322#endif
323}
324
325#if SIMDPP_USE_AVX2
326template<class VE, unsigned N> SIMDPP_INL
327void i_make_const(uint16<16>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
328{
329 v = _mm256_set_epi16(e.val(off+15), e.val(off+14), e.val(off+13), e.val(off+12),
330 e.val(off+11), e.val(off+10), e.val(off+9), e.val(off+8),
331 e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
332 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
333}
334#endif
335
336#if SIMDPP_USE_AVX512BW
337SIMDPP_INL uint32_t make_uint32_uint16(uint16_t a1, uint16_t a2)
338{
339 return (a1 & 0xffff) | ((a2 & 0xffff) << 16);
340}
341
342template<class VE, unsigned N> SIMDPP_INL
343void i_make_const(uint16<32>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
344{
345 v = _mm512_set_epi32(
346 make_uint32_uint16(e.val(off+30), e.val(off+31)),
347 make_uint32_uint16(e.val(off+28), e.val(off+29)),
348 make_uint32_uint16(e.val(off+26), e.val(off+27)),
349 make_uint32_uint16(e.val(off+24), e.val(off+25)),
350 make_uint32_uint16(e.val(off+22), e.val(off+23)),
351 make_uint32_uint16(e.val(off+20), e.val(off+21)),
352 make_uint32_uint16(e.val(off+18), e.val(off+19)),
353 make_uint32_uint16(e.val(off+16), e.val(off+17)),
354 make_uint32_uint16(e.val(off+14), e.val(off+15)),
355 make_uint32_uint16(e.val(off+12), e.val(off+13)),
356 make_uint32_uint16(e.val(off+10), e.val(off+11)),
357 make_uint32_uint16(e.val(off+8), e.val(off+9)),
358 make_uint32_uint16(e.val(off+6), e.val(off+7)),
359 make_uint32_uint16(e.val(off+4), e.val(off+5)),
360 make_uint32_uint16(e.val(off+2), e.val(off+3)),
361 make_uint32_uint16(e.val(off+0), e.val(off+1)));
362}
363#endif
364
365// -----------------------------------------------------------------------------
366
367#if SIMDPP_USE_NEON
368template<class VE> SIMDPP_INL
369void i_make_const(uint32<4>& v, const expr_vec_make_const<VE,1>& e, unsigned off)
370{
371 uint32_t rv = e.val(off+0);
372 v = vld1q_dup_u32(&rv);
373}
374
375template<class VE> SIMDPP_INL
376void i_make_const(uint32<4>& v, const expr_vec_make_const<VE,2>& e, unsigned off)
377{
378 uint32_t SIMDPP_ALIGN(8) data[2] = {
379 (uint32_t) e.val(off+0),
380 (uint32_t) e.val(off+1)
381 };
382 uint32x2_t half = vld1_u32(data);
383 v = vcombine_u32(half, half);
384}
385#endif
386
387template<class VE, unsigned N> SIMDPP_INL
388void i_make_const(uint32<4>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
389{
390#if SIMDPP_USE_NULL
391 v = detail::null::make_vec<uint32<4>, uint32_t>(e.val(off+0), e.val(off+1), e.val(off+2), e.val(off+3));
392#elif SIMDPP_USE_SSE2
393 v = _mm_set_epi32(e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
394#elif SIMDPP_USE_NEON
395 uint32_t SIMDPP_ALIGN(16) data[4] = {
396 (uint32_t) e.val(off+0), (uint32_t) e.val(off+1),
397 (uint32_t) e.val(off+2), (uint32_t) e.val(off+3)
398 };
399 v = vld1q_u32(data);
400#elif SIMDPP_USE_ALTIVEC
401 v = (__vector uint32_t) { uint32_t(e.val(off+0)), uint32_t(e.val(off+1)),
402 uint32_t(e.val(off+2)), uint32_t(e.val(off+3)) };
403#elif SIMDPP_USE_MSA
404 v = (v4u32) { uint32_t(e.val(off+0)), uint32_t(e.val(off+1)),
405 uint32_t(e.val(off+2)), uint32_t(e.val(off+3)) };
406#endif
407}
408
409#if SIMDPP_USE_AVX2
410template<class VE, unsigned N> SIMDPP_INL
411void i_make_const(uint32<8>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
412{
413 v = _mm256_set_epi32(e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
414 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
415}
416#endif
417
418#if SIMDPP_USE_AVX512F
419template<class VE, unsigned N> SIMDPP_INL
420void i_make_const(uint32<16>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
421{
422 v = _mm512_set_epi32(e.val(off+15), e.val(off+14), e.val(off+13), e.val(off+12),
423 e.val(off+11), e.val(off+10), e.val(off+9), e.val(off+8),
424 e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
425 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
426}
427#endif
428
429// -----------------------------------------------------------------------------
430
431#if SIMDPP_USE_NEON
432template<class VE> SIMDPP_INL
433void i_make_const(uint64<2>& v, const expr_vec_make_const<VE,1>& e, unsigned off)
434{
435 uint64x1_t r0 = vcreate_u64(uint64_t(e.val(off+0)));
436 v = vcombine_u64(r0, r0);
437}
438#endif
439
440template<class VE, unsigned N> SIMDPP_INL
441void i_make_const(uint64<2>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
442{
443#if SIMDPP_USE_SSE2
444#if SIMDPP_32_BITS && _MSC_VER
445 // MSVC does not support _mm_set_epi64x in 32-bit mode
446 uint64_t v1 = e.val(off+1);
447 uint64_t v0 = e.val(off+0);
448 v = _mm_set_epi32(v1 >> 32, v1 & 0xffffffff, v0 >> 32, v0 & 0xffffffff);
449#else
450 v = _mm_set_epi64x(e.val(off+1), e.val(off+0));
451#endif
452#elif SIMDPP_USE_NEON
453 uint64_t SIMDPP_ALIGN(16) data[2] = {
454 (uint64_t) e.val(off+0),
455 (uint64_t) e.val(off+1)
456 };
457 v = vld1q_u64(data);
458#elif SIMDPP_USE_VSX_207
459 __vector uint64_t r = { (uint64_t)e.val(off+0), (uint64_t)e.val(off+1) };
460 v = r;
461#elif SIMDPP_USE_MSA
462 v = (v2u64) { uint64_t(e.val(off+0)), uint64_t(e.val(off+1)) };
463#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
464 v = detail::null::make_vec<uint64<2>, uint64_t>(e.val(off+0), e.val(off+1));
465#endif
466}
467
468#if SIMDPP_USE_AVX2
469template<class VE, unsigned N> SIMDPP_INL
470void i_make_const(uint64<4>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
471{
472#if SIMDPP_32_BITS && _MSC_VER
473 // MSVC does not support _mm256_set_epi64x in 32-bit mode
474 uint64_t v3 = e.val(off+3);
475 uint64_t v2 = e.val(off+2);
476 uint64_t v1 = e.val(off+1);
477 uint64_t v0 = e.val(off+0);
478 v = _mm256_set_epi32(v3 >> 32, v3 & 0xffffffff, v2 >> 32, v2 & 0xffffffff,
479 v1 >> 32, v1 & 0xffffffff, v0 >> 32, v0 & 0xffffffff);
480#else
481 v = _mm256_set_epi64x(e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
482#endif
483}
484#endif
485
486#if SIMDPP_USE_AVX512F
487template<class VE, unsigned N> SIMDPP_INL
488void i_make_const(uint64<8>& v, const expr_vec_make_const<VE,N>& e, unsigned off)
489{
490 v = _mm512_set_epi64(e.val(off+7), e.val(off+6), e.val(off+5), e.val(off+4),
491 e.val(off+3), e.val(off+2), e.val(off+1), e.val(off+0));
492}
493#endif
494
495// -----------------------------------------------------------------------------
496
497template<class V, class VE, unsigned NE> SIMDPP_INL
498void i_make_const(V& v, const expr_vec_make_const<VE,NE>& e, unsigned off)
499{
500 for (unsigned i = 0; i < v.vec_length; ++i) {
501 i_make_const(v.vec(i), e, off + v.base_length * i);
502 }
503}
504
505// -----------------------------------------------------------------------------
506
507template<class V, class VE, unsigned N> SIMDPP_INL
508V i_make_const_any(const expr_vec_make_const<VE,N>& e)
509{
510 typename detail::remove_sign<V>::type r;
511 i_make_const(r, e, 0);
512 return V(r);
513}
514
515// -----------------------------------------------------------------------------
516} // namespace insn
517
518template<class V, class VE, unsigned N> SIMDPP_INL
519void construct_eval(V& v, const expr_vec_make_const<VE, N>& e)
520{
521 v = insn::i_make_const_any<V>(e);
522}
523
524template<class V> SIMDPP_INL
525void construct_eval(V& v, const expr_vec_make_ones& e)
526{
527 (void) e;
528 expr_vec_make_const<uint64_t,1> e2;
529 e2.a[0] = (uint64_t)-1;
530 typename V::uint_vector_type u;
531 insn::i_make_const(u, e2, 0);
532 v = u;
533}
534
535} // namespace detail
536} // namespace SIMDPP_ARCH_NAMESPACE
537} // namespace simdpp
538
539#if _MSC_VER
540#pragma warning(pop)
541#endif
542
543#endif
544
545