set_splat.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/set_splat.h]

1	/ Copyright (C) 2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_SET_SPLAT_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_SET_SPLAT_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/load.h>
17	#include <simdpp/core/zip_lo.h>
18	#include <simdpp/detail/altivec/load1.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25	static SIMDPP_INL
26	void i_set_splat(uint32x4&, uint32_t);
27
28	static SIMDPP_INL
29	void i_set_splat(uint8x16& v, uint8_t v0)
30	{
31	#if SIMDPP_USE_NULL
32	v = detail::null::make_vec<uint8x16>(v0);
33	#elif SIMDPP_USE_AVX2
34	uint32_t u0 = v0;
35	v = _mm_cvtsi32_si128(u0);
36	v = _mm_broadcastb_epi8(v.native());
37	#elif SIMDPP_USE_SSE2
38	uint32_t u0;
39	u0 = v0 * `0x01010101`;
40	uint32x4 u;
41	i_set_splat(u, u0);
42	v = u;
43	#elif SIMDPP_USE_NEON
44	v = vdupq_n_u8(v0);
45	#elif SIMDPP_USE_ALTIVEC
46	SIMDPP_ALIGN(`16`) uint8_t rv[`16`];
47	rv[`0`] = v0;
48	v = altivec::load1(v, rv);
49	v = splat<`0`>(v);
50	#elif SIMDPP_USE_MSA
51	v = (v16u8) __msa_fill_b(v0);
52	#endif
53	}
54
55	#if SIMDPP_USE_AVX2
56	static SIMDPP_INL
57	void i_set_splat(uint8x32& v, uint8_t v0)
58	{
59	uint8x16 a = _mm_cvtsi32_si128(v0);
60	v = _mm256_broadcastb_epi8(a.native());
61	}
62	#endif
63
64	#if SIMDPP_USE_AVX512BW
65	SIMDPP_INL void i_set_splat(uint8<`64`>& v, uint8_t v0)
66	{
67	uint8x16 a = _mm_cvtsi32_si128(v0);
68	v = _mm512_broadcastb_epi8(a.native());
69	}
70	#endif
71
72	template<unsigned N> SIMDPP_INL
73	void i_set_splat(uint8<N>& v, uint8_t v0)
74	{
75	uint8v tv;
76	i_set_splat(tv, v0);
77	for (unsigned i = `0`; i < v.vec_length; ++i) {
78	v.vec(i) = tv;
79	}
80	}
81
82	// -----------------------------------------------------------------------------
83
84	static SIMDPP_INL
85	void i_set_splat(uint16x8& v, uint16_t v0)
86	{
87	#if SIMDPP_USE_NULL
88	v = detail::null::make_vec<uint16x8>(v0);
89	#elif SIMDPP_USE_AVX2
90	uint32_t u0 = v0;
91	v = _mm_cvtsi32_si128(u0);
92	v = _mm_broadcastw_epi16(v.native());
93	#elif SIMDPP_USE_SSE2
94	uint32_t u0;
95	u0 = v0 \| v0 << `16`;
96	uint32x4 u;
97	i_set_splat(u, u0);
98	v = u;
99	#elif SIMDPP_USE_NEON
100	v = vdupq_n_u16(v0);
101	#elif SIMDPP_USE_ALTIVEC
102	SIMDPP_ALIGN(`16`) uint16_t rv[`8`];
103	rv[`0`] = v0;
104	v = altivec::load1(v, rv);
105	v = splat<`0`>(v);
106	#elif SIMDPP_USE_MSA
107	v = (v8u16) __msa_fill_h(v0);
108	#endif
109	}
110
111	#if SIMDPP_USE_AVX2
112	static SIMDPP_INL
113	void i_set_splat(uint16x16& v, uint16_t v0)
114	{
115	uint16x8 a = _mm_cvtsi32_si128(v0);
116	v = _mm256_broadcastw_epi16(a.native());
117	}
118	#endif
119
120	#if SIMDPP_USE_AVX512BW
121	SIMDPP_INL void i_set_splat(uint16<`32`>& v, uint16_t v0)
122	{
123	uint16x8 a = _mm_cvtsi32_si128(v0);
124	v = _mm512_broadcastw_epi16(a.native());
125	}
126	#endif
127
128	template<unsigned N> SIMDPP_INL
129	void i_set_splat(uint16<N>& v, uint16_t v0)
130	{
131	uint16v tv;
132	i_set_splat(tv, v0);
133	for (unsigned i = `0`; i < v.vec_length; ++i) {
134	v.vec(i) = tv;
135	}
136	}
137
138	// -----------------------------------------------------------------------------
139
140	static SIMDPP_INL
141	void i_set_splat(uint32x4& v, uint32_t v0)
142	{
143	#if SIMDPP_USE_NULL
144	v = detail::null::make_vec<uint32x4>(v0);
145	#elif SIMDPP_USE_AVX2
146	v = _mm_cvtsi32_si128(v0);
147	v = _mm_broadcastd_epi32(v.native());
148	#elif SIMDPP_USE_SSE2
149	v = _mm_cvtsi32_si128(v0);
150	v = permute4<`0`,`0`,`0`,`0`>(v);
151	#elif SIMDPP_USE_NEON
152	v = vdupq_n_u32(v0);
153	#elif SIMDPP_USE_ALTIVEC
154	SIMDPP_ALIGN(`16`) uint32_t rv[`4`];
155	rv[`0`] = v0;
156	v = altivec::load1(v, rv);
157	v = splat<`0`>(v);
158	#elif SIMDPP_USE_MSA
159	v = (v4u32) __msa_fill_w(v0);
160	#endif
161	}
162
163	#if SIMDPP_USE_AVX2
164	static SIMDPP_INL
165	void i_set_splat(uint32x8& v, uint32_t v0)
166	{
167	uint32x4 a = _mm_cvtsi32_si128(v0);
168	v = _mm256_broadcastd_epi32(a.native());
169	}
170	#endif
171
172	#if SIMDPP_USE_AVX512F
173	static SIMDPP_INL
174	void i_set_splat(uint32<`16`>& v, uint32_t v0)
175	{
176	v = _mm512_set1_epi32(v0);
177	}
178	#endif
179
180	template<unsigned N> SIMDPP_INL
181	void i_set_splat(uint32<N>& v, uint32_t v0)
182	{
183	uint32v tv;
184	i_set_splat(tv, v0);
185	for (unsigned i = `0`; i < v.vec_length; ++i) {
186	v.vec(i) = tv;
187	}
188	}
189
190	// -----------------------------------------------------------------------------
191
192	static SIMDPP_INL
193	void i_set_splat(uint64x2& v, uint64_t v0)
194	{
195	#if SIMDPP_USE_SSE2
196	#if SIMDPP_32_BITS
197	uint32x4 va = _mm_cvtsi32_si128(uint32_t(v0));
198	uint32x4 vb = _mm_cvtsi32_si128(uint32_t(v0 >> `32`));
199	v = zip4_lo(va, vb);
200	v = permute2<`0`,`0`>(v);
201	#else
202	v = _mm_cvtsi64_si128(v0);
203	v = permute2<`0`,`0`>(v);
204	#endif
205	#elif SIMDPP_USE_NEON
206	v = vdupq_n_u64(v0);
207	#elif SIMDPP_USE_VSX_207
208	SIMDPP_ALIGN(`16`) uint64_t rv[`2`];
209	rv[`0`] = v0;
210	v = vec_ld(`0`, reinterpret_cast<const __vector uint64_t*>(rv));
211	v = splat<`0`>(v);
212	#elif SIMDPP_USE_MSA
213	#if SIMDPP_64_BITS
214	v = (v2u64) __msa_fill_d(v0.native());
215	#else
216	uint32_t v0lo = v0;
217	uint32_t v0hi = v0 >> `32`;
218	#pragma GCC diagnostic push
219	#pragma GCC diagnostic ignored "-Wuninitialized"
220	v4i32 vr;
221	vr = __msa_insert_w(vr, `0`, v0lo);
222	vr = __msa_insert_w(vr, `1`, v0hi);
223	#pragma GCC diagnostic pop
224	v = (int32<`4`>) vr;
225	v = (v2u64) __msa_splat_d((v2i64) v.native(), `0`);
226	#endif
227	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
228	v = detail::null::make_vec<uint64x2>(v0);
229	#endif
230	}
231
232	#if SIMDPP_USE_AVX2
233	static SIMDPP_INL
234	void i_set_splat(uint64x4& v, uint64_t v0)
235	{
236	#if SIMDPP_32_BITS
237	uint32x4 va = _mm_cvtsi32_si128(uint32_t(v0));
238	uint32x4 vb = _mm_cvtsi32_si128(uint32_t(v0 >> `32`));
239	uint64x2 a = (uint64x2) zip4_lo(va, vb);
240	v = _mm256_broadcastq_epi64(a.native());
241	#else
242	uint64x2 a = _mm_cvtsi64_si128(v0);
243	v = _mm256_broadcastq_epi64(a.native());
244	#endif
245	}
246	#endif
247
248	#if SIMDPP_USE_AVX512F
249	static SIMDPP_INL
250	void i_set_splat(uint64<`8`>& v, uint64_t v0)
251	{
252	v = _mm512_set1_epi64(v0);
253	}
254	#endif
255
256	template<unsigned N> SIMDPP_INL
257	void i_set_splat(uint64<N>& v, uint64_t v0)
258	{
259	uint64v tv;
260	i_set_splat(tv, v0);
261	for (unsigned i = `0`; i < v.vec_length; ++i) {
262	v.vec(i) = tv;
263	}
264	}
265
266	// -----------------------------------------------------------------------------
267
268	static SIMDPP_INL
269	void i_set_splat(float32x4& v, float v0)
270	{
271	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
272	v = detail::null::make_vec<float32x4>(v0);
273	#elif SIMDPP_USE_SSE2
274	v = _mm_set1_ps(v0); // likely in a SSE register anyway
275	#elif SIMDPP_USE_NEON
276	v = vdupq_n_f32(v0);
277	#elif SIMDPP_USE_ALTIVEC
278	SIMDPP_ALIGN(`16`) float rv[`4`];
279	rv[`0`] = v0;
280	v = altivec::load1(v, rv);
281	v = splat<`0`>(v);
282	#elif SIMDPP_USE_MSA
283	SIMDPP_ALIGN(`16`) float rv[`4`];
284	rv[`0`] = v0;
285	v = (v4f32) __msa_ld_w(rv, `0`);
286	v = (v4f32) __msa_splat_w((v4i32) v.native(), `0`);
287	#endif
288	}
289
290	#if SIMDPP_USE_AVX
291	static SIMDPP_INL
292	void i_set_splat(float32x8& v, float v0)
293	{
294	v = _mm256_broadcast_ss(&v0);
295	}
296	#endif
297
298	#if SIMDPP_USE_AVX512F
299	static SIMDPP_INL
300	void i_set_splat(float32<`16`>& v, float v0)
301	{
302	float32<`4`> a;
303	i_set_splat(a, v0);
304	v = _mm512_broadcast_f32x4(a.native());
305	}
306	#endif
307
308	template<unsigned N> SIMDPP_INL
309	void i_set_splat(float32<N>& v, float v0)
310	{
311	#ifdef __GNUC__
312	#pragma GCC diagnostic push
313	#pragma GCC diagnostic ignored "-Wuninitialized"
314	#endif
315	// GCC thinks tv is not initialized
316	float32v tv;
317	i_set_splat(tv, v0);
318	for (unsigned i = `0`; i < v.vec_length; ++i) {
319	v.vec(i) = tv;
320	}
321	#ifdef __GNUC__
322	#pragma GCC diagnostic pop
323	#endif
324	}
325
326	// -----------------------------------------------------------------------------
327
328	static SIMDPP_INL
329	void i_set_splat(float64x2& v, double v0)
330	{
331	#if SIMDPP_USE_SSE2
332	v = _mm_set1_pd(v0); // likely in a SSE register anyway
333	#elif SIMDPP_USE_NEON64
334	v = vdupq_n_f64(v0);
335	#elif SIMDPP_USE_VSX_206
336	SIMDPP_ALIGN(`16`) double rv[`2`];
337	rv[`0`] = v0;
338	v = vec_ld(`0`, reinterpret_cast<const __vector double*>(rv));
339	v = splat<`0`>(v);
340	#elif SIMDPP_USE_MSA
341	SIMDPP_ALIGN(`16`) double rv[`2`];
342	rv[`0`] = v0;
343	v = (v2f64) __msa_ld_d(rv, `0`);
344	v = (v2f64) __msa_splat_d((v2i64) v.native(), `0`);
345	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
346	v = detail::null::make_vec<float64x2>(v0);
347	#endif
348	}
349
350	#if SIMDPP_USE_AVX
351	static SIMDPP_INL
352	void i_set_splat(float64x4& v, double v0)
353	{
354	v = _mm256_broadcast_sd(&v0);
355	}
356	#endif
357
358	#if SIMDPP_USE_AVX512F
359	static SIMDPP_INL
360	void i_set_splat(float64<`8`>& v, double v0)
361	{
362	float64<`4`> v1;
363	i_set_splat(v1, v0);
364	v = _mm512_broadcast_f64x4(v1.native());
365	}
366	#endif
367
368	template<unsigned N> SIMDPP_INL
369	void i_set_splat(float64<N>& v, double v0)
370	{
371	float64v tv;
372	i_set_splat(tv, v0);
373	for (unsigned i = `0`; i < v.vec_length; ++i) {
374	v.vec(i) = tv;
375	}
376	}
377
378	// -----------------------------------------------------------------------------
379
380	template<class V, class VE> SIMDPP_INL
381	V i_splat_any(const VE& x)
382	{
383	#ifdef __GNUC__
384	#pragma GCC diagnostic push
385	#pragma GCC diagnostic ignored "-Wuninitialized"
386	#endif
387	// GCC thinks r is not initialized
388	typename detail::remove_sign<V>::type r;
389	insn::i_set_splat(r, x);
390	return V(r);
391	#ifdef __GNUC__
392	#pragma GCC diagnostic pop
393	#endif
394	}
395
396	} // namespace insn
397
398	template<class V, class VE> SIMDPP_INL
399	void construct_eval(V& v, const expr_vec_set_splat<VE>& e)
400	{
401	v = insn::i_splat_any<V>(e.a);
402	}
403
404	template<class V, class VE> SIMDPP_INL
405	V splat_impl(const VE& x)
406	{
407	static_assert(is_vector<V>::value && !is_mask<V>::value,
408	"V must be a non-mask vector");
409	return insn::i_splat_any<V>(x);
410	}
411
412	} // namespace detail
413	} // namespace SIMDPP_ARCH_NAMESPACE
414	} // namespace simdpp
415
416	#endif
417
418

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/set_splat.h