store_masked.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_masked.h]

1	/ Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_MASKED_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_MASKED_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/load.h>
17	#include <simdpp/core/store.h>
18	#include <simdpp/detail/null/memory.h>
19	#include <simdpp/detail/align.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	static SIMDPP_INL
27	void i_store_masked(char* p, const uint32<`4`>& a, const mask_int32<`4`>& mask)
28	{
29	#if SIMDPP_USE_NULL
30	null::store_masked(p, a, mask);
31	#elif SIMDPP_USE_AVX512VL
32	_mm_mask_store_epi32(p, mask.native(), a.native());
33	#elif SIMDPP_USE_AVX2
34	_mm_maskstore_epi32(reinterpret_cast<int*>(p), mask.native(), a.native());
35	#elif SIMDPP_USE_AVX
36	_mm_maskstore_ps(reinterpret_cast<float*>(p), mask.native(),
37	_mm_castsi128_ps(a.native()));
38	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
39	uint32<`4`> b = load(p);
40	b = blend(a, b, mask);
41	store(p, b);
42	#endif
43	}
44
45	#if SIMDPP_USE_AVX2
46	static SIMDPP_INL
47	void i_store_masked(char* p, const uint32<`8`>& a, const mask_int32<`8`>& mask)
48	{
49	#if SIMDPP_USE_AVX512VL
50	_mm256_mask_store_epi32(p, mask.native(), a.native());
51	#else
52	_mm256_maskstore_epi32(reinterpret_cast<int*>(p), mask.native(), a.native());
53	#endif
54	}
55	#endif
56
57	#if SIMDPP_USE_AVX512F
58	static SIMDPP_INL
59	void i_store_masked(char* p, const uint32<`16`>& a, const mask_int32<`16`>& mask)
60	{
61	_mm512_mask_store_epi32(reinterpret_cast<int*>(p), mask.native(), a.native());
62	}
63	#endif
64
65	// -----------------------------------------------------------------------------
66
67	static SIMDPP_INL
68	void i_store_masked(char* p, const uint64<`2`>& a, const mask_int64<`2`>& mask)
69	{
70	#if SIMDPP_USE_AVX512VL
71	#if __INTEL_COMPILER
72	_mm_mask_store_epi64(reinterpret_cast<__int64*>(p), mask.native(),
73	a.native());
74	#else
75	_mm_mask_store_epi64(reinterpret_cast<long long*>(p), mask.native(),
76	a.native());
77	#endif
78	#elif SIMDPP_USE_AVX2
79	#if __INTEL_COMPILER
80	_mm_maskstore_epi64(reinterpret_cast<__int64*>(p), mask.native(), a.native());
81	#else
82	_mm_maskstore_epi64(reinterpret_cast<long long*>(p), mask.native(), a.native());
83	#endif
84	#elif SIMDPP_USE_AVX
85	_mm_maskstore_pd(reinterpret_cast<double*>(p), mask.native(), _mm_castsi128_pd(a.native()));
86	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_VSX_207 \|\| SIMDPP_USE_MSA
87	uint64<`2`> b = load(p);
88	b = blend(a, b, mask);
89	store(p, b);
90	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
91	null::store_masked(p, a, mask);
92	#endif
93	}
94
95	#if SIMDPP_USE_AVX2
96	static SIMDPP_INL
97	void i_store_masked(char* p, const uint64<`4`>& a, const mask_int64<`4`>& mask)
98	{
99	#if SIMDPP_USE_AVX512VL
100	#if __INTEL_COMPILER
101	_mm256_mask_store_epi64(reinterpret_cast<__int64*>(p), mask.native(),
102	a.native());
103	#else
104	_mm256_mask_store_epi64(reinterpret_cast<long long*>(p), mask.native(),
105	a.native());
106	#endif
107	#else
108	#if __INTEL_COMPILER
109	_mm256_maskstore_epi64(reinterpret_cast<__int64*>(p), mask.native(), a.native());
110	#else
111	_mm256_maskstore_epi64(reinterpret_cast<long long*>(p), mask.native(), a.native());
112	#endif
113	#endif
114	}
115	#endif
116
117	#if SIMDPP_USE_AVX512F
118	static SIMDPP_INL
119	void i_store_masked(char* p, const uint64<`8`>& a, const mask_int64<`8`>& mask)
120	{
121	#if __INTEL_COMPILER
122	_mm512_mask_store_epi64(reinterpret_cast<__int64*>(p), mask.native(), a.native());
123	#else
124	_mm512_mask_store_epi64(reinterpret_cast<long long*>(p), mask.native(), a.native());
125	#endif
126	}
127	#endif
128
129	// -----------------------------------------------------------------------------
130
131	static SIMDPP_INL
132	void i_store_masked(char* p, const float32<`4`>& a, const mask_float32<`4`>& mask)
133	{
134	#if SIMDPP_USE_NULL
135	null::store_masked(p, a, mask);
136	#elif SIMDPP_USE_AVX512VL
137	_mm_mask_store_ps(reinterpret_cast<float*>(p), mask.native(), a.native());
138	#elif SIMDPP_USE_AVX
139	_mm_maskstore_ps(reinterpret_cast<float*>(p),
140	_mm_castps_si128(mask.native()), a.native());
141	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
142	float32<`4`> b = load(p);
143	b = blend(a, b, mask);
144	store(p, b);
145	#endif
146	}
147
148	#if SIMDPP_USE_AVX
149	static SIMDPP_INL
150	void i_store_masked(char* p, const float32<`8`>& a, const mask_float32<`8`>& mask)
151	{
152	#if SIMDPP_USE_AVX512VL
153	_mm256_mask_store_ps(reinterpret_cast<float*>(p), mask.native(),
154	a.native());
155	#else
156	_mm256_maskstore_ps(reinterpret_cast<float*>(p),
157	_mm256_castps_si256(mask.native()), a.native());
158	#endif
159	}
160	#endif
161
162	#if SIMDPP_USE_AVX512F
163	static SIMDPP_INL
164	void i_store_masked(char* p, const float32<`16`>& a, const mask_float32<`16`>& mask)
165	{
166	_mm512_mask_store_ps(reinterpret_cast<float*>(p), mask.native(), a.native());
167	}
168	#endif
169
170	// -----------------------------------------------------------------------------
171
172	static SIMDPP_INL
173	void i_store_masked(char* p, const float64<`2`>& a, const mask_float64<`2`>& mask)
174	{
175	#if SIMDPP_USE_AVX512VL
176	_mm_mask_store_pd(reinterpret_cast<double*>(p), mask.native(), a.native());
177	#elif SIMDPP_USE_AVX
178	_mm_maskstore_pd(reinterpret_cast<double*>(p),
179	_mm_castpd_si128(mask.native()), a.native());
180	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON64 \|\| SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
181	float64<`2`> b = load(p);
182	b = blend(a, b, mask);
183	store(p, b);
184	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
185	null::store_masked(p, a, mask);
186	#endif
187	}
188
189	#if SIMDPP_USE_AVX
190	static SIMDPP_INL
191	void i_store_masked(char* p, const float64<`4`>& a, const mask_float64<`4`>& mask)
192	{
193	#if SIMDPP_USE_AVX512VL
194	_mm256_mask_store_pd(reinterpret_cast<double*>(p), mask.native(),
195	a.native());
196	#else
197	_mm256_maskstore_pd(reinterpret_cast<double*>(p),
198	_mm256_castpd_si256(mask.native()), a.native());
199	#endif
200	}
201	#endif
202
203	#if SIMDPP_USE_AVX512F
204	static SIMDPP_INL
205	void i_store_masked(char* p, const float64<`8`>& a, const mask_float64<`8`>& mask)
206	{
207	_mm512_mask_store_pd(reinterpret_cast<double*>(p), mask.native(), a.native());
208	}
209	#endif
210
211	// -----------------------------------------------------------------------------
212
213	template<class V, class M>
214	void i_store_masked(char* p, const V& a, const M& mask)
215	{
216	const unsigned veclen = V::base_vector_type::length_bytes;
217
218	for (unsigned i = `0`; i < a.vec_length; ++i) {
219	i_store_masked(p, a.vec(i), mask.vec(i));
220	p += veclen;
221	}
222	}
223
224	} // namespace insn
225	} // namespace detail
226	} // namespace SIMDPP_ARCH_NAMESPACE
227	} // namespace simdpp
228
229	#endif
230
231

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/store_masked.h