1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_PACKED2_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_PACKED2_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/align.h>
17#include <simdpp/detail/insn/mem_pack.h>
18#include <simdpp/core/store.h>
19#include <simdpp/detail/null/memory.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23namespace detail {
24namespace insn {
25
26// collect some boilerplate
27template<class V> SIMDPP_INL
28void v128_store_pack2(char* p, const V& ca, const V& cb);
29template<class V> SIMDPP_INL
30void v256_store_pack2(char* p, const V& ca, const V& cb);
31template<class V> SIMDPP_INL
32void v512_store_pack2(char* p, const V& ca, const V& cb);
33
34// -----------------------------------------------------------------------------
35
36static SIMDPP_INL
37void i_store_packed2(char* p, const uint8x16& a, const uint8x16& b)
38{
39 p = detail::assume_aligned(p, 16);
40#if SIMDPP_USE_NULL
41 detail::null::store_packed2(p, a, b);
42#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
43 v128_store_pack2(p, a, b);
44#elif SIMDPP_USE_NEON
45 uint8x16x2_t t;
46 t.val[0] = a.native();
47 t.val[1] = b.native();
48 vst2q_u8(reinterpret_cast<uint8_t*>(p), t);
49#endif
50}
51
52#if SIMDPP_USE_AVX2
53static SIMDPP_INL
54void i_store_packed2(char* p, const uint8x32& a, const uint8x32& b)
55{
56 v256_store_pack2(p, a, b);
57}
58#endif
59
60#if SIMDPP_USE_AVX512BW
61SIMDPP_INL void i_store_packed2(char* p, const uint8<64>& a, const uint8<64>& b)
62{
63 v512_store_pack2(p, a, b);
64}
65#endif
66
67// -----------------------------------------------------------------------------
68
69static SIMDPP_INL
70void i_store_packed2(char* p, const uint16x8& a, const uint16x8& b)
71{
72 p = detail::assume_aligned(p, 16);
73#if SIMDPP_USE_NULL
74 detail::null::store_packed2(p, a, b);
75#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
76 v128_store_pack2(p, a, b);
77#elif SIMDPP_USE_NEON
78 uint16x8x2_t t;
79 t.val[0] = a.native();
80 t.val[1] = b.native();
81 vst2q_u16(reinterpret_cast<uint16_t*>(p), t);
82#endif
83}
84
85#if SIMDPP_USE_AVX2
86static SIMDPP_INL
87void i_store_packed2(char* p, const uint16x16& a, const uint16x16& b)
88{
89 v256_store_pack2(p, a, b);
90}
91#endif
92
93#if SIMDPP_USE_AVX512BW
94SIMDPP_INL void i_store_packed2(char* p, const uint16<32>& a, const uint16<32>& b)
95{
96 v512_store_pack2(p, a, b);
97}
98#endif
99
100// -----------------------------------------------------------------------------
101
102static SIMDPP_INL
103void i_store_packed2(char* p, const uint32x4& a, const uint32x4& b)
104{
105 p = detail::assume_aligned(p, 16);
106#if SIMDPP_USE_NULL
107 detail::null::store_packed2(p, a, b);
108#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
109 v128_store_pack2(p, a, b);
110#elif SIMDPP_USE_NEON
111 uint32x4x2_t t;
112 t.val[0] = a.native();
113 t.val[1] = b.native();
114 vst2q_u32(reinterpret_cast<uint32_t*>(p), t);
115#endif
116}
117
118#if SIMDPP_USE_AVX2
119static SIMDPP_INL
120void i_store_packed2(char* p, const uint32x8& a, const uint32x8& b)
121{
122 v256_store_pack2(p, a, b);
123}
124#endif
125
126#if SIMDPP_USE_AVX512F
127static SIMDPP_INL
128void i_store_packed2(char* p, const uint32<16>& a, const uint32<16>& b)
129{
130 v512_store_pack2(p, a, b);
131}
132#endif
133
134// -----------------------------------------------------------------------------
135
136static SIMDPP_INL
137void i_store_packed2(char* p, const uint64x2& a, const uint64x2& b)
138{
139#if SIMDPP_USE_NEON64
140 uint64x2x2_t t;
141 t.val[0] = a.native();
142 t.val[1] = b.native();
143 vst2q_u64(reinterpret_cast<uint64_t*>(p), t);
144#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
145 v128_store_pack2(p, a, b);
146#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
147 detail::null::store_packed2(p, a, b);
148#endif
149}
150
151#if SIMDPP_USE_AVX2
152static SIMDPP_INL
153void i_store_packed2(char* p, const uint64x4& a, const uint64x4& b)
154{
155 v256_store_pack2(p, a, b);
156}
157#endif
158
159#if SIMDPP_USE_AVX512F
160static SIMDPP_INL
161void i_store_packed2(char* p, const uint64<8>& a, const uint64<8>& b)
162{
163 v512_store_pack2(p, a, b);
164}
165#endif
166
167// -----------------------------------------------------------------------------
168
169static SIMDPP_INL
170void i_store_packed2(char* p, const float32x4& a, const float32x4& b)
171{
172 p = detail::assume_aligned(p, 16);
173#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
174 detail::null::store_packed2(p, a, b);
175#elif SIMDPP_USE_SSE2 || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
176 v128_store_pack2(p, a, b);
177#elif SIMDPP_USE_NEON
178 float32x4x2_t t;
179 t.val[0] = a.native();
180 t.val[1] = b.native();
181 vst2q_f32(reinterpret_cast<float*>(p), t);
182#endif
183}
184
185#if SIMDPP_USE_AVX
186static SIMDPP_INL
187void i_store_packed2(char* p, const float32x8& a, const float32x8& b)
188{
189 v256_store_pack2(p, a, b);
190}
191#endif
192
193#if SIMDPP_USE_AVX512F
194static SIMDPP_INL
195void i_store_packed2(char* p, const float32<16>& a, const float32<16>& b)
196{
197 v512_store_pack2(p, a, b);
198}
199#endif
200
201// -----------------------------------------------------------------------------
202
203static SIMDPP_INL
204void i_store_packed2(char* p, const float64x2& a, const float64x2& b)
205{
206#if SIMDPP_USE_NEON64
207 float64x2x2_t t;
208 t.val[0] = a.native();
209 t.val[1] = b.native();
210 vst2q_f64(reinterpret_cast<double*>(p), t);
211#elif SIMDPP_USE_SSE2 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
212 v128_store_pack2(p, a, b);
213#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
214 detail::null::store_packed2(p, a, b);
215#endif
216}
217
218#if SIMDPP_USE_AVX
219static SIMDPP_INL
220void i_store_packed2(char* p, const float64x4& a, const float64x4& b)
221{
222 v256_store_pack2(p, a, b);
223}
224#endif
225
226#if SIMDPP_USE_AVX512F
227static SIMDPP_INL
228void i_store_packed2(char* p, const float64<8>& a, const float64<8>& b)
229{
230 v512_store_pack2(p, a, b);
231}
232#endif
233
234// -----------------------------------------------------------------------------
235
236template<class V> SIMDPP_INL
237void v128_store_pack2(char* p, const V& ca, const V& cb)
238{
239 p = detail::assume_aligned(p, 32);
240 V a = ca, b = cb;
241 mem_pack2(a, b);
242 i_store(p, a);
243 i_store(p + 16, b);
244}
245
246template<class V> SIMDPP_INL
247void v256_store_pack2(char* p, const V& ca, const V& cb)
248{
249 p = detail::assume_aligned(p, 32);
250 V a = ca, b = cb;
251 mem_pack2(a, b);
252 i_store(p, a);
253 i_store(p + 32, b);
254}
255
256template<class V> SIMDPP_INL
257void v512_store_pack2(char* p, const V& ca, const V& cb)
258{
259 p = detail::assume_aligned(p, 32);
260 V a = ca, b = cb;
261 mem_pack2(a, b);
262 i_store(p, a);
263 i_store(p + 64, b);
264}
265
266
267template<class V> SIMDPP_INL
268void i_store_packed2(char* p, const V& ca, const V& cb)
269{
270 const unsigned veclen = V::base_vector_type::length_bytes;
271 typename detail::remove_sign<V>::type a = ca, b = cb;
272
273 p = detail::assume_aligned(p, veclen);
274 for (unsigned i = 0; i < V::vec_length; ++i) {
275 i_store_packed2(p, a.vec(i), b.vec(i));
276 p += veclen*2;
277 }
278}
279
280} // namespace insn
281} // namespace detail
282} // namespace SIMDPP_ARCH_NAMESPACE
283} // namespace simdpp
284
285#endif
286