1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/null/memory.h>
17#include <simdpp/detail/align.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21namespace detail {
22namespace insn {
23
24static SIMDPP_INL
25void i_store(char* p, const uint8x16& a)
26{
27 p = detail::assume_aligned(p, 16);
28#if SIMDPP_USE_NULL
29 detail::null::store(p, a);
30#elif SIMDPP_USE_SSE2
31 _mm_store_si128(reinterpret_cast<__m128i*>(p), a.native());
32#elif SIMDPP_USE_NEON
33 vst1q_u64(reinterpret_cast<uint64_t*>(p), vreinterpretq_u64_u8(a.native()));
34#elif SIMDPP_USE_ALTIVEC
35 vec_stl(a.native(), 0, reinterpret_cast<uint8_t*>(p));
36#elif SIMDPP_USE_MSA
37 __msa_st_b((v16i8)a.native(), p, 0);
38#endif
39}
40
41#if SIMDPP_USE_AVX2
42static SIMDPP_INL
43void i_store(char* p, const uint8x32& a)
44{
45 p = detail::assume_aligned(p, 32);
46 _mm256_store_si256(reinterpret_cast<__m256i*>(p), a.native());
47}
48#endif
49
50#if SIMDPP_USE_AVX512BW
51SIMDPP_INL void i_store(char* p, const uint8<64>& a)
52{
53 p = detail::assume_aligned(p, 64);
54 _mm512_store_si512(reinterpret_cast<__m512i*>(p), a.native());
55}
56#endif
57
58// -----------------------------------------------------------------------------
59
60static SIMDPP_INL
61void i_store(char* p, const uint16<8>& a)
62{
63 i_store(p, uint8<16>(a));
64}
65
66#if SIMDPP_USE_AVX2
67static SIMDPP_INL
68void i_store(char* p, const uint16<16>& a)
69{
70 i_store(p, uint8<32>(a));
71}
72#endif
73
74#if SIMDPP_USE_AVX512BW
75SIMDPP_INL void i_store(char* p, const uint16<32>& a)
76{
77 i_store(p, uint8<64>(a));
78}
79#endif
80
81// -----------------------------------------------------------------------------
82
83static SIMDPP_INL
84void i_store(char* p, const uint32<4>& a)
85{
86 i_store(p, uint8<16>(a));
87}
88
89#if SIMDPP_USE_AVX2
90static SIMDPP_INL
91void i_store(char* p, const uint32<8>& a)
92{
93 i_store(p, uint8<32>(a));
94}
95#endif
96
97#if SIMDPP_USE_AVX512F
98static SIMDPP_INL
99void i_store(char* p, const uint32<16>& a)
100{
101 p = detail::assume_aligned(p, 64);
102 _mm512_store_epi32(p, a.native());
103}
104#endif
105
106// -----------------------------------------------------------------------------
107
108static SIMDPP_INL
109void i_store(char* p, const uint64<2>& a)
110{
111#if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
112 p = detail::assume_aligned(p, 16);
113 detail::null::store(p, a);
114#else
115 i_store(p, uint8<16>(a));
116#endif
117}
118
119#if SIMDPP_USE_AVX2
120static SIMDPP_INL
121void i_store(char* p, const uint64<4>& a)
122{
123 i_store(p, uint8<32>(a));
124}
125#endif
126
127#if SIMDPP_USE_AVX512F
128static SIMDPP_INL
129void i_store(char* p, const uint64<8>& a)
130{
131 p = detail::assume_aligned(p, 64);
132 _mm512_store_epi64(p, a.native());
133}
134#endif
135
136// -----------------------------------------------------------------------------
137
138static SIMDPP_INL
139void i_store(char* p, const float32x4& a)
140{
141 p = detail::assume_aligned(p, 16);
142 float* q = reinterpret_cast<float*>(p);
143 (void) q;
144#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
145 detail::null::store(p, a);
146#elif SIMDPP_USE_SSE2
147 _mm_store_ps(q, a.native());
148#elif SIMDPP_USE_NEON
149 vst1q_f32(q, a.native());
150#elif SIMDPP_USE_ALTIVEC
151 vec_stl(a.native(), 0, q);
152#elif SIMDPP_USE_MSA
153 __msa_st_w((v4i32) a.native(), q, 0);
154#endif
155}
156
157#if SIMDPP_USE_AVX
158static SIMDPP_INL
159void i_store(char* p, const float32x8& a)
160{
161 float* q = reinterpret_cast<float*>(p);
162 q = detail::assume_aligned(q, 32);
163 _mm256_store_ps(q, a.native());
164}
165#endif
166
167#if SIMDPP_USE_AVX512F
168static SIMDPP_INL
169void i_store(char* p, const float32<16>& a)
170{
171 p = detail::assume_aligned(p, 64);
172 _mm512_store_ps(p, a.native());
173}
174#endif
175
176// -----------------------------------------------------------------------------
177
178static SIMDPP_INL
179void i_store(char* p, const float64x2& a)
180{
181 p = detail::assume_aligned(p, 16);
182 double* q = reinterpret_cast<double*>(p);
183 (void) q;
184#if SIMDPP_USE_SSE2
185 _mm_store_pd(q, a.native());
186#elif SIMDPP_USE_NEON64
187 vst1q_f64(q, a.native());
188#elif SIMDPP_USE_VSX_206
189 vec_stl(a.native(), 0, q);
190#elif SIMDPP_USE_MSA
191 __msa_st_d((v2i64) a.native(), q, 0);
192#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
193 detail::null::store(p, a);
194#endif
195}
196
197#if SIMDPP_USE_AVX
198static SIMDPP_INL
199void i_store(char* p, const float64x4& a)
200{
201 p = detail::assume_aligned(p, 32);
202 _mm256_store_pd(reinterpret_cast<double*>(p), a.native());
203}
204#endif
205
206#if SIMDPP_USE_AVX512F
207static SIMDPP_INL
208void i_store(char* p, const float64<8>& a)
209{
210 p = detail::assume_aligned(p, 64);
211 _mm512_store_pd(p, a.native());
212}
213#endif
214
215// -----------------------------------------------------------------------------
216
217template<class V> SIMDPP_INL
218void i_store(char* p, const V& ca)
219{
220 const unsigned veclen = V::base_vector_type::length_bytes;
221
222 typename detail::remove_sign<V>::type a = ca;
223 p = detail::assume_aligned(p, veclen);
224 for (unsigned i = 0; i < V::vec_length; ++i) {
225 i_store(p, a.vec(i));
226 p += veclen;
227 }
228}
229
230} // namespace insn
231} // namespace detail
232} // namespace SIMDPP_ARCH_NAMESPACE
233} // namespace simdpp
234
235#endif
236