1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_STORE_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/null/memory.h> |
17 | #include <simdpp/detail/align.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | namespace detail { |
22 | namespace insn { |
23 | |
24 | static SIMDPP_INL |
25 | void i_store(char* p, const uint8x16& a) |
26 | { |
27 | p = detail::assume_aligned(p, 16); |
28 | #if SIMDPP_USE_NULL |
29 | detail::null::store(p, a); |
30 | #elif SIMDPP_USE_SSE2 |
31 | _mm_store_si128(reinterpret_cast<__m128i*>(p), a.native()); |
32 | #elif SIMDPP_USE_NEON |
33 | vst1q_u64(reinterpret_cast<uint64_t*>(p), vreinterpretq_u64_u8(a.native())); |
34 | #elif SIMDPP_USE_ALTIVEC |
35 | vec_stl(a.native(), 0, reinterpret_cast<uint8_t*>(p)); |
36 | #elif SIMDPP_USE_MSA |
37 | __msa_st_b((v16i8)a.native(), p, 0); |
38 | #endif |
39 | } |
40 | |
41 | #if SIMDPP_USE_AVX2 |
42 | static SIMDPP_INL |
43 | void i_store(char* p, const uint8x32& a) |
44 | { |
45 | p = detail::assume_aligned(p, 32); |
46 | _mm256_store_si256(reinterpret_cast<__m256i*>(p), a.native()); |
47 | } |
48 | #endif |
49 | |
50 | #if SIMDPP_USE_AVX512BW |
51 | SIMDPP_INL void i_store(char* p, const uint8<64>& a) |
52 | { |
53 | p = detail::assume_aligned(p, 64); |
54 | _mm512_store_si512(reinterpret_cast<__m512i*>(p), a.native()); |
55 | } |
56 | #endif |
57 | |
58 | // ----------------------------------------------------------------------------- |
59 | |
60 | static SIMDPP_INL |
61 | void i_store(char* p, const uint16<8>& a) |
62 | { |
63 | i_store(p, uint8<16>(a)); |
64 | } |
65 | |
66 | #if SIMDPP_USE_AVX2 |
67 | static SIMDPP_INL |
68 | void i_store(char* p, const uint16<16>& a) |
69 | { |
70 | i_store(p, uint8<32>(a)); |
71 | } |
72 | #endif |
73 | |
74 | #if SIMDPP_USE_AVX512BW |
75 | SIMDPP_INL void i_store(char* p, const uint16<32>& a) |
76 | { |
77 | i_store(p, uint8<64>(a)); |
78 | } |
79 | #endif |
80 | |
81 | // ----------------------------------------------------------------------------- |
82 | |
83 | static SIMDPP_INL |
84 | void i_store(char* p, const uint32<4>& a) |
85 | { |
86 | i_store(p, uint8<16>(a)); |
87 | } |
88 | |
89 | #if SIMDPP_USE_AVX2 |
90 | static SIMDPP_INL |
91 | void i_store(char* p, const uint32<8>& a) |
92 | { |
93 | i_store(p, uint8<32>(a)); |
94 | } |
95 | #endif |
96 | |
97 | #if SIMDPP_USE_AVX512F |
98 | static SIMDPP_INL |
99 | void i_store(char* p, const uint32<16>& a) |
100 | { |
101 | p = detail::assume_aligned(p, 64); |
102 | _mm512_store_epi32(p, a.native()); |
103 | } |
104 | #endif |
105 | |
106 | // ----------------------------------------------------------------------------- |
107 | |
108 | static SIMDPP_INL |
109 | void i_store(char* p, const uint64<2>& a) |
110 | { |
111 | #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
112 | p = detail::assume_aligned(p, 16); |
113 | detail::null::store(p, a); |
114 | #else |
115 | i_store(p, uint8<16>(a)); |
116 | #endif |
117 | } |
118 | |
119 | #if SIMDPP_USE_AVX2 |
120 | static SIMDPP_INL |
121 | void i_store(char* p, const uint64<4>& a) |
122 | { |
123 | i_store(p, uint8<32>(a)); |
124 | } |
125 | #endif |
126 | |
127 | #if SIMDPP_USE_AVX512F |
128 | static SIMDPP_INL |
129 | void i_store(char* p, const uint64<8>& a) |
130 | { |
131 | p = detail::assume_aligned(p, 64); |
132 | _mm512_store_epi64(p, a.native()); |
133 | } |
134 | #endif |
135 | |
136 | // ----------------------------------------------------------------------------- |
137 | |
138 | static SIMDPP_INL |
139 | void i_store(char* p, const float32x4& a) |
140 | { |
141 | p = detail::assume_aligned(p, 16); |
142 | float* q = reinterpret_cast<float*>(p); |
143 | (void) q; |
144 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
145 | detail::null::store(p, a); |
146 | #elif SIMDPP_USE_SSE2 |
147 | _mm_store_ps(q, a.native()); |
148 | #elif SIMDPP_USE_NEON |
149 | vst1q_f32(q, a.native()); |
150 | #elif SIMDPP_USE_ALTIVEC |
151 | vec_stl(a.native(), 0, q); |
152 | #elif SIMDPP_USE_MSA |
153 | __msa_st_w((v4i32) a.native(), q, 0); |
154 | #endif |
155 | } |
156 | |
157 | #if SIMDPP_USE_AVX |
158 | static SIMDPP_INL |
159 | void i_store(char* p, const float32x8& a) |
160 | { |
161 | float* q = reinterpret_cast<float*>(p); |
162 | q = detail::assume_aligned(q, 32); |
163 | _mm256_store_ps(q, a.native()); |
164 | } |
165 | #endif |
166 | |
167 | #if SIMDPP_USE_AVX512F |
168 | static SIMDPP_INL |
169 | void i_store(char* p, const float32<16>& a) |
170 | { |
171 | p = detail::assume_aligned(p, 64); |
172 | _mm512_store_ps(p, a.native()); |
173 | } |
174 | #endif |
175 | |
176 | // ----------------------------------------------------------------------------- |
177 | |
178 | static SIMDPP_INL |
179 | void i_store(char* p, const float64x2& a) |
180 | { |
181 | p = detail::assume_aligned(p, 16); |
182 | double* q = reinterpret_cast<double*>(p); |
183 | (void) q; |
184 | #if SIMDPP_USE_SSE2 |
185 | _mm_store_pd(q, a.native()); |
186 | #elif SIMDPP_USE_NEON64 |
187 | vst1q_f64(q, a.native()); |
188 | #elif SIMDPP_USE_VSX_206 |
189 | vec_stl(a.native(), 0, q); |
190 | #elif SIMDPP_USE_MSA |
191 | __msa_st_d((v2i64) a.native(), q, 0); |
192 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC |
193 | detail::null::store(p, a); |
194 | #endif |
195 | } |
196 | |
197 | #if SIMDPP_USE_AVX |
198 | static SIMDPP_INL |
199 | void i_store(char* p, const float64x4& a) |
200 | { |
201 | p = detail::assume_aligned(p, 32); |
202 | _mm256_store_pd(reinterpret_cast<double*>(p), a.native()); |
203 | } |
204 | #endif |
205 | |
206 | #if SIMDPP_USE_AVX512F |
207 | static SIMDPP_INL |
208 | void i_store(char* p, const float64<8>& a) |
209 | { |
210 | p = detail::assume_aligned(p, 64); |
211 | _mm512_store_pd(p, a.native()); |
212 | } |
213 | #endif |
214 | |
215 | // ----------------------------------------------------------------------------- |
216 | |
217 | template<class V> SIMDPP_INL |
218 | void i_store(char* p, const V& ca) |
219 | { |
220 | const unsigned veclen = V::base_vector_type::length_bytes; |
221 | |
222 | typename detail::remove_sign<V>::type a = ca; |
223 | p = detail::assume_aligned(p, veclen); |
224 | for (unsigned i = 0; i < V::vec_length; ++i) { |
225 | i_store(p, a.vec(i)); |
226 | p += veclen; |
227 | } |
228 | } |
229 | |
230 | } // namespace insn |
231 | } // namespace detail |
232 | } // namespace SIMDPP_ARCH_NAMESPACE |
233 | } // namespace simdpp |
234 | |
235 | #endif |
236 | |