1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STREAM_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_STREAM_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/align.h> |
17 | #include <simdpp/core/store.h> |
18 | #include <simdpp/detail/null/memory.h> |
19 | |
20 | namespace simdpp { |
21 | namespace SIMDPP_ARCH_NAMESPACE { |
22 | namespace detail { |
23 | namespace insn { |
24 | |
25 | static SIMDPP_INL |
26 | void i_stream(char* p, const uint8<16>& a) |
27 | { |
28 | p = detail::assume_aligned(p, 16); |
29 | #if SIMDPP_USE_NULL |
30 | detail::null::store(p, a); |
31 | #elif SIMDPP_USE_SSE2 |
32 | _mm_stream_si128(reinterpret_cast<__m128i*>(p), a.native()); |
33 | #elif SIMDPP_USE_NEON || SIMDPP_USE_MSA |
34 | store(p, a); |
35 | #elif SIMDPP_USE_ALTIVEC |
36 | vec_st(a.native(), 0, reinterpret_cast<uint8_t*>(p)); |
37 | #endif |
38 | } |
39 | |
40 | #if SIMDPP_USE_AVX2 |
41 | static SIMDPP_INL |
42 | void i_stream(char* p, const uint8<32>& a) |
43 | { |
44 | p = detail::assume_aligned(p, 32); |
45 | _mm256_stream_si256(reinterpret_cast<__m256i*>(p), a.native()); |
46 | } |
47 | #endif |
48 | |
49 | #if SIMDPP_USE_AVX512BW |
50 | SIMDPP_INL void i_stream(char* p, const uint8<64>& a) |
51 | { |
52 | p = detail::assume_aligned(p, 64); |
53 | _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native()); |
54 | } |
55 | #endif |
56 | |
57 | // ----------------------------------------------------------------------------- |
58 | |
59 | static SIMDPP_INL |
60 | void i_stream(char* p, const uint16<8>& a) |
61 | { |
62 | i_stream(p, uint8<16>(a)); |
63 | } |
64 | |
65 | #if SIMDPP_USE_AVX2 |
66 | static SIMDPP_INL |
67 | void i_stream(char* p, const uint16<16>& a) |
68 | { |
69 | i_stream(p, uint8<32>(a)); |
70 | } |
71 | #endif |
72 | |
73 | #if SIMDPP_USE_AVX512BW |
74 | SIMDPP_INL void i_stream(char* p, const uint16<32>& a) |
75 | { |
76 | p = detail::assume_aligned(p, 64); |
77 | _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native()); |
78 | } |
79 | #endif |
80 | |
81 | // ----------------------------------------------------------------------------- |
82 | |
83 | static SIMDPP_INL |
84 | void i_stream(char* p, const uint32<4>& a) |
85 | { |
86 | i_stream(p, uint8<16>(a)); |
87 | } |
88 | |
89 | #if SIMDPP_USE_AVX2 |
90 | static SIMDPP_INL |
91 | void i_stream(char* p, const uint32<8>& a) |
92 | { |
93 | i_stream(p, uint8<32>(a)); |
94 | } |
95 | #endif |
96 | |
97 | #if SIMDPP_USE_AVX512F |
98 | static SIMDPP_INL |
99 | void i_stream(char* p, const uint32<16>& a) |
100 | { |
101 | p = detail::assume_aligned(p, 64); |
102 | _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native()); |
103 | } |
104 | #endif |
105 | |
106 | // ----------------------------------------------------------------------------- |
107 | |
108 | static SIMDPP_INL |
109 | void i_stream(char* p, const uint64<2>& a) |
110 | { |
111 | #if (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207) |
112 | p = detail::assume_aligned(p, 16); |
113 | detail::null::store(p, a); |
114 | #else |
115 | i_stream(p, uint8<16>(a)); |
116 | #endif |
117 | } |
118 | |
119 | #if SIMDPP_USE_AVX2 |
120 | static SIMDPP_INL |
121 | void i_stream(char* p, const uint64<4>& a) |
122 | { |
123 | i_stream(p, uint8<32>(a)); |
124 | } |
125 | #endif |
126 | |
127 | #if SIMDPP_USE_AVX512F |
128 | static SIMDPP_INL |
129 | void i_stream(char* p, const uint64<8>& a) |
130 | { |
131 | p = detail::assume_aligned(p, 64); |
132 | _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native()); |
133 | } |
134 | #endif |
135 | |
136 | // ----------------------------------------------------------------------------- |
137 | |
138 | static SIMDPP_INL |
139 | void i_stream(char* p, const float32x4& a) |
140 | { |
141 | p = detail::assume_aligned(p, 16); |
142 | float* q = reinterpret_cast<float*>(p); |
143 | (void) q; |
144 | #if SIMDPP_USE_NULL |
145 | detail::null::store(p, a); |
146 | #elif SIMDPP_USE_SSE2 |
147 | _mm_stream_ps(q, a.native()); |
148 | #elif SIMDPP_USE_NEON || SIMDPP_USE_MSA |
149 | store(q, a); |
150 | #elif SIMDPP_USE_ALTIVEC |
151 | vec_st(a.native(), 0, q); |
152 | #endif |
153 | } |
154 | |
155 | #if SIMDPP_USE_AVX |
156 | static SIMDPP_INL |
157 | void i_stream(char* p, const float32x8& a) |
158 | { |
159 | p = detail::assume_aligned(p, 32); |
160 | _mm256_stream_ps(reinterpret_cast<float*>(p), a.native()); |
161 | } |
162 | #endif |
163 | |
164 | #if SIMDPP_USE_AVX512F |
165 | static SIMDPP_INL |
166 | void i_stream(char* p, const float32<16>& a) |
167 | { |
168 | p = detail::assume_aligned(p, 64); |
169 | _mm512_stream_ps(reinterpret_cast<float*>(p), a.native()); |
170 | } |
171 | #endif |
172 | |
173 | // ----------------------------------------------------------------------------- |
174 | |
175 | static SIMDPP_INL |
176 | void i_stream(char* p, const float64x2& a) |
177 | { |
178 | p = detail::assume_aligned(p, 16); |
179 | #if SIMDPP_USE_SSE2 |
180 | _mm_stream_pd(reinterpret_cast<double*>(p), a.native()); |
181 | #elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA |
182 | store(p, a); |
183 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
184 | detail::null::store(p, a); |
185 | #endif |
186 | } |
187 | |
188 | #if SIMDPP_USE_AVX |
189 | static SIMDPP_INL |
190 | void i_stream(char* p, const float64x4& a) |
191 | { |
192 | p = detail::assume_aligned(p, 32); |
193 | _mm256_stream_pd(reinterpret_cast<double*>(p), a.native()); |
194 | } |
195 | #endif |
196 | |
197 | #if SIMDPP_USE_AVX512F |
198 | static SIMDPP_INL |
199 | void i_stream(char* p, const float64<8>& a) |
200 | { |
201 | p = detail::assume_aligned(p, 64); |
202 | _mm512_stream_pd(reinterpret_cast<double*>(p), a.native()); |
203 | } |
204 | #endif |
205 | |
206 | // ----------------------------------------------------------------------------- |
207 | |
208 | template<class V> SIMDPP_INL |
209 | void i_stream(char* p, const V& ca) |
210 | { |
211 | const unsigned veclen = V::base_vector_type::length_bytes; |
212 | |
213 | typename detail::remove_sign<V>::type a = ca; |
214 | p = detail::assume_aligned(p, veclen); |
215 | for (unsigned i = 0; i < V::vec_length; ++i) { |
216 | i_stream(p, a.vec(i)); |
217 | p += veclen; |
218 | } |
219 | } |
220 | |
221 | } // namespace insn |
222 | } // namespace detail |
223 | } // namespace SIMDPP_ARCH_NAMESPACE |
224 | } // namespace simdpp |
225 | |
226 | #endif |
227 | |