1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_STREAM_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_STREAM_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/align.h>
17#include <simdpp/core/store.h>
18#include <simdpp/detail/null/memory.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25static SIMDPP_INL
26void i_stream(char* p, const uint8<16>& a)
27{
28 p = detail::assume_aligned(p, 16);
29#if SIMDPP_USE_NULL
30 detail::null::store(p, a);
31#elif SIMDPP_USE_SSE2
32 _mm_stream_si128(reinterpret_cast<__m128i*>(p), a.native());
33#elif SIMDPP_USE_NEON || SIMDPP_USE_MSA
34 store(p, a);
35#elif SIMDPP_USE_ALTIVEC
36 vec_st(a.native(), 0, reinterpret_cast<uint8_t*>(p));
37#endif
38}
39
40#if SIMDPP_USE_AVX2
41static SIMDPP_INL
42void i_stream(char* p, const uint8<32>& a)
43{
44 p = detail::assume_aligned(p, 32);
45 _mm256_stream_si256(reinterpret_cast<__m256i*>(p), a.native());
46}
47#endif
48
49#if SIMDPP_USE_AVX512BW
50SIMDPP_INL void i_stream(char* p, const uint8<64>& a)
51{
52 p = detail::assume_aligned(p, 64);
53 _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native());
54}
55#endif
56
57// -----------------------------------------------------------------------------
58
59static SIMDPP_INL
60void i_stream(char* p, const uint16<8>& a)
61{
62 i_stream(p, uint8<16>(a));
63}
64
65#if SIMDPP_USE_AVX2
66static SIMDPP_INL
67void i_stream(char* p, const uint16<16>& a)
68{
69 i_stream(p, uint8<32>(a));
70}
71#endif
72
73#if SIMDPP_USE_AVX512BW
74SIMDPP_INL void i_stream(char* p, const uint16<32>& a)
75{
76 p = detail::assume_aligned(p, 64);
77 _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native());
78}
79#endif
80
81// -----------------------------------------------------------------------------
82
83static SIMDPP_INL
84void i_stream(char* p, const uint32<4>& a)
85{
86 i_stream(p, uint8<16>(a));
87}
88
89#if SIMDPP_USE_AVX2
90static SIMDPP_INL
91void i_stream(char* p, const uint32<8>& a)
92{
93 i_stream(p, uint8<32>(a));
94}
95#endif
96
97#if SIMDPP_USE_AVX512F
98static SIMDPP_INL
99void i_stream(char* p, const uint32<16>& a)
100{
101 p = detail::assume_aligned(p, 64);
102 _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native());
103}
104#endif
105
106// -----------------------------------------------------------------------------
107
108static SIMDPP_INL
109void i_stream(char* p, const uint64<2>& a)
110{
111#if (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
112 p = detail::assume_aligned(p, 16);
113 detail::null::store(p, a);
114#else
115 i_stream(p, uint8<16>(a));
116#endif
117}
118
119#if SIMDPP_USE_AVX2
120static SIMDPP_INL
121void i_stream(char* p, const uint64<4>& a)
122{
123 i_stream(p, uint8<32>(a));
124}
125#endif
126
127#if SIMDPP_USE_AVX512F
128static SIMDPP_INL
129void i_stream(char* p, const uint64<8>& a)
130{
131 p = detail::assume_aligned(p, 64);
132 _mm512_stream_si512(reinterpret_cast<__m512i*>(p), a.native());
133}
134#endif
135
136// -----------------------------------------------------------------------------
137
138static SIMDPP_INL
139void i_stream(char* p, const float32x4& a)
140{
141 p = detail::assume_aligned(p, 16);
142 float* q = reinterpret_cast<float*>(p);
143 (void) q;
144#if SIMDPP_USE_NULL
145 detail::null::store(p, a);
146#elif SIMDPP_USE_SSE2
147 _mm_stream_ps(q, a.native());
148#elif SIMDPP_USE_NEON || SIMDPP_USE_MSA
149 store(q, a);
150#elif SIMDPP_USE_ALTIVEC
151 vec_st(a.native(), 0, q);
152#endif
153}
154
155#if SIMDPP_USE_AVX
156static SIMDPP_INL
157void i_stream(char* p, const float32x8& a)
158{
159 p = detail::assume_aligned(p, 32);
160 _mm256_stream_ps(reinterpret_cast<float*>(p), a.native());
161}
162#endif
163
164#if SIMDPP_USE_AVX512F
165static SIMDPP_INL
166void i_stream(char* p, const float32<16>& a)
167{
168 p = detail::assume_aligned(p, 64);
169 _mm512_stream_ps(reinterpret_cast<float*>(p), a.native());
170}
171#endif
172
173// -----------------------------------------------------------------------------
174
175static SIMDPP_INL
176void i_stream(char* p, const float64x2& a)
177{
178 p = detail::assume_aligned(p, 16);
179#if SIMDPP_USE_SSE2
180 _mm_stream_pd(reinterpret_cast<double*>(p), a.native());
181#elif SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
182 store(p, a);
183#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
184 detail::null::store(p, a);
185#endif
186}
187
188#if SIMDPP_USE_AVX
189static SIMDPP_INL
190void i_stream(char* p, const float64x4& a)
191{
192 p = detail::assume_aligned(p, 32);
193 _mm256_stream_pd(reinterpret_cast<double*>(p), a.native());
194}
195#endif
196
197#if SIMDPP_USE_AVX512F
198static SIMDPP_INL
199void i_stream(char* p, const float64<8>& a)
200{
201 p = detail::assume_aligned(p, 64);
202 _mm512_stream_pd(reinterpret_cast<double*>(p), a.native());
203}
204#endif
205
206// -----------------------------------------------------------------------------
207
208template<class V> SIMDPP_INL
209void i_stream(char* p, const V& ca)
210{
211 const unsigned veclen = V::base_vector_type::length_bytes;
212
213 typename detail::remove_sign<V>::type a = ca;
214 p = detail::assume_aligned(p, veclen);
215 for (unsigned i = 0; i < V::vec_length; ++i) {
216 i_stream(p, a.vec(i));
217 p += veclen;
218 }
219}
220
221} // namespace insn
222} // namespace detail
223} // namespace SIMDPP_ARCH_NAMESPACE
224} // namespace simdpp
225
226#endif
227