1 | /* Copyright (C) 2013 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_DETAIL_CAST_BITWISE_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_DETAIL_CAST_BITWISE_H |
10 | |
11 | #include <simdpp/types.h> |
12 | |
13 | namespace simdpp { |
14 | namespace SIMDPP_ARCH_NAMESPACE { |
15 | namespace detail { |
16 | |
17 | /* Note that in this function we are invoking undefined behavior that happens |
18 | to work in all compilers the library supports. The only non-undefined way |
19 | to do bitwise data transfer between unrelated types without breaking strict |
20 | aliasing rules is the memcpy() function. Unfortunately some compilers can't |
21 | fully optimize out the overhead of the function which leads to unnecessary |
22 | data movement to the stack. |
23 | |
24 | Note that this function does not fully work with vector types even in C++11 |
25 | mode where they are trivial types and thus may be placed in an union. |
26 | Vectors containing one or two native vectors are fine, but larger vectors |
27 | containing 4 or more native vectors result in internal compiler errors or |
28 | miscompiled code on some compilers. |
29 | */ |
30 | template<class T, class R> SIMDPP_INL |
31 | void cast_bitwise(const T& t, R& r) |
32 | { |
33 | static_assert(sizeof(R) == sizeof(T), "Size mismatch" ); |
34 | union { |
35 | T t_union; |
36 | R r_union; |
37 | }; |
38 | t_union = t; |
39 | r = r_union; |
40 | } |
41 | |
42 | enum { |
43 | VECTOR_CAST_TYPE_1_TO_1, |
44 | VECTOR_CAST_TYPE_SPLIT2, |
45 | VECTOR_CAST_TYPE_COMBINE2, |
46 | VECTOR_CAST_TYPE_INVALID |
47 | }; |
48 | |
49 | #if (__GNUC__ >= 6) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
50 | /* native_cast, native_cast_split and native_cast_combine uses native vector |
51 | type as class template parameter. On GCC vector types have alignment |
52 | attributes specified on some architectures. This leads to "ignored |
53 | attributes" warning, because the attributes are not part of the type. |
54 | Since libsimdpp always uses the same attributes for all native_type members |
55 | we can safely ignore this warning. |
56 | */ |
57 | #pragma GCC diagnostic push |
58 | #pragma GCC diagnostic ignored "-Wignored-attributes" |
59 | #endif |
60 | |
61 | // The Size argument is needed to disambiguate vectors of different size on old |
62 | // GNU ABIs. |
63 | template<unsigned Size, class NativeT, class NativeR, bool IsVarArray> |
64 | struct native_cast; |
65 | |
66 | template<unsigned Size, class T, class R> struct native_cast<Size, T, R, false> { |
67 | static SIMDPP_INL R cast(const T& t) { return R(t); } |
68 | }; |
69 | |
70 | template<unsigned Size, class T> struct native_cast<Size, T, T, false> { |
71 | static SIMDPP_INL T cast(const T& t) { return t; } |
72 | }; |
73 | |
74 | template<unsigned Size, class T, class R> struct native_cast<Size, T, R, true> { |
75 | static SIMDPP_INL R cast(const T& t) |
76 | { |
77 | R r; |
78 | cast_bitwise(t, r); |
79 | return r; |
80 | } |
81 | }; |
82 | |
83 | #define NATIVE_CAST_IMPL(SIZE, T_TYPE, R_TYPE, FUNC) \ |
84 | template<> struct native_cast<SIZE, T_TYPE, R_TYPE, false> { \ |
85 | static SIMDPP_INL R_TYPE cast(const T_TYPE& t) { return FUNC(t); } \ |
86 | } |
87 | |
88 | #if SIMDPP_USE_SSE2 |
89 | NATIVE_CAST_IMPL(16, __m128, __m128i, _mm_castps_si128); |
90 | NATIVE_CAST_IMPL(16, __m128, __m128d, _mm_castps_pd); |
91 | NATIVE_CAST_IMPL(16, __m128i, __m128, _mm_castsi128_ps); |
92 | NATIVE_CAST_IMPL(16, __m128i, __m128d, _mm_castsi128_pd); |
93 | NATIVE_CAST_IMPL(16, __m128d, __m128i, _mm_castpd_si128); |
94 | NATIVE_CAST_IMPL(16, __m128d, __m128, _mm_castpd_ps); |
95 | #endif |
96 | |
97 | #if SIMDPP_USE_AVX |
98 | NATIVE_CAST_IMPL(32, __m256, __m256i, _mm256_castps_si256); |
99 | NATIVE_CAST_IMPL(32, __m256, __m256d, _mm256_castps_pd); |
100 | NATIVE_CAST_IMPL(32, __m256i, __m256, _mm256_castsi256_ps); |
101 | NATIVE_CAST_IMPL(32, __m256i, __m256d, _mm256_castsi256_pd); |
102 | NATIVE_CAST_IMPL(32, __m256d, __m256i, _mm256_castpd_si256); |
103 | NATIVE_CAST_IMPL(32, __m256d, __m256, _mm256_castpd_ps); |
104 | #endif |
105 | |
106 | #if SIMDPP_USE_AVX512F |
107 | NATIVE_CAST_IMPL(64, __m512, __m512i, _mm512_castps_si512); |
108 | NATIVE_CAST_IMPL(64, __m512, __m512d, _mm512_castps_pd); |
109 | NATIVE_CAST_IMPL(64, __m512i, __m512, _mm512_castsi512_ps); |
110 | NATIVE_CAST_IMPL(64, __m512i, __m512d, _mm512_castsi512_pd); |
111 | NATIVE_CAST_IMPL(64, __m512d, __m512i, _mm512_castpd_si512); |
112 | NATIVE_CAST_IMPL(64, __m512d, __m512, _mm512_castpd_ps); |
113 | #endif |
114 | |
115 | #if SIMDPP_USE_NEON |
116 | NATIVE_CAST_IMPL(16, float32x4_t, uint64x2_t, vreinterpretq_u64_f32); |
117 | NATIVE_CAST_IMPL(16, float32x4_t, int64x2_t, vreinterpretq_s64_f32); |
118 | NATIVE_CAST_IMPL(16, float32x4_t, uint32x4_t, vreinterpretq_u32_f32); |
119 | NATIVE_CAST_IMPL(16, float32x4_t, int32x4_t, vreinterpretq_s32_f32); |
120 | NATIVE_CAST_IMPL(16, float32x4_t, uint16x8_t, vreinterpretq_u16_f32); |
121 | NATIVE_CAST_IMPL(16, float32x4_t, int16x8_t, vreinterpretq_s16_f32); |
122 | NATIVE_CAST_IMPL(16, float32x4_t, uint8x16_t, vreinterpretq_u8_f32); |
123 | NATIVE_CAST_IMPL(16, float32x4_t, int8x16_t, vreinterpretq_s8_f32); |
124 | |
125 | NATIVE_CAST_IMPL(16, uint64x2_t, int64x2_t, vreinterpretq_s64_u64); |
126 | NATIVE_CAST_IMPL(16, uint64x2_t, uint32x4_t, vreinterpretq_u32_u64); |
127 | NATIVE_CAST_IMPL(16, uint64x2_t, int32x4_t, vreinterpretq_s32_u64); |
128 | NATIVE_CAST_IMPL(16, uint64x2_t, uint16x8_t, vreinterpretq_u16_u64); |
129 | NATIVE_CAST_IMPL(16, uint64x2_t, int16x8_t, vreinterpretq_s16_u64); |
130 | NATIVE_CAST_IMPL(16, uint64x2_t, uint8x16_t, vreinterpretq_u8_u64); |
131 | NATIVE_CAST_IMPL(16, uint64x2_t, int8x16_t, vreinterpretq_s8_u64); |
132 | NATIVE_CAST_IMPL(16, uint64x2_t, float32x4_t, vreinterpretq_f32_u64); |
133 | |
134 | NATIVE_CAST_IMPL(16, int64x2_t, uint64x2_t, vreinterpretq_u64_s64); |
135 | NATIVE_CAST_IMPL(16, int64x2_t, uint32x4_t, vreinterpretq_u32_s64); |
136 | NATIVE_CAST_IMPL(16, int64x2_t, int32x4_t, vreinterpretq_s32_s64); |
137 | NATIVE_CAST_IMPL(16, int64x2_t, uint16x8_t, vreinterpretq_u16_s64); |
138 | NATIVE_CAST_IMPL(16, int64x2_t, int16x8_t, vreinterpretq_s16_s64); |
139 | NATIVE_CAST_IMPL(16, int64x2_t, uint8x16_t, vreinterpretq_u8_s64); |
140 | NATIVE_CAST_IMPL(16, int64x2_t, int8x16_t, vreinterpretq_s8_s64); |
141 | NATIVE_CAST_IMPL(16, int64x2_t, float32x4_t, vreinterpretq_f32_s64); |
142 | |
143 | NATIVE_CAST_IMPL(16, uint32x4_t, uint64x2_t, vreinterpretq_u64_u32); |
144 | NATIVE_CAST_IMPL(16, uint32x4_t, int64x2_t, vreinterpretq_s64_u32); |
145 | NATIVE_CAST_IMPL(16, uint32x4_t, int32x4_t, vreinterpretq_s32_u32); |
146 | NATIVE_CAST_IMPL(16, uint32x4_t, uint16x8_t, vreinterpretq_u16_u32); |
147 | NATIVE_CAST_IMPL(16, uint32x4_t, int16x8_t, vreinterpretq_s16_u32); |
148 | NATIVE_CAST_IMPL(16, uint32x4_t, uint8x16_t, vreinterpretq_u8_u32); |
149 | NATIVE_CAST_IMPL(16, uint32x4_t, int8x16_t, vreinterpretq_s8_u32); |
150 | NATIVE_CAST_IMPL(16, uint32x4_t, float32x4_t, vreinterpretq_f32_u32); |
151 | |
152 | NATIVE_CAST_IMPL(16, int32x4_t, uint64x2_t, vreinterpretq_u64_s32); |
153 | NATIVE_CAST_IMPL(16, int32x4_t, int64x2_t, vreinterpretq_s64_s32); |
154 | NATIVE_CAST_IMPL(16, int32x4_t, uint32x4_t, vreinterpretq_u32_s32); |
155 | NATIVE_CAST_IMPL(16, int32x4_t, uint16x8_t, vreinterpretq_u16_s32); |
156 | NATIVE_CAST_IMPL(16, int32x4_t, int16x8_t, vreinterpretq_s16_s32); |
157 | NATIVE_CAST_IMPL(16, int32x4_t, uint8x16_t, vreinterpretq_u8_s32); |
158 | NATIVE_CAST_IMPL(16, int32x4_t, int8x16_t, vreinterpretq_s8_s32); |
159 | NATIVE_CAST_IMPL(16, int32x4_t, float32x4_t, vreinterpretq_f32_s32); |
160 | |
161 | NATIVE_CAST_IMPL(16, uint16x8_t, uint64x2_t, vreinterpretq_u64_u16); |
162 | NATIVE_CAST_IMPL(16, uint16x8_t, int64x2_t, vreinterpretq_s64_u16); |
163 | NATIVE_CAST_IMPL(16, uint16x8_t, uint32x4_t, vreinterpretq_u32_u16); |
164 | NATIVE_CAST_IMPL(16, uint16x8_t, int32x4_t, vreinterpretq_s32_u16); |
165 | NATIVE_CAST_IMPL(16, uint16x8_t, int16x8_t, vreinterpretq_s16_u16); |
166 | NATIVE_CAST_IMPL(16, uint16x8_t, uint8x16_t, vreinterpretq_u8_u16); |
167 | NATIVE_CAST_IMPL(16, uint16x8_t, int8x16_t, vreinterpretq_s8_u16); |
168 | NATIVE_CAST_IMPL(16, uint16x8_t, float32x4_t, vreinterpretq_f32_u16); |
169 | |
170 | NATIVE_CAST_IMPL(16, int16x8_t, uint64x2_t, vreinterpretq_u64_s16); |
171 | NATIVE_CAST_IMPL(16, int16x8_t, int64x2_t, vreinterpretq_s64_s16); |
172 | NATIVE_CAST_IMPL(16, int16x8_t, uint32x4_t, vreinterpretq_u32_s16); |
173 | NATIVE_CAST_IMPL(16, int16x8_t, int32x4_t, vreinterpretq_s32_s16); |
174 | NATIVE_CAST_IMPL(16, int16x8_t, uint16x8_t, vreinterpretq_u16_s16); |
175 | NATIVE_CAST_IMPL(16, int16x8_t, uint8x16_t, vreinterpretq_u8_s16); |
176 | NATIVE_CAST_IMPL(16, int16x8_t, int8x16_t, vreinterpretq_s8_s16); |
177 | NATIVE_CAST_IMPL(16, int16x8_t, float32x4_t, vreinterpretq_f32_s16); |
178 | |
179 | NATIVE_CAST_IMPL(16, uint8x16_t, uint64x2_t, vreinterpretq_u64_u8); |
180 | NATIVE_CAST_IMPL(16, uint8x16_t, int64x2_t, vreinterpretq_s64_u8); |
181 | NATIVE_CAST_IMPL(16, uint8x16_t, uint32x4_t, vreinterpretq_u32_u8); |
182 | NATIVE_CAST_IMPL(16, uint8x16_t, int32x4_t, vreinterpretq_s32_u8); |
183 | NATIVE_CAST_IMPL(16, uint8x16_t, uint16x8_t, vreinterpretq_u16_u8); |
184 | NATIVE_CAST_IMPL(16, uint8x16_t, int16x8_t, vreinterpretq_s16_u8); |
185 | NATIVE_CAST_IMPL(16, uint8x16_t, int8x16_t, vreinterpretq_s8_u8); |
186 | NATIVE_CAST_IMPL(16, uint8x16_t, float32x4_t, vreinterpretq_f32_u8); |
187 | |
188 | NATIVE_CAST_IMPL(16, int8x16_t, uint64x2_t, vreinterpretq_u64_s8); |
189 | NATIVE_CAST_IMPL(16, int8x16_t, int64x2_t, vreinterpretq_s64_s8); |
190 | NATIVE_CAST_IMPL(16, int8x16_t, uint32x4_t, vreinterpretq_u32_s8); |
191 | NATIVE_CAST_IMPL(16, int8x16_t, int32x4_t, vreinterpretq_s32_s8); |
192 | NATIVE_CAST_IMPL(16, int8x16_t, uint16x8_t, vreinterpretq_u16_s8); |
193 | NATIVE_CAST_IMPL(16, int8x16_t, int16x8_t, vreinterpretq_s16_s8); |
194 | NATIVE_CAST_IMPL(16, int8x16_t, uint8x16_t, vreinterpretq_u8_s8); |
195 | NATIVE_CAST_IMPL(16, int8x16_t, float32x4_t, vreinterpretq_f32_s8); |
196 | #endif |
197 | |
198 | #if SIMDPP_USE_NEON64 |
199 | NATIVE_CAST_IMPL(16, float64x2_t, uint64x2_t, vreinterpretq_u64_f64); |
200 | NATIVE_CAST_IMPL(16, float64x2_t, int64x2_t, vreinterpretq_s64_f64); |
201 | NATIVE_CAST_IMPL(16, float64x2_t, uint32x4_t, vreinterpretq_u32_f64); |
202 | NATIVE_CAST_IMPL(16, float64x2_t, int32x4_t, vreinterpretq_s32_f64); |
203 | NATIVE_CAST_IMPL(16, float64x2_t, uint16x8_t, vreinterpretq_u16_f64); |
204 | NATIVE_CAST_IMPL(16, float64x2_t, int16x8_t, vreinterpretq_s16_f64); |
205 | NATIVE_CAST_IMPL(16, float64x2_t, uint8x16_t, vreinterpretq_u8_f64); |
206 | NATIVE_CAST_IMPL(16, float64x2_t, int8x16_t, vreinterpretq_s8_f64); |
207 | NATIVE_CAST_IMPL(16, float64x2_t, float32x4_t, vreinterpretq_f32_f64); |
208 | |
209 | NATIVE_CAST_IMPL(16, uint64x2_t, float64x2_t, vreinterpretq_f64_u64); |
210 | NATIVE_CAST_IMPL(16, int64x2_t, float64x2_t, vreinterpretq_f64_s64); |
211 | NATIVE_CAST_IMPL(16, uint32x4_t, float64x2_t, vreinterpretq_f64_u32); |
212 | NATIVE_CAST_IMPL(16, int32x4_t, float64x2_t, vreinterpretq_f64_s32); |
213 | NATIVE_CAST_IMPL(16, uint16x8_t, float64x2_t, vreinterpretq_f64_u16); |
214 | NATIVE_CAST_IMPL(16, int16x8_t, float64x2_t, vreinterpretq_f64_s16); |
215 | NATIVE_CAST_IMPL(16, uint8x16_t, float64x2_t, vreinterpretq_f64_u8); |
216 | NATIVE_CAST_IMPL(16, int8x16_t, float64x2_t, vreinterpretq_f64_s8); |
217 | NATIVE_CAST_IMPL(16, float32x4_t, float64x2_t, vreinterpretq_f64_f32); |
218 | #endif |
219 | #undef NATIVE_CAST_IMPL |
220 | |
221 | template<unsigned SizeT, class NativeT, class NativeR> struct native_cast_split; |
222 | template<unsigned SizeR, class NativeT, class NativeR> struct native_cast_combine; |
223 | |
224 | #if SIMDPP_USE_AVX |
225 | template<> struct native_cast_split<32, __m256, __m128i> { |
226 | static SIMDPP_INL void cast(const __m256& t, __m128i& r0, __m128i& r1) |
227 | { |
228 | r0 = _mm_castps_si128(_mm256_castps256_ps128(t)); |
229 | r1 = _mm_castps_si128(_mm256_extractf128_ps(t, 1)); |
230 | } |
231 | }; |
232 | |
233 | template<> struct native_cast_split<32, __m256d, __m128i> { |
234 | static SIMDPP_INL void cast(const __m256d& t, __m128i& r0, __m128i& r1) |
235 | { |
236 | r0 = _mm_castpd_si128(_mm256_castpd256_pd128(t)); |
237 | r1 = _mm_castpd_si128(_mm256_extractf128_pd(t, 1)); |
238 | } |
239 | }; |
240 | |
241 | template<> struct native_cast_combine<32, __m128i, __m256> { |
242 | static SIMDPP_INL __m256 cast(const __m128i& t0, const __m128i& t1) |
243 | { |
244 | __m256 r = _mm256_castsi256_ps(_mm256_castsi128_si256(t0)); |
245 | r = _mm256_insertf128_ps(r, _mm_castsi128_ps(t1), 1); |
246 | return r; |
247 | } |
248 | }; |
249 | |
250 | template<> struct native_cast_combine<32, __m128i, __m256d> { |
251 | static SIMDPP_INL __m256d cast(const __m128i& t0, const __m128i& t1) |
252 | { |
253 | __m256d r = _mm256_castsi256_pd(_mm256_castsi128_si256(t0)); |
254 | r = _mm256_insertf128_pd(r, _mm_castsi128_pd(t1), 1); |
255 | return r; |
256 | } |
257 | }; |
258 | #endif |
259 | |
260 | #if SIMDPP_USE_AVX512F |
261 | template<> struct native_cast_split<64, __m512i, __m256i> { |
262 | static SIMDPP_INL void cast(const __m512i& t, __m256i& r0, __m256i& r1) |
263 | { |
264 | r0 = _mm512_castsi512_si256(t); |
265 | r1 = _mm512_extracti64x4_epi64(t, 1); |
266 | } |
267 | }; |
268 | |
269 | template<> struct native_cast_split<64, __m512, __m256i> { |
270 | static SIMDPP_INL void cast(const __m512& t, __m256i& r0, __m256i& r1) |
271 | { |
272 | r0 = _mm256_castps_si256(_mm512_castps512_ps256(t)); |
273 | r1 = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castps_pd(t), 1)); |
274 | } |
275 | }; |
276 | |
277 | template<> struct native_cast_split<64, __m512d, __m256i> { |
278 | static SIMDPP_INL void cast(const __m512d& t, __m256i& r0, __m256i& r1) |
279 | { |
280 | r0 = _mm256_castpd_si256(_mm512_castpd512_pd256(t)); |
281 | r1 = _mm256_castpd_si256(_mm512_extractf64x4_pd(t, 1)); |
282 | } |
283 | }; |
284 | |
285 | template<> struct native_cast_combine<64, __m256i, __m512i> { |
286 | static SIMDPP_INL __m512i cast(const __m256i& t0, const __m256i& t1) |
287 | { |
288 | __m512i r = _mm512_castsi256_si512(t0); |
289 | return _mm512_inserti64x4(r, t1, 1); |
290 | } |
291 | }; |
292 | |
293 | template<> struct native_cast_combine<64, __m256i, __m512> { |
294 | static SIMDPP_INL __m512 cast(const __m256i& t0, const __m256i& t1) |
295 | { |
296 | __m512d r = _mm512_castsi512_pd(_mm512_castsi256_si512(t0)); |
297 | r = _mm512_insertf64x4(r, _mm256_castsi256_pd(t1), 1); |
298 | return _mm512_castpd_ps(r); |
299 | } |
300 | }; |
301 | |
302 | template<> struct native_cast_combine<64, __m256i, __m512d> { |
303 | static SIMDPP_INL __m512d cast(const __m256i& t0, const __m256i& t1) |
304 | { |
305 | __m512d r = _mm512_castsi512_pd(_mm512_castsi256_si512(t0)); |
306 | r = _mm512_insertf64x4(r, _mm256_castsi256_pd(t1), 1); |
307 | return r; |
308 | } |
309 | }; |
310 | #endif |
311 | |
312 | template<unsigned CastType> |
313 | struct cast_bitwise_vector_impl; |
314 | |
315 | template<class T> |
316 | struct is_vararray : std::false_type {}; |
317 | |
318 | template<class T, unsigned N> |
319 | struct is_vararray<vararray<T, N>> : std::true_type {}; |
320 | |
321 | template<> |
322 | struct cast_bitwise_vector_impl<VECTOR_CAST_TYPE_1_TO_1> { |
323 | template<class T, class R> SIMDPP_INL static |
324 | void cast(const T& t, R& r) |
325 | { |
326 | using NativeT = typename T::base_vector_type::native_type; |
327 | using NativeR = typename R::base_vector_type::native_type; |
328 | const bool is_arg_vararray = |
329 | is_vararray<NativeT>::value || is_vararray<NativeR>::value; |
330 | using CastImpl = native_cast<sizeof(NativeT), NativeT, |
331 | NativeR, is_arg_vararray>; |
332 | |
333 | for (unsigned i = 0; i < T::vec_length; ++i) { |
334 | r.vec(i) = CastImpl::cast(t.vec(i).native()); |
335 | } |
336 | } |
337 | }; |
338 | |
339 | template<> |
340 | struct cast_bitwise_vector_impl<VECTOR_CAST_TYPE_SPLIT2> { |
341 | template<class T, class R> SIMDPP_INL static |
342 | void cast(const T& t, R& r) |
343 | { |
344 | using NativeT = typename T::base_vector_type::native_type; |
345 | using NativeR = typename R::base_vector_type::native_type; |
346 | using CastImpl = native_cast_split<sizeof(NativeT), NativeT, NativeR>; |
347 | |
348 | for (unsigned i = 0; i < T::vec_length; ++i) { |
349 | NativeR r0, r1; |
350 | CastImpl::cast(t.vec(i).native(), r0, r1); |
351 | r.vec(i*2) = r0; |
352 | r.vec(i*2+1) = r1; |
353 | } |
354 | } |
355 | }; |
356 | |
357 | template<> |
358 | struct cast_bitwise_vector_impl<VECTOR_CAST_TYPE_COMBINE2> { |
359 | template<class T, class R> SIMDPP_INL static |
360 | void cast(const T& t, R& r) |
361 | { |
362 | using NativeT = typename T::base_vector_type::native_type; |
363 | using NativeR = typename R::base_vector_type::native_type; |
364 | using CastImpl = native_cast_combine<sizeof(NativeR), NativeT, NativeR>; |
365 | |
366 | for (unsigned i = 0; i < R::vec_length; ++i) { |
367 | r.vec(i) = CastImpl::cast(t.vec(i*2).native(), |
368 | t.vec(i*2+1).native()); |
369 | } |
370 | } |
371 | }; |
372 | |
373 | template<class T, class R> SIMDPP_INL |
374 | void cast_bitwise_vector(const T& t, R& r) |
375 | { |
376 | static_assert(sizeof(R) == sizeof(T), "Size mismatch" ); |
377 | const unsigned vector_cast_type = |
378 | T::vec_length == R::vec_length ? VECTOR_CAST_TYPE_1_TO_1 : |
379 | T::vec_length == R::vec_length*2 ? VECTOR_CAST_TYPE_COMBINE2 : |
380 | T::vec_length*2 == R::vec_length ? VECTOR_CAST_TYPE_SPLIT2 : |
381 | VECTOR_CAST_TYPE_INVALID; |
382 | |
383 | cast_bitwise_vector_impl<vector_cast_type>::cast(t, r); |
384 | } |
385 | |
386 | #if (__GNUC__ >= 6) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
387 | #pragma GCC diagnostic pop |
388 | #endif |
389 | |
390 | } // namespace detail |
391 | } // namespace SIMDPP_ARCH_NAMESPACE |
392 | } // namespace simdpp |
393 | |
394 | #endif |
395 | |