1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE2_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_PERMUTE2_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/make_shuffle_bytes_mask.h>
17#include <simdpp/core/permute4.h>
18#include <simdpp/detail/null/shuffle.h>
19#include <simdpp/detail/shuffle/neon_int16x8.h>
20#include <simdpp/detail/shuffle/neon_int32x4.h>
21#include <simdpp/detail/shuffle/neon_int64x2.h>
22#include <simdpp/detail/shuffle/shuffle_mask.h>
23#include <simdpp/detail/vector_array_macros.h>
24
25namespace simdpp {
26namespace SIMDPP_ARCH_NAMESPACE {
27namespace detail {
28namespace insn {
29
30
31template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
32uint16<N> i_permute2(const uint16<N>& a)
33{
34 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
35 return i_permute4<s0,s1,s0+2,s1+2>(a);
36}
37
38
39template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
40uint32<N> i_permute2(const uint32<N>& a)
41{
42 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
43 return i_permute4<s0,s1,s0+2,s1+2>(a);
44}
45
46
47template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
48float32<N> i_permute2(const float32<N>& a)
49{
50 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
51 return i_permute4<s0,s1,s0+2,s1+2>(a);
52}
53
54
55template<unsigned s0, unsigned s1> SIMDPP_INL
56uint64x2 i_permute2(const uint64x2& a)
57{
58 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
59#if SIMDPP_USE_SSE2 || SIMDPP_USE_MSA
60 return (uint64x2) i_permute4<s0*2, s0*2+1, s1*2, s1*2+1>(int32x4(a));
61#elif SIMDPP_USE_NEON
62 return detail::neon_shuffle_int64x2::permute2<s0,s1>(a);
63#elif SIMDPP_USE_VSX_207
64 return vec_xxpermdi(a.native(), a.native(),
65 SIMDPP_VSX_SHUFFLE_MASK_2x2(s0, s1));
66#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
67 return detail::null::permute<s0,s1>(a);
68#endif
69}
70
71#if SIMDPP_USE_AVX2
72template<unsigned s0, unsigned s1> SIMDPP_INL
73uint64x4 i_permute2(const uint64x4& a)
74{
75 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
76 return i_permute4<s0,s1,s0+2,s1+2>(a);
77}
78#endif
79
80#if SIMDPP_USE_AVX512F
81template<unsigned s0, unsigned s1> SIMDPP_INL
82uint64<8> i_permute2(const uint64<8>& a)
83{
84 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
85 return i_permute4<s0,s1,s0+2,s1+2>(a);
86}
87#endif
88
89template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
90uint64<N> i_permute2(const uint64<N>& a)
91{
92 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
93 SIMDPP_VEC_ARRAY_IMPL1(uint64<N>, (i_permute2<s0,s1>), a);
94}
95
96// -----------------------------------------------------------------------------
97
98template<unsigned s0, unsigned s1> SIMDPP_INL
99float64x2 i_permute2(const float64x2& a)
100{
101 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
102#if SIMDPP_USE_SSE2
103 return _mm_shuffle_pd(a.native(), a.native(), SIMDPP_SHUFFLE_MASK_2x2(s0, s1));
104#elif SIMDPP_USE_VSX_206
105 return vec_xxpermdi(a.native(), a.native(),
106 SIMDPP_VSX_SHUFFLE_MASK_2x2(s0, s1));
107#elif SIMDPP_USE_NEON64 || SIMDPP_USE_MSA
108 return float64x2(i_permute2<s0,s1>(int64x2(a)));
109#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
110 return detail::null::permute<s0,s1>(a);
111#endif
112}
113
114#if SIMDPP_USE_AVX
115template<unsigned s0, unsigned s1> SIMDPP_INL
116float64x4 i_permute2(const float64x4& a)
117{
118 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
119#if SIMDPP_USE_AVX2
120 return _mm256_permute4x64_pd(a.native(), s0 | s1<<2 | (s0+2)<<4 | (s1+2)<<6);
121#else // SIMDPP_USE_AVX
122 return _mm256_permute_pd(a.native(), s0 | s1<<1 | s0<<2 | s1<<3);
123#endif
124}
125#endif
126
127#if SIMDPP_USE_AVX512F
128template<unsigned s0, unsigned s1> SIMDPP_INL
129float64<8> i_permute2(const float64<8>& a)
130{
131 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
132 return _mm512_permute_pd(a.native(), s0 | s1<<1 | s0<<2 | s1<<3 | s0<<4 | s1<<5 | s0<<6 | s1<<7);
133}
134#endif
135
136template<unsigned s0, unsigned s1, unsigned N> SIMDPP_INL
137float64<N> i_permute2(const float64<N>& a)
138{
139 static_assert(s0 < 2 && s1 < 2, "Selector out of range");
140 SIMDPP_VEC_ARRAY_IMPL1(float64<N>, (i_permute2<s0,s1>), a);
141}
142
143} // namespace insn
144} // namespace detail
145} // namespace SIMDPP_ARCH_NAMESPACE
146} // namespace simdpp
147
148#endif
149
150