1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_SPLAT_N_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_SPLAT_N_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/splat_n.h> |
17 | #include <simdpp/detail/expr/splat_n.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | |
22 | /** Broadcasts the specified 8-bit value to all elements within 128-bit lanes. |
23 | |
24 | @code |
25 | r0 = a[s] |
26 | r1 = a[s] |
27 | ... |
28 | rN = a[s] |
29 | @endcode |
30 | |
31 | @par 128-bit version: |
32 | @icost{SSE2-SSE3, 7} |
33 | @icost{SSSE3-AVX, 1-2} |
34 | @icost{AVX2, 2} |
35 | |
36 | @par 256-bit version: |
37 | The lower and higher 128-bit halves are processed as if 128-bit instruction |
38 | was applied to each of them separately. |
39 | |
40 | @icost{SSE2-SSE3, 14} |
41 | @icost{SSSE3-AVX, 2-3} |
42 | @icost{NEON, ALTIVEC, 2} |
43 | */ |
44 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
45 | int8<N, expr_splat16<s,int8<N,E>>> splat16(const int8<N,E>& a) |
46 | { |
47 | static_assert(s < 16, "Access out of bounds" ); |
48 | return { { a } }; |
49 | } |
50 | |
51 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
52 | uint8<N, expr_splat16<s,uint8<N,E>>> splat16(const uint8<N,E>& a) |
53 | { |
54 | static_assert(s < 16, "Access out of bounds" ); |
55 | return { { a } }; |
56 | } |
57 | |
58 | /** Broadcasts the specified 16-bit value to all elements within 128-bit lanes. |
59 | |
60 | @code |
61 | r0 = a[s] |
62 | r1 = a[s] |
63 | ... |
64 | r7 = a[s] |
65 | @endcode |
66 | |
67 | @par 128-bit version: |
68 | @icost{SSE2-SSE3, 3} |
69 | @icost{SSSE3-AVX, 1-2} |
70 | @icost{AVX2, 2} |
71 | |
72 | @par 256-bit version: |
73 | @icost{SSE2-SSE3, 6} |
74 | @icost{SSSE3-AVX, 2-3} |
75 | @icost{AVX2, NEON, ALTIVEC, 2} |
76 | */ |
77 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
78 | int16<N, expr_splat8<s,int16<N,E>>> splat8(const int16<N,E>& a) |
79 | { |
80 | static_assert(s < 8, "Access out of bounds" ); |
81 | return { { a } }; |
82 | } |
83 | |
84 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
85 | uint16<N, expr_splat8<s,uint16<N,E>>> splat8(const uint16<N,E>& a) |
86 | { |
87 | static_assert(s < 8, "Access out of bounds" ); |
88 | return { { a } }; |
89 | } |
90 | |
91 | /** Broadcasts the specified 32-bit value to all elements within 128-bit lanes. |
92 | |
93 | @code |
94 | r0 = a[s] |
95 | r1 = a[s] |
96 | r2 = a[s] |
97 | r3 = a[s] |
98 | @endcode |
99 | |
100 | @par 256-bit version: |
101 | @icost{NEON, ALTIVEC, 2} |
102 | */ |
103 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
104 | int32<N, expr_splat4<s,int32<N,E>>> splat4(const int32<N,E>& a) |
105 | { |
106 | static_assert(s < 4, "Access out of bounds" ); |
107 | return { { a } }; |
108 | } |
109 | |
110 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
111 | uint32<N, expr_splat4<s,uint32<N,E>>> splat4(const uint32<N,E>& a) |
112 | { |
113 | static_assert(s < 4, "Access out of bounds" ); |
114 | return { { a } }; |
115 | } |
116 | |
117 | /** Broadcasts the specified 64-bit value to all elements within 128-bit lanes. |
118 | |
119 | @code |
120 | r0 = a[s] |
121 | r1 = a[s] |
122 | @endcode |
123 | |
124 | @par 128-bit version: |
125 | @icost{ALTIVEC, 1-2} |
126 | |
127 | @par 256-bit version: |
128 | @icost{SSE2-AVX, NEON, 2} |
129 | @icost{ALTIVEC, 2-3} |
130 | */ |
131 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
132 | int64<N, expr_splat2<s,int64<N,E>>> splat2(const int64<N,E>& a) |
133 | { |
134 | static_assert(s < 2, "Access out of bounds" ); |
135 | return { { a } }; |
136 | } |
137 | |
138 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
139 | uint64<N, expr_splat2<s,uint64<N,E>>> splat2(const uint64<N,E>& a) |
140 | { |
141 | static_assert(s < 2, "Access out of bounds" ); |
142 | return { { a } }; |
143 | } |
144 | |
145 | /** Broadcasts the specified 32-bit value to all elements within 128-bit lanes. |
146 | |
147 | @code |
148 | r0 = a[s] |
149 | r1 = a[s] |
150 | r2 = a[s] |
151 | r3 = a[s] |
152 | @endcode |
153 | |
154 | @par 256-bit version: |
155 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
156 | */ |
157 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
158 | float32<N, expr_splat4<s,float32<N,E>>> splat4(const float32<N,E>& a) |
159 | { |
160 | static_assert(s < 4, "Access out of bounds" ); |
161 | return { { a } }; |
162 | } |
163 | |
164 | /** Broadcasts the specified 64-bit value to all elements within 128-bit lanes. |
165 | |
166 | @code |
167 | r0 = a[s] |
168 | r1 = a[s] |
169 | @endcode |
170 | |
171 | @par 128-bit version: |
172 | @novec{NEON, ALTIVEC} |
173 | |
174 | @par 256-bit version: |
175 | @icost{SSE2-AVX, 2} |
176 | @novec{NEON, ALTIVEC} |
177 | */ |
178 | template<unsigned s, unsigned N, class E> SIMDPP_INL |
179 | float64<N, expr_splat2<s,float64<N,E>>> splat2(const float64<N,E>& a) |
180 | { |
181 | static_assert(s < 2, "Access out of bounds" ); |
182 | return { { a } }; |
183 | } |
184 | |
185 | } // namespace SIMDPP_ARCH_NAMESPACE |
186 | } // namespace simdpp |
187 | |
188 | #endif |
189 | |
190 | |