1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_SPLAT_N_H
9#define LIBSIMDPP_SIMDPP_CORE_SPLAT_N_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/splat_n.h>
17#include <simdpp/detail/expr/splat_n.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21
22/** Broadcasts the specified 8-bit value to all elements within 128-bit lanes.
23
24 @code
25 r0 = a[s]
26 r1 = a[s]
27 ...
28 rN = a[s]
29 @endcode
30
31 @par 128-bit version:
32 @icost{SSE2-SSE3, 7}
33 @icost{SSSE3-AVX, 1-2}
34 @icost{AVX2, 2}
35
36 @par 256-bit version:
37 The lower and higher 128-bit halves are processed as if 128-bit instruction
38 was applied to each of them separately.
39
40 @icost{SSE2-SSE3, 14}
41 @icost{SSSE3-AVX, 2-3}
42 @icost{NEON, ALTIVEC, 2}
43*/
44template<unsigned s, unsigned N, class E> SIMDPP_INL
45int8<N, expr_splat16<s,int8<N,E>>> splat16(const int8<N,E>& a)
46{
47 static_assert(s < 16, "Access out of bounds");
48 return { { a } };
49}
50
51template<unsigned s, unsigned N, class E> SIMDPP_INL
52uint8<N, expr_splat16<s,uint8<N,E>>> splat16(const uint8<N,E>& a)
53{
54 static_assert(s < 16, "Access out of bounds");
55 return { { a } };
56}
57
58/** Broadcasts the specified 16-bit value to all elements within 128-bit lanes.
59
60 @code
61 r0 = a[s]
62 r1 = a[s]
63 ...
64 r7 = a[s]
65 @endcode
66
67 @par 128-bit version:
68 @icost{SSE2-SSE3, 3}
69 @icost{SSSE3-AVX, 1-2}
70 @icost{AVX2, 2}
71
72 @par 256-bit version:
73 @icost{SSE2-SSE3, 6}
74 @icost{SSSE3-AVX, 2-3}
75 @icost{AVX2, NEON, ALTIVEC, 2}
76*/
77template<unsigned s, unsigned N, class E> SIMDPP_INL
78int16<N, expr_splat8<s,int16<N,E>>> splat8(const int16<N,E>& a)
79{
80 static_assert(s < 8, "Access out of bounds");
81 return { { a } };
82}
83
84template<unsigned s, unsigned N, class E> SIMDPP_INL
85uint16<N, expr_splat8<s,uint16<N,E>>> splat8(const uint16<N,E>& a)
86{
87 static_assert(s < 8, "Access out of bounds");
88 return { { a } };
89}
90
91/** Broadcasts the specified 32-bit value to all elements within 128-bit lanes.
92
93 @code
94 r0 = a[s]
95 r1 = a[s]
96 r2 = a[s]
97 r3 = a[s]
98 @endcode
99
100 @par 256-bit version:
101 @icost{NEON, ALTIVEC, 2}
102*/
103template<unsigned s, unsigned N, class E> SIMDPP_INL
104int32<N, expr_splat4<s,int32<N,E>>> splat4(const int32<N,E>& a)
105{
106 static_assert(s < 4, "Access out of bounds");
107 return { { a } };
108}
109
110template<unsigned s, unsigned N, class E> SIMDPP_INL
111uint32<N, expr_splat4<s,uint32<N,E>>> splat4(const uint32<N,E>& a)
112{
113 static_assert(s < 4, "Access out of bounds");
114 return { { a } };
115}
116
117/** Broadcasts the specified 64-bit value to all elements within 128-bit lanes.
118
119 @code
120 r0 = a[s]
121 r1 = a[s]
122 @endcode
123
124 @par 128-bit version:
125 @icost{ALTIVEC, 1-2}
126
127 @par 256-bit version:
128 @icost{SSE2-AVX, NEON, 2}
129 @icost{ALTIVEC, 2-3}
130*/
131template<unsigned s, unsigned N, class E> SIMDPP_INL
132int64<N, expr_splat2<s,int64<N,E>>> splat2(const int64<N,E>& a)
133{
134 static_assert(s < 2, "Access out of bounds");
135 return { { a } };
136}
137
138template<unsigned s, unsigned N, class E> SIMDPP_INL
139uint64<N, expr_splat2<s,uint64<N,E>>> splat2(const uint64<N,E>& a)
140{
141 static_assert(s < 2, "Access out of bounds");
142 return { { a } };
143}
144
145/** Broadcasts the specified 32-bit value to all elements within 128-bit lanes.
146
147 @code
148 r0 = a[s]
149 r1 = a[s]
150 r2 = a[s]
151 r3 = a[s]
152 @endcode
153
154 @par 256-bit version:
155 @icost{SSE2-AVX, NEON, ALTIVEC, 2}
156*/
157template<unsigned s, unsigned N, class E> SIMDPP_INL
158float32<N, expr_splat4<s,float32<N,E>>> splat4(const float32<N,E>& a)
159{
160 static_assert(s < 4, "Access out of bounds");
161 return { { a } };
162}
163
164/** Broadcasts the specified 64-bit value to all elements within 128-bit lanes.
165
166 @code
167 r0 = a[s]
168 r1 = a[s]
169 @endcode
170
171 @par 128-bit version:
172 @novec{NEON, ALTIVEC}
173
174 @par 256-bit version:
175 @icost{SSE2-AVX, 2}
176 @novec{NEON, ALTIVEC}
177*/
178template<unsigned s, unsigned N, class E> SIMDPP_INL
179float64<N, expr_splat2<s,float64<N,E>>> splat2(const float64<N,E>& a)
180{
181 static_assert(s < 2, "Access out of bounds");
182 return { { a } };
183}
184
185} // namespace SIMDPP_ARCH_NAMESPACE
186} // namespace simdpp
187
188#endif
189
190