1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_BLEND_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_BLEND_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/blend.h> |
17 | #include <simdpp/detail/get_expr.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | |
22 | namespace detail { |
23 | |
24 | /* Blend is a special function in that the type of the returned expression |
25 | depends on three arguments. |
26 | |
27 | As always, we want to reduce the number of overloads that need to be |
28 | created in order to match a specific case of an expression tree containing |
29 | 'blend' nodes. In this case we do the following in an attempt to achieve |
30 | that: |
31 | |
32 | * the first and the second types have the same type as the expression |
33 | itself, except that signed integer vectors are converted to unsigned |
34 | * the third type is the same as the expression itself, except when it is |
35 | a mask. In that case it is converted to floating-point mask if the |
36 | expression is floating-point expression and to integer mask otherwise |
37 | * TODO |
38 | |
39 | So, as a result, the following tuples of types will appear as the arguments |
40 | of the returned expression: |
41 | |
42 | * mask_int8, mask_int8, mask_int8 |
43 | * uint8, uint8, uint8 |
44 | * uint8, uint8, mask_int8 |
45 | * mask_int16, mask_int16, mask_int16 |
46 | * uint16, uint16, uint16 |
47 | * uint16, uint16, mask_uint16 |
48 | * mask_int32, mask_int32, mask_int32 |
49 | * mask_float32, mask_float32, mask_float32 |
50 | * uint32, uint32, uint32 |
51 | * uint32, uint32, mask_int32 |
52 | * float32, float32, float32 |
53 | * float32, float32, mask_float32 |
54 | * mask_int64, mask_int64, mask_int64 |
55 | * mask_float64, mask_float64, mask_float64 |
56 | * uint64, uint64, uint64 |
57 | * uint64, uint64, mask_int64 |
58 | * float64, float64, float64 |
59 | * float64, float64, mask_float64 |
60 | |
61 | The type of the returned expression is governed by the usual rules |
62 | (see simdpp/types/tag.h) |
63 | */ |
64 | |
65 | template<class V1, class V2, class V3> |
66 | class get_expr_blend { |
67 | |
68 | // (size_tag) get the size tag of the resulting expression |
69 | static const unsigned size_tag_t1 = V1::size_tag > V2::size_tag ? V1::size_tag : V2::size_tag; |
70 | static const unsigned size_tag = size_tag_t1 > V3::size_tag ? size_tag_t1 : V3::size_tag; |
71 | |
72 | // (type_tag_t2) get the type tag of the first pair of parameters. We |
73 | // compute it by applying the promotion rules to the first two parameters, |
74 | // i.e. type_tag_t2 == get_expr2<V1,V2>::type::type_tag |
75 | static const unsigned type_tag_t1 = V1::type_tag > V2::type_tag ? V1::type_tag : V2::type_tag; |
76 | static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT || |
77 | type_tag_t1 == SIMDPP_TAG_MASK_FLOAT; |
78 | static const unsigned type_tag_t2 = (is_mask_op1 && V1::size_tag != V2::size_tag) |
79 | ? SIMDPP_TAG_UINT : type_tag_t1; |
80 | |
81 | // (type_tag) get the type tag of the expression. We compute it by applying |
82 | // the promotion rules to the pair that includes the third parameter and |
83 | // the result of the first promotion. |
84 | // I.e. type_tag == get_expr2<get_expr2<V1,V2>::type, V3>::type::type_tag |
85 | static const unsigned type_tag_t3 = type_tag_t2 > V3::type_tag ? type_tag_t2 : V3::type_tag; |
86 | static const bool is_mask_op2 = type_tag_t3 == SIMDPP_TAG_MASK_INT || |
87 | type_tag_t3 == SIMDPP_TAG_MASK_FLOAT; |
88 | static const unsigned type_tag = (is_mask_op2 && V3::size_tag != size_tag_t1) |
89 | ? SIMDPP_TAG_UINT : type_tag_t3; |
90 | |
91 | // strip signed types |
92 | static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag; |
93 | |
94 | |
95 | static const bool is_v3_mask = V3::type_tag == SIMDPP_TAG_MASK_INT || |
96 | V3::type_tag == SIMDPP_TAG_MASK_FLOAT; |
97 | static const bool is_v12_float = v12_type_tag == SIMDPP_TAG_FLOAT || |
98 | v12_type_tag == SIMDPP_TAG_MASK_FLOAT; |
99 | |
100 | // if third parameter is a mask and its size tag matches the size tag of the |
101 | // first two parameters, then convert the mask to float mask if the |
102 | // expression is float and to integer mask otherwise |
103 | static const unsigned v3_type_tag = (!is_v3_mask || size_tag != V3::size_tag) ? v12_type_tag : |
104 | is_v12_float ? SIMDPP_TAG_MASK_FLOAT : |
105 | SIMDPP_TAG_MASK_INT; |
106 | |
107 | |
108 | public: |
109 | using v1_final_type = typename type_of_tag<v12_type_tag + size_tag, |
110 | V1::length_bytes, void>::type; |
111 | |
112 | using v2_final_type = typename type_of_tag<v12_type_tag + size_tag, |
113 | V1::length_bytes, void>::type; |
114 | |
115 | using v3_final_type = typename type_of_tag<v3_type_tag + size_tag, |
116 | V1::length_bytes, void>::type; |
117 | |
118 | using type = typename type_of_tag<type_tag + size_tag, V1::length_bytes, |
119 | expr_blend<V1, V2, V3>>::type; |
120 | }; |
121 | |
122 | } // namespace detail |
123 | |
124 | /** Composes a vector from two sources according to a mask. Each element within |
125 | the mask must have either all bits set or all bits unset. |
126 | |
127 | @code |
128 | r0 = (mask0 == 0xff ) ? on0 : off0 |
129 | ... |
130 | rN = (maskN == 0xff ) ? onN : offN |
131 | @endcode |
132 | |
133 | @todo icost |
134 | |
135 | @par int16 |
136 | |
137 | @par 128-bit version: |
138 | @icost{SSE2-AVX, 3} |
139 | |
140 | @par 256-bit version: |
141 | @icost{SSE2-AVX, 6} |
142 | @icost{NEON, ALTIVEC, 2} |
143 | |
144 | @par int32 |
145 | |
146 | @par 128-bit version: |
147 | @icost{SSE2-AVX, 3} |
148 | |
149 | @par 256-bit version: |
150 | @icost{SSE2-AVX, 6} |
151 | @icost{NEON, ALTIVEC, 2} |
152 | |
153 | @par int64 |
154 | |
155 | @par 128-bit version: |
156 | @icost{SSE2-AVX, 3} |
157 | |
158 | @par 256-bit version: |
159 | @icost{SSE2-AVX, 6} |
160 | @icost{NEON, ALTIVEC, 2} |
161 | |
162 | @par float32 |
163 | |
164 | @par 128-bit version: |
165 | @icost{SSE2-SSE4.1, 3} |
166 | |
167 | @par 256-bit version: |
168 | @icost{SSE2-SSE4.1, 6} |
169 | @icost{NEON, ALTIVEC, 2} |
170 | |
171 | @par float64 |
172 | |
173 | @par 128-bit version: |
174 | @icost{SSE2-SSE4.1, 3} |
175 | @novec{NEON, ALTIVEC} |
176 | |
177 | @par 256-bit version: |
178 | @icost{SSE2-SSE4.1, 6} |
179 | @novec{NEON, ALTIVEC} |
180 | */ |
181 | template<unsigned N, class V1, class V2, class V3> SIMDPP_INL |
182 | typename detail::get_expr_blend<V1, V2, V3>::type |
183 | blend(const any_vec<N,V1>& on, const any_vec<N,V2>& off, |
184 | const any_vec<N,V3>& mask) |
185 | { |
186 | return { { on.wrapped(), off.wrapped(), mask.wrapped() } }; |
187 | } |
188 | |
189 | } // namespace SIMDPP_ARCH_NAMESPACE |
190 | } // namespace simdpp |
191 | |
192 | #endif |
193 | |
194 | |