1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_CORE_BLEND_H
9#define LIBSIMDPP_SIMDPP_CORE_BLEND_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/insn/blend.h>
17#include <simdpp/detail/get_expr.h>
18
19namespace simdpp {
20namespace SIMDPP_ARCH_NAMESPACE {
21
22namespace detail {
23
24/* Blend is a special function in that the type of the returned expression
25 depends on three arguments.
26
27 As always, we want to reduce the number of overloads that need to be
28 created in order to match a specific case of an expression tree containing
29 'blend' nodes. In this case we do the following in an attempt to achieve
30 that:
31
32 * the first and the second types have the same type as the expression
33 itself, except that signed integer vectors are converted to unsigned
34 * the third type is the same as the expression itself, except when it is
35 a mask. In that case it is converted to floating-point mask if the
36 expression is floating-point expression and to integer mask otherwise
37 * TODO
38
39 So, as a result, the following tuples of types will appear as the arguments
40 of the returned expression:
41
42 * mask_int8, mask_int8, mask_int8
43 * uint8, uint8, uint8
44 * uint8, uint8, mask_int8
45 * mask_int16, mask_int16, mask_int16
46 * uint16, uint16, uint16
47 * uint16, uint16, mask_uint16
48 * mask_int32, mask_int32, mask_int32
49 * mask_float32, mask_float32, mask_float32
50 * uint32, uint32, uint32
51 * uint32, uint32, mask_int32
52 * float32, float32, float32
53 * float32, float32, mask_float32
54 * mask_int64, mask_int64, mask_int64
55 * mask_float64, mask_float64, mask_float64
56 * uint64, uint64, uint64
57 * uint64, uint64, mask_int64
58 * float64, float64, float64
59 * float64, float64, mask_float64
60
61 The type of the returned expression is governed by the usual rules
62 (see simdpp/types/tag.h)
63*/
64
65template<class V1, class V2, class V3>
66class get_expr_blend {
67
68 // (size_tag) get the size tag of the resulting expression
69 static const unsigned size_tag_t1 = V1::size_tag > V2::size_tag ? V1::size_tag : V2::size_tag;
70 static const unsigned size_tag = size_tag_t1 > V3::size_tag ? size_tag_t1 : V3::size_tag;
71
72 // (type_tag_t2) get the type tag of the first pair of parameters. We
73 // compute it by applying the promotion rules to the first two parameters,
74 // i.e. type_tag_t2 == get_expr2<V1,V2>::type::type_tag
75 static const unsigned type_tag_t1 = V1::type_tag > V2::type_tag ? V1::type_tag : V2::type_tag;
76 static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT ||
77 type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
78 static const unsigned type_tag_t2 = (is_mask_op1 && V1::size_tag != V2::size_tag)
79 ? SIMDPP_TAG_UINT : type_tag_t1;
80
81 // (type_tag) get the type tag of the expression. We compute it by applying
82 // the promotion rules to the pair that includes the third parameter and
83 // the result of the first promotion.
84 // I.e. type_tag == get_expr2<get_expr2<V1,V2>::type, V3>::type::type_tag
85 static const unsigned type_tag_t3 = type_tag_t2 > V3::type_tag ? type_tag_t2 : V3::type_tag;
86 static const bool is_mask_op2 = type_tag_t3 == SIMDPP_TAG_MASK_INT ||
87 type_tag_t3 == SIMDPP_TAG_MASK_FLOAT;
88 static const unsigned type_tag = (is_mask_op2 && V3::size_tag != size_tag_t1)
89 ? SIMDPP_TAG_UINT : type_tag_t3;
90
91 // strip signed types
92 static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag;
93
94
95 static const bool is_v3_mask = V3::type_tag == SIMDPP_TAG_MASK_INT ||
96 V3::type_tag == SIMDPP_TAG_MASK_FLOAT;
97 static const bool is_v12_float = v12_type_tag == SIMDPP_TAG_FLOAT ||
98 v12_type_tag == SIMDPP_TAG_MASK_FLOAT;
99
100 // if third parameter is a mask and its size tag matches the size tag of the
101 // first two parameters, then convert the mask to float mask if the
102 // expression is float and to integer mask otherwise
103 static const unsigned v3_type_tag = (!is_v3_mask || size_tag != V3::size_tag) ? v12_type_tag :
104 is_v12_float ? SIMDPP_TAG_MASK_FLOAT :
105 SIMDPP_TAG_MASK_INT;
106
107
108public:
109 using v1_final_type = typename type_of_tag<v12_type_tag + size_tag,
110 V1::length_bytes, void>::type;
111
112 using v2_final_type = typename type_of_tag<v12_type_tag + size_tag,
113 V1::length_bytes, void>::type;
114
115 using v3_final_type = typename type_of_tag<v3_type_tag + size_tag,
116 V1::length_bytes, void>::type;
117
118 using type = typename type_of_tag<type_tag + size_tag, V1::length_bytes,
119 expr_blend<V1, V2, V3>>::type;
120};
121
122} // namespace detail
123
124/** Composes a vector from two sources according to a mask. Each element within
125 the mask must have either all bits set or all bits unset.
126
127 @code
128 r0 = (mask0 == 0xff ) ? on0 : off0
129 ...
130 rN = (maskN == 0xff ) ? onN : offN
131 @endcode
132
133 @todo icost
134
135 @par int16
136
137 @par 128-bit version:
138 @icost{SSE2-AVX, 3}
139
140 @par 256-bit version:
141 @icost{SSE2-AVX, 6}
142 @icost{NEON, ALTIVEC, 2}
143
144 @par int32
145
146 @par 128-bit version:
147 @icost{SSE2-AVX, 3}
148
149 @par 256-bit version:
150 @icost{SSE2-AVX, 6}
151 @icost{NEON, ALTIVEC, 2}
152
153 @par int64
154
155 @par 128-bit version:
156 @icost{SSE2-AVX, 3}
157
158 @par 256-bit version:
159 @icost{SSE2-AVX, 6}
160 @icost{NEON, ALTIVEC, 2}
161
162 @par float32
163
164 @par 128-bit version:
165 @icost{SSE2-SSE4.1, 3}
166
167 @par 256-bit version:
168 @icost{SSE2-SSE4.1, 6}
169 @icost{NEON, ALTIVEC, 2}
170
171 @par float64
172
173 @par 128-bit version:
174 @icost{SSE2-SSE4.1, 3}
175 @novec{NEON, ALTIVEC}
176
177 @par 256-bit version:
178 @icost{SSE2-SSE4.1, 6}
179 @novec{NEON, ALTIVEC}
180*/
181template<unsigned N, class V1, class V2, class V3> SIMDPP_INL
182typename detail::get_expr_blend<V1, V2, V3>::type
183 blend(const any_vec<N,V1>& on, const any_vec<N,V2>& off,
184 const any_vec<N,V3>& mask)
185{
186 return { { on.wrapped(), off.wrapped(), mask.wrapped() } };
187}
188
189} // namespace SIMDPP_ARCH_NAMESPACE
190} // namespace simdpp
191
192#endif
193
194