blend.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/core/blend.h]

1	/ Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_CORE_BLEND_H
9	#define LIBSIMDPP_SIMDPP_CORE_BLEND_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/detail/insn/blend.h>
17	#include <simdpp/detail/get_expr.h>
18
19	namespace simdpp {
20	namespace SIMDPP_ARCH_NAMESPACE {
21
22	namespace detail {
23
24	/ Blend is a special function in that the type of the returned expression*
25	depends on three arguments.
26
27	As always, we want to reduce the number of overloads that need to be
28	created in order to match a specific case of an expression tree containing
29	'blend' nodes. In this case we do the following in an attempt to achieve
30	that:
31
32	* the first and the second types have the same type as the expression
33	itself, except that signed integer vectors are converted to unsigned
34	* the third type is the same as the expression itself, except when it is
35	a mask. In that case it is converted to floating-point mask if the
36	expression is floating-point expression and to integer mask otherwise
37	* TODO
38
39	So, as a result, the following tuples of types will appear as the arguments
40	of the returned expression:
41
42	* mask_int8, mask_int8, mask_int8
43	* uint8, uint8, uint8
44	* uint8, uint8, mask_int8
45	* mask_int16, mask_int16, mask_int16
46	* uint16, uint16, uint16
47	* uint16, uint16, mask_uint16
48	* mask_int32, mask_int32, mask_int32
49	* mask_float32, mask_float32, mask_float32
50	* uint32, uint32, uint32
51	* uint32, uint32, mask_int32
52	* float32, float32, float32
53	* float32, float32, mask_float32
54	* mask_int64, mask_int64, mask_int64
55	* mask_float64, mask_float64, mask_float64
56	* uint64, uint64, uint64
57	* uint64, uint64, mask_int64
58	* float64, float64, float64
59	* float64, float64, mask_float64
60
61	The type of the returned expression is governed by the usual rules
62	(see simdpp/types/tag.h)
63	*/
64
65	template<class V1, class V2, class V3>
66	class get_expr_blend {
67
68	// (size_tag) get the size tag of the resulting expression
69	static const unsigned size_tag_t1 = V1::size_tag > V2::size_tag ? V1::size_tag : V2::size_tag;
70	static const unsigned size_tag = size_tag_t1 > V3::size_tag ? size_tag_t1 : V3::size_tag;
71
72	// (type_tag_t2) get the type tag of the first pair of parameters. We
73	// compute it by applying the promotion rules to the first two parameters,
74	// i.e. type_tag_t2 == get_expr2<V1,V2>::type::type_tag
75	static const unsigned type_tag_t1 = V1::type_tag > V2::type_tag ? V1::type_tag : V2::type_tag;
76	static const bool is_mask_op1 = type_tag_t1 == SIMDPP_TAG_MASK_INT \|\|
77	type_tag_t1 == SIMDPP_TAG_MASK_FLOAT;
78	static const unsigned type_tag_t2 = (is_mask_op1 && V1::size_tag != V2::size_tag)
79	? SIMDPP_TAG_UINT : type_tag_t1;
80
81	// (type_tag) get the type tag of the expression. We compute it by applying
82	// the promotion rules to the pair that includes the third parameter and
83	// the result of the first promotion.
84	// I.e. type_tag == get_expr2<get_expr2<V1,V2>::type, V3>::type::type_tag
85	static const unsigned type_tag_t3 = type_tag_t2 > V3::type_tag ? type_tag_t2 : V3::type_tag;
86	static const bool is_mask_op2 = type_tag_t3 == SIMDPP_TAG_MASK_INT \|\|
87	type_tag_t3 == SIMDPP_TAG_MASK_FLOAT;
88	static const unsigned type_tag = (is_mask_op2 && V3::size_tag != size_tag_t1)
89	? SIMDPP_TAG_UINT : type_tag_t3;
90
91	// strip signed types
92	static const unsigned v12_type_tag = type_tag == SIMDPP_TAG_INT ? SIMDPP_TAG_UINT : type_tag;
93
94
95	static const bool is_v3_mask = V3::type_tag == SIMDPP_TAG_MASK_INT \|\|
96	V3::type_tag == SIMDPP_TAG_MASK_FLOAT;
97	static const bool is_v12_float = v12_type_tag == SIMDPP_TAG_FLOAT \|\|
98	v12_type_tag == SIMDPP_TAG_MASK_FLOAT;
99
100	// if third parameter is a mask and its size tag matches the size tag of the
101	// first two parameters, then convert the mask to float mask if the
102	// expression is float and to integer mask otherwise
103	static const unsigned v3_type_tag = (!is_v3_mask \|\| size_tag != V3::size_tag) ? v12_type_tag :
104	is_v12_float ? SIMDPP_TAG_MASK_FLOAT :
105	SIMDPP_TAG_MASK_INT;
106
107
108	public:
109	using v1_final_type = typename type_of_tag<v12_type_tag + size_tag,
110	V1::length_bytes, void>::type;
111
112	using v2_final_type = typename type_of_tag<v12_type_tag + size_tag,
113	V1::length_bytes, void>::type;
114
115	using v3_final_type = typename type_of_tag<v3_type_tag + size_tag,
116	V1::length_bytes, void>::type;
117
118	using type = typename type_of_tag<type_tag + size_tag, V1::length_bytes,
119	expr_blend<V1, V2, V3>>::type;
120	};
121
122	} // namespace detail
123
124	/* Composes a vector from two sources according to a mask. Each element within*
125	the mask must have either all bits set or all bits unset.
126
127	@code
128	r0 = (mask0 == 0xff ) ? on0 : off0
129	...
130	rN = (maskN == 0xff ) ? onN : offN
131	@endcode
132
133	@todo icost
134
135	@par int16
136
137	@par 128-bit version:
138	@icost{SSE2-AVX, 3}
139
140	@par 256-bit version:
141	@icost{SSE2-AVX, 6}
142	@icost{NEON, ALTIVEC, 2}
143
144	@par int32
145
146	@par 128-bit version:
147	@icost{SSE2-AVX, 3}
148
149	@par 256-bit version:
150	@icost{SSE2-AVX, 6}
151	@icost{NEON, ALTIVEC, 2}
152
153	@par int64
154
155	@par 128-bit version:
156	@icost{SSE2-AVX, 3}
157
158	@par 256-bit version:
159	@icost{SSE2-AVX, 6}
160	@icost{NEON, ALTIVEC, 2}
161
162	@par float32
163
164	@par 128-bit version:
165	@icost{SSE2-SSE4.1, 3}
166
167	@par 256-bit version:
168	@icost{SSE2-SSE4.1, 6}
169	@icost{NEON, ALTIVEC, 2}
170
171	@par float64
172
173	@par 128-bit version:
174	@icost{SSE2-SSE4.1, 3}
175	@novec{NEON, ALTIVEC}
176
177	@par 256-bit version:
178	@icost{SSE2-SSE4.1, 6}
179	@novec{NEON, ALTIVEC}
180	*/
181	template<unsigned N, class V1, class V2, class V3> SIMDPP_INL
182	typename detail::get_expr_blend<V1, V2, V3>::type
183	blend(const any_vec<N,V1>& on, const any_vec<N,V2>& off,
184	const any_vec<N,V3>& mask)
185	{
186	return { { on.wrapped(), off.wrapped(), mask.wrapped() } };
187	}
188
189	} // namespace SIMDPP_ARCH_NAMESPACE
190	} // namespace simdpp
191
192	#endif
193
194

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/core/blend.h