i_avg_trunc.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_avg_trunc.h]

1	/ Copyright (C) 2012-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_AVG_TRUNC_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_AVG_TRUNC_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_xor.h>
17	#include <simdpp/core/bit_and.h>
18	#include <simdpp/core/i_add.h>
19	#include <simdpp/core/i_shift_r.h>
20	#include <simdpp/detail/vector_array_macros.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	template<class V> SIMDPP_INL V v_emul_avg_trunc(const V& a, const V& b);
28	template<class V> SIMDPP_INL V v_emul_avg_trunc_i8(const V& a, const V& b);
29	template<class V> SIMDPP_INL V v_emul_avg_trunc_i16(const V& a, const V& b);
30	template<class V> SIMDPP_INL V v_emul_avg_trunc_i32(const V& a, const V& b);
31
32
33	static SIMDPP_INL
34	uint8x16 i_avg_trunc(const uint8x16& a, const uint8x16& b)
35	{
36	#if SIMDPP_USE_NULL
37	uint8x16 r;
38	for (unsigned i = `0`; i < a.length; i++) {
39	r.el(i) = (uint16_t(a.el(i)) + b.el(i)) >> `1`;
40	}
41	return r;
42	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
43	return v_emul_avg_trunc(a, b);
44	#elif SIMDPP_USE_NEON
45	return vhaddq_u8(a.native(), b.native());
46	#elif SIMDPP_USE_MSA
47	return __msa_ave_u_b(a.native(), b.native());
48	#endif
49	}
50
51	#if SIMDPP_USE_AVX2
52	static SIMDPP_INL
53	uint8x32 i_avg_trunc(const uint8x32& a, const uint8x32& b)
54	{
55	return v_emul_avg_trunc(a, b);
56	}
57	#endif
58
59	#if SIMDPP_USE_AVX512BW
60	SIMDPP_INL uint8<`64`> i_avg_trunc(const uint8<`64`>& a, const uint8<`64`>& b)
61	{
62	return v_emul_avg_trunc(a, b);
63	}
64	#endif
65
66	// -----------------------------------------------------------------------------
67
68	static SIMDPP_INL
69	int8x16 i_avg_trunc(const int8x16& a, const int8x16& b)
70	{
71	#if SIMDPP_USE_NULL
72	int8x16 r;
73	for (unsigned i = `0`; i < a.length; i++) {
74	r.el(i) = (int16_t(a.el(i)) + b.el(i)) >> `1`;
75	}
76	return r;
77	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
78	return v_emul_avg_trunc_i8(a, b);
79	#elif SIMDPP_USE_NEON
80	return vhaddq_s8(a.native(), b.native());
81	#elif SIMDPP_USE_MSA
82	return __msa_ave_s_b(a.native(), b.native());
83	#endif
84	}
85
86	#if SIMDPP_USE_AVX2
87	static SIMDPP_INL
88	int8x32 i_avg_trunc(const int8x32& a, const int8x32& b)
89	{
90	return v_emul_avg_trunc_i8(a, b);
91	}
92	#endif
93
94	#if SIMDPP_USE_AVX512BW
95	SIMDPP_INL int8<`64`> i_avg_trunc(const int8<`64`>& a, const int8<`64`>& b)
96	{
97	return v_emul_avg_trunc_i8(a, b);
98	}
99	#endif
100
101	// -----------------------------------------------------------------------------
102
103	static SIMDPP_INL
104	uint16x8 i_avg_trunc(const uint16x8& a, const uint16x8& b)
105	{
106	#if SIMDPP_USE_NULL
107	uint16x8 r;
108	for (unsigned i = `0`; i < a.length; i++) {
109	r.el(i) = (uint32_t(a.el(i)) + b.el(i)) >> `1`;
110	}
111	return r;
112	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
113	return v_emul_avg_trunc(a, b);
114	#elif SIMDPP_USE_NEON
115	return vhaddq_u16(a.native(), b.native());
116	#elif SIMDPP_USE_MSA
117	return __msa_ave_u_h(a.native(), b.native());
118	#endif
119	}
120
121	#if SIMDPP_USE_AVX2
122	static SIMDPP_INL
123	uint16x16 i_avg_trunc(const uint16x16& a, const uint16x16& b)
124	{
125	return v_emul_avg_trunc(a, b);
126	}
127	#endif
128
129	#if SIMDPP_USE_AVX512BW
130	SIMDPP_INL uint16<`32`> i_avg_trunc(const uint16<`32`>& a, const uint16<`32`>& b)
131	{
132	return v_emul_avg_trunc(a, b);
133	}
134	#endif
135
136	// -----------------------------------------------------------------------------
137
138	static SIMDPP_INL
139	int16x8 i_avg_trunc(const int16x8& a, const int16x8& b)
140	{
141	#if SIMDPP_USE_NULL
142	int16x8 r;
143	for (unsigned i = `0`; i < a.length; i++) {
144	r.el(i) = (int32_t(a.el(i)) + b.el(i)) >> `1`;
145	}
146	return r;
147	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
148	return v_emul_avg_trunc_i16(a, b);
149	#elif SIMDPP_USE_NEON
150	return vhaddq_s16(a.native(), b.native());
151	#elif SIMDPP_USE_MSA
152	return __msa_ave_s_h(a.native(), b.native());
153	#endif
154	}
155
156	#if SIMDPP_USE_AVX2
157	static SIMDPP_INL
158	int16x16 i_avg_trunc(const int16x16& a, const int16x16& b)
159	{
160	return v_emul_avg_trunc_i16(a, b);
161	}
162	#endif
163
164	#if SIMDPP_USE_AVX512BW
165	SIMDPP_INL int16<`32`> i_avg_trunc(const int16<`32`>& a, const int16<`32`>& b)
166	{
167	return v_emul_avg_trunc_i16(a, b);
168	}
169	#endif
170
171	// -----------------------------------------------------------------------------
172
173	static SIMDPP_INL
174	uint32x4 i_avg_trunc(const uint32x4& a, const uint32x4& b)
175	{
176	#if SIMDPP_USE_NULL
177	uint32x4 r;
178	for (unsigned i = `0`; i < a.length; i++) {
179	r.el(i) = (uint64_t(a.el(i)) + b.el(i)) >> `1`;
180	}
181	return r;
182	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
183	return v_emul_avg_trunc(a, b);
184	#elif SIMDPP_USE_NEON
185	return vhaddq_u32(a.native(), b.native());
186	#elif SIMDPP_USE_MSA
187	return __msa_ave_u_w(a.native(), b.native());
188	#endif
189	}
190
191	#if SIMDPP_USE_AVX2
192	static SIMDPP_INL
193	uint32x8 i_avg_trunc(const uint32x8& a, const uint32x8& b)
194	{
195	return v_emul_avg_trunc(a, b);
196	}
197	#endif
198
199	#if SIMDPP_USE_AVX512F
200	static SIMDPP_INL
201	uint32<`16`> i_avg_trunc(const uint32<`16`>& a, const uint32<`16`>& b)
202	{
203	return v_emul_avg_trunc(a, b);
204	}
205	#endif
206
207	// -----------------------------------------------------------------------------
208
209	static SIMDPP_INL
210	int32x4 i_avg_trunc(const int32x4& a, const int32x4& b)
211	{
212	#if SIMDPP_USE_NULL
213	int32x4 r;
214	for (unsigned i = `0`; i < a.length; i++) {
215	r.el(i) = (int64_t(a.el(i)) + b.el(i)) >> `1`;
216	}
217	return r;
218	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_ALTIVEC
219	return v_emul_avg_trunc_i32(a, b);
220	#elif SIMDPP_USE_NEON
221	return vhaddq_s32(a.native(), b.native());
222	#elif SIMDPP_USE_MSA
223	return __msa_ave_s_w(a.native(), b.native());
224	#endif
225	}
226
227	#if SIMDPP_USE_AVX2
228	static SIMDPP_INL
229	int32x8 i_avg_trunc(const int32x8& a, const int32x8& b)
230	{
231	return v_emul_avg_trunc_i32(a, b);
232	}
233	#endif
234
235	#if SIMDPP_USE_AVX512F
236	static SIMDPP_INL
237	int32<`16`> i_avg_trunc(const int32<`16`>& a, const int32<`16`>& b)
238	{
239	return v_emul_avg_trunc_i32(a, b);
240	}
241	#endif
242
243	// -----------------------------------------------------------------------------
244
245	template<class V> SIMDPP_INL
246	V i_avg_trunc(const V& a, const V& b)
247	{
248	SIMDPP_VEC_ARRAY_IMPL2(V, i_avg_trunc, a, b);
249	}
250
251	template<class V> SIMDPP_INL
252	V v_emul_avg_trunc(const V& a, const V& b)
253	{
254	// (x & y) + ((x ^ y) >> 1)
255	V x1 = bit_and(a, b);
256	V x2 = bit_xor(a, b);
257	return add(x1, shift_r<`1`>(x2));
258	}
259
260	template<class V> SIMDPP_INL
261	V v_emul_avg_trunc_i8(const V& a, const V& b)
262	{
263	typename V::uint_vector_type a2, b2, r, bias;
264	bias = make_uint(`0x80`);
265
266	a2 = bit_xor(a, bias); // add
267	b2 = bit_xor(b, bias); // add
268	r = v_emul_avg_trunc(a2, b2); // unsigned
269	r = bit_xor(r, bias); // sub
270	return r;
271	}
272
273	template<class V> SIMDPP_INL
274	V v_emul_avg_trunc_i16(const V& a, const V& b)
275	{
276	typename V::uint_vector_type a2, b2, r, bias;
277	bias = make_uint(`0x8000`);
278
279	a2 = bit_xor(a, bias); // add
280	b2 = bit_xor(b, bias); // add
281	r = v_emul_avg_trunc(a2, b2); // unsigned
282	r = bit_xor(r, bias); // sub
283	return r;
284	}
285
286	template<class V> SIMDPP_INL
287	V v_emul_avg_trunc_i32(const V& a, const V& b)
288	{
289	typename V::uint_vector_type a2, b2, r, bias;
290	bias = make_uint(`0x80000000`);
291
292	a2 = bit_xor(a, bias); // add
293	b2 = bit_xor(b, bias); // add
294	r = v_emul_avg_trunc(a2, b2); // unsigned
295	r = bit_xor(r, bias); // sub
296	return r;
297	}
298
299	} // namespace insn
300	} // namespace detail
301	} // namespace SIMDPP_ARCH_NAMESPACE
302	} // namespace simdpp
303
304	#endif
305
306

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_avg_trunc.h