i_avg.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_avg.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_AVG_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_AVG_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_xor.h>
17	#include <simdpp/core/bit_and.h>
18	#include <simdpp/core/i_add.h>
19	#include <simdpp/core/i_shift_r.h>
20	#include <simdpp/detail/vector_array_macros.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	template<class V> SIMDPP_INL
28	V v_emul_avg_u32(const V& a, const V& b);
29	template<class V> SIMDPP_INL
30	V v_emul_avg_i32(const V& a, const V& b);
31
32	static SIMDPP_INL
33	uint8x16 i_avg(const uint8x16& a, const uint8x16& b)
34	{
35	#if SIMDPP_USE_NULL
36	uint8x16 r;
37	for (unsigned i = `0`; i < a.length; i++) {
38	r.el(i) = (uint16_t(a.el(i)) + b.el(i) + `1`) >> `1`;
39	}
40	return r;
41	#elif SIMDPP_USE_SSE2
42	return _mm_avg_epu8(a.native(), b.native());
43	#elif SIMDPP_USE_NEON
44	return vrhaddq_u8(a.native(), b.native());
45	#elif SIMDPP_USE_ALTIVEC
46	return vec_avg(a.native(), b.native());
47	#elif SIMDPP_USE_MSA
48	return __msa_aver_u_b(a.native(), b.native());
49	#endif
50	}
51
52	#if SIMDPP_USE_AVX2
53	static SIMDPP_INL
54	uint8x32 i_avg(const uint8x32& a, const uint8x32& b)
55	{
56	return _mm256_avg_epu8(a.native(), b.native());
57	}
58	#endif
59
60	#if SIMDPP_USE_AVX512BW
61	SIMDPP_INL uint8<`64`> i_avg(const uint8<`64`>& a, const uint8<`64`>& b)
62	{
63	return _mm512_avg_epu8(a.native(), b.native());
64	}
65	#endif
66
67	// -----------------------------------------------------------------------------
68
69	static SIMDPP_INL
70	int8x16 i_avg(const int8x16& a, const int8x16& b)
71	{
72	#if SIMDPP_USE_NULL
73	int8x16 r;
74	for (unsigned i = `0`; i < a.length; i++) {
75	r.el(i) = (int16_t(a.el(i)) + b.el(i) + `1`) >> `1`;
76	}
77	return r;
78	#elif SIMDPP_USE_SSE2
79	uint8x16 a2, b2, bias, r;
80	bias = make_uint(`0x80`);
81	a2 = bit_xor(a, bias); // add
82	b2 = bit_xor(b, bias); // add
83	r = i_avg(a2, b2); // unsigned
84	r = bit_xor(r, bias); // sub
85	return r;
86	#elif SIMDPP_USE_NEON
87	return vrhaddq_s8(a.native(), b.native());
88	#elif SIMDPP_USE_ALTIVEC
89	return vec_avg(a.native(), b.native());
90	#elif SIMDPP_USE_MSA
91	return __msa_aver_s_b(a.native(), b.native());
92	#endif
93	}
94
95	#if SIMDPP_USE_AVX2
96	static SIMDPP_INL
97	int8x32 i_avg(const int8x32& a, const int8x32& b)
98	{
99	uint8x32 a2, b2, bias, r;
100	bias = make_uint(`0x80`);
101	a2 = bit_xor(a, bias); // add
102	b2 = bit_xor(b, bias); // add
103	r = i_avg(a2, b2); // unsigned
104	r = bit_xor(r, bias); // sub
105	return r;
106	}
107	#endif
108
109	#if SIMDPP_USE_AVX512BW
110	SIMDPP_INL int8<`64`>i_avg(const int8<`64`>& a, const int8<`64`>& b)
111	{
112	uint8<`64`> a2, b2, bias, r;
113	bias = make_uint(`0x80`);
114	a2 = bit_xor(a, bias); // add
115	b2 = bit_xor(b, bias); // add
116	r = i_avg(a2, b2); // unsigned
117	r = bit_xor(r, bias); // sub
118	return r;
119	}
120	#endif
121
122	// -----------------------------------------------------------------------------
123
124	static SIMDPP_INL
125	uint16x8 i_avg(const uint16x8& a, const uint16x8& b)
126	{
127	#if SIMDPP_USE_NULL
128	uint16x8 r;
129	for (unsigned i = `0`; i < a.length; i++) {
130	r.el(i) = (uint32_t(a.el(i)) + b.el(i) + `1`) >> `1`;
131	}
132	return r;
133	#elif SIMDPP_USE_SSE2
134	return _mm_avg_epu16(a.native(), b.native());
135	#elif SIMDPP_USE_NEON
136	return vrhaddq_u16(a.native(), b.native());
137	#elif SIMDPP_USE_ALTIVEC
138	return vec_avg(a.native(), b.native());
139	#elif SIMDPP_USE_MSA
140	return __msa_aver_u_h(a.native(), b.native());
141	#endif
142	}
143
144	#if SIMDPP_USE_AVX2
145	static SIMDPP_INL
146	uint16x16 i_avg(const uint16x16& a, const uint16x16& b)
147	{
148	return _mm256_avg_epu16(a.native(), b.native());
149	}
150	#endif
151
152	#if SIMDPP_USE_AVX512BW
153	SIMDPP_INL uint16<`32`> i_avg(const uint16<`32`>& a, const uint16<`32`>& b)
154	{
155	return _mm512_avg_epu16(a.native(), b.native());
156	}
157	#endif
158
159	// -----------------------------------------------------------------------------
160
161	static SIMDPP_INL
162	int16x8 i_avg(const int16x8& a, const int16x8& b)
163	{
164	#if SIMDPP_USE_NULL
165	int16x8 r;
166	for (unsigned i = `0`; i < a.length; i++) {
167	r.el(i) = (int32_t(a.el(i)) + b.el(i) + `1`) >> `1`;
168	}
169	return r;
170	#elif SIMDPP_USE_SSE2
171	uint16x8 a2, b2, r;
172	a2 = bit_xor(a, `0x8000`); // add
173	b2 = bit_xor(b, `0x8000`); // add
174	r = i_avg(a2, b2); // unsigned
175	r = bit_xor(r, `0x8000`); // sub
176	return r;
177	#elif SIMDPP_USE_NEON
178	return vrhaddq_s16(a.native(), b.native());
179	#elif SIMDPP_USE_ALTIVEC
180	return vec_avg(a.native(), b.native());
181	#elif SIMDPP_USE_MSA
182	return __msa_aver_s_h(a.native(), b.native());
183	#endif
184	}
185
186	#if SIMDPP_USE_AVX2
187	static SIMDPP_INL
188	int16x16 i_avg(const int16x16& a, const int16x16& b)
189	{
190	uint16x16 a2, b2, r;
191	a2 = bit_xor(a, `0x8000`); // add
192	b2 = bit_xor(b, `0x8000`); // add
193	r = i_avg(a2, b2); // unsigned
194	r = bit_xor(r, `0x8000`); // sub
195	return r;
196	}
197	#endif
198
199	#if SIMDPP_USE_AVX512BW
200	SIMDPP_INL int16<`32`> i_avg(const int16<`32`>& a, const int16<`32`>& b)
201	{
202	uint16<`32`> a2, b2, r;
203	a2 = bit_xor(a, `0x8000`); // add
204	b2 = bit_xor(b, `0x8000`); // add
205	r = i_avg(a2, b2); // unsigned
206	r = bit_xor(r, `0x8000`); // sub
207	return r;
208	}
209	#endif
210
211	// -----------------------------------------------------------------------------
212
213	static SIMDPP_INL
214	uint32x4 i_avg(const uint32x4& a, const uint32x4& b)
215	{
216	#if SIMDPP_USE_NULL
217	uint32x4 r;
218	for (unsigned i = `0`; i < a.length; i++) {
219	r.el(i) = (uint64_t(a.el(i)) + b.el(i) + `1`) >> `1`;
220	}
221	return r;
222	#elif SIMDPP_USE_SSE2
223	return v_emul_avg_u32(a, b);
224	#elif SIMDPP_USE_NEON
225	return vrhaddq_u32(a.native(), b.native());
226	#elif SIMDPP_USE_ALTIVEC
227	return vec_avg(a.native(), b.native());
228	#elif SIMDPP_USE_MSA
229	return __msa_aver_u_w(a.native(), b.native());
230	#endif
231	}
232
233	#if SIMDPP_USE_AVX2
234	static SIMDPP_INL
235	uint32x8 i_avg(const uint32x8& a, const uint32x8& b)
236	{
237	return v_emul_avg_u32(a, b);
238	}
239	#endif
240
241	#if SIMDPP_USE_AVX512F
242	static SIMDPP_INL
243	uint32<`16`> i_avg(const uint32<`16`>& a, const uint32<`16`>& b)
244	{
245	return v_emul_avg_u32(a, b);
246	}
247	#endif
248
249	// -----------------------------------------------------------------------------
250
251	static SIMDPP_INL
252	int32x4 i_avg(const int32x4& a, const int32x4& b)
253	{
254	#if SIMDPP_USE_NULL
255	int32x4 r;
256	for (unsigned i = `0`; i < a.length; i++) {
257	r.el(i) = (int64_t(a.el(i)) + b.el(i) + `1`) >> `1`;
258	}
259	return r;
260	#elif SIMDPP_USE_SSE2
261	return v_emul_avg_i32(a, b);
262
263	#elif SIMDPP_USE_NEON
264	return vrhaddq_s32(a.native(), b.native());
265	#elif SIMDPP_USE_ALTIVEC
266	return vec_avg(a.native(), b.native());
267	#elif SIMDPP_USE_MSA
268	return __msa_aver_s_w(a.native(), b.native());
269	#endif
270	}
271
272	#if SIMDPP_USE_AVX2
273	static SIMDPP_INL
274	int32x8 i_avg(const int32x8& a, const int32x8& b)
275	{
276	return v_emul_avg_i32(a, b);
277	}
278	#endif
279
280	#if SIMDPP_USE_AVX512F
281	static SIMDPP_INL
282	int32<`16`> i_avg(const int32<`16`>& a, const int32<`16`>& b)
283	{
284	return v_emul_avg_i32(a, b);
285	}
286	#endif
287
288	// -----------------------------------------------------------------------------
289
290	template<class V> SIMDPP_INL
291	V i_avg(const V& a, const V& b)
292	{
293	SIMDPP_VEC_ARRAY_IMPL2(V, i_avg, a, b);
294	}
295
296	// generic implementations
297
298	template<class V> SIMDPP_INL
299	V v_emul_avg_u32(const V& a, const V& b)
300	{
301	// (x & y) + ((x ^ y) >> 1) + (x ^ y) & 1
302	V x1, x2, round;
303	x1 = bit_and(a, b);
304	x2 = bit_xor(a, b);
305	round = bit_and(x2, `1`);
306	x1 = add(x1, shift_r<`1`>(x2));
307	x1 = add(x1, round);
308	return x1;
309	}
310
311	template<class V> SIMDPP_INL
312	V v_emul_avg_i32(const V& a, const V& b)
313	{
314	using VI = typename V::uint_vector_type;
315	VI a2, b2, r;
316	a2 = bit_xor(a, `0x80000000`); // add
317	b2 = bit_xor(b, `0x80000000`); // add
318	r = v_emul_avg_u32(a2, b2); // unsigned
319	r = bit_xor(r, `0x80000000`); // sub
320	return r;
321	}
322
323	} // namespace insn
324	} // namespace detail
325	} // namespace SIMDPP_ARCH_NAMESPACE
326	} // namespace simdpp
327
328	#endif
329
330

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_avg.h