i_reduce_or.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_reduce_or.h]

1	/ Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_OR_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_OR_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/bit_or.h>
17	#include <simdpp/core/extract.h>
18	#include <simdpp/core/move_l.h>
19	#include <simdpp/core/make_uint.h>
20
21	namespace simdpp {
22	namespace SIMDPP_ARCH_NAMESPACE {
23	namespace detail {
24	namespace insn {
25
26	static SIMDPP_INL
27	uint8_t i_reduce_or(const uint8x16& a)
28	{
29	#if SIMDPP_USE_NULL
30	uint8_t r = a.el(`0`);
31	for (unsigned i = `1`; i < a.length; i++) {
32	r \|= a.el(i);
33	}
34	return r;
35	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
36	uint8x16 r = bit_or(a, move16_l<`8`>(a));
37	r = bit_or(r, move16_l<`4`>(r));
38	r = bit_or(r, move16_l<`2`>(r));
39	r = bit_or(r, move16_l<`1`>(r));
40	return extract<`0`>(r);
41	#endif
42	}
43
44	#if SIMDPP_USE_AVX2
45	static SIMDPP_INL
46	uint8_t i_reduce_or(const uint8x32& a)
47	{
48	uint8x16 r = detail::extract128<`0`>(a);
49	r = bit_or(r, detail::extract128<`1`>(a));
50	return i_reduce_or(r);
51	}
52	#endif
53
54	#if SIMDPP_USE_AVX512BW
55	SIMDPP_INL uint8_t i_reduce_or(const uint8<`64`>& a)
56	{
57	uint8<`32`> r = detail::extract256<`0`>(a);
58	r = bit_or(r, detail::extract256<`1`>(a));
59	return i_reduce_or(r);
60	}
61	#endif
62
63	template<unsigned N>
64	SIMDPP_INL uint8_t i_reduce_or(const uint8<N>& a)
65	{
66	#if SIMDPP_USE_NULL
67	uint8_t r = `0`;
68	for (unsigned j = `0`; j < a.vec_length; ++j) {
69	for (unsigned i = `0`; i < a.base_length; i++) {
70	r \|= a.vec(j).el(i);
71	}
72	}
73	return r;
74	#else
75	uint8v r = a.vec(`0`);
76	for (unsigned j = `1`; j < a.vec_length; ++j) {
77	r = bit_or(r, a.vec(j));
78	}
79	return i_reduce_or(r);
80	#endif
81	}
82
83	// -----------------------------------------------------------------------------
84
85	static SIMDPP_INL
86	uint16_t i_reduce_or(const uint16x8& a)
87	{
88	#if SIMDPP_USE_NULL
89	uint16_t r = a.el(`0`);
90	for (unsigned i = `0`; i < a.length; i++) {
91	r \|= a.el(i);
92	}
93	return r;
94	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
95	uint16x8 r = bit_or(a, move8_l<`4`>(a));
96	r = bit_or(r, move8_l<`2`>(r));
97	r = bit_or(r, move8_l<`1`>(r));
98	return extract<`0`>(r);
99	#endif
100	}
101
102	#if SIMDPP_USE_AVX2
103	static SIMDPP_INL
104	uint16_t i_reduce_or(const uint16x16& a)
105	{
106	uint16x8 r = detail::extract128<`0`>(a);
107	r = bit_or(r, detail::extract128<`1`>(a));
108	return i_reduce_or(r);
109	}
110	#endif
111
112	#if SIMDPP_USE_AVX512BW
113	SIMDPP_INL uint16_t i_reduce_or(const uint16<`32`>& a)
114	{
115	uint16<`16`> r = detail::extract256<`0`>(a);
116	r = bit_or(r, detail::extract256<`1`>(a));
117	return i_reduce_or(r);
118	}
119	#endif
120
121	template<unsigned N>
122	SIMDPP_INL uint16_t i_reduce_or(const uint16<N>& a)
123	{
124	#if SIMDPP_USE_NULL
125	uint16_t r = `0`;
126	for (unsigned j = `0`; j < a.vec_length; ++j) {
127	for (unsigned i = `0`; i < a.base_length; i++) {
128	r \|= a.vec(j).el(i);
129	}
130	}
131	return r;
132	#else
133	uint16v r = a.vec(`0`);
134	for (unsigned j = `1`; j < a.vec_length; ++j) {
135	r = bit_or(r, a.vec(j));
136	}
137	return i_reduce_or(r);
138	#endif
139	}
140
141	// -----------------------------------------------------------------------------
142
143	static SIMDPP_INL
144	uint32_t i_reduce_or(const uint32x4& a)
145	{
146	#if SIMDPP_USE_NULL
147	uint32_t r = a.el(`0`);
148	for (unsigned i = `0`; i < a.length; i++) {
149	r \|= a.el(i);
150	}
151	return r;
152	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
153	uint32x4 r = bit_or(a, move4_l<`2`>(a));
154	r = bit_or(r, move4_l<`1`>(r));
155	return extract<`0`>(r);
156	#endif
157	}
158
159	#if SIMDPP_USE_AVX2
160	static SIMDPP_INL
161	uint32_t i_reduce_or(const uint32x8& a)
162	{
163	uint32x4 r = detail::extract128<`0`>(a);
164	r = bit_or(r, detail::extract128<`1`>(a));
165	r = bit_or(r, move4_l<`2`>(r));
166	r = bit_or(r, move4_l<`1`>(r));
167	return extract<`0`>(r);
168	}
169	#endif
170
171	#if SIMDPP_USE_AVX512F
172	static SIMDPP_INL
173	uint32_t i_reduce_or(const uint32<`16`>& a)
174	{
175	return i_reduce_or(bit_or(extract256<`0`>(a), extract256<`1`>(a)));
176	}
177	#endif
178
179	template<unsigned N>
180	SIMDPP_INL uint32_t i_reduce_or(const uint32<N>& a)
181	{
182	#if SIMDPP_USE_NULL
183	uint32_t r = `0`;
184	for (unsigned j = `0`; j < a.vec_length; ++j) {
185	for (unsigned i = `0`; i < a.base_length; i++) {
186	r \|= a.vec(j).el(i);
187	}
188	}
189	return r;
190	#else
191	uint32v r = a.vec(`0`);
192	for (unsigned j = `1`; j < a.vec_length; ++j) {
193	r = bit_or(r, a.vec(j));
194	}
195	return i_reduce_or(r);
196	#endif
197	}
198
199	// -----------------------------------------------------------------------------
200
201	static SIMDPP_INL
202	uint64_t i_reduce_or(const uint64x2& a)
203	{
204	#if SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_VSX_207 \|\| SIMDPP_USE_MSA
205	uint64x2 r = bit_or(a, move2_l<`1`>(a));
206	return extract<`0`>(r);
207	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
208	uint64_t r = a.el(`0`);
209	for (unsigned i = `0`; i < a.length; i++) {
210	r \|= a.el(i);
211	}
212	return r;
213	#endif
214	}
215
216	#if SIMDPP_USE_AVX2
217	static SIMDPP_INL
218	uint64_t i_reduce_or(const uint64x4& a)
219	{
220	uint64x2 r = detail::extract128<`0`>(a);
221	r = bit_or(r, detail::extract128<`1`>(a));
222	r = bit_or(r, move2_l<`1`>(r));
223	return extract<`0`>(r);
224	}
225	#endif
226
227	#if SIMDPP_USE_AVX512F
228	static SIMDPP_INL
229	uint64_t i_reduce_or(const uint64<`8`>& a)
230	{
231	return i_reduce_or(bit_or(extract256<`0`>(a), extract256<`1`>(a)));
232	}
233	#endif
234
235	template<unsigned N>
236	SIMDPP_INL uint64_t i_reduce_or(const uint64<N>& a)
237	{
238	#if SIMDPP_USE_NULL
239	uint64_t r = `0`;
240	for (unsigned j = `0`; j < a.vec_length; ++j) {
241	for (unsigned i = `0`; i < a.base_length; i++) {
242	r \|= a.vec(j).el(i);
243	}
244	}
245	return r;
246	#else
247	uint64v r = a.vec(`0`);
248	for (unsigned j = `1`; j < a.vec_length; ++j) {
249	r = bit_or(r, a.vec(j));
250	}
251	return i_reduce_or(r);
252	#endif
253	}
254
255	// -----------------------------------------------------------------------------
256
257	} // namespace insn
258	} // namespace detail
259	} // namespace SIMDPP_ARCH_NAMESPACE
260	} // namespace simdpp
261
262	#endif
263
264

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_reduce_or.h