1/* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_OR_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_OR_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/bit_or.h>
17#include <simdpp/core/extract.h>
18#include <simdpp/core/move_l.h>
19#include <simdpp/core/make_uint.h>
20
21namespace simdpp {
22namespace SIMDPP_ARCH_NAMESPACE {
23namespace detail {
24namespace insn {
25
26static SIMDPP_INL
27uint8_t i_reduce_or(const uint8x16& a)
28{
29#if SIMDPP_USE_NULL
30 uint8_t r = a.el(0);
31 for (unsigned i = 1; i < a.length; i++) {
32 r |= a.el(i);
33 }
34 return r;
35#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
36 uint8x16 r = bit_or(a, move16_l<8>(a));
37 r = bit_or(r, move16_l<4>(r));
38 r = bit_or(r, move16_l<2>(r));
39 r = bit_or(r, move16_l<1>(r));
40 return extract<0>(r);
41#endif
42}
43
44#if SIMDPP_USE_AVX2
45static SIMDPP_INL
46uint8_t i_reduce_or(const uint8x32& a)
47{
48 uint8x16 r = detail::extract128<0>(a);
49 r = bit_or(r, detail::extract128<1>(a));
50 return i_reduce_or(r);
51}
52#endif
53
54#if SIMDPP_USE_AVX512BW
55SIMDPP_INL uint8_t i_reduce_or(const uint8<64>& a)
56{
57 uint8<32> r = detail::extract256<0>(a);
58 r = bit_or(r, detail::extract256<1>(a));
59 return i_reduce_or(r);
60}
61#endif
62
63template<unsigned N>
64SIMDPP_INL uint8_t i_reduce_or(const uint8<N>& a)
65{
66#if SIMDPP_USE_NULL
67 uint8_t r = 0;
68 for (unsigned j = 0; j < a.vec_length; ++j) {
69 for (unsigned i = 0; i < a.base_length; i++) {
70 r |= a.vec(j).el(i);
71 }
72 }
73 return r;
74#else
75 uint8v r = a.vec(0);
76 for (unsigned j = 1; j < a.vec_length; ++j) {
77 r = bit_or(r, a.vec(j));
78 }
79 return i_reduce_or(r);
80#endif
81}
82
83// -----------------------------------------------------------------------------
84
85static SIMDPP_INL
86uint16_t i_reduce_or(const uint16x8& a)
87{
88#if SIMDPP_USE_NULL
89 uint16_t r = a.el(0);
90 for (unsigned i = 0; i < a.length; i++) {
91 r |= a.el(i);
92 }
93 return r;
94#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
95 uint16x8 r = bit_or(a, move8_l<4>(a));
96 r = bit_or(r, move8_l<2>(r));
97 r = bit_or(r, move8_l<1>(r));
98 return extract<0>(r);
99#endif
100}
101
102#if SIMDPP_USE_AVX2
103static SIMDPP_INL
104uint16_t i_reduce_or(const uint16x16& a)
105{
106 uint16x8 r = detail::extract128<0>(a);
107 r = bit_or(r, detail::extract128<1>(a));
108 return i_reduce_or(r);
109}
110#endif
111
112#if SIMDPP_USE_AVX512BW
113SIMDPP_INL uint16_t i_reduce_or(const uint16<32>& a)
114{
115 uint16<16> r = detail::extract256<0>(a);
116 r = bit_or(r, detail::extract256<1>(a));
117 return i_reduce_or(r);
118}
119#endif
120
121template<unsigned N>
122SIMDPP_INL uint16_t i_reduce_or(const uint16<N>& a)
123{
124#if SIMDPP_USE_NULL
125 uint16_t r = 0;
126 for (unsigned j = 0; j < a.vec_length; ++j) {
127 for (unsigned i = 0; i < a.base_length; i++) {
128 r |= a.vec(j).el(i);
129 }
130 }
131 return r;
132#else
133 uint16v r = a.vec(0);
134 for (unsigned j = 1; j < a.vec_length; ++j) {
135 r = bit_or(r, a.vec(j));
136 }
137 return i_reduce_or(r);
138#endif
139}
140
141// -----------------------------------------------------------------------------
142
143static SIMDPP_INL
144uint32_t i_reduce_or(const uint32x4& a)
145{
146#if SIMDPP_USE_NULL
147 uint32_t r = a.el(0);
148 for (unsigned i = 0; i < a.length; i++) {
149 r |= a.el(i);
150 }
151 return r;
152#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
153 uint32x4 r = bit_or(a, move4_l<2>(a));
154 r = bit_or(r, move4_l<1>(r));
155 return extract<0>(r);
156#endif
157}
158
159#if SIMDPP_USE_AVX2
160static SIMDPP_INL
161uint32_t i_reduce_or(const uint32x8& a)
162{
163 uint32x4 r = detail::extract128<0>(a);
164 r = bit_or(r, detail::extract128<1>(a));
165 r = bit_or(r, move4_l<2>(r));
166 r = bit_or(r, move4_l<1>(r));
167 return extract<0>(r);
168}
169#endif
170
171#if SIMDPP_USE_AVX512F
172static SIMDPP_INL
173uint32_t i_reduce_or(const uint32<16>& a)
174{
175 return i_reduce_or(bit_or(extract256<0>(a), extract256<1>(a)));
176}
177#endif
178
179template<unsigned N>
180SIMDPP_INL uint32_t i_reduce_or(const uint32<N>& a)
181{
182#if SIMDPP_USE_NULL
183 uint32_t r = 0;
184 for (unsigned j = 0; j < a.vec_length; ++j) {
185 for (unsigned i = 0; i < a.base_length; i++) {
186 r |= a.vec(j).el(i);
187 }
188 }
189 return r;
190#else
191 uint32v r = a.vec(0);
192 for (unsigned j = 1; j < a.vec_length; ++j) {
193 r = bit_or(r, a.vec(j));
194 }
195 return i_reduce_or(r);
196#endif
197}
198
199// -----------------------------------------------------------------------------
200
201static SIMDPP_INL
202uint64_t i_reduce_or(const uint64x2& a)
203{
204#if SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
205 uint64x2 r = bit_or(a, move2_l<1>(a));
206 return extract<0>(r);
207#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
208 uint64_t r = a.el(0);
209 for (unsigned i = 0; i < a.length; i++) {
210 r |= a.el(i);
211 }
212 return r;
213#endif
214}
215
216#if SIMDPP_USE_AVX2
217static SIMDPP_INL
218uint64_t i_reduce_or(const uint64x4& a)
219{
220 uint64x2 r = detail::extract128<0>(a);
221 r = bit_or(r, detail::extract128<1>(a));
222 r = bit_or(r, move2_l<1>(r));
223 return extract<0>(r);
224}
225#endif
226
227#if SIMDPP_USE_AVX512F
228static SIMDPP_INL
229uint64_t i_reduce_or(const uint64<8>& a)
230{
231 return i_reduce_or(bit_or(extract256<0>(a), extract256<1>(a)));
232}
233#endif
234
235template<unsigned N>
236SIMDPP_INL uint64_t i_reduce_or(const uint64<N>& a)
237{
238#if SIMDPP_USE_NULL
239 uint64_t r = 0;
240 for (unsigned j = 0; j < a.vec_length; ++j) {
241 for (unsigned i = 0; i < a.base_length; i++) {
242 r |= a.vec(j).el(i);
243 }
244 }
245 return r;
246#else
247 uint64v r = a.vec(0);
248 for (unsigned j = 1; j < a.vec_length; ++j) {
249 r = bit_or(r, a.vec(j));
250 }
251 return i_reduce_or(r);
252#endif
253}
254
255// -----------------------------------------------------------------------------
256
257} // namespace insn
258} // namespace detail
259} // namespace SIMDPP_ARCH_NAMESPACE
260} // namespace simdpp
261
262#endif
263
264