bit_or.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/bit_or.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_OR_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_BIT_OR_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/to_mask.h>
17	#include <simdpp/detail/null/bitwise.h>
18	#include <simdpp/detail/vector_array_macros.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25	// -----------------------------------------------------------------------------
26	// uint8, uint8
27	static SIMDPP_INL
28	uint8<`16`> i_bit_or(const uint8<`16`>& a, const uint8<`16`>& b)
29	{
30	#if SIMDPP_USE_NULL
31	return detail::null::bit_or(uint8x16(a), uint8x16(b));
32	#elif SIMDPP_USE_SSE2
33	return _mm_or_si128(a.native(), b.native());
34	#elif SIMDPP_USE_NEON
35	return vorrq_u8(a.native(), b.native());
36	#elif SIMDPP_USE_ALTIVEC
37	return vec_or(a.native(), b.native());
38	#elif SIMDPP_USE_MSA
39	return __msa_or_v(a.native(), b.native());
40	#endif
41	}
42
43	#if SIMDPP_USE_AVX2
44	static SIMDPP_INL
45	uint8<`32`> i_bit_or(const uint8<`32`>& a, const uint8<`32`>& b)
46	{
47	return _mm256_or_si256(a.native(), b.native());
48	}
49	#endif
50
51	#if SIMDPP_USE_AVX512BW
52	SIMDPP_INL uint8<`64`> i_bit_or(const uint8<`64`>& a, const uint8<`64`>& b)
53	{
54	return _mm512_or_si512(a.native(), b.native());
55	}
56	#endif
57
58	// -----------------------------------------------------------------------------
59	// mask_int8, mask_int8
60	static SIMDPP_INL
61	mask_int8<`16`> i_bit_or(const mask_int8<`16`>& a, const mask_int8<`16`>& b)
62	{
63	#if SIMDPP_USE_NULL
64	return detail::null::bit_or_mm(a, b);
65	#elif SIMDPP_USE_AVX512VL
66	return a.native() \| b.native();
67	#else
68	return to_mask(i_bit_or(uint8<`16`>(a), uint8<`16`>(b)));
69	#endif
70	}
71
72	#if SIMDPP_USE_AVX2
73	static SIMDPP_INL
74	mask_int8<`32`> i_bit_or(const mask_int8<`32`>& a, const mask_int8<`32`>& b)
75	{
76	#if SIMDPP_USE_AVX512VL
77	return a.native() \| b.native();
78	#else
79	return to_mask(i_bit_or(uint8<`32`>(a), uint8<`32`>(b)));
80	#endif
81	}
82	#endif
83
84	#if SIMDPP_USE_AVX512BW
85	SIMDPP_INL mask_int8<`64`> i_bit_or(const mask_int8<`64`>& a, const mask_int8<`64`>& b)
86	{
87	return a.native() \| b.native();
88	}
89	#endif
90
91	// -----------------------------------------------------------------------------
92	// uint16, uint16
93	static SIMDPP_INL
94	uint16<`8`> i_bit_or(const uint16<`8`>& a, const uint16<`8`>& b)
95	{
96	return uint16<`8`>(i_bit_or(uint8<`16`>(a), uint8<`16`>(b)));
97	}
98
99	#if SIMDPP_USE_AVX2
100	static SIMDPP_INL
101	uint16<`16`> i_bit_or(const uint16<`16`>& a, const uint16<`16`>& b)
102	{
103	return _mm256_or_si256(a.native(), b.native());
104	}
105	#endif
106
107	#if SIMDPP_USE_AVX512BW
108	SIMDPP_INL uint16<`32`> i_bit_or(const uint16<`32`>& a, const uint16<`32`>& b)
109	{
110	return _mm512_or_si512(a.native(), b.native());
111	}
112	#endif
113
114	// -----------------------------------------------------------------------------
115	// mask_int16, mask_int16
116	static SIMDPP_INL
117	mask_int16<`8`> i_bit_or(const mask_int16<`8`>& a, const mask_int16<`8`>& b)
118	{
119	#if SIMDPP_USE_NULL
120	return detail::null::bit_or_mm(a, b);
121	#elif SIMDPP_USE_AVX512VL
122	return a.native() \| b.native();
123	#else
124	return to_mask(i_bit_or(uint16<`8`>(a), uint16<`8`>(b)));
125	#endif
126	}
127
128	#if SIMDPP_USE_AVX2
129	static SIMDPP_INL
130	mask_int16<`16`> i_bit_or(const mask_int16<`16`>& a, const mask_int16<`16`>& b)
131	{
132	#if SIMDPP_USE_AVX512VL
133	return a.native() \| b.native();
134	#else
135	return to_mask(i_bit_or(uint16<`16`>(a), uint16<`16`>(b)));
136	#endif
137	}
138	#endif
139
140	#if SIMDPP_USE_AVX512BW
141	SIMDPP_INL mask_int16<`32`> i_bit_or(const mask_int16<`32`>& a, const mask_int16<`32`>& b)
142	{
143	return a.native() \| b.native();
144	}
145	#endif
146
147	// -----------------------------------------------------------------------------
148	// uint32, uint32
149	static SIMDPP_INL
150	uint32<`4`> i_bit_or(const uint32<`4`>& a, const uint32<`4`>& b)
151	{
152	return uint32<`4`>(i_bit_or(uint8<`16`>(a), uint8<`16`>(b)));
153	}
154
155	#if SIMDPP_USE_AVX2
156	static SIMDPP_INL
157	uint32<`8`> i_bit_or(const uint32<`8`>& a, const uint32<`8`>& b)
158	{
159	return _mm256_or_si256(a.native(), b.native());
160	}
161	#endif
162
163	#if SIMDPP_USE_AVX512F
164	static SIMDPP_INL
165	uint32<`16`> i_bit_or(const uint32<`16`>& a, const uint32<`16`>& b)
166	{
167	return _mm512_or_epi32(a.native(), b.native());
168	}
169	#endif
170
171	// -----------------------------------------------------------------------------
172	// mask_int32, mask_int32
173	static SIMDPP_INL
174	mask_int32<`4`> i_bit_or(const mask_int32<`4`>& a, const mask_int32<`4`>& b)
175	{
176	#if SIMDPP_USE_NULL
177	return detail::null::bit_or_mm(a, b);
178	#elif SIMDPP_USE_AVX512VL
179	return a.native() \| b.native();
180	#else
181	return to_mask(i_bit_or(uint32<`4`>(a), uint32<`4`>(b)));
182	#endif
183	}
184
185	#if SIMDPP_USE_AVX2
186	static SIMDPP_INL
187	mask_int32<`8`> i_bit_or(const mask_int32<`8`>& a, const mask_int32<`8`>& b)
188	{
189	#if SIMDPP_USE_AVX512VL
190	return a.native() \| b.native();
191	#else
192	return to_mask(i_bit_or(uint32<`8`>(a), uint32<`8`>(b)));
193	#endif
194	}
195	#endif
196
197	#if SIMDPP_USE_AVX512F
198	static SIMDPP_INL
199	mask_int32<`16`> i_bit_or(const mask_int32<`16`>& a, const mask_int32<`16`>& b)
200	{
201	return _mm512_kor(a.native(), b.native());
202	}
203	#endif
204
205	// -----------------------------------------------------------------------------
206	// uint64, uint64
207	static SIMDPP_INL
208	uint64<`2`> i_bit_or(const uint64<`2`>& a, const uint64<`2`>& b)
209	{
210	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
211	return detail::null::bit_or(a, b);
212	#else
213	return uint64<`2`>(i_bit_or(uint8<`16`>(a), uint8<`16`>(b)));
214	#endif
215	}
216
217	#if SIMDPP_USE_AVX2
218	static SIMDPP_INL
219	uint64<`4`> i_bit_or(const uint64<`4`>& a, const uint64<`4`>& b)
220	{
221	return _mm256_or_si256(a.native(), b.native());
222	}
223	#endif
224
225	#if SIMDPP_USE_AVX512F
226	static SIMDPP_INL
227	uint64<`8`> i_bit_or(const uint64<`8`>& a, const uint64<`8`>& b)
228	{
229	return _mm512_or_epi64(a.native(), b.native());
230	}
231	#endif
232
233	// -----------------------------------------------------------------------------
234	// mask_int64, mask_int64
235	static SIMDPP_INL
236	mask_int64<`2`> i_bit_or(const mask_int64<`2`>& a, const mask_int64<`2`>& b)
237	{
238	#if SIMDPP_USE_NULL \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
239	return detail::null::bit_or_mm(a, b);
240	#elif SIMDPP_USE_AVX512VL
241	return a.native() \| b.native();
242	#else
243	return to_mask(i_bit_or(uint64<`2`>(a), uint64<`2`>(b)));
244	#endif
245	}
246
247	#if SIMDPP_USE_AVX2
248	static SIMDPP_INL
249	mask_int64<`4`> i_bit_or(const mask_int64<`4`>& a, const mask_int64<`4`>& b)
250	{
251	#if SIMDPP_USE_AVX512VL
252	return a.native() \| b.native();
253	#else
254	return to_mask(i_bit_or(uint64<`4`>(a), uint64<`4`>(b)));
255	#endif
256	}
257	#endif
258
259	#if SIMDPP_USE_AVX512F
260	static SIMDPP_INL
261	mask_int64<`8`> i_bit_or(const mask_int64<`8`>& a, const mask_int64<`8`>& b)
262	{
263	return _mm512_kor(a.native(), b.native());
264	}
265	#endif
266
267	// -----------------------------------------------------------------------------
268	// float32, float32
269	static SIMDPP_INL
270	float32<`4`> i_bit_or(const float32<`4`>& a, const float32<`4`>& b)
271	{
272	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
273	return detail::null::bit_or(a, b);
274	#elif SIMDPP_USE_SSE2
275	return _mm_or_ps(a.native(), b.native());
276	#elif SIMDPP_USE_NEON
277	return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.native()),
278	vreinterpretq_u32_f32(b.native())));
279	#elif SIMDPP_USE_ALTIVEC
280	return vec_or(a.native(), b.native());
281	#elif SIMDPP_USE_MSA
282	return (float32<`4`>) i_bit_or(uint8<`16`>(a), uint8<`16`>(b));
283	#endif
284	}
285
286	#if SIMDPP_USE_AVX
287	static SIMDPP_INL
288	float32<`8`> i_bit_or(const float32<`8`>& a, const float32<`8`>& b)
289	{
290	return _mm256_or_ps(a.native(), b.native());
291	}
292	#endif
293
294	#if SIMDPP_USE_AVX512F
295	static SIMDPP_INL
296	float32<`16`> i_bit_or(const float32<`16`>& a, const float32<`16`>& b)
297	{
298	#if SIMDPP_USE_AVX512DQ
299	return _mm512_or_ps(a.native(), b.native());
300	#else
301	return (float32<`16`>) i_bit_or(uint32<`16`>(a), uint32<`16`>(b));
302	#endif
303	}
304	#endif
305
306	// -----------------------------------------------------------------------------
307	// mask_float32, mask_float32
308	static SIMDPP_INL
309	mask_float32<`4`> i_bit_or(const mask_float32<`4`>& a, const mask_float32<`4`>& b)
310	{
311	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
312	return detail::null::bit_or_mm(a, b);
313	#elif SIMDPP_USE_AVX512VL
314	return a.native() \| b.native();
315	#else
316	return to_mask(i_bit_or(float32<`4`>(a), float32<`4`>(b)));
317	#endif
318	}
319
320	#if SIMDPP_USE_AVX
321	static SIMDPP_INL
322	mask_float32<`8`> i_bit_or(const mask_float32<`8`>& a, const mask_float32<`8`>& b)
323	{
324	#if SIMDPP_USE_AVX512VL
325	return a.native() \| b.native();
326	#else
327	return to_mask(i_bit_or(float32<`8`>(a), float32<`8`>(b)));
328	#endif
329	}
330	#endif
331
332	#if SIMDPP_USE_AVX512F
333	static SIMDPP_INL
334	mask_float32<`16`> i_bit_or(const mask_float32<`16`>& a, const mask_float32<`16`>& b)
335	{
336	return _mm512_kor(a.native(), b.native());
337	}
338	#endif
339
340	// -----------------------------------------------------------------------------
341	// float64, float64
342	static SIMDPP_INL
343	float64<`2`> i_bit_or(const float64<`2`>& a, const float64<`2`>& b)
344	{
345	#if SIMDPP_USE_SSE2
346	return _mm_or_pd(a.native(), b.native());
347	#elif SIMDPP_USE_NEON64
348	return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.native()),
349	vreinterpretq_u64_f64(b.native())));
350	#elif SIMDPP_USE_VSX_206
351	return vec_or(a.native(), b.native());
352	#elif SIMDPP_USE_MSA
353	return (float64<`2`>) i_bit_or(uint8<`16`>(a), uint8<`16`>(b));
354	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_ALTIVEC
355	return detail::null::bit_or(a, b);
356	#endif
357	}
358
359	#if SIMDPP_USE_AVX
360	static SIMDPP_INL
361	float64<`4`> i_bit_or(const float64<`4`>& a, const float64<`4`>& b)
362	{
363	return _mm256_or_pd(a.native(), b.native());
364	}
365	#endif
366
367	#if SIMDPP_USE_AVX512F
368	static SIMDPP_INL
369	float64<`8`> i_bit_or(const float64<`8`>& a, const float64<`8`>& b)
370	{
371	#if SIMDPP_USE_AVX512DQ
372	return _mm512_or_pd(a.native(), b.native());
373	#else
374	return (float64<`8`>) i_bit_or(uint64<`8`>(a), uint64<`8`>(b));
375	#endif
376	}
377	#endif
378
379	// -----------------------------------------------------------------------------
380	// mask_float64, mask_float64
381	static SIMDPP_INL
382	mask_float64<`2`> i_bit_or(const mask_float64<`2`>& a, const mask_float64<`2`>& b)
383	{
384	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_206)
385	return detail::null::bit_or_mm(a, b);
386	#elif SIMDPP_USE_AVX512VL
387	return a.native() \| b.native();
388	#else
389	return to_mask(i_bit_or(float64<`2`>(a), float64<`2`>(b)));
390	#endif
391	}
392
393	#if SIMDPP_USE_AVX
394	static SIMDPP_INL
395	mask_float64<`4`> i_bit_or(const mask_float64<`4`>& a, const mask_float64<`4`>& b)
396	{
397	#if SIMDPP_USE_AVX512VL
398	return a.native() \| b.native();
399	#else
400	return to_mask(i_bit_or(float64<`4`>(a), float64<`4`>(b)));
401	#endif
402	}
403	#endif
404
405	#if SIMDPP_USE_AVX512F
406	static SIMDPP_INL
407	mask_float64<`8`> i_bit_or(const mask_float64<`8`>& a, const mask_float64<`8`>& b)
408	{
409	return _mm512_kor(a.native(), b.native());
410	}
411	#endif
412
413	// -----------------------------------------------------------------------------
414
415	template<class V, class VM> SIMDPP_INL
416	V i_bit_or(const V& a, const VM& b)
417	{
418	SIMDPP_VEC_ARRAY_IMPL2(V, i_bit_or, a, b)
419	}
420
421	} // namespace insn
422	} // namespace detail
423	} // namespace SIMDPP_ARCH_NAMESPACE
424	} // namespace simdpp
425
426	#endif
427
428
429

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/bit_or.h