i_min.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_min.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MIN_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MIN_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/blend.h>
17	#include <simdpp/core/cmp_lt.h>
18	#include <simdpp/core/bit_xor.h>
19	#include <simdpp/detail/not_implemented.h>
20	#include <simdpp/detail/null/math.h>
21	#include <simdpp/detail/vector_array_macros.h>
22
23	namespace simdpp {
24	namespace SIMDPP_ARCH_NAMESPACE {
25	namespace detail {
26	namespace insn {
27
28
29	static SIMDPP_INL
30	int8x16 i_min(const int8x16& a, const int8x16& b)
31	{
32	#if SIMDPP_USE_NULL
33	return detail::null::min(a, b);
34	#elif SIMDPP_USE_SSE4_1
35	return _mm_min_epi8(a.native(), b.native());
36	#elif SIMDPP_USE_SSE2
37	int8x16 ca = bit_xor(a, `0x80`);
38	int8x16 cb = bit_xor(b, `0x80`);
39	int8x16 r = _mm_min_epu8(ca.native(), cb.native());
40	return bit_xor(r, `0x80`);
41	#elif SIMDPP_USE_NEON
42	return vminq_s8(a.native(), b.native());
43	#elif SIMDPP_USE_ALTIVEC
44	return vec_min(a.native(), b.native());
45	#elif SIMDPP_USE_MSA
46	return __msa_min_s_b(a.native(), b.native());
47	#endif
48	}
49
50	#if SIMDPP_USE_AVX2
51	static SIMDPP_INL
52	int8x32 i_min(const int8x32& a, const int8x32& b)
53	{
54	return _mm256_min_epi8(a.native(), b.native());
55	}
56	#endif
57
58	#if SIMDPP_USE_AVX512BW
59	SIMDPP_INL int8<`64`> i_min(const int8<`64`>& a, const int8<`64`>& b)
60	{
61	return _mm512_min_epi8(a.native(), b.native());
62	}
63	#endif
64
65	template<unsigned N> SIMDPP_INL
66	int8<N> i_min(const int8<N>& a, const int8<N>& b)
67	{
68	SIMDPP_VEC_ARRAY_IMPL2(int8<N>, i_min, a, b);
69	}
70
71	// -----------------------------------------------------------------------------
72
73	static SIMDPP_INL
74	uint8x16 i_min(const uint8x16& a, const uint8x16& b)
75	{
76	#if SIMDPP_USE_NULL
77	return detail::null::min(a, b);
78	#elif SIMDPP_USE_SSE2
79	return _mm_min_epu8(a.native(), b.native());
80	#elif SIMDPP_USE_NEON
81	return vminq_u8(a.native(), b.native());
82	#elif SIMDPP_USE_ALTIVEC
83	return vec_min(a.native(), b.native());
84	#elif SIMDPP_USE_MSA
85	return __msa_min_u_b(a.native(), b.native());
86	#endif
87	}
88
89	#if SIMDPP_USE_AVX2
90	static SIMDPP_INL
91	uint8x32 i_min(const uint8x32& a, const uint8x32& b)
92	{
93	return _mm256_min_epu8(a.native(), b.native());
94	}
95	#endif
96
97	#if SIMDPP_USE_AVX512BW
98	SIMDPP_INL uint8<`64`> i_min(const uint8<`64`>& a, const uint8<`64`>& b)
99	{
100	return _mm512_min_epu8(a.native(), b.native());
101	}
102	#endif
103
104	template<unsigned N> SIMDPP_INL
105	uint8<N> i_min(const uint8<N>& a, const uint8<N>& b)
106	{
107	SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_min, a, b);
108	}
109
110	// -----------------------------------------------------------------------------
111
112	static SIMDPP_INL
113	int16x8 i_min(const int16x8& a, const int16x8& b)
114	{
115	#if SIMDPP_USE_NULL
116	return detail::null::min(a, b);
117	#elif SIMDPP_USE_SSE2
118	return _mm_min_epi16(a.native(), b.native());
119	#elif SIMDPP_USE_NEON
120	return vminq_s16(a.native(), b.native());
121	#elif SIMDPP_USE_ALTIVEC
122	return vec_min(a.native(), b.native());
123	#elif SIMDPP_USE_MSA
124	return __msa_min_s_h(a.native(), b.native());
125	#endif
126	}
127
128	#if SIMDPP_USE_AVX2
129	static SIMDPP_INL
130	int16x16 i_min(const int16x16& a, const int16x16& b)
131	{
132	return _mm256_min_epi16(a.native(), b.native());
133	}
134	#endif
135
136	#if SIMDPP_USE_AVX512BW
137	SIMDPP_INL int16<`32`> i_min(const int16<`32`>& a, const int16<`32`>& b)
138	{
139	return _mm512_min_epi16(a.native(), b.native());
140	}
141	#endif
142
143	template<unsigned N> SIMDPP_INL
144	int16<N> i_min(const int16<N>& a, const int16<N>& b)
145	{
146	SIMDPP_VEC_ARRAY_IMPL2(int16<N>, i_min, a, b);
147	}
148
149	// -----------------------------------------------------------------------------
150
151	static SIMDPP_INL
152	uint16x8 i_min(const uint16x8& a, const uint16x8& b)
153	{
154	#if SIMDPP_USE_NULL
155	return detail::null::min(a, b);
156	#elif SIMDPP_USE_SSE4_1
157	return _mm_min_epu16(a.native(), b.native());
158	#elif SIMDPP_USE_SSE2
159	int16x8 ca = bit_xor(a, `0x8000`);
160	int16x8 cb = bit_xor(b, `0x8000`);
161	int16x8 r = _mm_min_epi16(ca.native(), cb.native());
162	return bit_xor(r, `0x8000`);
163	#elif SIMDPP_USE_NEON
164	return vminq_u16(a.native(), b.native());
165	#elif SIMDPP_USE_ALTIVEC
166	return vec_min(a.native(), b.native());
167	#elif SIMDPP_USE_MSA
168	return __msa_min_u_h(a.native(), b.native());
169	#endif
170	}
171
172	#if SIMDPP_USE_AVX2
173	static SIMDPP_INL
174	uint16x16 i_min(const uint16x16& a, const uint16x16& b)
175	{
176	return _mm256_min_epu16(a.native(), b.native());
177	}
178	#endif
179
180	#if SIMDPP_USE_AVX512BW
181	SIMDPP_INL uint16<`32`> i_min(const uint16<`32`>& a, const uint16<`32`>& b)
182	{
183	return _mm512_min_epu16(a.native(), b.native());
184	}
185	#endif
186
187	template<unsigned N> SIMDPP_INL
188	uint16<N> i_min(const uint16<N>& a, const uint16<N>& b)
189	{
190	SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_min, a, b);
191	}
192
193	// -----------------------------------------------------------------------------
194
195	static SIMDPP_INL
196	int32x4 i_min(const int32x4& a, const int32x4& b)
197	{
198	#if SIMDPP_USE_NULL
199	return detail::null::min(a, b);
200	#elif SIMDPP_USE_SSE4_1
201	return _mm_min_epi32(a.native(), b.native());
202	#elif SIMDPP_USE_SSE2
203	mask_int32x4 mask = cmp_lt(a, b);
204	return blend(a, b, mask);
205	#elif SIMDPP_USE_NEON
206	return vminq_s32(a.native(), b.native());
207	#elif SIMDPP_USE_ALTIVEC
208	return vec_min(a.native(), b.native());
209	#elif SIMDPP_USE_MSA
210	return __msa_min_s_w(a.native(), b.native());
211	#endif
212	}
213
214	#if SIMDPP_USE_AVX2
215	static SIMDPP_INL
216	int32x8 i_min(const int32x8& a, const int32x8& b)
217	{
218	return _mm256_min_epi32(a.native(), b.native());
219	}
220	#endif
221
222	#if SIMDPP_USE_AVX512F
223	static SIMDPP_INL
224	int32<`16`> i_min(const int32<`16`>& a, const int32<`16`>& b)
225	{
226	return _mm512_min_epi32(a.native(), b.native());
227	}
228	#endif
229
230	template<unsigned N> SIMDPP_INL
231	int32<N> i_min(const int32<N>& a, const int32<N>& b)
232	{
233	SIMDPP_VEC_ARRAY_IMPL2(int32<N>, i_min, a, b);
234	}
235
236	// -----------------------------------------------------------------------------
237
238	static SIMDPP_INL
239	uint32x4 i_min(const uint32x4& a, const uint32x4& b)
240	{
241	#if SIMDPP_USE_NULL
242	return detail::null::min(a, b);
243	#elif SIMDPP_USE_SSE4_1
244	return _mm_min_epu32(a.native(), b.native());
245	#elif SIMDPP_USE_SSE2
246	mask_int32x4 mask = cmp_lt(a, b);
247	return blend(a, b, mask);
248	#elif SIMDPP_USE_NEON
249	return vminq_u32(a.native(), b.native());
250	#elif SIMDPP_USE_ALTIVEC
251	return vec_min(a.native(), b.native());
252	#elif SIMDPP_USE_MSA
253	return __msa_min_u_w(a.native(), b.native());
254	#endif
255	}
256
257	#if SIMDPP_USE_AVX2
258	static SIMDPP_INL
259	uint32x8 i_min(const uint32x8& a, const uint32x8& b)
260	{
261	return _mm256_min_epu32(a.native(), b.native());
262	}
263	#endif
264
265	#if SIMDPP_USE_AVX512F
266	static SIMDPP_INL
267	uint32<`16`> i_min(const uint32<`16`>& a, const uint32<`16`>& b)
268	{
269	return _mm512_min_epu32(a.native(), b.native());
270	}
271	#endif
272
273	template<unsigned N> SIMDPP_INL
274	uint32<N> i_min(const uint32<N>& a, const uint32<N>& b)
275	{
276	SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_min, a, b);
277	}
278
279	// -----------------------------------------------------------------------------
280
281	static SIMDPP_INL
282	int64x2 i_min(const int64x2& a, const int64x2& b)
283	{
284	#if SIMDPP_USE_AVX512VL
285	return _mm_min_epi64(a.native(), b.native());
286	#elif SIMDPP_USE_AVX2 \|\| SIMDPP_USE_NEON64
287	mask_int64x2 mask = cmp_lt(a, b);
288	return blend(a, b, mask);
289	#elif SIMDPP_USE_VSX_207
290	return vec_min(a.native(), b.native());
291	#elif SIMDPP_USE_MSA
292	return __msa_min_s_d(a.native(), b.native());
293	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
294	return detail::null::min(a, b);
295	#else
296	return SIMDPP_NOT_IMPLEMENTED2(a, b);
297	#endif
298	}
299
300	#if SIMDPP_USE_AVX2
301	static SIMDPP_INL
302	int64x4 i_min(const int64x4& a, const int64x4& b)
303	{
304	#if SIMDPP_USE_AVX512VL
305	return _mm256_min_epi64(a.native(), b.native());
306	#else
307	mask_int64x4 mask = cmp_lt(a, b);
308	return blend(a, b, mask);
309	#endif
310	}
311	#endif
312
313	#if SIMDPP_USE_AVX512F
314	static SIMDPP_INL
315	int64<`8`> i_min(const int64<`8`>& a, const int64<`8`>& b)
316	{
317	return _mm512_min_epi64(a.native(), b.native());
318	}
319	#endif
320
321	template<unsigned N> SIMDPP_INL
322	int64<N> i_min(const int64<N>& a, const int64<N>& b)
323	{
324	SIMDPP_VEC_ARRAY_IMPL2(int64<N>, i_min, a, b);
325	}
326
327	// -----------------------------------------------------------------------------
328
329	static SIMDPP_INL
330	uint64x2 i_min(const uint64x2& a, const uint64x2& b)
331	{
332	#if SIMDPP_USE_AVX512VL
333	return _mm_min_epu64(a.native(), b.native());
334	#elif SIMDPP_USE_AVX2 \|\| SIMDPP_USE_NEON64
335	mask_int64x2 mask = cmp_lt(a, b);
336	return blend(a, b, mask);
337	#elif SIMDPP_USE_VSX_207
338	return vec_min(a.native(), b.native());
339	#elif SIMDPP_USE_MSA
340	return __msa_min_u_d(a.native(), b.native());
341	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_ALTIVEC
342	return detail::null::min(a, b);
343	#else
344	return SIMDPP_NOT_IMPLEMENTED2(a, b);
345	#endif
346	}
347
348	#if SIMDPP_USE_AVX2
349	static SIMDPP_INL
350	uint64x4 i_min(const uint64x4& a, const uint64x4& b)
351	{
352	#if SIMDPP_USE_AVX512VL
353	return _mm256_min_epu64(a.native(), b.native());
354	#else
355	mask_int64x4 mask = cmp_lt(a, b);
356	return blend(a, b, mask);
357	#endif
358	}
359	#endif
360
361	#if SIMDPP_USE_AVX512F
362	static SIMDPP_INL
363	uint64<`8`> i_min(const uint64<`8`>& a, const uint64<`8`>& b)
364	{
365	return _mm512_min_epu64(a.native(), b.native());
366	}
367	#endif
368
369	template<unsigned N> SIMDPP_INL
370	uint64<N> i_min(const uint64<N>& a, const uint64<N>& b)
371	{
372	SIMDPP_VEC_ARRAY_IMPL2(uint64<N>, i_min, a, b);
373	}
374
375	} // namespace insn
376	} // namespace detail
377	} // namespace SIMDPP_ARCH_NAMESPACE
378	} // namespace simdpp
379
380	#endif
381
382

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/i_min.h