1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MIN_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MIN_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/blend.h>
17#include <simdpp/core/cmp_lt.h>
18#include <simdpp/core/bit_xor.h>
19#include <simdpp/detail/not_implemented.h>
20#include <simdpp/detail/null/math.h>
21#include <simdpp/detail/vector_array_macros.h>
22
23namespace simdpp {
24namespace SIMDPP_ARCH_NAMESPACE {
25namespace detail {
26namespace insn {
27
28
29static SIMDPP_INL
30int8x16 i_min(const int8x16& a, const int8x16& b)
31{
32#if SIMDPP_USE_NULL
33 return detail::null::min(a, b);
34#elif SIMDPP_USE_SSE4_1
35 return _mm_min_epi8(a.native(), b.native());
36#elif SIMDPP_USE_SSE2
37 int8x16 ca = bit_xor(a, 0x80);
38 int8x16 cb = bit_xor(b, 0x80);
39 int8x16 r = _mm_min_epu8(ca.native(), cb.native());
40 return bit_xor(r, 0x80);
41#elif SIMDPP_USE_NEON
42 return vminq_s8(a.native(), b.native());
43#elif SIMDPP_USE_ALTIVEC
44 return vec_min(a.native(), b.native());
45#elif SIMDPP_USE_MSA
46 return __msa_min_s_b(a.native(), b.native());
47#endif
48}
49
50#if SIMDPP_USE_AVX2
51static SIMDPP_INL
52int8x32 i_min(const int8x32& a, const int8x32& b)
53{
54 return _mm256_min_epi8(a.native(), b.native());
55}
56#endif
57
58#if SIMDPP_USE_AVX512BW
59SIMDPP_INL int8<64> i_min(const int8<64>& a, const int8<64>& b)
60{
61 return _mm512_min_epi8(a.native(), b.native());
62}
63#endif
64
65template<unsigned N> SIMDPP_INL
66int8<N> i_min(const int8<N>& a, const int8<N>& b)
67{
68 SIMDPP_VEC_ARRAY_IMPL2(int8<N>, i_min, a, b);
69}
70
71// -----------------------------------------------------------------------------
72
73static SIMDPP_INL
74uint8x16 i_min(const uint8x16& a, const uint8x16& b)
75{
76#if SIMDPP_USE_NULL
77 return detail::null::min(a, b);
78#elif SIMDPP_USE_SSE2
79 return _mm_min_epu8(a.native(), b.native());
80#elif SIMDPP_USE_NEON
81 return vminq_u8(a.native(), b.native());
82#elif SIMDPP_USE_ALTIVEC
83 return vec_min(a.native(), b.native());
84#elif SIMDPP_USE_MSA
85 return __msa_min_u_b(a.native(), b.native());
86#endif
87}
88
89#if SIMDPP_USE_AVX2
90static SIMDPP_INL
91uint8x32 i_min(const uint8x32& a, const uint8x32& b)
92{
93 return _mm256_min_epu8(a.native(), b.native());
94}
95#endif
96
97#if SIMDPP_USE_AVX512BW
98SIMDPP_INL uint8<64> i_min(const uint8<64>& a, const uint8<64>& b)
99{
100 return _mm512_min_epu8(a.native(), b.native());
101}
102#endif
103
104template<unsigned N> SIMDPP_INL
105uint8<N> i_min(const uint8<N>& a, const uint8<N>& b)
106{
107 SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_min, a, b);
108}
109
110// -----------------------------------------------------------------------------
111
112static SIMDPP_INL
113int16x8 i_min(const int16x8& a, const int16x8& b)
114{
115#if SIMDPP_USE_NULL
116 return detail::null::min(a, b);
117#elif SIMDPP_USE_SSE2
118 return _mm_min_epi16(a.native(), b.native());
119#elif SIMDPP_USE_NEON
120 return vminq_s16(a.native(), b.native());
121#elif SIMDPP_USE_ALTIVEC
122 return vec_min(a.native(), b.native());
123#elif SIMDPP_USE_MSA
124 return __msa_min_s_h(a.native(), b.native());
125#endif
126}
127
128#if SIMDPP_USE_AVX2
129static SIMDPP_INL
130int16x16 i_min(const int16x16& a, const int16x16& b)
131{
132 return _mm256_min_epi16(a.native(), b.native());
133}
134#endif
135
136#if SIMDPP_USE_AVX512BW
137SIMDPP_INL int16<32> i_min(const int16<32>& a, const int16<32>& b)
138{
139 return _mm512_min_epi16(a.native(), b.native());
140}
141#endif
142
143template<unsigned N> SIMDPP_INL
144int16<N> i_min(const int16<N>& a, const int16<N>& b)
145{
146 SIMDPP_VEC_ARRAY_IMPL2(int16<N>, i_min, a, b);
147}
148
149// -----------------------------------------------------------------------------
150
151static SIMDPP_INL
152uint16x8 i_min(const uint16x8& a, const uint16x8& b)
153{
154#if SIMDPP_USE_NULL
155 return detail::null::min(a, b);
156#elif SIMDPP_USE_SSE4_1
157 return _mm_min_epu16(a.native(), b.native());
158#elif SIMDPP_USE_SSE2
159 int16x8 ca = bit_xor(a, 0x8000);
160 int16x8 cb = bit_xor(b, 0x8000);
161 int16x8 r = _mm_min_epi16(ca.native(), cb.native());
162 return bit_xor(r, 0x8000);
163#elif SIMDPP_USE_NEON
164 return vminq_u16(a.native(), b.native());
165#elif SIMDPP_USE_ALTIVEC
166 return vec_min(a.native(), b.native());
167#elif SIMDPP_USE_MSA
168 return __msa_min_u_h(a.native(), b.native());
169#endif
170}
171
172#if SIMDPP_USE_AVX2
173static SIMDPP_INL
174uint16x16 i_min(const uint16x16& a, const uint16x16& b)
175{
176 return _mm256_min_epu16(a.native(), b.native());
177}
178#endif
179
180#if SIMDPP_USE_AVX512BW
181SIMDPP_INL uint16<32> i_min(const uint16<32>& a, const uint16<32>& b)
182{
183 return _mm512_min_epu16(a.native(), b.native());
184}
185#endif
186
187template<unsigned N> SIMDPP_INL
188uint16<N> i_min(const uint16<N>& a, const uint16<N>& b)
189{
190 SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_min, a, b);
191}
192
193// -----------------------------------------------------------------------------
194
195static SIMDPP_INL
196int32x4 i_min(const int32x4& a, const int32x4& b)
197{
198#if SIMDPP_USE_NULL
199 return detail::null::min(a, b);
200#elif SIMDPP_USE_SSE4_1
201 return _mm_min_epi32(a.native(), b.native());
202#elif SIMDPP_USE_SSE2
203 mask_int32x4 mask = cmp_lt(a, b);
204 return blend(a, b, mask);
205#elif SIMDPP_USE_NEON
206 return vminq_s32(a.native(), b.native());
207#elif SIMDPP_USE_ALTIVEC
208 return vec_min(a.native(), b.native());
209#elif SIMDPP_USE_MSA
210 return __msa_min_s_w(a.native(), b.native());
211#endif
212}
213
214#if SIMDPP_USE_AVX2
215static SIMDPP_INL
216int32x8 i_min(const int32x8& a, const int32x8& b)
217{
218 return _mm256_min_epi32(a.native(), b.native());
219}
220#endif
221
222#if SIMDPP_USE_AVX512F
223static SIMDPP_INL
224int32<16> i_min(const int32<16>& a, const int32<16>& b)
225{
226 return _mm512_min_epi32(a.native(), b.native());
227}
228#endif
229
230template<unsigned N> SIMDPP_INL
231int32<N> i_min(const int32<N>& a, const int32<N>& b)
232{
233 SIMDPP_VEC_ARRAY_IMPL2(int32<N>, i_min, a, b);
234}
235
236// -----------------------------------------------------------------------------
237
238static SIMDPP_INL
239uint32x4 i_min(const uint32x4& a, const uint32x4& b)
240{
241#if SIMDPP_USE_NULL
242 return detail::null::min(a, b);
243#elif SIMDPP_USE_SSE4_1
244 return _mm_min_epu32(a.native(), b.native());
245#elif SIMDPP_USE_SSE2
246 mask_int32x4 mask = cmp_lt(a, b);
247 return blend(a, b, mask);
248#elif SIMDPP_USE_NEON
249 return vminq_u32(a.native(), b.native());
250#elif SIMDPP_USE_ALTIVEC
251 return vec_min(a.native(), b.native());
252#elif SIMDPP_USE_MSA
253 return __msa_min_u_w(a.native(), b.native());
254#endif
255}
256
257#if SIMDPP_USE_AVX2
258static SIMDPP_INL
259uint32x8 i_min(const uint32x8& a, const uint32x8& b)
260{
261 return _mm256_min_epu32(a.native(), b.native());
262}
263#endif
264
265#if SIMDPP_USE_AVX512F
266static SIMDPP_INL
267uint32<16> i_min(const uint32<16>& a, const uint32<16>& b)
268{
269 return _mm512_min_epu32(a.native(), b.native());
270}
271#endif
272
273template<unsigned N> SIMDPP_INL
274uint32<N> i_min(const uint32<N>& a, const uint32<N>& b)
275{
276 SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_min, a, b);
277}
278
279// -----------------------------------------------------------------------------
280
281static SIMDPP_INL
282int64x2 i_min(const int64x2& a, const int64x2& b)
283{
284#if SIMDPP_USE_AVX512VL
285 return _mm_min_epi64(a.native(), b.native());
286#elif SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64
287 mask_int64x2 mask = cmp_lt(a, b);
288 return blend(a, b, mask);
289#elif SIMDPP_USE_VSX_207
290 return vec_min(a.native(), b.native());
291#elif SIMDPP_USE_MSA
292 return __msa_min_s_d(a.native(), b.native());
293#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
294 return detail::null::min(a, b);
295#else
296 return SIMDPP_NOT_IMPLEMENTED2(a, b);
297#endif
298}
299
300#if SIMDPP_USE_AVX2
301static SIMDPP_INL
302int64x4 i_min(const int64x4& a, const int64x4& b)
303{
304#if SIMDPP_USE_AVX512VL
305 return _mm256_min_epi64(a.native(), b.native());
306#else
307 mask_int64x4 mask = cmp_lt(a, b);
308 return blend(a, b, mask);
309#endif
310}
311#endif
312
313#if SIMDPP_USE_AVX512F
314static SIMDPP_INL
315int64<8> i_min(const int64<8>& a, const int64<8>& b)
316{
317 return _mm512_min_epi64(a.native(), b.native());
318}
319#endif
320
321template<unsigned N> SIMDPP_INL
322int64<N> i_min(const int64<N>& a, const int64<N>& b)
323{
324 SIMDPP_VEC_ARRAY_IMPL2(int64<N>, i_min, a, b);
325}
326
327// -----------------------------------------------------------------------------
328
329static SIMDPP_INL
330uint64x2 i_min(const uint64x2& a, const uint64x2& b)
331{
332#if SIMDPP_USE_AVX512VL
333 return _mm_min_epu64(a.native(), b.native());
334#elif SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64
335 mask_int64x2 mask = cmp_lt(a, b);
336 return blend(a, b, mask);
337#elif SIMDPP_USE_VSX_207
338 return vec_min(a.native(), b.native());
339#elif SIMDPP_USE_MSA
340 return __msa_min_u_d(a.native(), b.native());
341#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
342 return detail::null::min(a, b);
343#else
344 return SIMDPP_NOT_IMPLEMENTED2(a, b);
345#endif
346}
347
348#if SIMDPP_USE_AVX2
349static SIMDPP_INL
350uint64x4 i_min(const uint64x4& a, const uint64x4& b)
351{
352#if SIMDPP_USE_AVX512VL
353 return _mm256_min_epu64(a.native(), b.native());
354#else
355 mask_int64x4 mask = cmp_lt(a, b);
356 return blend(a, b, mask);
357#endif
358}
359#endif
360
361#if SIMDPP_USE_AVX512F
362static SIMDPP_INL
363uint64<8> i_min(const uint64<8>& a, const uint64<8>& b)
364{
365 return _mm512_min_epu64(a.native(), b.native());
366}
367#endif
368
369template<unsigned N> SIMDPP_INL
370uint64<N> i_min(const uint64<N>& a, const uint64<N>& b)
371{
372 SIMDPP_VEC_ARRAY_IMPL2(uint64<N>, i_min, a, b);
373}
374
375} // namespace insn
376} // namespace detail
377} // namespace SIMDPP_ARCH_NAMESPACE
378} // namespace simdpp
379
380#endif
381
382