1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MAX_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_MAX_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/bit_xor.h>
17#include <simdpp/core/blend.h>
18#include <simdpp/core/cmp_gt.h>
19#include <simdpp/detail/null/math.h>
20#include <simdpp/detail/vector_array_macros.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27
28static SIMDPP_INL
29int8x16 i_max(const int8x16& a, const int8x16& b)
30{
31#if SIMDPP_USE_NULL
32 return detail::null::max(a, b);
33#elif SIMDPP_USE_SSE4_1
34 return _mm_max_epi8(a.native(), b.native());
35#elif SIMDPP_USE_SSE2
36 int8x16 ca = bit_xor(a, 0x80);
37 int8x16 cb = bit_xor(b, 0x80);
38 int8x16 r = _mm_max_epu8(ca.native(), cb.native());
39 return bit_xor(r, 0x80);
40#elif SIMDPP_USE_NEON
41 return vmaxq_s8(a.native(), b.native());
42#elif SIMDPP_USE_ALTIVEC
43 return vec_max(a.native(), b.native());
44#elif SIMDPP_USE_MSA
45 return __msa_max_s_b(a.native(), b.native());
46#endif
47}
48
49#if SIMDPP_USE_AVX2
50static SIMDPP_INL
51int8x32 i_max(const int8x32& a, const int8x32& b)
52{
53 return _mm256_max_epi8(a.native(), b.native());
54}
55#endif
56
57#if SIMDPP_USE_AVX512BW
58SIMDPP_INL int8<64> i_max(const int8<64>& a, const int8<64>& b)
59{
60 return _mm512_max_epi8(a.native(), b.native());
61}
62#endif
63
64template<unsigned N> SIMDPP_INL
65int8<N> i_max(const int8<N>& a, const int8<N>& b)
66{
67 SIMDPP_VEC_ARRAY_IMPL2(int8<N>, i_max, a, b);
68}
69
70// -----------------------------------------------------------------------------
71
72static SIMDPP_INL
73uint8x16 i_max(const uint8x16& a, const uint8x16& b)
74{
75#if SIMDPP_USE_NULL
76 return detail::null::max(a, b);
77#elif SIMDPP_USE_SSE2
78 return _mm_max_epu8(a.native(), b.native());
79#elif SIMDPP_USE_NEON
80 return vmaxq_u8(a.native(), b.native());
81#elif SIMDPP_USE_ALTIVEC
82 return vec_max(a.native(), b.native());
83#elif SIMDPP_USE_MSA
84 return __msa_max_u_b(a.native(), b.native());
85#endif
86}
87
88#if SIMDPP_USE_AVX2
89static SIMDPP_INL
90uint8x32 i_max(const uint8x32& a, const uint8x32& b)
91{
92 return _mm256_max_epu8(a.native(), b.native());
93}
94#endif
95
96#if SIMDPP_USE_AVX512BW
97SIMDPP_INL uint8<64> i_max(const uint8<64>& a, const uint8<64>& b)
98{
99 return _mm512_max_epu8(a.native(), b.native());
100}
101#endif
102
103template<unsigned N> SIMDPP_INL
104uint8<N> i_max(const uint8<N>& a, const uint8<N>& b)
105{
106 SIMDPP_VEC_ARRAY_IMPL2(uint8<N>, i_max, a, b);
107}
108
109// -----------------------------------------------------------------------------
110
111static SIMDPP_INL
112int16x8 i_max(const int16x8& a, const int16x8& b)
113{
114#if SIMDPP_USE_NULL
115 return detail::null::max(a, b);
116#elif SIMDPP_USE_SSE2
117 return _mm_max_epi16(a.native(), b.native());
118#elif SIMDPP_USE_NEON
119 return vmaxq_s16(a.native(), b.native());
120#elif SIMDPP_USE_ALTIVEC
121 return vec_max(a.native(), b.native());
122#elif SIMDPP_USE_MSA
123 return __msa_max_s_h(a.native(), b.native());
124#endif
125}
126
127#if SIMDPP_USE_AVX2
128static SIMDPP_INL
129int16x16 i_max(const int16x16& a, const int16x16& b)
130{
131 return _mm256_max_epi16(a.native(), b.native());
132}
133#endif
134
135#if SIMDPP_USE_AVX512BW
136SIMDPP_INL int16<32> i_max(const int16<32>& a, const int16<32>& b)
137{
138 return _mm512_max_epi16(a.native(), b.native());
139}
140#endif
141
142template<unsigned N> SIMDPP_INL
143int16<N> i_max(const int16<N>& a, const int16<N>& b)
144{
145 SIMDPP_VEC_ARRAY_IMPL2(int16<N>, i_max, a, b);
146}
147
148// -----------------------------------------------------------------------------
149
150static SIMDPP_INL
151uint16x8 i_max(const uint16x8& a, const uint16x8& b)
152{
153#if SIMDPP_USE_NULL
154 return detail::null::max(a, b);
155#elif SIMDPP_USE_SSE4_1
156 return _mm_max_epu16(a.native(), b.native());
157#elif SIMDPP_USE_SSE2
158 int16x8 ca = bit_xor(a, 0x8000);
159 int16x8 cb = bit_xor(b, 0x8000);
160 int16x8 r = _mm_max_epi16(ca.native(), cb.native());
161 return bit_xor(r, 0x8000);
162#elif SIMDPP_USE_NEON
163 return vmaxq_u16(a.native(), b.native());
164#elif SIMDPP_USE_ALTIVEC
165 return vec_max(a.native(), b.native());
166#elif SIMDPP_USE_MSA
167 return __msa_max_u_h(a.native(), b.native());
168#endif
169}
170
171#if SIMDPP_USE_AVX2
172static SIMDPP_INL
173uint16x16 i_max(const uint16x16& a, const uint16x16& b)
174{
175 return _mm256_max_epu16(a.native(), b.native());
176}
177#endif
178
179#if SIMDPP_USE_AVX512BW
180SIMDPP_INL uint16<32> i_max(const uint16<32>& a, const uint16<32>& b)
181{
182 return _mm512_max_epu16(a.native(), b.native());
183}
184#endif
185
186template<unsigned N> SIMDPP_INL
187uint16<N> i_max(const uint16<N>& a, const uint16<N>& b)
188{
189 SIMDPP_VEC_ARRAY_IMPL2(uint16<N>, i_max, a, b);
190}
191
192// -----------------------------------------------------------------------------
193
194static SIMDPP_INL
195int32x4 i_max(const int32x4& a, const int32x4& b)
196{
197#if SIMDPP_USE_NULL
198 return detail::null::max(a, b);
199#elif SIMDPP_USE_SSE4_1
200 return _mm_max_epi32(a.native(), b.native());
201#elif SIMDPP_USE_SSE2
202 mask_int32x4 mask = cmp_gt(a, b);
203 return blend(a, b, mask);
204#elif SIMDPP_USE_NEON
205 return vmaxq_s32(a.native(), b.native());
206#elif SIMDPP_USE_ALTIVEC
207 return vec_max(a.native(), b.native());
208#elif SIMDPP_USE_MSA
209 return __msa_max_s_w(a.native(), b.native());
210#endif
211}
212
213#if SIMDPP_USE_AVX2
214static SIMDPP_INL
215int32x8 i_max(const int32x8& a, const int32x8& b)
216{
217 return _mm256_max_epi32(a.native(), b.native());
218}
219#endif
220
221#if SIMDPP_USE_AVX512F
222static SIMDPP_INL
223int32<16> i_max(const int32<16>& a, const int32<16>& b)
224{
225 return _mm512_max_epi32(a.native(), b.native());
226}
227#endif
228
229template<unsigned N> SIMDPP_INL
230int32<N> i_max(const int32<N>& a, const int32<N>& b)
231{
232 SIMDPP_VEC_ARRAY_IMPL2(int32<N>, i_max, a, b);
233}
234
235// -----------------------------------------------------------------------------
236
237static SIMDPP_INL
238uint32x4 i_max(const uint32x4& a, const uint32x4& b)
239{
240#if SIMDPP_USE_NULL
241 return detail::null::max(a, b);
242#elif SIMDPP_USE_SSE4_1
243 return _mm_max_epu32(a.native(), b.native());
244#elif SIMDPP_USE_SSE2
245 mask_int32x4 mask = cmp_gt(a, b);
246 return blend(a, b, mask);
247#elif SIMDPP_USE_NEON
248 return vmaxq_u32(a.native(), b.native());
249#elif SIMDPP_USE_ALTIVEC
250 return vec_max(a.native(), b.native());
251#elif SIMDPP_USE_MSA
252 return __msa_max_u_w(a.native(), b.native());
253#endif
254}
255
256#if SIMDPP_USE_AVX2
257static SIMDPP_INL
258uint32x8 i_max(const uint32x8& a, const uint32x8& b)
259{
260 return _mm256_max_epu32(a.native(), b.native());
261}
262#endif
263
264#if SIMDPP_USE_AVX512F
265static SIMDPP_INL
266uint32<16> i_max(const uint32<16>& a, const uint32<16>& b)
267{
268 return _mm512_max_epu32(a.native(), b.native());
269}
270#endif
271
272template<unsigned N> SIMDPP_INL
273uint32<N> i_max(const uint32<N>& a, const uint32<N>& b)
274{
275 SIMDPP_VEC_ARRAY_IMPL2(uint32<N>, i_max, a, b);
276}
277
278// -----------------------------------------------------------------------------
279
280static SIMDPP_INL
281int64x2 i_max(const int64x2& a, const int64x2& b)
282{
283#if SIMDPP_USE_AVX512VL
284 return _mm_max_epi64(a.native(), b.native());
285#elif SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64
286 mask_int64x2 mask = cmp_gt(a, b);
287 return blend(a, b, mask);
288#elif SIMDPP_USE_VSX_207
289 return vec_max(a.native(), b.native());
290#elif SIMDPP_USE_MSA
291 return __msa_max_s_d(a.native(), b.native());
292#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
293 return detail::null::max(a, b);
294#else
295 return SIMDPP_NOT_IMPLEMENTED2(a, b);
296#endif
297}
298
299#if SIMDPP_USE_AVX2
300static SIMDPP_INL
301int64x4 i_max(const int64x4& a, const int64x4& b)
302{
303#if SIMDPP_USE_AVX512VL
304 return _mm256_max_epi64(a.native(), b.native());
305#else
306 mask_int64x4 mask = cmp_gt(a, b);
307 return blend(a, b, mask);
308#endif
309}
310#endif
311
312#if SIMDPP_USE_AVX512F
313static SIMDPP_INL
314int64<8> i_max(const int64<8>& a, const int64<8>& b)
315{
316 return _mm512_max_epi64(a.native(), b.native());
317}
318#endif
319
320template<unsigned N> SIMDPP_INL
321int64<N> i_max(const int64<N>& a, const int64<N>& b)
322{
323 SIMDPP_VEC_ARRAY_IMPL2(int64<N>, i_max, a, b);
324}
325
326// -----------------------------------------------------------------------------
327
328static SIMDPP_INL
329uint64x2 i_max(const uint64x2& a, const uint64x2& b)
330{
331#if SIMDPP_USE_AVX512VL
332 return _mm_max_epu64(a.native(), b.native());
333#elif SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64
334 mask_int64x2 mask = cmp_gt(a, b);
335 return blend(a, b, mask);
336#elif SIMDPP_USE_VSX_207
337 return vec_max(a.native(), b.native());
338#elif SIMDPP_USE_MSA
339 return __msa_max_u_d(a.native(), b.native());
340#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
341 return detail::null::max(a, b);
342#else
343 return SIMDPP_NOT_IMPLEMENTED2(a, b);
344#endif
345}
346
347#if SIMDPP_USE_AVX2
348static SIMDPP_INL
349uint64x4 i_max(const uint64x4& a, const uint64x4& b)
350{
351#if SIMDPP_USE_AVX512VL
352 return _mm256_max_epu64(a.native(), b.native());
353#else
354 mask_int64x4 mask = cmp_gt(a, b);
355 return blend(a, b, mask);
356#endif
357}
358#endif
359
360#if SIMDPP_USE_AVX512F
361static SIMDPP_INL
362uint64<8> i_max(const uint64<8>& a, const uint64<8>& b)
363{
364 return _mm512_max_epu64(a.native(), b.native());
365}
366#endif
367
368template<unsigned N> SIMDPP_INL
369uint64<N> i_max(const uint64<N>& a, const uint64<N>& b)
370{
371 SIMDPP_VEC_ARRAY_IMPL2(uint64<N>, i_max, a, b);
372}
373
374} // namespace insn
375} // namespace detail
376} // namespace SIMDPP_ARCH_NAMESPACE
377} // namespace simdpp
378
379#endif
380
381