1/* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_MIN_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_MIN_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/i_min.h>
17#include <simdpp/core/extract.h>
18#include <simdpp/core/move_l.h>
19#include <simdpp/core/make_uint.h>
20#include <simdpp/detail/mem_block.h>
21#include <limits>
22
23namespace simdpp {
24namespace SIMDPP_ARCH_NAMESPACE {
25namespace detail {
26namespace insn {
27
28static SIMDPP_INL
29uint8_t i_reduce_min(const uint8x16& a)
30{
31#if SIMDPP_USE_NULL
32 uint8_t r = a.el(0);
33 for (unsigned i = 0; i < a.length; i++) {
34 r = r < a.el(i) ? r : a.el(i);
35 }
36 return r;
37#elif SIMDPP_USE_NEON64
38 return vminvq_u8(a.native());
39#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
40 uint8x16 r = min(a, move16_l<8>(a));
41 r = min(r, move16_l<4>(r));
42 r = min(r, move16_l<2>(r));
43 r = min(r, move16_l<1>(r));
44 return extract<0>(r);
45#endif
46}
47
48#if SIMDPP_USE_AVX2
49static SIMDPP_INL
50uint8_t i_reduce_min(const uint8<32>& a)
51{
52 uint8x16 r = detail::extract128<0>(a);
53 r = min(r, detail::extract128<1>(a));
54 return i_reduce_min(r);
55}
56#endif
57
58#if SIMDPP_USE_AVX512BW
59SIMDPP_INL uint8_t i_reduce_min(const uint8<64>& a)
60{
61 uint8<32> r = detail::extract256<0>(a);
62 r = min(r, detail::extract256<1>(a));
63 return i_reduce_min(r);
64}
65#endif
66
67template<unsigned N>
68SIMDPP_INL uint8_t i_reduce_min(const uint8<N>& a)
69{
70#if SIMDPP_USE_NULL
71 uint8_t r = std::numeric_limits<uint8_t>::max();
72 for (unsigned j = 0; j < a.vec_length; ++j) {
73 for (unsigned i = 0; i < a.base_length; i++) {
74 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
75 }
76 }
77 return r;
78#else
79 uint8v r = a.vec(0);
80 for (unsigned j = 1; j < a.vec_length; ++j) {
81 r = min(r, a.vec(j));
82 }
83 return i_reduce_min(r);
84#endif
85}
86
87// -----------------------------------------------------------------------------
88
89static SIMDPP_INL
90int8_t i_reduce_min(const int8x16& a)
91{
92#if SIMDPP_USE_NULL
93 int8_t r = a.el(0);
94 for (unsigned i = 0; i < a.length; i++) {
95 r = r < a.el(i) ? r : a.el(i);
96 }
97 return r;
98#elif SIMDPP_USE_NEON64
99 return vminvq_s8(a.native());
100#elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
101 int8x16 r = min(a, move16_l<8>(a));
102 r = min(r, move16_l<4>(r));
103 r = min(r, move16_l<2>(r));
104 r = min(r, move16_l<1>(r));
105 return extract<0>(r);
106#elif SIMDPP_USE_SSE2
107 // no instruction for int8 min available, only for uint8
108 uint8x16 ca = bit_xor(a, 0x80);
109 return i_reduce_min(ca) ^ 0x80;
110#endif
111}
112
113#if SIMDPP_USE_AVX2
114static SIMDPP_INL
115int8_t i_reduce_min(const int8x32& a)
116{
117 int8x16 r = detail::extract128<0>(a);
118 r = min(r, detail::extract128<1>(a));
119 return i_reduce_min(r);
120}
121#endif
122
123#if SIMDPP_USE_AVX512BW
124SIMDPP_INL int8_t i_reduce_min(const int8<64>& a)
125{
126 int8<32> r = detail::extract256<0>(a);
127 r = min(r, detail::extract256<1>(a));
128 return i_reduce_min(r);
129}
130#endif
131
132template<unsigned N>
133SIMDPP_INL int8_t i_reduce_min(const int8<N>& a)
134{
135#if SIMDPP_USE_NULL
136 int8_t r = std::numeric_limits<int8_t>::max();
137 for (unsigned j = 0; j < a.vec_length; ++j) {
138 for (unsigned i = 0; i < a.base_length; i++) {
139 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
140 }
141 }
142 return r;
143#elif SIMDPP_USE_SSE2 && !SIMDPP_USE_SSE4_1
144 // no instruction for int8 min available, only for uint8
145 uint8x16 r = bit_xor(a.vec(0), 0x80);
146 for (unsigned j = 1; j < a.vec_length; ++j) {
147 uint8x16 ca = bit_xor(a.vec(j), 0x80);
148 r = min(r, ca);
149 }
150 return i_reduce_min(r) ^ 0x80;
151#else
152 int8v r = a.vec(0);
153 for (unsigned j = 1; j < a.vec_length; ++j) {
154 r = min(r, a.vec(j));
155 }
156 return i_reduce_min(r);
157#endif
158}
159
160// -----------------------------------------------------------------------------
161static SIMDPP_INL
162int16_t i_reduce_min(const int16x8& a);
163
164static SIMDPP_INL
165uint16_t i_reduce_min(const uint16x8& a)
166{
167#if SIMDPP_USE_NULL
168 uint16_t r = a.el(0);
169 for (unsigned i = 0; i < a.length; i++) {
170 r = r < a.el(i) ? r : a.el(i);
171 }
172 return r;
173#elif SIMDPP_USE_NEON64
174 return vminvq_u16(a.native());
175#elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
176 uint16x8 r = min(a, move8_l<4>(a));
177 r = min(r, move8_l<2>(r));
178 r = min(r, move8_l<1>(r));
179 return extract<0>(r);
180#elif SIMDPP_USE_SSE2
181 // no instruction for uint16 min available, only for int16
182 int16x8 ca = bit_xor(a, 0x8000);
183 return i_reduce_min(ca) ^ 0x8000;
184#endif
185}
186
187#if SIMDPP_USE_AVX2
188static SIMDPP_INL
189uint16_t i_reduce_min(const uint16x16& a)
190{
191 uint16x8 r = detail::extract128<0>(a);
192 r = min(r, detail::extract128<1>(a));
193 return i_reduce_min(r);
194}
195#endif
196
197#if SIMDPP_USE_AVX512BW
198SIMDPP_INL uint16_t i_reduce_min(const uint16<32>& a)
199{
200 uint16<16> r = detail::extract256<0>(a);
201 r = min(r, detail::extract256<1>(a));
202 return i_reduce_min(r);
203}
204#endif
205
206template<unsigned N>
207SIMDPP_INL uint16_t i_reduce_min(const uint16<N>& a)
208{
209#if SIMDPP_USE_NULL
210 uint16_t r = std::numeric_limits<uint16_t>::max();
211 for (unsigned j = 0; j < a.vec_length; ++j) {
212 for (unsigned i = 0; i < a.base_length; i++) {
213 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
214 }
215 }
216 return r;
217#elif SIMDPP_USE_SSE2 && !SIMDPP_USE_SSE4_1
218 // no instruction for uint16 min available, only for int16
219 int16x8 r = bit_xor(a.vec(0), 0x8000);
220 for (unsigned j = 1; j < a.vec_length; ++j) {
221 int16x8 ca = bit_xor(a.vec(j), 0x8000);
222 r = min(r, ca);
223 }
224 return i_reduce_min(r) ^ 0x8000;
225#else
226 uint16v r = a.vec(0);
227 for (unsigned j = 1; j < a.vec_length; ++j) {
228 r = min(r, a.vec(j));
229 }
230 return i_reduce_min(r);
231#endif
232}
233
234// -----------------------------------------------------------------------------
235
236static SIMDPP_INL
237int16_t i_reduce_min(const int16x8& a)
238{
239#if SIMDPP_USE_NULL
240 int16_t r = a.el(0);
241 for (unsigned i = 0; i < a.length; i++) {
242 r = r < a.el(i) ? r : a.el(i);
243 }
244 return r;
245#elif SIMDPP_USE_NEON64
246 return vminvq_s16(a.native());
247#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
248 int16x8 r = min(a, move8_l<4>(a));
249 r = min(r, move8_l<2>(r));
250 r = min(r, move8_l<1>(r));
251 return extract<0>(r);
252#endif
253}
254
255#if SIMDPP_USE_AVX2
256static SIMDPP_INL
257int16_t i_reduce_min(const int16x16& a)
258{
259 int16x8 r = detail::extract128<0>(a);
260 r = min(r, detail::extract128<1>(a));
261 return i_reduce_min(r);
262}
263#endif
264
265#if SIMDPP_USE_AVX512BW
266SIMDPP_INL int16_t i_reduce_min(const int16<32>& a)
267{
268 int16<16> r = detail::extract256<0>(a);
269 r = min(r, detail::extract256<1>(a));
270 return i_reduce_min(r);
271}
272#endif
273
274template<unsigned N>
275SIMDPP_INL int16_t i_reduce_min(const int16<N>& a)
276{
277#if SIMDPP_USE_NULL
278 int16_t r = std::numeric_limits<int16_t>::max();
279 for (unsigned j = 0; j < a.vec_length; ++j) {
280 for (unsigned i = 0; i < a.base_length; i++) {
281 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
282 }
283 }
284 return r;
285#else
286 int16v r = a.vec(0);
287 for (unsigned j = 1; j < a.vec_length; ++j) {
288 r = min(r, a.vec(j));
289 }
290 return i_reduce_min(r);
291#endif
292}
293
294// -----------------------------------------------------------------------------
295
296static SIMDPP_INL
297uint32_t i_reduce_min(const uint32x4& a)
298{
299#if SIMDPP_USE_NULL
300 uint32_t r = a.el(0);
301 for (unsigned i = 0; i < a.length; i++) {
302 r = r < a.el(i) ? r : a.el(i);
303 }
304 return r;
305#elif SIMDPP_USE_NEON64
306 return vminvq_u32(a.native());
307#elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
308 uint32x4 r = min(a, move4_l<2>(a));
309 r = min(r, move4_l<1>(r));
310 return extract<0>(r);
311#elif SIMDPP_USE_SSE2
312 mem_block<uint32x4> b = a;
313 uint32_t r = b[0];
314 for (unsigned i = 1; i < b.length; i++) {
315 r = r < b[i] ? r : b[i];
316 }
317 return r;
318#endif
319}
320
321#if SIMDPP_USE_AVX2
322static SIMDPP_INL
323uint32_t i_reduce_min(const uint32x8& a)
324{
325 uint32x4 r = detail::extract128<0>(a);
326 r = min(r, detail::extract128<1>(a));
327 r = min(r, move4_l<2>(r));
328 r = min(r, move4_l<1>(r));
329 return extract<0>(r);
330}
331#endif
332
333#if SIMDPP_USE_AVX512F
334static SIMDPP_INL
335uint32_t i_reduce_min(const uint32<16>& a)
336{
337 return i_reduce_min((uint32<8>)min(extract256<0>(a), extract256<1>(a)));
338}
339#endif
340
341template<unsigned N>
342SIMDPP_INL uint32_t i_reduce_min(const uint32<N>& a)
343{
344#if SIMDPP_USE_NULL
345 uint32_t r = std::numeric_limits<uint32_t>::max();
346 for (unsigned j = 0; j < a.vec_length; ++j) {
347 for (unsigned i = 0; i < a.base_length; i++) {
348 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
349 }
350 }
351 return r;
352#else
353 uint32v r = a.vec(0);
354 for (unsigned j = 1; j < a.vec_length; ++j) {
355 r = min(r, a.vec(j));
356 }
357 return i_reduce_min(r);
358#endif
359}
360
361// -----------------------------------------------------------------------------
362
363static SIMDPP_INL
364int32_t i_reduce_min(const int32x4& a)
365{
366#if SIMDPP_USE_NULL
367 int32_t r = a.el(0);
368 for (unsigned i = 1; i < a.length; i++) {
369 r = r < a.el(i) ? r : a.el(i);
370 }
371 return r;
372#elif SIMDPP_USE_NEON64
373 return vminvq_s32(a.native());
374#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
375 int32x4 r = min(a, move4_l<2>(a));
376 r = min(r, move4_l<1>(r));
377 return extract<0>(r);
378#endif
379}
380
381#if SIMDPP_USE_AVX2
382static SIMDPP_INL
383int32_t i_reduce_min(const int32x8& a)
384{
385 int32x4 r = detail::extract128<0>(a);
386 r = min(r, detail::extract128<1>(a));
387 r = min(r, move4_l<2>(r));
388 r = min(r, move4_l<1>(r));
389 return extract<0>(r);
390}
391#endif
392
393#if SIMDPP_USE_AVX512F
394static SIMDPP_INL
395int32_t i_reduce_min(const int32<16>& a)
396{
397 return i_reduce_min((int32<8>)min(extract256<0>(a), extract256<1>(a)));
398}
399#endif
400
401template<unsigned N>
402SIMDPP_INL int32_t i_reduce_min(const int32<N>& a)
403{
404#if SIMDPP_USE_NULL
405 int32_t r = std::numeric_limits<int32_t>::max();
406 for (unsigned j = 0; j < a.vec_length; ++j) {
407 for (unsigned i = 0; i < a.base_length; i++) {
408 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
409 }
410 }
411 return r;
412#else
413 int32v r = a.vec(0);
414 for (unsigned j = 1; j < a.vec_length; ++j) {
415 r = min(r, a.vec(j));
416 }
417 return i_reduce_min(r);
418#endif
419}
420
421// -----------------------------------------------------------------------------
422
423static SIMDPP_INL
424uint64_t i_reduce_min(const uint64x2& a)
425{
426#if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
427 uint64x2 r = min(a, move2_l<1>(a));
428 return extract<0>(r);
429#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32
430 mem_block<uint64x2> b = a;
431 return b[0] < b[1] ? b[0] : b[1];
432#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
433 uint64_t r = a.el(0);
434 for (unsigned i = 0; i < a.length; i++) {
435 r = r < a.el(i) ? r : a.el(i);
436 }
437 return r;
438#else
439 return SIMDPP_NOT_IMPLEMENTED1(a);
440#endif
441}
442
443#if SIMDPP_USE_AVX2
444static SIMDPP_INL
445uint64_t i_reduce_min(const uint64x4& a)
446{
447 uint64x2 r = detail::extract128<0>(a);
448 r = min(r, detail::extract128<1>(a));
449 r = min(r, move2_l<1>(r));
450 return extract<0>(r);
451}
452#endif
453
454#if SIMDPP_USE_AVX512F
455static SIMDPP_INL
456uint64_t i_reduce_min(const uint64<8>& a)
457{
458 return i_reduce_min((uint64<4>)min(extract256<0>(a), extract256<1>(a)));
459}
460#endif
461
462template<unsigned N>
463SIMDPP_INL uint64_t i_reduce_min(const uint64<N>& a)
464{
465#if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
466 uint64v r = a.vec(0);
467 for (unsigned j = 1; j < a.vec_length; ++j) {
468 r = min(r, a.vec(j));
469 }
470 return i_reduce_min(r);
471#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32
472 uint64_t r = std::numeric_limits<uint64_t>::max();
473 for (unsigned j = 0; j < a.vec_length; ++j) {
474 mem_block<uint64v> b = a.vec(j);
475 for (unsigned i = 0; i < a.base_length; i++) {
476 r = r < b[i] ? r : b[i];
477 }
478 }
479 return r;
480#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
481 uint64_t r = std::numeric_limits<uint64_t>::max();
482 for (unsigned j = 0; j < a.vec_length; ++j) {
483 for (unsigned i = 0; i < a.base_length; i++) {
484 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
485 }
486 }
487 return r;
488#else
489 return SIMDPP_NOT_IMPLEMENTED1(a);
490#endif
491}
492
493// -----------------------------------------------------------------------------
494
495static SIMDPP_INL
496int64_t i_reduce_min(const int64x2& a)
497{
498#if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
499 int64x2 r = min(a, move2_l<1>(a));
500 return extract<0>(r);
501#elif SIMDPP_USE_SSE2
502 mem_block<int64x2> b = a;
503 return b[0] < b[1] ? b[0] : b[1];
504#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
505 int64_t r = a.el(0);
506 for (unsigned i = 0; i < a.length; i++) {
507 r = r < a.el(i) ? r : a.el(i);
508 }
509 return r;
510#else
511 return SIMDPP_NOT_IMPLEMENTED1(a);
512#endif
513}
514
515#if SIMDPP_USE_AVX2
516static SIMDPP_INL
517int64_t i_reduce_min(const int64x4& a)
518{
519 int64x2 r = detail::extract128<0>(a);
520 r = min(r, detail::extract128<1>(a));
521 r = min(r, move2_l<1>(r));
522 return extract<0>(r);
523}
524#endif
525
526#if SIMDPP_USE_AVX512F
527static SIMDPP_INL
528int64_t i_reduce_min(const int64<8>& a)
529{
530 return i_reduce_min((int64<4>)min(extract256<0>(a), extract256<1>(a)));
531}
532#endif
533
534template<unsigned N>
535SIMDPP_INL int64_t i_reduce_min(const int64<N>& a)
536{
537#if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
538 int64v r = a.vec(0);
539 for (unsigned j = 1; j < a.vec_length; ++j) {
540 r = min(r, a.vec(j));
541 }
542 return i_reduce_min(r);
543#elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32
544 int64_t r = std::numeric_limits<int64_t>::max();
545 for (unsigned j = 0; j < a.vec_length; ++j) {
546 mem_block<int64v> b = a.vec(j);
547 for (unsigned i = 0; i < a.base_length; i++) {
548 r = r < b[i] ? r : b[i];
549 }
550 }
551 return r;
552#elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC
553 int64_t r = std::numeric_limits<int64_t>::max();
554 for (unsigned j = 0; j < a.vec_length; ++j) {
555 for (unsigned i = 0; i < a.base_length; i++) {
556 r = r < a.vec(j).el(i) ? r : a.vec(j).el(i);
557 }
558 }
559 return r;
560#else
561 return SIMDPP_NOT_IMPLEMENTED1(a);
562#endif
563}
564
565// -----------------------------------------------------------------------------
566
567} // namespace insn
568} // namespace detail
569} // namespace SIMDPP_ARCH_NAMESPACE
570} // namespace simdpp
571
572#endif
573
574