1/* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_REDUCE_MIN_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_REDUCE_MIN_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/f_min.h>
17#include <simdpp/core/extract.h>
18#include <simdpp/core/permute2.h>
19#include <simdpp/detail/extract128.h>
20#include <simdpp/detail/workarounds.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27
28static SIMDPP_INL
29float i_reduce_min(const float32x4& a)
30{
31#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
32 float r = a.el(0);
33 for (unsigned i = 1; i < a.length; i++) {
34 r = r < a.el(i) ? r : a.el(i); // TODO nan
35 }
36 return r;
37#elif SIMDPP_USE_SSE2
38 float32x4 b = _mm_movehl_ps(a.native(), a.native());
39 b = min(a, b);
40 b = min(b, permute2<1,0>(b));
41 return _mm_cvtss_f32(b.native());
42#elif SIMDPP_USE_NEON64
43 return vminnmvq_f32(a.native());
44#elif SIMDPP_USE_NEON_FLT_SP
45 float32x2_t a2 = vpmin_f32(vget_low_f32(a.native()), vget_high_f32(a.native()));
46 a2 = vpmin_f32(a2, a2);
47 return vget_lane_f32(a2, 0);
48#elif SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA
49 float32x4 b = min(a, move4_l<1>(a));
50 b = min(b, move4_l<2>(b));
51 return extract<0>(b);
52#endif
53}
54
55#if SIMDPP_USE_AVX
56static SIMDPP_INL
57float i_reduce_min(const float32x8& a)
58{
59 float32x4 ah = detail::extract128<1>(a);
60 float32x4 al = detail::extract128<0>(a);
61 al = min(al, ah);
62 return i_reduce_min(al);
63}
64#endif
65
66#if SIMDPP_USE_AVX512F
67static SIMDPP_INL
68float i_reduce_min(const float32<16>& a)
69{
70 return i_reduce_min(min(extract256<0>(a), extract256<1>(a)));
71}
72#endif
73
74template<unsigned N>
75SIMDPP_INL float i_reduce_min(const float32<N>& a)
76{
77 float32v r = a.vec(0);
78 for (unsigned i = 1; i < a.vec_length; ++i)
79 r = min(r, a.vec(i));
80 return i_reduce_min(r);
81}
82
83// -----------------------------------------------------------------------------
84
85static SIMDPP_INL
86double i_reduce_min(const float64x2& a)
87{
88#if SIMDPP_USE_SSE2
89 float64x2 b = min(a, permute2<1,1>(a));
90 return _mm_cvtsd_f64(b.native());
91#elif SIMDPP_USE_NEON64
92 return vminnmvq_f64(a.native());
93#elif SIMDPP_USE_VSX_206 || SIMDPP_USE_MSA
94 float64x2 b = min(a, permute2<1,1>(a));
95 return extract<0>(b);
96#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
97 double r = a.el(0);
98 for (unsigned i = 1; i < a.length; i++) {
99 r = r < a.el(i) ? r : a.el(i); // TODO nan
100 }
101 return r;
102#endif
103}
104
105#if SIMDPP_USE_AVX
106static SIMDPP_INL
107double i_reduce_min(const float64x4& a)
108{
109 float64x2 ah = detail::extract128<1>(a);
110 float64x2 al = detail::extract128<0>(a);
111 al = min(al, ah);
112 return i_reduce_min(al);
113}
114#endif
115
116#if SIMDPP_USE_AVX512F
117static SIMDPP_INL
118double i_reduce_min(const float64<8>& a)
119{
120 return i_reduce_min(min(extract256<0>(a), extract256<1>(a)));
121}
122#endif
123
124template<unsigned N>
125SIMDPP_INL double i_reduce_min(const float64<N>& a)
126{
127 float64v r = a.vec(0);
128 for (unsigned i = 1; i < a.vec_length; ++i)
129 r = min(r, a.vec(i));
130 return i_reduce_min(r);
131}
132
133} // namespace insn
134} // namespace detail
135} // namespace SIMDPP_ARCH_NAMESPACE
136} // namespace simdpp
137
138#endif
139
140