1 | /* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_CORE_I_MAX_H |
9 | #define LIBSIMDPP_SIMDPP_CORE_I_MAX_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/detail/insn/i_max.h> |
17 | #include <simdpp/core/detail/scalar_arg_impl.h> |
18 | |
19 | namespace simdpp { |
20 | namespace SIMDPP_ARCH_NAMESPACE { |
21 | |
22 | |
23 | /** Computes maximum of the signed 8-bit values. |
24 | |
25 | @code |
26 | r0 = max(a0, b0) |
27 | ... |
28 | rN = max(aN, bN) |
29 | @endcode |
30 | |
31 | @par 128-bit version: |
32 | @icost{SSE2-SSSE3, 4} |
33 | |
34 | @par 256-bit version: |
35 | @icost{SSE2-SSSE3, 8} |
36 | @icost{SSE4.1-AVX, NEON, ALTIVEC, 2} |
37 | */ |
38 | template<unsigned N, class E1, class E2> SIMDPP_INL |
39 | int8<N,expr_empty> max(const int8<N,E1>& a, const int8<N,E2>& b) |
40 | { |
41 | return detail::insn::i_max(a.eval(), b.eval()); |
42 | } |
43 | |
44 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, int8, int8) |
45 | |
46 | /** Computes maximum of the unsigned 8-bit values. |
47 | |
48 | @code |
49 | r0 = max(a0, b0) |
50 | ... |
51 | rN = max(aN, bN) |
52 | @endcode |
53 | |
54 | @par 256-bit version: |
55 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
56 | */ |
57 | template<unsigned N, class E1, class E2> SIMDPP_INL |
58 | uint8<N,expr_empty> max(const uint8<N,E1>& a, const uint8<N,E2>& b) |
59 | { |
60 | return detail::insn::i_max(a.eval(), b.eval()); |
61 | } |
62 | |
63 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint8, uint8) |
64 | |
65 | /** Computes maximum of the signed 16-bit values. |
66 | |
67 | @code |
68 | r0 = max(a0, b0) |
69 | ... |
70 | rN = max(aN, bN) |
71 | @endcode |
72 | |
73 | @par 256-bit version: |
74 | @icost{SSE2-AVX, NEON, ALTIVEC, 2} |
75 | */ |
76 | template<unsigned N, class E1, class E2> SIMDPP_INL |
77 | int16<N,expr_empty> max(const int16<N,E1>& a, const int16<N,E2>& b) |
78 | { |
79 | return detail::insn::i_max(a.eval(), b.eval()); |
80 | } |
81 | |
82 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, int16, int16) |
83 | |
84 | /** Computes maximum of the unsigned 16-bit values. |
85 | |
86 | @code |
87 | r0 = max(a0, b0) |
88 | ... |
89 | rN = max(aN, bN) |
90 | @endcode |
91 | |
92 | @par 128-bit version: |
93 | @icost{SSE2-SSSE3, 6-7} |
94 | |
95 | @par 256-bit version: |
96 | @icost{SSE2-SSSE3, 12-13} |
97 | @icost{SSE4.1-AVX, NEON, ALTIVEC, 2} |
98 | */ |
99 | template<unsigned N, class E1, class E2> SIMDPP_INL |
100 | uint16<N,expr_empty> max(const uint16<N,E1>& a, const uint16<N,E2>& b) |
101 | { |
102 | return detail::insn::i_max(a.eval(), b.eval()); |
103 | } |
104 | |
105 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint16, uint16) |
106 | |
107 | /** Computes maximum of the signed 32-bit values. |
108 | |
109 | @code |
110 | r0 = max(a0, b0) |
111 | ... |
112 | rN = max(aN, bN) |
113 | @endcode |
114 | |
115 | @par 128-bit version: |
116 | @icost{SSE2-SSSE3, 4} |
117 | |
118 | @par 256-bit version: |
119 | @icost{SSE2-SSSE3, 8} |
120 | @icost{SSE4.1-AVX, NEON, ALTIVEC, 2} |
121 | */ |
122 | template<unsigned N, class E1, class E2> SIMDPP_INL |
123 | int32<N,expr_empty> max(const int32<N,E1>& a, const int32<N,E2>& b) |
124 | { |
125 | return detail::insn::i_max(a.eval(), b.eval()); |
126 | } |
127 | |
128 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, int32, int32) |
129 | |
130 | /** Computes maximum of the unsigned 32-bit values. |
131 | |
132 | @code |
133 | r0 = max(a0, b0) |
134 | ... |
135 | rN = max(aN, bN) |
136 | @endcode |
137 | |
138 | @par 128-bit version: |
139 | @icost{SSE2-SSSE3, 6-7} |
140 | |
141 | @par 256-bit version: |
142 | @icost{SSE2-SSSE3, 12-13} |
143 | @icost{SSE4.1-AVX, NEON, ALTIVEC, 2} |
144 | */ |
145 | template<unsigned N, class E1, class E2> SIMDPP_INL |
146 | uint32<N,expr_empty> max(const uint32<N,E1>& a, const uint32<N,E2>& b) |
147 | { |
148 | return detail::insn::i_max(a.eval(), b.eval()); |
149 | } |
150 | |
151 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint32, uint32) |
152 | |
153 | /** Computes maximum of the signed 64-bit values. |
154 | |
155 | @code |
156 | r0 = max(a0, b0) |
157 | ... |
158 | rN = max(aN, bN) |
159 | @endcode |
160 | |
161 | Supported since AVX2, NEON64. Not supported on ALTIVEC. |
162 | */ |
163 | template<unsigned N, class E1, class E2> SIMDPP_INL |
164 | int64<N,expr_empty> max(const int64<N,E1>& a, const int64<N,E2>& b) |
165 | { |
166 | return detail::insn::i_max(a.eval(), b.eval()); |
167 | } |
168 | |
169 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, int64, int64) |
170 | |
171 | /** Computes maximum of the unsigned 64-bit values. |
172 | |
173 | @code |
174 | r0 = max(a0, b0) |
175 | ... |
176 | rN = max(aN, bN) |
177 | @endcode |
178 | |
179 | Supported since AVX2, NEON64. Not supported on ALTIVEC. |
180 | */ |
181 | template<unsigned N, class E1, class E2> SIMDPP_INL |
182 | uint64<N,expr_empty> max(const uint64<N,E1>& a, const uint64<N,E2>& b) |
183 | { |
184 | return detail::insn::i_max(a.eval(), b.eval()); |
185 | } |
186 | |
187 | SIMDPP_SCALAR_ARG_IMPL_VEC(max, uint64, uint64) |
188 | |
189 | } // namespace SIMDPP_ARCH_NAMESPACE |
190 | } // namespace simdpp |
191 | |
192 | #endif |
193 | |
194 | |