1 | /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_DETAIL_NULL_MATH_H |
9 | #define LIBSIMDPP_DETAIL_NULL_MATH_H |
10 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC |
11 | |
12 | #ifndef LIBSIMDPP_SIMD_H |
13 | #error "This file must be included through simd.h" |
14 | #endif |
15 | |
16 | #include <simdpp/types.h> |
17 | #include <simdpp/core/cast.h> |
18 | |
19 | #include <cmath> |
20 | #include <cstdlib> |
21 | #include <limits> |
22 | |
23 | // On certain versions of MSVC min and max are defined as macros. |
24 | #if _MSC_VER |
25 | #ifdef min |
26 | #undef min |
27 | #endif |
28 | |
29 | #ifdef max |
30 | #undef max |
31 | #endif |
32 | #endif |
33 | |
34 | namespace simdpp { |
35 | namespace SIMDPP_ARCH_NAMESPACE { |
36 | namespace detail { |
37 | namespace null { |
38 | |
39 | template<class V> SIMDPP_INL |
40 | typename V::mask_vector_type isnan(const V& a) |
41 | { |
42 | typename V::mask_vector_type r; |
43 | for (unsigned i = 0; i < V::length; i++) { |
44 | r.el(i) = std::isnan(a.el(i)) ? 1 : 0; |
45 | } |
46 | return r; |
47 | } |
48 | |
49 | template<class V> SIMDPP_INL |
50 | typename V::mask_vector_type isnan2(const V& a, const V& b) |
51 | { |
52 | typename V::mask_vector_type r; |
53 | for (unsigned i = 0; i < V::length; i++) { |
54 | r.el(i) = (std::isnan(a.el(i)) || std::isnan(b.el(i))) ? 1 : 0; |
55 | } |
56 | return r; |
57 | } |
58 | |
59 | template<class V> SIMDPP_INL |
60 | V abs(const V& a) |
61 | { |
62 | V r; |
63 | for (unsigned i = 0; i < V::length; i++) { |
64 | r.el(i) = std::abs(a.el(i)); |
65 | } |
66 | return r; |
67 | } |
68 | |
69 | template<class T, class U> SIMDPP_INL |
70 | T saturate(U t) |
71 | { |
72 | U min = std::numeric_limits<T>::min(); |
73 | U max = std::numeric_limits<T>::max(); |
74 | t = t < min ? min : t; |
75 | t = t > max ? max : t; |
76 | return t; |
77 | } |
78 | |
79 | template<class V> SIMDPP_INL |
80 | V add(const V& a, const V& b) |
81 | { |
82 | V r; |
83 | for (unsigned i = 0; i < V::length; i++) { |
84 | r.el(i) = a.el(i) + b.el(i); |
85 | } |
86 | return r; |
87 | } |
88 | |
89 | template<class V> SIMDPP_INL |
90 | V add_sat(const V& a, const V& b) |
91 | { |
92 | V r; |
93 | for (unsigned i = 0; i < V::length; i++) { |
94 | r.el(i) = saturate<typename V::element_type>(int32_t(a.el(i)) + b.el(i)); |
95 | } |
96 | return r; |
97 | } |
98 | |
99 | template<class V> SIMDPP_INL |
100 | V sub(const V& a, const V& b) |
101 | { |
102 | V r; |
103 | for (unsigned i = 0; i < V::length; i++) { |
104 | r.el(i) = a.el(i) - b.el(i); |
105 | } |
106 | return r; |
107 | } |
108 | |
109 | template<class V> SIMDPP_INL |
110 | V sub_sat(const V& a, const V& b) |
111 | { |
112 | V r; |
113 | for (unsigned i = 0; i < V::length; i++) { |
114 | r.el(i) = saturate<typename V::element_type>(int32_t(a.el(i)) - b.el(i)); |
115 | } |
116 | return r; |
117 | } |
118 | |
119 | template<class V> SIMDPP_INL |
120 | V neg(const V& a) |
121 | { |
122 | V r; |
123 | for (unsigned i = 0; i < V::length; i++) { |
124 | r.el(i) = -a.el(i); |
125 | } |
126 | return r; |
127 | } |
128 | |
129 | template<class V> SIMDPP_INL |
130 | V mul(const V& a, const V& b) |
131 | { |
132 | V r; |
133 | for (unsigned i = 0; i < V::length; i++) { |
134 | r.el(i) = a.el(i) * b.el(i); |
135 | } |
136 | return r; |
137 | } |
138 | |
139 | template<class V> SIMDPP_INL |
140 | V fmadd(const V& a, const V& b, const V& c) |
141 | { |
142 | V r; |
143 | for (unsigned i = 0; i < V::length; i++) { |
144 | r.el(i) = std::fma(a.el(i), b.el(i), c.el(i)); |
145 | } |
146 | return r; |
147 | } |
148 | |
149 | template<class V> SIMDPP_INL |
150 | V fmsub(const V& a, const V& b, const V& c) |
151 | { |
152 | V r; |
153 | for (unsigned i = 0; i < V::length; i++) { |
154 | r.el(i) = std::fma(a.el(i), b.el(i), -c.el(i)); |
155 | } |
156 | return r; |
157 | } |
158 | |
159 | template<unsigned P, class V> SIMDPP_INL |
160 | V div_p(const V& a, const V& b) |
161 | { |
162 | // the least P significant bits of the mask are set. |
163 | uint64_t mask = 0; |
164 | mask = ~mask; |
165 | mask <<= P; |
166 | mask = ~mask; |
167 | |
168 | V r; |
169 | for (unsigned i = 0; i < V::length; i++) { |
170 | r.el(i) = (a.el(i) & mask) / (b.el(i) & mask); |
171 | r.el(i) &= mask; |
172 | } |
173 | return r; |
174 | } |
175 | |
176 | |
177 | template<class V> SIMDPP_INL |
178 | V shift_r(const V& a, unsigned shift) |
179 | { |
180 | V r; |
181 | for (unsigned i = 0; i < V::length; i++) { |
182 | r.el(i) = a.el(i) >> shift; |
183 | } |
184 | return r; |
185 | } |
186 | |
187 | template<class V, class S> SIMDPP_INL |
188 | V shift_r_v(const V& a, const S& shift) |
189 | { |
190 | V r; |
191 | for (unsigned i = 0; i < V::length; i++) { |
192 | r.el(i) = a.el(i) >> shift.el(i); |
193 | } |
194 | return r; |
195 | } |
196 | |
197 | template<class V> SIMDPP_INL |
198 | V shift_l(const V& a, unsigned shift) |
199 | { |
200 | V r; |
201 | for (unsigned i = 0; i < V::length; i++) { |
202 | r.el(i) = a.el(i) << shift; |
203 | } |
204 | return r; |
205 | } |
206 | |
207 | template<class V, class S> SIMDPP_INL |
208 | V shift_l_v(const V& a, const S& shift) |
209 | { |
210 | V r; |
211 | for (unsigned i = 0; i < V::length; i++) { |
212 | r.el(i) = a.el(i) << shift.el(i); |
213 | } |
214 | return r; |
215 | } |
216 | |
217 | template<class V> SIMDPP_INL |
218 | V min(const V& a, const V& b) |
219 | { |
220 | V r; |
221 | for (unsigned i = 0; i < V::length; i++) { |
222 | r.el(i) = a.el(i) <= b.el(i) ? a.el(i) : b.el(i); |
223 | } |
224 | return r; |
225 | } |
226 | |
227 | template<class V> SIMDPP_INL |
228 | V max(const V& a, const V& b) |
229 | { |
230 | V r; |
231 | for (unsigned i = 0; i < V::length; i++) { |
232 | r.el(i) = a.el(i) >= b.el(i) ? a.el(i) : b.el(i); |
233 | } |
234 | return r; |
235 | } |
236 | |
237 | } // namespace null |
238 | } // namespace detail |
239 | } // namespace SIMDPP_ARCH_NAMESPACE |
240 | } // namespace simdpp |
241 | |
242 | #endif |
243 | #endif |
244 | |