1 | /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_SQRT_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_SQRT_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <cmath> |
16 | #include <simdpp/types.h> |
17 | #include <simdpp/core/f_rsqrt_e.h> |
18 | #include <simdpp/core/f_rsqrt_rh.h> |
19 | #include <simdpp/detail/null/math.h> |
20 | #include <simdpp/detail/vector_array_macros.h> |
21 | |
22 | namespace simdpp { |
23 | namespace SIMDPP_ARCH_NAMESPACE { |
24 | namespace detail { |
25 | namespace insn { |
26 | |
27 | |
28 | static SIMDPP_INL |
29 | float32x4 i_sqrt(const float32x4& a) |
30 | { |
31 | #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP |
32 | float32x4 r; |
33 | for (unsigned i = 0; i < a.length; i++) { |
34 | r.el(i) = std::sqrt(a.el(i)); |
35 | } |
36 | return r; |
37 | #elif SIMDPP_USE_SSE2 |
38 | return _mm_sqrt_ps(a.native()); |
39 | #elif SIMPDP_USE_NEON64 |
40 | return vsqrtq_f32(a.native()); |
41 | #elif SIMDPP_USE_NEON_FLT_SP || SIMDPP_USE_ALTIVEC |
42 | float32x4 x; |
43 | x = rsqrt_e(a); |
44 | x = rsqrt_rh(x, a); |
45 | return mul(a, x); |
46 | #elif SIMDPP_USE_MSA |
47 | return __msa_fsqrt_w(a.native()); |
48 | #endif |
49 | } |
50 | |
51 | #if SIMDPP_USE_AVX |
52 | static SIMDPP_INL |
53 | float32x8 i_sqrt(const float32x8& a) |
54 | { |
55 | return _mm256_sqrt_ps(a.native()); |
56 | } |
57 | #endif |
58 | |
59 | #if SIMDPP_USE_AVX512F |
60 | static SIMDPP_INL |
61 | float32<16> i_sqrt(const float32<16>& a) |
62 | { |
63 | return _mm512_sqrt_ps(a.native()); |
64 | } |
65 | #endif |
66 | |
67 | template<unsigned N> SIMDPP_INL |
68 | float32<N> i_sqrt(const float32<N>& a) |
69 | { |
70 | SIMDPP_VEC_ARRAY_IMPL1(float32<N>, i_sqrt, a); |
71 | } |
72 | |
73 | // ----------------------------------------------------------------------------- |
74 | |
75 | static SIMDPP_INL |
76 | float64x2 i_sqrt(const float64x2& a) |
77 | { |
78 | #if SIMDPP_USE_SSE2 |
79 | return _mm_sqrt_pd(a.native()); |
80 | #elif SIMDPP_USE_NEON64 |
81 | return vsqrtq_f64(a.native()); |
82 | #elif SIMDPP_USE_VSX_206 |
83 | return vec_sqrt(a.native()); |
84 | #elif SIMDPP_USE_MSA |
85 | return __msa_fsqrt_d(a.native()); |
86 | #elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC |
87 | float64x2 r; |
88 | for (unsigned i = 0; i < a.length; i++) { |
89 | r.el(i) = std::sqrt(a.el(i)); |
90 | } |
91 | return r; |
92 | #endif |
93 | } |
94 | |
95 | #if SIMDPP_USE_AVX |
96 | static SIMDPP_INL |
97 | float64x4 i_sqrt(const float64x4& a) |
98 | { |
99 | return _mm256_sqrt_pd(a.native()); |
100 | } |
101 | #endif |
102 | |
103 | #if SIMDPP_USE_AVX512F |
104 | static SIMDPP_INL |
105 | float64<8> i_sqrt(const float64<8>& a) |
106 | { |
107 | return _mm512_sqrt_pd(a.native()); |
108 | } |
109 | #endif |
110 | |
111 | template<unsigned N> SIMDPP_INL |
112 | float64<N> i_sqrt(const float64<N>& a) |
113 | { |
114 | SIMDPP_VEC_ARRAY_IMPL1(float64<N>, i_sqrt, a); |
115 | } |
116 | |
117 | |
118 | } // namespace insn |
119 | } // namespace detail |
120 | } // namespace SIMDPP_ARCH_NAMESPACE |
121 | } // namespace simdpp |
122 | |
123 | #endif |
124 | |
125 | |