1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_SQRT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_SQRT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <cmath>
16#include <simdpp/types.h>
17#include <simdpp/core/f_rsqrt_e.h>
18#include <simdpp/core/f_rsqrt_rh.h>
19#include <simdpp/detail/null/math.h>
20#include <simdpp/detail/vector_array_macros.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27
28static SIMDPP_INL
29float32x4 i_sqrt(const float32x4& a)
30{
31#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
32 float32x4 r;
33 for (unsigned i = 0; i < a.length; i++) {
34 r.el(i) = std::sqrt(a.el(i));
35 }
36 return r;
37#elif SIMDPP_USE_SSE2
38 return _mm_sqrt_ps(a.native());
39#elif SIMPDP_USE_NEON64
40 return vsqrtq_f32(a.native());
41#elif SIMDPP_USE_NEON_FLT_SP || SIMDPP_USE_ALTIVEC
42 float32x4 x;
43 x = rsqrt_e(a);
44 x = rsqrt_rh(x, a);
45 return mul(a, x);
46#elif SIMDPP_USE_MSA
47 return __msa_fsqrt_w(a.native());
48#endif
49}
50
51#if SIMDPP_USE_AVX
52static SIMDPP_INL
53float32x8 i_sqrt(const float32x8& a)
54{
55 return _mm256_sqrt_ps(a.native());
56}
57#endif
58
59#if SIMDPP_USE_AVX512F
60static SIMDPP_INL
61float32<16> i_sqrt(const float32<16>& a)
62{
63 return _mm512_sqrt_ps(a.native());
64}
65#endif
66
67template<unsigned N> SIMDPP_INL
68float32<N> i_sqrt(const float32<N>& a)
69{
70 SIMDPP_VEC_ARRAY_IMPL1(float32<N>, i_sqrt, a);
71}
72
73// -----------------------------------------------------------------------------
74
75static SIMDPP_INL
76float64x2 i_sqrt(const float64x2& a)
77{
78#if SIMDPP_USE_SSE2
79 return _mm_sqrt_pd(a.native());
80#elif SIMDPP_USE_NEON64
81 return vsqrtq_f64(a.native());
82#elif SIMDPP_USE_VSX_206
83 return vec_sqrt(a.native());
84#elif SIMDPP_USE_MSA
85 return __msa_fsqrt_d(a.native());
86#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
87 float64x2 r;
88 for (unsigned i = 0; i < a.length; i++) {
89 r.el(i) = std::sqrt(a.el(i));
90 }
91 return r;
92#endif
93}
94
95#if SIMDPP_USE_AVX
96static SIMDPP_INL
97float64x4 i_sqrt(const float64x4& a)
98{
99 return _mm256_sqrt_pd(a.native());
100}
101#endif
102
103#if SIMDPP_USE_AVX512F
104static SIMDPP_INL
105float64<8> i_sqrt(const float64<8>& a)
106{
107 return _mm512_sqrt_pd(a.native());
108}
109#endif
110
111template<unsigned N> SIMDPP_INL
112float64<N> i_sqrt(const float64<N>& a)
113{
114 SIMDPP_VEC_ARRAY_IMPL1(float64<N>, i_sqrt, a);
115}
116
117
118} // namespace insn
119} // namespace detail
120} // namespace SIMDPP_ARCH_NAMESPACE
121} // namespace simdpp
122
123#endif
124
125