1/* Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_RSQRT_RH_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_RSQRT_RH_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <cmath>
16#include <simdpp/types.h>
17#include <simdpp/core/f_mul.h>
18#include <simdpp/core/f_sub.h>
19#include <simdpp/detail/null/math.h>
20#include <simdpp/detail/vector_array_macros.h>
21
22namespace simdpp {
23namespace SIMDPP_ARCH_NAMESPACE {
24namespace detail {
25namespace insn {
26
27template<class V> SIMDPP_INL
28V v_rsqrt_rh(const V& cx, const V& a)
29{
30 V x2, r, x = cx;
31
32 x2 = mul(x, x);
33 r = mul(a, x2);
34 r = sub(3.0, r);
35 x = mul(x, 0.5);
36 r = mul(x, r);
37
38 return r;
39}
40
41static SIMDPP_INL
42float32x4 i_rsqrt_rh(const float32x4& cx, const float32x4& a)
43{
44 // x_n = x*(3-d*x*x)/2
45 float32<4> x = cx;
46#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
47 float32x4 r;
48 for (unsigned i = 0; i < cx.length; i++) {
49 float ix = x.el(i);
50 float ia = a.el(i);
51 r.el(i) = ix * (3.0f - ia*ix*ix) * 0.5f;
52 }
53 return r;
54#elif SIMDPP_USE_SSE2 || SIMDPP_USE_MSA
55 return v_rsqrt_rh(x, a);
56#elif SIMDPP_USE_NEON_FLT_SP
57 float32x4 x2, r;
58
59 x2 = mul(x, x);
60 r = vrsqrtsq_f32(a.native(), x2.native());
61 x = mul(x, r);
62
63 return x;
64#elif SIMDPP_USE_ALTIVEC
65 float32x4 x2, r, xp5, c3;
66
67 c3 = make_float(3.0f);
68
69 x2 = mul(x, x);
70 // r = (c3 - a*x2)
71 r = vec_nmsub(a.native(), x2.native(), c3.native());
72 xp5 = mul(x, 0.5);
73 r = mul(xp5, r);
74
75 return r;
76#endif
77}
78
79#if SIMDPP_USE_AVX
80static SIMDPP_INL
81float32x8 i_rsqrt_rh(const float32x8& x, const float32x8& a)
82{
83 return v_rsqrt_rh(x, a);
84}
85#endif
86
87#if SIMDPP_USE_AVX512F
88static SIMDPP_INL
89float32<16> i_rsqrt_rh(const float32<16>& x, const float32<16>& a)
90{
91 return v_rsqrt_rh(x, a);
92}
93#endif
94
95
96template<unsigned N> SIMDPP_INL
97float32<N> i_rsqrt_rh(const float32<N>& x, const float32<N>& a)
98{
99 SIMDPP_VEC_ARRAY_IMPL2(float32<N>, i_rsqrt_rh, x, a);
100}
101
102} // namespace insn
103} // namespace detail
104} // namespace SIMDPP_ARCH_NAMESPACE
105} // namespace simdpp
106
107#endif
108
109