f_rsqrt_rh.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_rsqrt_rh.h]

1	/ Copyright (C) 2013-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_RSQRT_RH_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_RSQRT_RH_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <cmath>
16	#include <simdpp/types.h>
17	#include <simdpp/core/f_mul.h>
18	#include <simdpp/core/f_sub.h>
19	#include <simdpp/detail/null/math.h>
20	#include <simdpp/detail/vector_array_macros.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27	template<class V> SIMDPP_INL
28	V v_rsqrt_rh(const V& cx, const V& a)
29	{
30	V x2, r, x = cx;
31
32	x2 = mul(x, x);
33	r = mul(a, x2);
34	r = sub(`3.0`, r);
35	x = mul(x, `0.5`);
36	r = mul(x, r);
37
38	return r;
39	}
40
41	static SIMDPP_INL
42	float32x4 i_rsqrt_rh(const float32x4& cx, const float32x4& a)
43	{
44	// x_n = x(3-dxx)/2*
45	float32<`4`> x = cx;
46	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
47	float32x4 r;
48	for (unsigned i = `0`; i < cx.length; i++) {
49	float ix = x.el(i);
50	float ia = a.el(i);
51	r.el(i) = ix * (`3.0f` - iaixix) * `0.5f`;
52	}
53	return r;
54	#elif SIMDPP_USE_SSE2 \|\| SIMDPP_USE_MSA
55	return v_rsqrt_rh(x, a);
56	#elif SIMDPP_USE_NEON_FLT_SP
57	float32x4 x2, r;
58
59	x2 = mul(x, x);
60	r = vrsqrtsq_f32(a.native(), x2.native());
61	x = mul(x, r);
62
63	return x;
64	#elif SIMDPP_USE_ALTIVEC
65	float32x4 x2, r, xp5, c3;
66
67	c3 = make_float(`3.0f`);
68
69	x2 = mul(x, x);
70	// r = (c3 - ax2)*
71	r = vec_nmsub(a.native(), x2.native(), c3.native());
72	xp5 = mul(x, `0.5`);
73	r = mul(xp5, r);
74
75	return r;
76	#endif
77	}
78
79	#if SIMDPP_USE_AVX
80	static SIMDPP_INL
81	float32x8 i_rsqrt_rh(const float32x8& x, const float32x8& a)
82	{
83	return v_rsqrt_rh(x, a);
84	}
85	#endif
86
87	#if SIMDPP_USE_AVX512F
88	static SIMDPP_INL
89	float32<`16`> i_rsqrt_rh(const float32<`16`>& x, const float32<`16`>& a)
90	{
91	return v_rsqrt_rh(x, a);
92	}
93	#endif
94
95
96	template<unsigned N> SIMDPP_INL
97	float32<N> i_rsqrt_rh(const float32<N>& x, const float32<N>& a)
98	{
99	SIMDPP_VEC_ARRAY_IMPL2(float32<N>, i_rsqrt_rh, x, a);
100	}
101
102	} // namespace insn
103	} // namespace detail
104	} // namespace SIMDPP_ARCH_NAMESPACE
105	} // namespace simdpp
106
107	#endif
108
109

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_rsqrt_rh.h