f_div.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_div.h]

1	/ Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_DIV_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_DIV_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/f_rcp_e.h>
17	#include <simdpp/core/f_rcp_rh.h>
18	#include <simdpp/detail/vector_array_macros.h>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23	namespace insn {
24
25
26	static SIMDPP_INL
27	float32x4 i_div(const float32x4& a, const float32x4& b)
28	{
29	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
30	float32x4 r;
31	for (unsigned i = `0`; i < a.length; i++) {
32	r.el(i) = a.el(i) / b.el(i);
33	}
34	return r;
35	#elif SIMDPP_USE_SSE2
36	return _mm_div_ps(a.native(), b.native());
37	#elif SIMDPP_USE_NEON64
38	return vdivq_f32(a.native(), b.native());
39	#elif SIMDPP_USE_NEON_FLT_SP
40	float32x4 x;
41	x = rcp_e(b);
42	x = rcp_rh(x, b);
43	x = rcp_rh(x, b);
44	return mul(a, x);
45	#elif SIMDPP_USE_VSX_206
46	return vec_div(a.native(), b.native());
47	#elif SIMDPP_USE_ALTIVEC
48	float32x4 x;
49	x = rcp_e(b);
50	x = rcp_rh(x, b);
51	x = rcp_rh(x, b); // TODO: check how many approximation steps are needed
52	return mul(a, x);
53	#elif SIMDPP_USE_MSA
54	return __msa_fdiv_w(a.native(), b.native());
55	#endif
56	}
57
58	#if SIMDPP_USE_AVX
59	static SIMDPP_INL
60	float32x8 i_div(const float32x8& a, const float32x8& b)
61	{
62	return _mm256_div_ps(a.native(), b.native());
63	}
64	#endif
65
66	#if SIMDPP_USE_AVX512F
67	static SIMDPP_INL
68	float32<`16`> i_div(const float32<`16`>& a, const float32<`16`>& b)
69	{
70	return _mm512_div_ps(a.native(), b.native());
71	}
72	#endif
73
74	// -----------------------------------------------------------------------------
75
76	static SIMDPP_INL
77	float64x2 i_div(const float64x2& a, const float64x2& b)
78	{
79	#if SIMDPP_USE_SSE2
80	return _mm_div_pd(a.native(), b.native());
81	#elif SIMDPP_USE_NEON64
82	return vdivq_f64(a.native(), b.native());
83	#elif SIMDPP_USE_VSX_206
84	return vec_div(a.native(), b.native());
85	#elif SIMDPP_USE_MSA
86	return __msa_fdiv_d(a.native(), b.native());
87	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
88	float64x2 r;
89	for (unsigned i = `0`; i < a.length; i++) {
90	r.el(i) = a.el(i) / b.el(i);
91	}
92	return r;
93	#endif
94	}
95
96	#if SIMDPP_USE_AVX
97	static SIMDPP_INL
98	float64x4 i_div(const float64x4& a, const float64x4& b)
99	{
100	return _mm256_div_pd(a.native(), b.native());
101	}
102	#endif
103
104	#if SIMDPP_USE_AVX512F
105	static SIMDPP_INL
106	float64<`8`> i_div(const float64<`8`>& a, const float64<`8`>& b)
107	{
108	return _mm512_div_pd(a.native(), b.native());
109	}
110	#endif
111
112	// -----------------------------------------------------------------------------
113
114	template<class V> SIMDPP_INL
115	V i_div(const V& a, const V& b)
116	{
117	SIMDPP_VEC_ARRAY_IMPL2(V, i_div, a, b);
118	}
119
120	} // namespace insn
121	} // namespace detail
122	} // namespace SIMDPP_ARCH_NAMESPACE
123	} // namespace simdpp
124
125	#endif
126
127

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_div.h