1/* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_DIV_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_DIV_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/core/f_rcp_e.h>
17#include <simdpp/core/f_rcp_rh.h>
18#include <simdpp/detail/vector_array_macros.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25
26static SIMDPP_INL
27float32x4 i_div(const float32x4& a, const float32x4& b)
28{
29#if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
30 float32x4 r;
31 for (unsigned i = 0; i < a.length; i++) {
32 r.el(i) = a.el(i) / b.el(i);
33 }
34 return r;
35#elif SIMDPP_USE_SSE2
36 return _mm_div_ps(a.native(), b.native());
37#elif SIMDPP_USE_NEON64
38 return vdivq_f32(a.native(), b.native());
39#elif SIMDPP_USE_NEON_FLT_SP
40 float32x4 x;
41 x = rcp_e(b);
42 x = rcp_rh(x, b);
43 x = rcp_rh(x, b);
44 return mul(a, x);
45#elif SIMDPP_USE_VSX_206
46 return vec_div(a.native(), b.native());
47#elif SIMDPP_USE_ALTIVEC
48 float32x4 x;
49 x = rcp_e(b);
50 x = rcp_rh(x, b);
51 x = rcp_rh(x, b); // TODO: check how many approximation steps are needed
52 return mul(a, x);
53#elif SIMDPP_USE_MSA
54 return __msa_fdiv_w(a.native(), b.native());
55#endif
56}
57
58#if SIMDPP_USE_AVX
59static SIMDPP_INL
60float32x8 i_div(const float32x8& a, const float32x8& b)
61{
62 return _mm256_div_ps(a.native(), b.native());
63}
64#endif
65
66#if SIMDPP_USE_AVX512F
67static SIMDPP_INL
68float32<16> i_div(const float32<16>& a, const float32<16>& b)
69{
70 return _mm512_div_ps(a.native(), b.native());
71}
72#endif
73
74// -----------------------------------------------------------------------------
75
76static SIMDPP_INL
77float64x2 i_div(const float64x2& a, const float64x2& b)
78{
79#if SIMDPP_USE_SSE2
80 return _mm_div_pd(a.native(), b.native());
81#elif SIMDPP_USE_NEON64
82 return vdivq_f64(a.native(), b.native());
83#elif SIMDPP_USE_VSX_206
84 return vec_div(a.native(), b.native());
85#elif SIMDPP_USE_MSA
86 return __msa_fdiv_d(a.native(), b.native());
87#elif SIMDPP_USE_NULL || SIMDPP_USE_NEON32 || SIMDPP_USE_ALTIVEC
88 float64x2 r;
89 for (unsigned i = 0; i < a.length; i++) {
90 r.el(i) = a.el(i) / b.el(i);
91 }
92 return r;
93#endif
94}
95
96#if SIMDPP_USE_AVX
97static SIMDPP_INL
98float64x4 i_div(const float64x4& a, const float64x4& b)
99{
100 return _mm256_div_pd(a.native(), b.native());
101}
102#endif
103
104#if SIMDPP_USE_AVX512F
105static SIMDPP_INL
106float64<8> i_div(const float64<8>& a, const float64<8>& b)
107{
108 return _mm512_div_pd(a.native(), b.native());
109}
110#endif
111
112// -----------------------------------------------------------------------------
113
114template<class V> SIMDPP_INL
115V i_div(const V& a, const V& b)
116{
117 SIMDPP_VEC_ARRAY_IMPL2(V, i_div, a, b);
118}
119
120} // namespace insn
121} // namespace detail
122} // namespace SIMDPP_ARCH_NAMESPACE
123} // namespace simdpp
124
125#endif
126
127