f_reduce_mul.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_reduce_mul.h]

1	/ Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_REDUCE_MUL_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_REDUCE_MUL_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/extract.h>
17	#include <simdpp/core/f_mul.h>
18	#include <simdpp/core/permute2.h>
19	#include <simdpp/detail/extract128.h>
20	#include <simdpp/detail/workarounds.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27
28	static SIMDPP_INL
29	float i_reduce_mul(const float32x4& a)
30	{
31	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
32	float r = a.el(`0`);
33	for (unsigned i = `1`; i < a.length; i++) {
34	r *= a.el(i);
35	}
36	return r;
37	#elif SIMDPP_USE_SSE2
38	float32x4 b = _mm_movehl_ps(a.native(), a.native());
39	b = mul(a, b);
40	b = mul(b, permute2<`1`,`0`>(b));
41	return _mm_cvtss_f32(b.native());
42	#elif SIMDPP_USE_NEON_FLT_SP
43	float32x2_t a2 = vmul_f32(vget_low_f32(a.native()), vget_high_f32(a.native()));
44	a2 = vmul_f32(a2, vext_f32(a2, a2, `1`));
45	return vget_lane_f32(a2, `0`);
46	#elif SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
47	float32x4 b = a;
48	b = mul(b, move4_l<`1`>(b));
49	b = mul(b, move4_l<`2`>(b));
50	return extract<`0`>(b);
51	#endif
52	}
53
54	#if SIMDPP_USE_AVX
55	static SIMDPP_INL
56	float i_reduce_mul(const float32x8& a)
57	{
58	float32x4 ah = detail::extract128<`1`>(a);
59	float32x4 al = detail::extract128<`0`>(a);
60	al = mul(al, ah);
61	return i_reduce_mul(al);
62	}
63	#endif
64
65	#if SIMDPP_USE_AVX512F
66	static SIMDPP_INL
67	float i_reduce_mul(const float32<`16`>& a)
68	{
69	return i_reduce_mul(mul(extract256<`0`>(a), extract256<`1`>(a)));
70	}
71	#endif
72
73	template<unsigned N>
74	SIMDPP_INL float i_reduce_mul(const float32<N>& a)
75	{
76	float32v r = a.vec(`0`);
77	for (unsigned i = `1`; i < a.vec_length; ++i)
78	r = mul(r, a.vec(i));
79	return i_reduce_mul(r);
80	}
81
82	// -----------------------------------------------------------------------------
83
84	static SIMDPP_INL
85	double i_reduce_mul(const float64x2& a)
86	{
87	#if SIMDPP_USE_SSE2
88	float64x2 b = mul(a, permute2<`1`,`0`>(a));
89	return _mm_cvtsd_f64(b.native());
90	#elif SIMDPP_USE_NEON64
91	float64x1_t a2 = vmul_f64(vget_low_f64(a.native()), vget_high_f64(a.native()));
92	return vget_lane_f64(a2, `0`);
93	#elif SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
94	float64x2 b = mul(a, permute2<`1`,`1`>(a));
95	return extract<`0`>(b);
96	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
97	double r = a.el(`0`);
98	for (unsigned i = `1`; i < a.length; i++) {
99	r *= a.el(i);
100	}
101	return r;
102	#endif
103	}
104
105	#if SIMDPP_USE_AVX
106	static SIMDPP_INL
107	double i_reduce_mul(const float64x4& a)
108	{
109	float64x2 ah = detail::extract128<`1`>(a);
110	float64x2 al = detail::extract128<`0`>(a);
111	al = mul(al, ah);
112	return i_reduce_mul(al);
113	}
114	#endif
115
116	#if SIMDPP_USE_AVX512F
117	static SIMDPP_INL
118	double i_reduce_mul(const float64<`8`>& a)
119	{
120	return i_reduce_mul(mul(extract256<`0`>(a), extract256<`1`>(a)));
121	}
122	#endif
123
124	template<unsigned N>
125	SIMDPP_INL double i_reduce_mul(const float64<N>& a)
126	{
127	float64v r = a.vec(`0`);
128	for (unsigned i = `1`; i < a.vec_length; ++i)
129	r = mul(r, a.vec(i));
130	return i_reduce_mul(r);
131	}
132
133	} // namespace insn
134	} // namespace detail
135	} // namespace SIMDPP_ARCH_NAMESPACE
136	} // namespace simdpp
137
138	#endif
139
140

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_reduce_mul.h