f_reduce_max.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_reduce_max.h]

1	/ Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_F_REDUCE_MAX_H
9	#define LIBSIMDPP_SIMDPP_DETAIL_INSN_F_REDUCE_MAX_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/extract.h>
17	#include <simdpp/core/f_max.h>
18	#include <simdpp/core/permute2.h>
19	#include <simdpp/detail/extract128.h>
20	#include <simdpp/detail/workarounds.h>
21
22	namespace simdpp {
23	namespace SIMDPP_ARCH_NAMESPACE {
24	namespace detail {
25	namespace insn {
26
27
28	static SIMDPP_INL
29	float i_reduce_max(const float32x4& a)
30	{
31	#if SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON_NO_FLT_SP
32	float r = a.el(`0`);
33	for (unsigned i = `1`; i < a.length; i++) {
34	r = r > a.el(i) ? r : a.el(i); // TODO nan
35	}
36	return r;
37	#elif SIMDPP_USE_SSE2
38	float32x4 b = _mm_movehl_ps(a.native(), a.native());
39	b = max(a, b);
40	b = max(b, permute2<`1`,`1`>(b));
41	return _mm_cvtss_f32(b.native());
42	#elif SIMDPP_USE_NEON64
43	return vmaxnmvq_f32(a.native());
44	#elif SIMDPP_USE_NEON_FLT_SP
45	float32x2_t a2 = vpmax_f32(vget_low_f32(a.native()), vget_high_f32(a.native()));
46	a2 = vpmax_f32(a2, a2);
47	return vget_lane_f32(a2, `0`);
48	#elif SIMDPP_USE_ALTIVEC \|\| SIMDPP_USE_MSA
49	float32x4 b = a;
50	b = max(b, move4_l<`1`>(b));
51	b = max(b, move4_l<`2`>(b));
52	return extract<`0`>(b);
53	#endif
54	}
55
56	#if SIMDPP_USE_AVX
57	static SIMDPP_INL
58	float i_reduce_max(const float32x8& a)
59	{
60	float32x4 ah = detail::extract128<`1`>(a);
61	float32x4 al = detail::extract128<`0`>(a);
62	al = max(al, ah);
63	return i_reduce_max(al);
64	}
65	#endif
66
67	#if SIMDPP_USE_AVX512F
68	static SIMDPP_INL
69	float i_reduce_max(const float32<`16`>& a)
70	{
71	return i_reduce_max(max(extract256<`0`>(a), extract256<`1`>(a)));
72	}
73	#endif
74
75	template<unsigned N>
76	SIMDPP_INL float i_reduce_max(const float32<N>& a)
77	{
78	float32v r = a.vec(`0`);
79	for (unsigned i = `1`; i < a.vec_length; ++i)
80	r = max(r, a.vec(i));
81	return i_reduce_max(r);
82	}
83
84	// -----------------------------------------------------------------------------
85
86	static SIMDPP_INL
87	double i_reduce_max(const float64x2& a)
88	{
89	#if SIMDPP_USE_SSE2
90	float64x2 b = max(a, permute2<`1`,`1`>(a));
91	return _mm_cvtsd_f64(b.native());
92	#elif SIMDPP_USE_NEON64
93	return vmaxnmvq_f64(a.native());
94	#elif SIMDPP_USE_VSX_206 \|\| SIMDPP_USE_MSA
95	float64x2 b = max(a, permute2<`1`,`1`>(a));
96	return extract<`0`>(b);
97	#elif SIMDPP_USE_NULL \|\| SIMDPP_USE_NEON32 \|\| SIMDPP_USE_ALTIVEC
98	double r = a.el(`0`);
99	for (unsigned i = `1`; i < a.length; i++) {
100	r = r > a.el(i) ? r : a.el(i); // TODO nan
101	}
102	return r;
103	#endif
104	}
105
106	#if SIMDPP_USE_AVX
107	static SIMDPP_INL
108	double i_reduce_max(const float64x4& a)
109	{
110	float64x2 ah = detail::extract128<`1`>(a);
111	float64x2 al = detail::extract128<`0`>(a);
112	al = max(al, ah);
113	return i_reduce_max(al);
114	}
115	#endif
116
117	#if SIMDPP_USE_AVX512F
118	static SIMDPP_INL
119	double i_reduce_max(const float64<`8`>& a)
120	{
121	return i_reduce_max(max(extract256<`0`>(a), extract256<`1`>(a)));
122	}
123	#endif
124
125	template<unsigned N>
126	SIMDPP_INL double i_reduce_max(const float64<N>& a)
127	{
128	float64v r = a.vec(`0`);
129	for (unsigned i = `1`; i < a.vec_length; ++i)
130	r = max(r, a.vec(i));
131	return i_reduce_max(r);
132	}
133
134	} // namespace insn
135	} // namespace detail
136	} // namespace SIMDPP_ARCH_NAMESPACE
137	} // namespace simdpp
138
139	#endif
140
141

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/insn/f_reduce_max.h