for_each.h source code [bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/for_each.h]

1	/ Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt>*
2
3	Distributed under the Boost Software License, Version 1.0.
4	(See accompanying file LICENSE_1_0.txt or copy at
5	http://www.boost.org/LICENSE_1_0.txt)
6	*/
7
8	#ifndef LIBSIMDPP_DETAIL_FOR_EACH_H
9	#define LIBSIMDPP_DETAIL_FOR_EACH_H
10
11	#ifndef LIBSIMDPP_SIMD_H
12	#error "This file must be included through simd.h"
13	#endif
14
15	#include <simdpp/types.h>
16	#include <simdpp/core/extract.h>
17	#include <simdpp/detail/mem_block.h>
18	#include <type_traits>
19
20	namespace simdpp {
21	namespace SIMDPP_ARCH_NAMESPACE {
22	namespace detail {
23
24	#if SIMDPP_USE_SSE2 \|\| SIMDPP_USE_NEON \|\| SIMDPP_USE_MSA
25	template<class V, class F> SIMDPP_INL
26	void foreach_impl(std::integral_constant<unsigned, `2`>,
27	const V& v, F function)
28	{
29	function(extract<`0`>(v));
30	function(extract<`1`>(v));
31	}
32
33	template<class V, class F> SIMDPP_INL
34	void foreach_impl(std::integral_constant<unsigned, `4`>,
35	const V& v, F function)
36	{
37	function(extract<`0`>(v));
38	function(extract<`1`>(v));
39	function(extract<`2`>(v));
40	function(extract<`3`>(v));
41	}
42
43	template<unsigned N, class V, class F> SIMDPP_INL
44	void foreach_impl(std::integral_constant<unsigned, N>,
45	const V& v, F function)
46	{
47	// When we're operating on more than 4-5 elements it makes sense to move
48	// the vector to memory and load data from there. This has higher latency,
49	// but this is masked by extracting the first several elements directly
50	// from the SIMD register set. For the rest of elements it's very likely
51	// that loading through memory has higher throughput.
52	//
53	// Recent x86 (since Sandy Bridge) and NEON (since Cortex A73) processors
54	// are able to sustain more than one load memory access per cycle.
55	// All x86 processors (at least up to Skylake, newer not checked) are only
56	// able to sustain single cross domain data access instruction per cycle.
57
58	// TODO: needs tuning on ARM and MIPS
59	function(extract<`0`>(v));
60	function(extract<`1`>(v));
61	mem_block<V> mem(v);
62	for (unsigned i = `2`; i < N; ++i)
63	function(mem[i]);
64	}
65	#else
66	template<unsigned N, class V, class F> SIMDPP_INL
67	void foreach_impl(std::integral_constant<unsigned, N>,
68	const V& v, F function)
69	{
70	mem_block<V> mem(v);
71	for (unsigned i = `0`; i < N; ++i)
72	function(mem[i]);
73	}
74	#endif
75
76	template<unsigned N, class V, class F> SIMDPP_INL
77	void for_each(const any_vec<N, V>& v, F function)
78	{
79	using size_tag = std::integral_constant<unsigned, V::base_vector_type::length>;
80	for (unsigned i = `0`; i < V::vec_length; ++i)
81	foreach_impl(size_tag(), v.wrapped().vec(i), function);
82	}
83
84
85	} // namespace detail
86	} // namespace SIMDPP_ARCH_NAMESPACE
87	} // namespace simdpp
88
89	#endif
90

Browse the source code of bsFramework/Source/Foundation/bsfUtility/ThirdParty/simdpp/detail/for_each.h