parallel_reduce.h source code [Godot/thirdparty/embree/common/algorithms/parallel_reduce.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "parallel_for.h"
7
8	namespace embree
9	{
10	template<typename Index, typename Value, typename Func, typename Reduction>
11	__forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
12	{
13	return func(range<Index>(first,last));
14	}
15
16	template<typename Index, typename Value, typename Func, typename Reduction>
17	__forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
18	{
19	return func(range<Index>(first,last));
20	}
21
22	template<typename Index, typename Value, typename Func, typename Reduction>
23	__noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
24	{
25	const Index maxTasks = `512`;
26	const Index threadCount = (Index) TaskScheduler::threadCount();
27	taskCount = min(taskCount,threadCount,maxTasks);
28
29	/ parallel invocation of all tasks /
30	dynamic_large_stack_array(Value,values,taskCount,`8192`); // consumes at most 8192 bytes on the stack
31	parallel_for(taskCount, [&](const Index taskIndex) {
32	const Index k0 = first+(taskIndex+`0`)*(last-first)/taskCount;
33	const Index k1 = first+(taskIndex+`1`)*(last-first)/taskCount;
34	values[taskIndex] = func(range<Index>(k0,k1));
35	});
36
37	/ perform reduction over all tasks /
38	Value v = identity;
39	for (Index i=`0`; i<taskCount; i++) v = reduction(v,values[i]);
40	return v;
41	}
42
43	template<typename Index, typename Value, typename Func, typename Reduction>
44	__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
45	{
46	#if defined(TASKING_INTERNAL)
47
48	/ fast path for small number of iterations /
49	Index taskCount = (last-first+minStepSize-`1`)/minStepSize;
50	if (likely(taskCount == `1`)) {
51	return func(range<Index>(first,last));
52	}
53	return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction);
54
55	#elif defined(TASKING_TBB)
56	#if TBB_INTERFACE_VERSION >= 12002
57	tbb::task_group_context context;
58	const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
59	[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
60	reduction,context);
61	// -- GODOT start --
62	// if (context.is_group_execution_cancelled())
63	// throw std::runtime_error("task cancelled");
64	// -- GODOT end --
65	return v;
66	#else
67	const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
68	[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
69	reduction);
70	// -- GODOT start --
71	// if (tbb::task::self().is_cancelled())
72	// throw std::runtime_error("task cancelled");
73	// -- GODOT end --
74	return v;
75	#endif
76	#else // TASKING_PPL
77	struct AlignedValue
78	{
79	char storage[__alignof(Value)+sizeof(Value)];
80	static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - `1`) % a); };
81	Value* getValuePtr() { return reinterpret_cast<Value>(alignUp(uintptr_t(storage), __alignof*(Value))); }
82	const Value* getValuePtr() const { return reinterpret_cast<Value>(alignUp(uintptr_t(storage), __alignof*(Value))); }
83	AlignedValue(const Value& v) { new(getValuePtr()) Value(v); }
84	AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); }
85	AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); };
86	AlignedValue& operator = (const AlignedValue& v) { getValuePtr() = v.getValuePtr(); return *this; };
87	AlignedValue& operator = (const AlignedValue&& v) { getValuePtr() = v.getValuePtr(); return *this; };
88	operator Value() const { return *getValuePtr(); }
89	};
90
91	struct Iterator_Index
92	{
93	Index v;
94	typedef std::forward_iterator_tag iterator_category;
95	typedef AlignedValue value_type;
96	typedef Index difference_type;
97	typedef Index distance_type;
98	typedef AlignedValue* pointer;
99	typedef AlignedValue& reference;
100	__forceinline Iterator_Index() {}
101	__forceinline Iterator_Index(Index v) : v(v) {}
102	__forceinline bool operator== (Iterator_Index other) { return v == other.v; }
103	__forceinline bool operator!= (Iterator_Index other) { return v != other.v; }
104	__forceinline Iterator_Index operator++() { return Iterator_Index(++v); }
105	__forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); }
106	};
107
108	auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) {
109	assert(begin.v < end.v);
110	return reduction(start, func(range<Index>(begin.v, end.v)));
111	};
112	const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction);
113	return v;
114	#endif
115	}
116
117	template<typename Index, typename Value, typename Func, typename Reduction>
118	__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
119	{
120	if (likely(last-first < parallel_threshold)) {
121	return func(range<Index>(first,last));
122	} else {
123	return parallel_reduce(first,last,minStepSize,identity,func,reduction);
124	}
125	}
126
127	template<typename Index, typename Value, typename Func, typename Reduction>
128	__forceinline Value parallel_reduce( const range<Index> range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
129	{
130	return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction);
131	}
132
133	template<typename Index, typename Value, typename Func, typename Reduction>
134	__forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
135	{
136	auto funcr = [&] ( const range<Index> r ) {
137	Value v = identity;
138	for (Index i=r.begin(); i<r.end(); i++)
139	v = reduction(v,func(i));
140	return v;
141	};
142	return parallel_reduce(first,last,Index(`1`),identity,funcr,reduction);
143	}
144
145	template<typename Index, typename Value, typename Func, typename Reduction>
146	__forceinline Value parallel_reduce( const range<Index> range, const Value& identity, const Func& func, const Reduction& reduction )
147	{
148	return parallel_reduce(range.begin(),range.end(),Index(`1`),identity,func,reduction);
149	}
150	}
151

Browse the source code of Godot/thirdparty/embree/common/algorithms/parallel_reduce.h