parallel_for_for_prefix_sum.h source code [Godot/thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "parallel_for_for.h"
7	#include "parallel_prefix_sum.h"
8
9	namespace embree
10	{
11	template<typename Value>
12	struct ParallelForForPrefixSumState : public ParallelForForState
13	{
14	__forceinline ParallelForForPrefixSumState () {}
15
16	template<typename ArrayArray>
17	__forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
18	: ParallelForForState(array2,minStepSize) {}
19
20	template<typename SizeFunc>
21	__forceinline ParallelForForPrefixSumState (size_t numArrays, const SizeFunc& getSize, const size_t minStepSize)
22	: ParallelForForState(numArrays,getSize,minStepSize) {}
23
24	ParallelPrefixSumState<Value> prefix_state;
25	};
26
27	template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
28	__forceinline Value parallel_for_for_prefix_sum0_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
29	const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction)
30	{
31	/ calculate number of tasks to use /
32	const size_t taskCount = state.taskCount;
33
34	/ perform parallel prefix sum /
35	parallel_for(taskCount, [&](const size_t taskIndex)
36	{
37	const size_t k0 = (taskIndex+`0`)*state.size()/taskCount;
38	const size_t k1 = (taskIndex+`1`)*state.size()/taskCount;
39	size_t i0 = state.i0[taskIndex];
40	size_t j0 = state.j0[taskIndex];
41
42	/ iterate over arrays /
43	size_t k=k0;
44	Value N=identity;
45	for (size_t i=i0; k<k1; i++) {
46	const size_t size = getSize(i);
47	const size_t r0 = j0, r1 = min(size,r0+k1-k);
48	if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k));
49	k+=r1-r0; j0 = `0`;
50	}
51	state.prefix_state.counts[taskIndex] = N;
52	});
53
54	/ calculate prefix sum /
55	Value sum=identity;
56	for (size_t i=`0`; i<taskCount; i++)
57	{
58	const Value c = state.prefix_state.counts[i];
59	state.prefix_state.sums[i] = sum;
60	sum=reduction(sum,c);
61	}
62
63	return sum;
64	}
65
66	template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
67	__forceinline Value parallel_for_for_prefix_sum1_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
68	const SizeFunc& getSize,
69	const Value& identity, const Func& func, const Reduction& reduction)
70	{
71	/ calculate number of tasks to use /
72	const size_t taskCount = state.taskCount;
73	/ perform parallel prefix sum /
74	parallel_for(taskCount, [&](const size_t taskIndex)
75	{
76	const size_t k0 = (taskIndex+`0`)*state.size()/taskCount;
77	const size_t k1 = (taskIndex+`1`)*state.size()/taskCount;
78	size_t i0 = state.i0[taskIndex];
79	size_t j0 = state.j0[taskIndex];
80
81	/ iterate over arrays /
82	size_t k=k0;
83	Value N=identity;
84	for (size_t i=i0; k<k1; i++) {
85	const size_t size = getSize(i);
86	const size_t r0 = j0, r1 = min(size,r0+k1-k);
87	if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k,reduction(state.prefix_state.sums[taskIndex],N)));
88	k+=r1-r0; j0 = `0`;
89	}
90	state.prefix_state.counts[taskIndex] = N;
91	});
92
93	/ calculate prefix sum /
94	Value sum=identity;
95	for (size_t i=`0`; i<taskCount; i++)
96	{
97	const Value c = state.prefix_state.counts[i];
98	state.prefix_state.sums[i] = sum;
99	sum=reduction(sum,c);
100	}
101
102	return sum;
103	}
104
105	template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
106	__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state,
107	ArrayArray& array2, Index minStepSize,
108	const Value& identity, const Func& func, const Reduction& reduction)
109	{
110	return parallel_for_for_prefix_sum0_(state,minStepSize,
111	[&](Index i) { return array2[i] ? array2[i]->size() : `0`; },
112	identity,
113	[&](Index i, const range<Index>& r, Index k) { return func(array2[i], r, k, i); },
114	reduction);
115	}
116
117	template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
118	__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state,
119	ArrayArray& array2, Index minStepSize,
120	const Value& identity, const Func& func, const Reduction& reduction)
121	{
122	return parallel_for_for_prefix_sum1_(state,minStepSize,
123	[&](Index i) { return array2[i] ? array2[i]->size() : `0`; },
124	identity,
125	[&](Index i, const range<Index>& r, Index k, const Value& base) { return func(array2[i], r, k, i, base); },
126	reduction);
127	}
128
129	template<typename ArrayArray, typename Value, typename Func, typename Reduction>
130	__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
131	const Value& identity, const Func& func, const Reduction& reduction)
132	{
133	return parallel_for_for_prefix_sum0(state,array2,size_t(`1`),identity,func,reduction);
134	}
135
136	template<typename ArrayArray, typename Value, typename Func, typename Reduction>
137	__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
138	const Value& identity, const Func& func, const Reduction& reduction)
139	{
140	return parallel_for_for_prefix_sum1(state,array2,size_t(`1`),identity,func,reduction);
141	}
142	}
143

Browse the source code of Godot/thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h