parallel_for_for.h source code [Godot/thirdparty/embree/common/algorithms/parallel_for_for.h]

1	// Copyright 2009-2021 Intel Corporation
2	// SPDX-License-Identifier: Apache-2.0
3
4	#pragma once
5
6	#include "parallel_for.h"
7
8	namespace embree
9	{
10	template<typename ArrayArray, typename Func>
11	__forceinline void sequential_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
12	{
13	size_t k=`0`;
14	for (size_t i=`0`; i!=array2.size(); ++i) {
15	const size_t N = array2[i]->size();
16	if (N) func(array2[i],range<size_t>(`0`,N),k);
17	k+=N;
18	}
19	}
20
21	class ParallelForForState
22	{
23	public:
24
25	enum { MAX_TASKS = `64` };
26
27	__forceinline ParallelForForState ()
28	: taskCount(`0`) {}
29
30	template<typename ArrayArray>
31	__forceinline ParallelForForState (ArrayArray& array2, const size_t minStepSize) {
32	init(array2,minStepSize);
33	}
34
35	template<typename SizeFunc>
36	__forceinline ParallelForForState (const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize) {
37	init(numArrays,getSize,minStepSize);
38	}
39
40	template<typename SizeFunc>
41	__forceinline void init ( const size_t numArrays, const SizeFunc& getSize, const size_t minStepSize )
42	{
43	/ first calculate total number of elements /
44	size_t N = `0`;
45	for (size_t i=`0`; i<numArrays; i++) {
46	N += getSize(i);
47	}
48	this->N = N;
49
50	/ calculate number of tasks to use /
51	const size_t numThreads = TaskScheduler::threadCount();
52	const size_t numBlocks = (N+minStepSize-`1`)/minStepSize;
53	taskCount = max(size_t(`1`),min(numThreads,numBlocks,size_t(ParallelForForState::MAX_TASKS)));
54
55	/ calculate start (i,j) for each task /
56	size_t taskIndex = `0`;
57	i0[taskIndex] = `0`;
58	j0[taskIndex] = `0`;
59	size_t k0 = (++taskIndex)*N/taskCount;
60	for (size_t i=`0`, k=`0`; taskIndex < taskCount; i++)
61	{
62	assert(i<numArrays);
63	size_t j=`0`, M = getSize(i);
64	while (j<M && k+M-j >= k0 && taskIndex < taskCount) {
65	assert(taskIndex<taskCount);
66	i0[taskIndex] = i;
67	j0[taskIndex] = j += k0-k;
68	k=k0;
69	k0 = (++taskIndex)*N/taskCount;
70	}
71	k+=M-j;
72	}
73	}
74
75	template<typename ArrayArray>
76	__forceinline void init ( ArrayArray& array2, const size_t minStepSize )
77	{
78	init(array2.size(),[&](size_t i) { return array2[i] ? array2[i]->size() : `0`; },minStepSize);
79	}
80
81	__forceinline size_t size() const {
82	return N;
83	}
84
85	public:
86	size_t i0[MAX_TASKS];
87	size_t j0[MAX_TASKS];
88	size_t taskCount;
89	size_t N;
90	};
91
92	template<typename ArrayArray, typename Func>
93	__forceinline void parallel_for_for( ArrayArray& array2, const size_t minStepSize, const Func& func )
94	{
95	ParallelForForState state(array2,minStepSize);
96
97	parallel_for(state.taskCount, [&](const size_t taskIndex)
98	{
99	/ calculate range /
100	const size_t k0 = (taskIndex+`0`)*state.size()/state.taskCount;
101	const size_t k1 = (taskIndex+`1`)*state.size()/state.taskCount;
102	size_t i0 = state.i0[taskIndex];
103	size_t j0 = state.j0[taskIndex];
104
105	/ iterate over arrays /
106	size_t k=k0;
107	for (size_t i=i0; k<k1; i++) {
108	const size_t N = array2[i] ? array2[i]->size() : `0`;
109	const size_t r0 = j0, r1 = min(N,r0+k1-k);
110	if (r1 > r0) func(array2[i],range<size_t>(r0,r1),k);
111	k+=r1-r0; j0 = `0`;
112	}
113	});
114	}
115
116	template<typename ArrayArray, typename Func>
117	__forceinline void parallel_for_for( ArrayArray& array2, const Func& func )
118	{
119	parallel_for_for(array2,`1`,func);
120	}
121
122	template<typename ArrayArray, typename Value, typename Func, typename Reduction>
123	__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const size_t minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
124	{
125	ParallelForForState state(array2,minStepSize);
126	Value temp[ParallelForForState::MAX_TASKS];
127
128	for (size_t i=`0`; i<state.taskCount; i++)
129	temp[i] = identity;
130
131	parallel_for(state.taskCount, [&](const size_t taskIndex)
132	{
133	/ calculate range /
134	const size_t k0 = (taskIndex+`0`)*state.size()/state.taskCount;
135	const size_t k1 = (taskIndex+`1`)*state.size()/state.taskCount;
136	size_t i0 = state.i0[taskIndex];
137	size_t j0 = state.j0[taskIndex];
138
139	/ iterate over arrays /
140	size_t k=k0;
141	for (size_t i=i0; k<k1; i++) {
142	const size_t N = array2[i] ? array2[i]->size() : `0`;
143	const size_t r0 = j0, r1 = min(N,r0+k1-k);
144	if (r1 > r0) temp[taskIndex] = reduction(temp[taskIndex],func(array2[i],range<size_t>(r0,r1),k));
145	k+=r1-r0; j0 = `0`;
146	}
147	});
148
149	Value ret = identity;
150	for (size_t i=`0`; i<state.taskCount; i++)
151	ret = reduction(ret,temp[i]);
152	return ret;
153	}
154
155	template<typename ArrayArray, typename Value, typename Func, typename Reduction>
156	__forceinline Value parallel_for_for_reduce( ArrayArray& array2, const Value& identity, const Func& func, const Reduction& reduction)
157	{
158	return parallel_for_for_reduce(array2,`1`,identity,func,reduction);
159	}
160	}
161

Browse the source code of Godot/thirdparty/embree/common/algorithms/parallel_for_for.h