1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#pragma once
5
6#include "parallel_for_for.h"
7#include "parallel_prefix_sum.h"
8
9namespace embree
10{
11 template<typename Value>
12 struct ParallelForForPrefixSumState : public ParallelForForState
13 {
14 __forceinline ParallelForForPrefixSumState () {}
15
16 template<typename ArrayArray>
17 __forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
18 : ParallelForForState(array2,minStepSize) {}
19
20 template<typename SizeFunc>
21 __forceinline ParallelForForPrefixSumState (size_t numArrays, const SizeFunc& getSize, const size_t minStepSize)
22 : ParallelForForState(numArrays,getSize,minStepSize) {}
23
24 ParallelPrefixSumState<Value> prefix_state;
25 };
26
27 template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
28 __forceinline Value parallel_for_for_prefix_sum0_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
29 const SizeFunc& getSize, const Value& identity, const Func& func, const Reduction& reduction)
30 {
31 /* calculate number of tasks to use */
32 const size_t taskCount = state.taskCount;
33
34 /* perform parallel prefix sum */
35 parallel_for(taskCount, [&](const size_t taskIndex)
36 {
37 const size_t k0 = (taskIndex+0)*state.size()/taskCount;
38 const size_t k1 = (taskIndex+1)*state.size()/taskCount;
39 size_t i0 = state.i0[taskIndex];
40 size_t j0 = state.j0[taskIndex];
41
42 /* iterate over arrays */
43 size_t k=k0;
44 Value N=identity;
45 for (size_t i=i0; k<k1; i++) {
46 const size_t size = getSize(i);
47 const size_t r0 = j0, r1 = min(size,r0+k1-k);
48 if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k));
49 k+=r1-r0; j0 = 0;
50 }
51 state.prefix_state.counts[taskIndex] = N;
52 });
53
54 /* calculate prefix sum */
55 Value sum=identity;
56 for (size_t i=0; i<taskCount; i++)
57 {
58 const Value c = state.prefix_state.counts[i];
59 state.prefix_state.sums[i] = sum;
60 sum=reduction(sum,c);
61 }
62
63 return sum;
64 }
65
66 template<typename SizeFunc, typename Index, typename Value, typename Func, typename Reduction>
67 __forceinline Value parallel_for_for_prefix_sum1_( ParallelForForPrefixSumState<Value>& state, Index minStepSize,
68 const SizeFunc& getSize,
69 const Value& identity, const Func& func, const Reduction& reduction)
70 {
71 /* calculate number of tasks to use */
72 const size_t taskCount = state.taskCount;
73 /* perform parallel prefix sum */
74 parallel_for(taskCount, [&](const size_t taskIndex)
75 {
76 const size_t k0 = (taskIndex+0)*state.size()/taskCount;
77 const size_t k1 = (taskIndex+1)*state.size()/taskCount;
78 size_t i0 = state.i0[taskIndex];
79 size_t j0 = state.j0[taskIndex];
80
81 /* iterate over arrays */
82 size_t k=k0;
83 Value N=identity;
84 for (size_t i=i0; k<k1; i++) {
85 const size_t size = getSize(i);
86 const size_t r0 = j0, r1 = min(size,r0+k1-k);
87 if (r1 > r0) N = reduction(N, func((Index)i,range<Index>((Index)r0,(Index)r1),(Index)k,reduction(state.prefix_state.sums[taskIndex],N)));
88 k+=r1-r0; j0 = 0;
89 }
90 state.prefix_state.counts[taskIndex] = N;
91 });
92
93 /* calculate prefix sum */
94 Value sum=identity;
95 for (size_t i=0; i<taskCount; i++)
96 {
97 const Value c = state.prefix_state.counts[i];
98 state.prefix_state.sums[i] = sum;
99 sum=reduction(sum,c);
100 }
101
102 return sum;
103 }
104
105 template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
106 __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state,
107 ArrayArray& array2, Index minStepSize,
108 const Value& identity, const Func& func, const Reduction& reduction)
109 {
110 return parallel_for_for_prefix_sum0_(state,minStepSize,
111 [&](Index i) { return array2[i] ? array2[i]->size() : 0; },
112 identity,
113 [&](Index i, const range<Index>& r, Index k) { return func(array2[i], r, k, i); },
114 reduction);
115 }
116
117 template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
118 __forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state,
119 ArrayArray& array2, Index minStepSize,
120 const Value& identity, const Func& func, const Reduction& reduction)
121 {
122 return parallel_for_for_prefix_sum1_(state,minStepSize,
123 [&](Index i) { return array2[i] ? array2[i]->size() : 0; },
124 identity,
125 [&](Index i, const range<Index>& r, Index k, const Value& base) { return func(array2[i], r, k, i, base); },
126 reduction);
127 }
128
129 template<typename ArrayArray, typename Value, typename Func, typename Reduction>
130 __forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
131 const Value& identity, const Func& func, const Reduction& reduction)
132 {
133 return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction);
134 }
135
136 template<typename ArrayArray, typename Value, typename Func, typename Reduction>
137 __forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
138 const Value& identity, const Func& func, const Reduction& reduction)
139 {
140 return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction);
141 }
142}
143