1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_parallel_invoke_H
18#define __TBB_parallel_invoke_H
19
20#include "task.h"
21#include "tbb_profiling.h"
22
23#if __TBB_VARIADIC_PARALLEL_INVOKE
24 #include <utility> // std::forward
25#endif
26
27namespace tbb {
28
29#if !__TBB_TASK_GROUP_CONTEXT
30 /** Dummy to avoid cluttering the bulk of the header with enormous amount of ifdefs. **/
31 struct task_group_context {
32 task_group_context(tbb::internal::string_index){}
33 };
34#endif /* __TBB_TASK_GROUP_CONTEXT */
35
36//! @cond INTERNAL
37namespace internal {
38 // Simple task object, executing user method
39 template<typename function>
40 class function_invoker : public task{
41 public:
42 function_invoker(const function& _function) : my_function(_function) {}
43 private:
44 const function &my_function;
45 task* execute() __TBB_override
46 {
47 my_function();
48 return NULL;
49 }
50 };
51
52 // The class spawns two or three child tasks
53 template <size_t N, typename function1, typename function2, typename function3>
54 class spawner : public task {
55 private:
56 const function1& my_func1;
57 const function2& my_func2;
58 const function3& my_func3;
59 bool is_recycled;
60
61 task* execute () __TBB_override {
62 if(is_recycled){
63 return NULL;
64 }else{
65 __TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
66 set_ref_count(N);
67 recycle_as_safe_continuation();
68 internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
69 __TBB_ASSERT(invoker2, "Child task allocation failed");
70 spawn(*invoker2);
71 size_t n = N; // To prevent compiler warnings
72 if (n>2) {
73 internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
74 __TBB_ASSERT(invoker3, "Child task allocation failed");
75 spawn(*invoker3);
76 }
77 my_func1();
78 is_recycled = true;
79 return NULL;
80 }
81 } // execute
82
83 public:
84 spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
85 };
86
87 // Creates and spawns child tasks
88 class parallel_invoke_helper : public empty_task {
89 public:
90 // Dummy functor class
91 class parallel_invoke_noop {
92 public:
93 void operator() () const {}
94 };
95 // Creates a helper object with user-defined number of children expected
96 parallel_invoke_helper(int number_of_children)
97 {
98 set_ref_count(number_of_children + 1);
99 }
100
101#if __TBB_VARIADIC_PARALLEL_INVOKE
102 void add_children() {}
103 void add_children(tbb::task_group_context&) {}
104
105 template <typename function>
106 void add_children(function&& _func)
107 {
108 internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(std::forward<function>(_func));
109 __TBB_ASSERT(invoker, "Child task allocation failed");
110 spawn(*invoker);
111 }
112
113 template<typename function>
114 void add_children(function&& _func, tbb::task_group_context&)
115 {
116 add_children(std::forward<function>(_func));
117 }
118
119 // Adds child(ren) task(s) and spawns them
120 template <typename function1, typename function2, typename... function>
121 void add_children(function1&& _func1, function2&& _func2, function&&... _func)
122 {
123 // The third argument is dummy, it is ignored actually.
124 parallel_invoke_noop noop;
125 typedef internal::spawner<2, function1, function2, parallel_invoke_noop> spawner_type;
126 spawner_type & sub_root = *new(allocate_child()) spawner_type(std::forward<function1>(_func1), std::forward<function2>(_func2), noop);
127 spawn(sub_root);
128 add_children(std::forward<function>(_func)...);
129 }
130#else
131 // Adds child task and spawns it
132 template <typename function>
133 void add_children (const function &_func)
134 {
135 internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
136 __TBB_ASSERT(invoker, "Child task allocation failed");
137 spawn(*invoker);
138 }
139
140 // Adds a task with multiple child tasks and spawns it
141 // two arguments
142 template <typename function1, typename function2>
143 void add_children (const function1& _func1, const function2& _func2)
144 {
145 // The third argument is dummy, it is ignored actually.
146 parallel_invoke_noop noop;
147 internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
148 spawn(sub_root);
149 }
150 // three arguments
151 template <typename function1, typename function2, typename function3>
152 void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
153 {
154 internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
155 spawn(sub_root);
156 }
157#endif // __TBB_VARIADIC_PARALLEL_INVOKE
158
159 // Waits for all child tasks
160 template <typename F0>
161 void run_and_finish(const F0& f0)
162 {
163 internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
164 __TBB_ASSERT(invoker, "Child task allocation failed");
165 spawn_and_wait_for_all(*invoker);
166 }
167 };
168 // The class destroys root if exception occurred as well as in normal case
169 class parallel_invoke_cleaner: internal::no_copy {
170 public:
171#if __TBB_TASK_GROUP_CONTEXT
172 parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context)
173 : root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
174#else
175 parallel_invoke_cleaner(int number_of_children, tbb::task_group_context&)
176 : root(*new(task::allocate_root()) internal::parallel_invoke_helper(number_of_children))
177#endif /* !__TBB_TASK_GROUP_CONTEXT */
178 {}
179
180 ~parallel_invoke_cleaner(){
181 root.destroy(root);
182 }
183 internal::parallel_invoke_helper& root;
184 };
185
186#if __TBB_VARIADIC_PARALLEL_INVOKE
187// Determine whether the last parameter in a pack is task_group_context
188 template<typename... T> struct impl_selector; // to workaround a GCC bug
189
190 template<typename T1, typename... T> struct impl_selector<T1, T...> {
191 typedef typename impl_selector<T...>::type type;
192 };
193
194 template<typename T> struct impl_selector<T> {
195 typedef false_type type;
196 };
197 template<> struct impl_selector<task_group_context&> {
198 typedef true_type type;
199 };
200
201 // Select task_group_context parameter from the back of a pack
202 inline task_group_context& get_context( task_group_context& tgc ) { return tgc; }
203
204 template<typename T1, typename... T>
205 task_group_context& get_context( T1&& /*ignored*/, T&&... t )
206 { return get_context( std::forward<T>(t)... ); }
207
208 // task_group_context is known to be at the back of the parameter pack
209 template<typename F0, typename F1, typename... F>
210 void parallel_invoke_impl(true_type, F0&& f0, F1&& f1, F&&... f) {
211 __TBB_STATIC_ASSERT(sizeof...(F)>0, "Variadic parallel_invoke implementation broken?");
212 // # of child tasks: f0, f1, and a task for each two elements of the pack except the last
213 const size_t number_of_children = 2 + sizeof...(F)/2;
214 parallel_invoke_cleaner cleaner(number_of_children, get_context(std::forward<F>(f)...));
215 parallel_invoke_helper& root = cleaner.root;
216
217 root.add_children(std::forward<F>(f)...);
218 root.add_children(std::forward<F1>(f1));
219 root.run_and_finish(std::forward<F0>(f0));
220 }
221
222 // task_group_context is not in the pack, needs to be added
223 template<typename F0, typename F1, typename... F>
224 void parallel_invoke_impl(false_type, F0&& f0, F1&& f1, F&&... f) {
225 tbb::task_group_context context(PARALLEL_INVOKE);
226 // Add context to the arguments, and redirect to the other overload
227 parallel_invoke_impl(true_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)..., context);
228 }
229#endif
230} // namespace internal
231//! @endcond
232
233/** \name parallel_invoke
234 **/
235//@{
236//! Executes a list of tasks in parallel and waits for all tasks to complete.
237/** @ingroup algorithms */
238
239#if __TBB_VARIADIC_PARALLEL_INVOKE
240
241// parallel_invoke for two or more arguments via variadic templates
242// presence of task_group_context is defined automatically
243template<typename F0, typename F1, typename... F>
244void parallel_invoke(F0&& f0, F1&& f1, F&&... f) {
245 typedef typename internal::impl_selector<internal::false_type, F...>::type selector_type;
246 internal::parallel_invoke_impl(selector_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)...);
247}
248
249#else
250
251// parallel_invoke with user-defined context
252// two arguments
253template<typename F0, typename F1 >
254void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
255 internal::parallel_invoke_cleaner cleaner(2, context);
256 internal::parallel_invoke_helper& root = cleaner.root;
257
258 root.add_children(f1);
259
260 root.run_and_finish(f0);
261}
262
263// three arguments
264template<typename F0, typename F1, typename F2 >
265void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
266 internal::parallel_invoke_cleaner cleaner(3, context);
267 internal::parallel_invoke_helper& root = cleaner.root;
268
269 root.add_children(f2);
270 root.add_children(f1);
271
272 root.run_and_finish(f0);
273}
274
275// four arguments
276template<typename F0, typename F1, typename F2, typename F3>
277void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
278 tbb::task_group_context& context)
279{
280 internal::parallel_invoke_cleaner cleaner(4, context);
281 internal::parallel_invoke_helper& root = cleaner.root;
282
283 root.add_children(f3);
284 root.add_children(f2);
285 root.add_children(f1);
286
287 root.run_and_finish(f0);
288}
289
290// five arguments
291template<typename F0, typename F1, typename F2, typename F3, typename F4 >
292void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
293 tbb::task_group_context& context)
294{
295 internal::parallel_invoke_cleaner cleaner(3, context);
296 internal::parallel_invoke_helper& root = cleaner.root;
297
298 root.add_children(f4, f3);
299 root.add_children(f2, f1);
300
301 root.run_and_finish(f0);
302}
303
304// six arguments
305template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
306void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
307 tbb::task_group_context& context)
308{
309 internal::parallel_invoke_cleaner cleaner(3, context);
310 internal::parallel_invoke_helper& root = cleaner.root;
311
312 root.add_children(f5, f4, f3);
313 root.add_children(f2, f1);
314
315 root.run_and_finish(f0);
316}
317
318// seven arguments
319template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
320void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
321 const F5& f5, const F6& f6,
322 tbb::task_group_context& context)
323{
324 internal::parallel_invoke_cleaner cleaner(3, context);
325 internal::parallel_invoke_helper& root = cleaner.root;
326
327 root.add_children(f6, f5, f4);
328 root.add_children(f3, f2, f1);
329
330 root.run_and_finish(f0);
331}
332
333// eight arguments
334template<typename F0, typename F1, typename F2, typename F3, typename F4,
335 typename F5, typename F6, typename F7>
336void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
337 const F5& f5, const F6& f6, const F7& f7,
338 tbb::task_group_context& context)
339{
340 internal::parallel_invoke_cleaner cleaner(4, context);
341 internal::parallel_invoke_helper& root = cleaner.root;
342
343 root.add_children(f7, f6, f5);
344 root.add_children(f4, f3);
345 root.add_children(f2, f1);
346
347 root.run_and_finish(f0);
348}
349
350// nine arguments
351template<typename F0, typename F1, typename F2, typename F3, typename F4,
352 typename F5, typename F6, typename F7, typename F8>
353void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
354 const F5& f5, const F6& f6, const F7& f7, const F8& f8,
355 tbb::task_group_context& context)
356{
357 internal::parallel_invoke_cleaner cleaner(4, context);
358 internal::parallel_invoke_helper& root = cleaner.root;
359
360 root.add_children(f8, f7, f6);
361 root.add_children(f5, f4, f3);
362 root.add_children(f2, f1);
363
364 root.run_and_finish(f0);
365}
366
367// ten arguments
368template<typename F0, typename F1, typename F2, typename F3, typename F4,
369 typename F5, typename F6, typename F7, typename F8, typename F9>
370void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
371 const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
372 tbb::task_group_context& context)
373{
374 internal::parallel_invoke_cleaner cleaner(4, context);
375 internal::parallel_invoke_helper& root = cleaner.root;
376
377 root.add_children(f9, f8, f7);
378 root.add_children(f6, f5, f4);
379 root.add_children(f3, f2, f1);
380
381 root.run_and_finish(f0);
382}
383
384// two arguments
385template<typename F0, typename F1>
386void parallel_invoke(const F0& f0, const F1& f1) {
387 task_group_context context(internal::PARALLEL_INVOKE);
388 parallel_invoke<F0, F1>(f0, f1, context);
389}
390// three arguments
391template<typename F0, typename F1, typename F2>
392void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
393 task_group_context context(internal::PARALLEL_INVOKE);
394 parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
395}
396// four arguments
397template<typename F0, typename F1, typename F2, typename F3 >
398void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
399 task_group_context context(internal::PARALLEL_INVOKE);
400 parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
401}
402// five arguments
403template<typename F0, typename F1, typename F2, typename F3, typename F4>
404void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
405 task_group_context context(internal::PARALLEL_INVOKE);
406 parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
407}
408// six arguments
409template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
410void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
411 task_group_context context(internal::PARALLEL_INVOKE);
412 parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
413}
414// seven arguments
415template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
416void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
417 const F5& f5, const F6& f6)
418{
419 task_group_context context(internal::PARALLEL_INVOKE);
420 parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
421}
422// eight arguments
423template<typename F0, typename F1, typename F2, typename F3, typename F4,
424 typename F5, typename F6, typename F7>
425void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
426 const F5& f5, const F6& f6, const F7& f7)
427{
428 task_group_context context(internal::PARALLEL_INVOKE);
429 parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
430}
431// nine arguments
432template<typename F0, typename F1, typename F2, typename F3, typename F4,
433 typename F5, typename F6, typename F7, typename F8>
434void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
435 const F5& f5, const F6& f6, const F7& f7, const F8& f8)
436{
437 task_group_context context(internal::PARALLEL_INVOKE);
438 parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
439}
440// ten arguments
441template<typename F0, typename F1, typename F2, typename F3, typename F4,
442 typename F5, typename F6, typename F7, typename F8, typename F9>
443void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
444 const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
445{
446 task_group_context context(internal::PARALLEL_INVOKE);
447 parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
448}
449#endif // __TBB_VARIADIC_PARALLEL_INVOKE
450//@}
451
452} // namespace
453
454#endif /* __TBB_parallel_invoke_H */
455