1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_parallel_reduce_H |
18 | #define __TBB_parallel_reduce_H |
19 | |
20 | #include <new> |
21 | #include "task.h" |
22 | #include "aligned_space.h" |
23 | #include "partitioner.h" |
24 | #include "tbb_profiling.h" |
25 | |
26 | namespace tbb { |
27 | |
28 | namespace interface9 { |
29 | //! @cond INTERNAL |
30 | namespace internal { |
31 | |
32 | using namespace tbb::internal; |
33 | |
34 | /** Values for reduction_context. */ |
35 | enum { |
36 | root_task, left_child, right_child |
37 | }; |
38 | |
39 | /** Represented as a char, not enum, for compactness. */ |
40 | typedef char reduction_context; |
41 | |
42 | //! Task type used to combine the partial results of parallel_reduce. |
43 | /** @ingroup algorithms */ |
44 | template<typename Body> |
45 | class finish_reduce: public flag_task { |
46 | //! Pointer to body, or NULL if the left child has not yet finished. |
47 | bool has_right_zombie; |
48 | const reduction_context my_context; |
49 | Body* my_body; |
50 | aligned_space<Body> zombie_space; |
51 | finish_reduce( reduction_context context_ ) : |
52 | has_right_zombie(false), // TODO: substitute by flag_task::child_stolen? |
53 | my_context(context_), |
54 | my_body(NULL) |
55 | { |
56 | } |
57 | ~finish_reduce() { |
58 | if( has_right_zombie ) |
59 | zombie_space.begin()->~Body(); |
60 | } |
61 | task* execute() __TBB_override { |
62 | if( has_right_zombie ) { |
63 | // Right child was stolen. |
64 | Body* s = zombie_space.begin(); |
65 | my_body->join( *s ); |
66 | // Body::join() won't be called if canceled. Defer destruction to destructor |
67 | } |
68 | if( my_context==left_child ) |
69 | itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body ); |
70 | return NULL; |
71 | } |
72 | template<typename Range,typename Body_, typename Partitioner> |
73 | friend class start_reduce; |
74 | }; |
75 | |
76 | //! allocate right task with new parent |
77 | void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes); |
78 | |
79 | //! Task type used to split the work of parallel_reduce. |
80 | /** @ingroup algorithms */ |
81 | template<typename Range, typename Body, typename Partitioner> |
82 | class start_reduce: public task { |
83 | typedef finish_reduce<Body> finish_type; |
84 | Body* my_body; |
85 | Range my_range; |
86 | typename Partitioner::task_partition_type my_partition; |
87 | reduction_context my_context; |
88 | task* execute() __TBB_override; |
89 | //! Update affinity info, if any |
90 | void note_affinity( affinity_id id ) __TBB_override { |
91 | my_partition.note_affinity( id ); |
92 | } |
93 | template<typename Body_> |
94 | friend class finish_reduce; |
95 | |
96 | public: |
97 | //! Constructor used for root task |
98 | start_reduce( const Range& range, Body* body, Partitioner& partitioner ) : |
99 | my_body(body), |
100 | my_range(range), |
101 | my_partition(partitioner), |
102 | my_context(root_task) |
103 | { |
104 | } |
105 | //! Splitting constructor used to generate children. |
106 | /** parent_ becomes left child. Newly constructed object is right child. */ |
107 | start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj ) : |
108 | my_body(parent_.my_body), |
109 | my_range(parent_.my_range, split_obj), |
110 | my_partition(parent_.my_partition, split_obj), |
111 | my_context(right_child) |
112 | { |
113 | my_partition.set_affinity(*this); |
114 | parent_.my_context = left_child; |
115 | } |
116 | //! Construct right child from the given range as response to the demand. |
117 | /** parent_ remains left child. Newly constructed object is right child. */ |
118 | start_reduce( start_reduce& parent_, const Range& r, depth_t d ) : |
119 | my_body(parent_.my_body), |
120 | my_range(r), |
121 | my_partition(parent_.my_partition, split()), |
122 | my_context(right_child) |
123 | { |
124 | my_partition.set_affinity(*this); |
125 | my_partition.align_depth( d ); // TODO: move into constructor of partitioner |
126 | parent_.my_context = left_child; |
127 | } |
128 | static void run( const Range& range, Body& body, Partitioner& partitioner ) { |
129 | if( !range.empty() ) { |
130 | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP |
131 | task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) ); |
132 | #else |
133 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
134 | // and allows users to handle exceptions safely by wrapping parallel_for in the try-block. |
135 | task_group_context context(PARALLEL_REDUCE); |
136 | task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) ); |
137 | #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ |
138 | } |
139 | } |
140 | #if __TBB_TASK_GROUP_CONTEXT |
141 | static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) { |
142 | if( !range.empty() ) |
143 | task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) ); |
144 | } |
145 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
146 | //! Run body for range |
147 | void run_body( Range &r ) { (*my_body)( r ); } |
148 | |
149 | //! spawn right task, serves as callback for partitioner |
150 | // TODO: remove code duplication from 'offer_work' methods |
151 | void offer_work(typename Partitioner::split_type& split_obj) { |
152 | task *tasks[2]; |
153 | allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type)); |
154 | new((void*)tasks[0]) finish_type(my_context); |
155 | new((void*)tasks[1]) start_reduce(*this, split_obj); |
156 | spawn(*tasks[1]); |
157 | } |
158 | //! spawn right task, serves as callback for partitioner |
159 | void offer_work(const Range& r, depth_t d = 0) { |
160 | task *tasks[2]; |
161 | allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type)); |
162 | new((void*)tasks[0]) finish_type(my_context); |
163 | new((void*)tasks[1]) start_reduce(*this, r, d); |
164 | spawn(*tasks[1]); |
165 | } |
166 | }; |
167 | |
168 | //! allocate right task with new parent |
169 | // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined |
170 | inline void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes) { |
171 | tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes); |
172 | start_reduce_task->set_parent(tasks[0]); |
173 | tasks[0]->set_ref_count(2); |
174 | tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes); |
175 | } |
176 | |
177 | template<typename Range, typename Body, typename Partitioner> |
178 | task* start_reduce<Range,Body,Partitioner>::execute() { |
179 | my_partition.check_being_stolen( *this ); |
180 | if( my_context==right_child ) { |
181 | finish_type* parent_ptr = static_cast<finish_type*>(parent()); |
182 | if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2??? |
183 | my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split()); |
184 | parent_ptr->has_right_zombie = true; |
185 | } |
186 | } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling |
187 | my_partition.execute(*this, my_range); |
188 | if( my_context==left_child ) { |
189 | finish_type* parent_ptr = static_cast<finish_type*>(parent()); |
190 | __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL); |
191 | itt_store_word_with_release(parent_ptr->my_body, my_body ); |
192 | } |
193 | return NULL; |
194 | } |
195 | |
196 | //! Task type used to combine the partial results of parallel_deterministic_reduce. |
197 | /** @ingroup algorithms */ |
198 | template<typename Body> |
199 | class finish_deterministic_reduce: public task { |
200 | Body &my_left_body; |
201 | Body my_right_body; |
202 | |
203 | finish_deterministic_reduce( Body &body ) : |
204 | my_left_body( body ), |
205 | my_right_body( body, split() ) |
206 | { |
207 | } |
208 | task* execute() __TBB_override { |
209 | my_left_body.join( my_right_body ); |
210 | return NULL; |
211 | } |
212 | template<typename Range,typename Body_, typename Partitioner> |
213 | friend class start_deterministic_reduce; |
214 | }; |
215 | |
216 | //! Task type used to split the work of parallel_deterministic_reduce. |
217 | /** @ingroup algorithms */ |
218 | template<typename Range, typename Body, typename Partitioner> |
219 | class start_deterministic_reduce: public task { |
220 | typedef finish_deterministic_reduce<Body> finish_type; |
221 | Body &my_body; |
222 | Range my_range; |
223 | typename Partitioner::task_partition_type my_partition; |
224 | task* execute() __TBB_override; |
225 | |
226 | //! Constructor used for root task |
227 | start_deterministic_reduce( const Range& range, Body& body, Partitioner& partitioner ) : |
228 | my_body( body ), |
229 | my_range( range ), |
230 | my_partition( partitioner ) |
231 | { |
232 | } |
233 | //! Splitting constructor used to generate children. |
234 | /** parent_ becomes left child. Newly constructed object is right child. */ |
235 | start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c, typename Partitioner::split_type& split_obj ) : |
236 | my_body( c.my_right_body ), |
237 | my_range( parent_.my_range, split_obj ), |
238 | my_partition( parent_.my_partition, split_obj ) |
239 | { |
240 | } |
241 | |
242 | public: |
243 | static void run( const Range& range, Body& body, Partitioner& partitioner ) { |
244 | if( !range.empty() ) { |
245 | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP |
246 | task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body,partitioner) ); |
247 | #else |
248 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
249 | // and allows users to handle exceptions safely by wrapping parallel_for in the try-block. |
250 | task_group_context context(PARALLEL_REDUCE); |
251 | task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body,partitioner) ); |
252 | #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ |
253 | } |
254 | } |
255 | #if __TBB_TASK_GROUP_CONTEXT |
256 | static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) { |
257 | if( !range.empty() ) |
258 | task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body,partitioner) ); |
259 | } |
260 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
261 | |
262 | void offer_work( typename Partitioner::split_type& split_obj) { |
263 | task* tasks[2]; |
264 | allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_deterministic_reduce), sizeof(finish_type)); |
265 | new((void*)tasks[0]) finish_type(my_body); |
266 | new((void*)tasks[1]) start_deterministic_reduce(*this, *static_cast<finish_type*>(tasks[0]), split_obj); |
267 | spawn(*tasks[1]); |
268 | } |
269 | |
270 | void run_body( Range &r ) { my_body(r); } |
271 | }; |
272 | |
273 | template<typename Range, typename Body, typename Partitioner> |
274 | task* start_deterministic_reduce<Range,Body, Partitioner>::execute() { |
275 | my_partition.execute(*this, my_range); |
276 | return NULL; |
277 | } |
278 | } // namespace internal |
279 | //! @endcond |
280 | } //namespace interfaceX |
281 | |
282 | //! @cond INTERNAL |
283 | namespace internal { |
284 | using interface9::internal::start_reduce; |
285 | using interface9::internal::start_deterministic_reduce; |
286 | //! Auxiliary class for parallel_reduce; for internal use only. |
287 | /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" |
288 | using given \ref parallel_reduce_lambda_req "anonymous function objects". |
289 | **/ |
290 | /** @ingroup algorithms */ |
291 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
292 | class lambda_reduce_body { |
293 | |
294 | //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced |
295 | // (might require some performance measurements) |
296 | |
297 | const Value& identity_element; |
298 | const RealBody& my_real_body; |
299 | const Reduction& my_reduction; |
300 | Value my_value; |
301 | lambda_reduce_body& operator= ( const lambda_reduce_body& other ); |
302 | public: |
303 | lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) |
304 | : identity_element(identity) |
305 | , my_real_body(body) |
306 | , my_reduction(reduction) |
307 | , my_value(identity) |
308 | { } |
309 | lambda_reduce_body( const lambda_reduce_body& other ) |
310 | : identity_element(other.identity_element) |
311 | , my_real_body(other.my_real_body) |
312 | , my_reduction(other.my_reduction) |
313 | , my_value(other.my_value) |
314 | { } |
315 | lambda_reduce_body( lambda_reduce_body& other, tbb::split ) |
316 | : identity_element(other.identity_element) |
317 | , my_real_body(other.my_real_body) |
318 | , my_reduction(other.my_reduction) |
319 | , my_value(other.identity_element) |
320 | { } |
321 | void operator()(Range& range) { |
322 | my_value = my_real_body(range, const_cast<const Value&>(my_value)); |
323 | } |
324 | void join( lambda_reduce_body& rhs ) { |
325 | my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); |
326 | } |
327 | Value result() const { |
328 | return my_value; |
329 | } |
330 | }; |
331 | |
332 | } // namespace internal |
333 | //! @endcond |
334 | |
335 | // Requirements on Range concept are documented in blocked_range.h |
336 | |
337 | /** \page parallel_reduce_body_req Requirements on parallel_reduce body |
338 | Class \c Body implementing the concept of parallel_reduce body must define: |
339 | - \code Body::Body( Body&, split ); \endcode Splitting constructor. |
340 | Must be able to run concurrently with operator() and method \c join |
341 | - \code Body::~Body(); \endcode Destructor |
342 | - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r |
343 | and accumulating the result |
344 | - \code void Body::join( Body& b ); \endcode Join results. |
345 | The result in \c b should be merged into the result of \c this |
346 | **/ |
347 | |
348 | /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) |
349 | TO BE DOCUMENTED |
350 | **/ |
351 | |
352 | /** \name parallel_reduce |
353 | See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ |
354 | //@{ |
355 | |
356 | //! Parallel iteration with reduction and default partitioner. |
357 | /** @ingroup algorithms **/ |
358 | template<typename Range, typename Body> |
359 | void parallel_reduce( const Range& range, Body& body ) { |
360 | internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); |
361 | } |
362 | |
363 | //! Parallel iteration with reduction and simple_partitioner |
364 | /** @ingroup algorithms **/ |
365 | template<typename Range, typename Body> |
366 | void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
367 | internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); |
368 | } |
369 | |
370 | //! Parallel iteration with reduction and auto_partitioner |
371 | /** @ingroup algorithms **/ |
372 | template<typename Range, typename Body> |
373 | void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { |
374 | internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); |
375 | } |
376 | |
377 | //! Parallel iteration with reduction and static_partitioner |
378 | /** @ingroup algorithms **/ |
379 | template<typename Range, typename Body> |
380 | void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { |
381 | internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); |
382 | } |
383 | |
384 | //! Parallel iteration with reduction and affinity_partitioner |
385 | /** @ingroup algorithms **/ |
386 | template<typename Range, typename Body> |
387 | void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { |
388 | internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); |
389 | } |
390 | |
391 | #if __TBB_TASK_GROUP_CONTEXT |
392 | //! Parallel iteration with reduction, default partitioner and user-supplied context. |
393 | /** @ingroup algorithms **/ |
394 | template<typename Range, typename Body> |
395 | void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { |
396 | internal::start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); |
397 | } |
398 | |
399 | //! Parallel iteration with reduction, simple partitioner and user-supplied context. |
400 | /** @ingroup algorithms **/ |
401 | template<typename Range, typename Body> |
402 | void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
403 | internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); |
404 | } |
405 | |
406 | //! Parallel iteration with reduction, auto_partitioner and user-supplied context |
407 | /** @ingroup algorithms **/ |
408 | template<typename Range, typename Body> |
409 | void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { |
410 | internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); |
411 | } |
412 | |
413 | //! Parallel iteration with reduction, static_partitioner and user-supplied context |
414 | /** @ingroup algorithms **/ |
415 | template<typename Range, typename Body> |
416 | void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
417 | internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); |
418 | } |
419 | |
420 | //! Parallel iteration with reduction, affinity_partitioner and user-supplied context |
421 | /** @ingroup algorithms **/ |
422 | template<typename Range, typename Body> |
423 | void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { |
424 | internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); |
425 | } |
426 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
427 | |
428 | /** parallel_reduce overloads that work with anonymous function objects |
429 | (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ |
430 | |
431 | //! Parallel iteration with reduction and default partitioner. |
432 | /** @ingroup algorithms **/ |
433 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
434 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { |
435 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
436 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> |
437 | ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); |
438 | return body.result(); |
439 | } |
440 | |
441 | //! Parallel iteration with reduction and simple_partitioner. |
442 | /** @ingroup algorithms **/ |
443 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
444 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
445 | const simple_partitioner& partitioner ) { |
446 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
447 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> |
448 | ::run(range, body, partitioner ); |
449 | return body.result(); |
450 | } |
451 | |
452 | //! Parallel iteration with reduction and auto_partitioner |
453 | /** @ingroup algorithms **/ |
454 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
455 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
456 | const auto_partitioner& partitioner ) { |
457 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
458 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> |
459 | ::run( range, body, partitioner ); |
460 | return body.result(); |
461 | } |
462 | |
463 | //! Parallel iteration with reduction and static_partitioner |
464 | /** @ingroup algorithms **/ |
465 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
466 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
467 | const static_partitioner& partitioner ) { |
468 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
469 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> |
470 | ::run( range, body, partitioner ); |
471 | return body.result(); |
472 | } |
473 | |
474 | //! Parallel iteration with reduction and affinity_partitioner |
475 | /** @ingroup algorithms **/ |
476 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
477 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
478 | affinity_partitioner& partitioner ) { |
479 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
480 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> |
481 | ::run( range, body, partitioner ); |
482 | return body.result(); |
483 | } |
484 | |
485 | #if __TBB_TASK_GROUP_CONTEXT |
486 | //! Parallel iteration with reduction, default partitioner and user-supplied context. |
487 | /** @ingroup algorithms **/ |
488 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
489 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
490 | task_group_context& context ) { |
491 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
492 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> |
493 | ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); |
494 | return body.result(); |
495 | } |
496 | |
497 | //! Parallel iteration with reduction, simple partitioner and user-supplied context. |
498 | /** @ingroup algorithms **/ |
499 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
500 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
501 | const simple_partitioner& partitioner, task_group_context& context ) { |
502 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
503 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> |
504 | ::run( range, body, partitioner, context ); |
505 | return body.result(); |
506 | } |
507 | |
508 | //! Parallel iteration with reduction, auto_partitioner and user-supplied context |
509 | /** @ingroup algorithms **/ |
510 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
511 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
512 | const auto_partitioner& partitioner, task_group_context& context ) { |
513 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
514 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> |
515 | ::run( range, body, partitioner, context ); |
516 | return body.result(); |
517 | } |
518 | |
519 | //! Parallel iteration with reduction, static_partitioner and user-supplied context |
520 | /** @ingroup algorithms **/ |
521 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
522 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
523 | const static_partitioner& partitioner, task_group_context& context ) { |
524 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
525 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> |
526 | ::run( range, body, partitioner, context ); |
527 | return body.result(); |
528 | } |
529 | |
530 | //! Parallel iteration with reduction, affinity_partitioner and user-supplied context |
531 | /** @ingroup algorithms **/ |
532 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
533 | Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
534 | affinity_partitioner& partitioner, task_group_context& context ) { |
535 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
536 | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> |
537 | ::run( range, body, partitioner, context ); |
538 | return body.result(); |
539 | } |
540 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
541 | |
542 | //! Parallel iteration with deterministic reduction and default simple partitioner. |
543 | /** @ingroup algorithms **/ |
544 | template<typename Range, typename Body> |
545 | void parallel_deterministic_reduce( const Range& range, Body& body ) { |
546 | internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); |
547 | } |
548 | |
549 | //! Parallel iteration with deterministic reduction and simple partitioner. |
550 | /** @ingroup algorithms **/ |
551 | template<typename Range, typename Body> |
552 | void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
553 | internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); |
554 | } |
555 | |
556 | //! Parallel iteration with deterministic reduction and static partitioner. |
557 | /** @ingroup algorithms **/ |
558 | template<typename Range, typename Body> |
559 | void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { |
560 | internal::start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); |
561 | } |
562 | |
563 | #if __TBB_TASK_GROUP_CONTEXT |
564 | //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. |
565 | /** @ingroup algorithms **/ |
566 | template<typename Range, typename Body> |
567 | void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { |
568 | internal::start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); |
569 | } |
570 | |
571 | //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. |
572 | /** @ingroup algorithms **/ |
573 | template<typename Range, typename Body> |
574 | void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
575 | internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); |
576 | } |
577 | |
578 | //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. |
579 | /** @ingroup algorithms **/ |
580 | template<typename Range, typename Body> |
581 | void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
582 | internal::start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); |
583 | } |
584 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
585 | |
586 | /** parallel_reduce overloads that work with anonymous function objects |
587 | (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ |
588 | |
589 | //! Parallel iteration with deterministic reduction and default simple partitioner. |
590 | // TODO: consider making static_partitioner the default |
591 | /** @ingroup algorithms **/ |
592 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
593 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { |
594 | return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); |
595 | } |
596 | |
597 | //! Parallel iteration with deterministic reduction and simple partitioner. |
598 | /** @ingroup algorithms **/ |
599 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
600 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { |
601 | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); |
602 | internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> |
603 | ::run(range, body, partitioner); |
604 | return body.result(); |
605 | } |
606 | |
607 | //! Parallel iteration with deterministic reduction and static partitioner. |
608 | /** @ingroup algorithms **/ |
609 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
610 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { |
611 | internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
612 | internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> |
613 | ::run(range, body, partitioner); |
614 | return body.result(); |
615 | } |
616 | #if __TBB_TASK_GROUP_CONTEXT |
617 | //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. |
618 | /** @ingroup algorithms **/ |
619 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
620 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
621 | task_group_context& context ) { |
622 | return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); |
623 | } |
624 | |
625 | //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. |
626 | /** @ingroup algorithms **/ |
627 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
628 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
629 | const simple_partitioner& partitioner, task_group_context& context ) { |
630 | internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
631 | internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> |
632 | ::run(range, body, partitioner, context); |
633 | return body.result(); |
634 | } |
635 | |
636 | //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. |
637 | /** @ingroup algorithms **/ |
638 | template<typename Range, typename Value, typename RealBody, typename Reduction> |
639 | Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, |
640 | const static_partitioner& partitioner, task_group_context& context ) { |
641 | internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); |
642 | internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> |
643 | ::run(range, body, partitioner, context); |
644 | return body.result(); |
645 | } |
646 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
647 | //@} |
648 | |
649 | } // namespace tbb |
650 | |
651 | #endif /* __TBB_parallel_reduce_H */ |
652 | |