1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_parallel_scan_H
18#define __TBB_parallel_scan_H
19
20#include "task.h"
21#include "aligned_space.h"
22#include <new>
23#include "partitioner.h"
24
25namespace tbb {
26
27//! Used to indicate that the initial scan is being performed.
28/** @ingroup algorithms */
29struct pre_scan_tag {
30 static bool is_final_scan() {return false;}
31 operator bool() {return is_final_scan();}
32};
33
34//! Used to indicate that the final scan is being performed.
35/** @ingroup algorithms */
36struct final_scan_tag {
37 static bool is_final_scan() {return true;}
38 operator bool() {return is_final_scan();}
39};
40
41//! @cond INTERNAL
42namespace internal {
43
44 //! Performs final scan for a leaf
45 /** @ingroup algorithms */
46 template<typename Range, typename Body>
47 class final_sum: public task {
48 public:
49 Body my_body;
50 private:
51 aligned_space<Range> my_range;
52 //! Where to put result of last subrange, or NULL if not last subrange.
53 Body* my_stuff_last;
54 public:
55 final_sum( Body& body_ ) :
56 my_body(body_,split())
57 {
58 poison_pointer(my_stuff_last);
59 }
60 ~final_sum() {
61 my_range.begin()->~Range();
62 }
63 void finish_construction( const Range& range_, Body* stuff_last_ ) {
64 new( my_range.begin() ) Range(range_);
65 my_stuff_last = stuff_last_;
66 }
67 private:
68 task* execute() __TBB_override {
69 my_body( *my_range.begin(), final_scan_tag() );
70 if( my_stuff_last )
71 my_stuff_last->assign(my_body);
72 return NULL;
73 }
74 };
75
76 //! Split work to be done in the scan.
77 /** @ingroup algorithms */
78 template<typename Range, typename Body>
79 class sum_node: public task {
80 typedef final_sum<Range,Body> final_sum_type;
81 public:
82 final_sum_type *my_incoming;
83 final_sum_type *my_body;
84 Body *my_stuff_last;
85 private:
86 final_sum_type *my_left_sum;
87 sum_node *my_left;
88 sum_node *my_right;
89 bool my_left_is_final;
90 Range my_range;
91 sum_node( const Range range_, bool left_is_final_ ) :
92 my_stuff_last(NULL),
93 my_left_sum(NULL),
94 my_left(NULL),
95 my_right(NULL),
96 my_left_is_final(left_is_final_),
97 my_range(range_)
98 {
99 // Poison fields that will be set by second pass.
100 poison_pointer(my_body);
101 poison_pointer(my_incoming);
102 }
103 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
104 if( !n ) {
105 f.recycle_as_child_of( *this );
106 f.finish_construction( range_, stuff_last_ );
107 return &f;
108 } else {
109 n->my_body = &f;
110 n->my_incoming = incoming_;
111 n->my_stuff_last = stuff_last_;
112 return n;
113 }
114 }
115 task* execute() __TBB_override {
116 if( my_body ) {
117 if( my_incoming )
118 my_left_sum->my_body.reverse_join( my_incoming->my_body );
119 recycle_as_continuation();
120 sum_node& c = *this;
121 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
122 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
123 set_ref_count( (a!=NULL)+(b!=NULL) );
124 my_body = NULL;
125 if( a ) spawn(*b);
126 else a = b;
127 return a;
128 } else {
129 return NULL;
130 }
131 }
132 template<typename Range_,typename Body_,typename Partitioner_>
133 friend class start_scan;
134
135 template<typename Range_,typename Body_>
136 friend class finish_scan;
137 };
138
139 //! Combine partial results
140 /** @ingroup algorithms */
141 template<typename Range, typename Body>
142 class finish_scan: public task {
143 typedef sum_node<Range,Body> sum_node_type;
144 typedef final_sum<Range,Body> final_sum_type;
145 final_sum_type** const my_sum;
146 sum_node_type*& my_return_slot;
147 public:
148 final_sum_type* my_right_zombie;
149 sum_node_type& my_result;
150
151 task* execute() __TBB_override {
152 __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
153 if( my_result.my_left )
154 my_result.my_left_is_final = false;
155 if( my_right_zombie && my_sum )
156 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
157 __TBB_ASSERT( !my_return_slot, NULL );
158 if( my_right_zombie || my_result.my_right ) {
159 my_return_slot = &my_result;
160 } else {
161 destroy( my_result );
162 }
163 if( my_right_zombie && !my_sum && !my_result.my_right ) {
164 destroy(*my_right_zombie);
165 my_right_zombie = NULL;
166 }
167 return NULL;
168 }
169
170 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
171 my_sum(sum_),
172 my_return_slot(return_slot_),
173 my_right_zombie(NULL),
174 my_result(result_)
175 {
176 __TBB_ASSERT( !my_return_slot, NULL );
177 }
178 };
179
180 //! Initial task to split the work
181 /** @ingroup algorithms */
182 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
183 class start_scan: public task {
184 typedef sum_node<Range,Body> sum_node_type;
185 typedef final_sum<Range,Body> final_sum_type;
186 final_sum_type* my_body;
187 /** Non-null if caller is requesting total. */
188 final_sum_type** my_sum;
189 sum_node_type** my_return_slot;
190 /** Null if computing root. */
191 sum_node_type* my_parent_sum;
192 bool my_is_final;
193 bool my_is_right_child;
194 Range my_range;
195 typename Partitioner::partition_type my_partition;
196 task* execute() __TBB_override ;
197 public:
198 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
199 my_body(parent_.my_body),
200 my_sum(parent_.my_sum),
201 my_return_slot(&return_slot_),
202 my_parent_sum(parent_sum_),
203 my_is_final(parent_.my_is_final),
204 my_is_right_child(false),
205 my_range(parent_.my_range,split()),
206 my_partition(parent_.my_partition,split())
207 {
208 __TBB_ASSERT( !*my_return_slot, NULL );
209 }
210
211 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
212 my_body(&body_),
213 my_sum(NULL),
214 my_return_slot(&return_slot_),
215 my_parent_sum(NULL),
216 my_is_final(true),
217 my_is_right_child(false),
218 my_range(range_),
219 my_partition(partitioner_)
220 {
221 __TBB_ASSERT( !*my_return_slot, NULL );
222 }
223
224 static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
225 if( !range_.empty() ) {
226 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
227 internal::sum_node<Range,Body>* root = NULL;
228 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
229 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
230 /*my_return_slot=*/root,
231 range_,
232 *temp_body,
233 partitioner_ );
234 temp_body->my_body.reverse_join(body_);
235 task::spawn_root_and_wait( pass1 );
236 if( root ) {
237 root->my_body = temp_body;
238 root->my_incoming = NULL;
239 root->my_stuff_last = &body_;
240 task::spawn_root_and_wait( *root );
241 } else {
242 body_.assign(temp_body->my_body);
243 temp_body->finish_construction( range_, NULL );
244 temp_body->destroy(*temp_body);
245 }
246 }
247 }
248 };
249
250 template<typename Range, typename Body, typename Partitioner>
251 task* start_scan<Range,Body,Partitioner>::execute() {
252 typedef internal::finish_scan<Range,Body> finish_pass1_type;
253 finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
254 // Inspecting p->result.left_sum would ordinarily be a race condition.
255 // But we inspect it only if we are not a stolen task, in which case we
256 // know that task assigning to p->result.left_sum has completed.
257 bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
258 if( treat_as_stolen ) {
259 // Invocation is for right child that has been really stolen or needs to be virtually stolen
260 p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
261 my_is_final = false;
262 }
263 task* next_task = NULL;
264 if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
265 if( my_is_final )
266 (my_body->my_body)( my_range, final_scan_tag() );
267 else if( my_sum )
268 (my_body->my_body)( my_range, pre_scan_tag() );
269 if( my_sum )
270 *my_sum = my_body;
271 __TBB_ASSERT( !*my_return_slot, NULL );
272 } else {
273 sum_node_type* result;
274 if( my_parent_sum )
275 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
276 else
277 result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
278 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
279 // Split off right child
280 start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
281 b.my_is_right_child = true;
282 // Left child is recycling of *this. Must recycle this before spawning b,
283 // otherwise b might complete and decrement c.ref_count() to zero, which
284 // would cause c.execute() to run prematurely.
285 recycle_as_child_of(c);
286 c.set_ref_count(2);
287 c.spawn(b);
288 my_sum = &result->my_left_sum;
289 my_return_slot = &result->my_left;
290 my_is_right_child = false;
291 next_task = this;
292 my_parent_sum = result;
293 __TBB_ASSERT( !*my_return_slot, NULL );
294 }
295 return next_task;
296 }
297
298 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
299 class lambda_scan_body : no_assign {
300 Value my_sum;
301 const Value& identity_element;
302 const Scan& my_scan;
303 const ReverseJoin& my_reverse_join;
304 public:
305 lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
306 : my_sum(identity)
307 , identity_element(identity)
308 , my_scan(scan)
309 , my_reverse_join(rev_join) {}
310
311 lambda_scan_body( lambda_scan_body& b, split )
312 : my_sum(b.identity_element)
313 , identity_element(b.identity_element)
314 , my_scan(b.my_scan)
315 , my_reverse_join(b.my_reverse_join) {}
316
317 template<typename Tag>
318 void operator()( const Range& r, Tag tag ) {
319 my_sum = my_scan(r, my_sum, tag);
320 }
321
322 void reverse_join( lambda_scan_body& a ) {
323 my_sum = my_reverse_join(a.my_sum, my_sum);
324 }
325
326 void assign( lambda_scan_body& b ) {
327 my_sum = b.my_sum;
328 }
329
330 Value result() const {
331 return my_sum;
332 }
333 };
334} // namespace internal
335//! @endcond
336
337// Requirements on Range concept are documented in blocked_range.h
338
339/** \page parallel_scan_body_req Requirements on parallel_scan body
340 Class \c Body implementing the concept of parallel_scan body must define:
341 - \code Body::Body( Body&, split ); \endcode Splitting constructor.
342 Split \c b so that \c this and \c b can accumulate separately
343 - \code Body::~Body(); \endcode Destructor
344 - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
345 Preprocess iterations for range \c r
346 - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
347 Do final processing for iterations of range \c r
348 - \code void Body::reverse_join( Body& a ); \endcode
349 Merge preprocessing state of \c a into \c this, where \c a was
350 created earlier from \c b by b's splitting constructor
351**/
352
353/** \name parallel_scan
354 See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
355//@{
356
357//! Parallel prefix with default partitioner
358/** @ingroup algorithms **/
359template<typename Range, typename Body>
360void parallel_scan( const Range& range, Body& body ) {
361 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
362}
363
364//! Parallel prefix with simple_partitioner
365/** @ingroup algorithms **/
366template<typename Range, typename Body>
367void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
368 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
369}
370
371//! Parallel prefix with auto_partitioner
372/** @ingroup algorithms **/
373template<typename Range, typename Body>
374void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
375 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
376}
377
378//! Parallel prefix with default partitioner
379/** @ingroup algorithms **/
380template<typename Range, typename Value, typename Scan, typename ReverseJoin>
381Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
382 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
383 tbb::parallel_scan(range,body,__TBB_DEFAULT_PARTITIONER());
384 return body.result();
385}
386
387//! Parallel prefix with simple_partitioner
388/** @ingroup algorithms **/
389template<typename Range, typename Value, typename Scan, typename ReverseJoin>
390Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
391 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
392 tbb::parallel_scan(range,body,partitioner);
393 return body.result();
394}
395
396//! Parallel prefix with auto_partitioner
397/** @ingroup algorithms **/
398template<typename Range, typename Value, typename Scan, typename ReverseJoin>
399Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
400 internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
401 tbb::parallel_scan(range,body,partitioner);
402 return body.result();
403}
404
405//@}
406
407} // namespace tbb
408
409#endif /* __TBB_parallel_scan_H */
410
411