1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_parallel_scan_H |
18 | #define __TBB_parallel_scan_H |
19 | |
20 | #include "task.h" |
21 | #include "aligned_space.h" |
22 | #include <new> |
23 | #include "partitioner.h" |
24 | |
25 | namespace tbb { |
26 | |
27 | //! Used to indicate that the initial scan is being performed. |
28 | /** @ingroup algorithms */ |
29 | struct pre_scan_tag { |
30 | static bool is_final_scan() {return false;} |
31 | operator bool() {return is_final_scan();} |
32 | }; |
33 | |
34 | //! Used to indicate that the final scan is being performed. |
35 | /** @ingroup algorithms */ |
36 | struct final_scan_tag { |
37 | static bool is_final_scan() {return true;} |
38 | operator bool() {return is_final_scan();} |
39 | }; |
40 | |
41 | //! @cond INTERNAL |
42 | namespace internal { |
43 | |
44 | //! Performs final scan for a leaf |
45 | /** @ingroup algorithms */ |
46 | template<typename Range, typename Body> |
47 | class final_sum: public task { |
48 | public: |
49 | Body my_body; |
50 | private: |
51 | aligned_space<Range> my_range; |
52 | //! Where to put result of last subrange, or NULL if not last subrange. |
53 | Body* my_stuff_last; |
54 | public: |
55 | final_sum( Body& body_ ) : |
56 | my_body(body_,split()) |
57 | { |
58 | poison_pointer(my_stuff_last); |
59 | } |
60 | ~final_sum() { |
61 | my_range.begin()->~Range(); |
62 | } |
63 | void finish_construction( const Range& range_, Body* stuff_last_ ) { |
64 | new( my_range.begin() ) Range(range_); |
65 | my_stuff_last = stuff_last_; |
66 | } |
67 | private: |
68 | task* execute() __TBB_override { |
69 | my_body( *my_range.begin(), final_scan_tag() ); |
70 | if( my_stuff_last ) |
71 | my_stuff_last->assign(my_body); |
72 | return NULL; |
73 | } |
74 | }; |
75 | |
76 | //! Split work to be done in the scan. |
77 | /** @ingroup algorithms */ |
78 | template<typename Range, typename Body> |
79 | class sum_node: public task { |
80 | typedef final_sum<Range,Body> final_sum_type; |
81 | public: |
82 | final_sum_type *my_incoming; |
83 | final_sum_type *my_body; |
84 | Body *my_stuff_last; |
85 | private: |
86 | final_sum_type *my_left_sum; |
87 | sum_node *my_left; |
88 | sum_node *my_right; |
89 | bool my_left_is_final; |
90 | Range my_range; |
91 | sum_node( const Range range_, bool left_is_final_ ) : |
92 | my_stuff_last(NULL), |
93 | my_left_sum(NULL), |
94 | my_left(NULL), |
95 | my_right(NULL), |
96 | my_left_is_final(left_is_final_), |
97 | my_range(range_) |
98 | { |
99 | // Poison fields that will be set by second pass. |
100 | poison_pointer(my_body); |
101 | poison_pointer(my_incoming); |
102 | } |
103 | task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) { |
104 | if( !n ) { |
105 | f.recycle_as_child_of( *this ); |
106 | f.finish_construction( range_, stuff_last_ ); |
107 | return &f; |
108 | } else { |
109 | n->my_body = &f; |
110 | n->my_incoming = incoming_; |
111 | n->my_stuff_last = stuff_last_; |
112 | return n; |
113 | } |
114 | } |
115 | task* execute() __TBB_override { |
116 | if( my_body ) { |
117 | if( my_incoming ) |
118 | my_left_sum->my_body.reverse_join( my_incoming->my_body ); |
119 | recycle_as_continuation(); |
120 | sum_node& c = *this; |
121 | task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last); |
122 | task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL); |
123 | set_ref_count( (a!=NULL)+(b!=NULL) ); |
124 | my_body = NULL; |
125 | if( a ) spawn(*b); |
126 | else a = b; |
127 | return a; |
128 | } else { |
129 | return NULL; |
130 | } |
131 | } |
132 | template<typename Range_,typename Body_,typename Partitioner_> |
133 | friend class start_scan; |
134 | |
135 | template<typename Range_,typename Body_> |
136 | friend class finish_scan; |
137 | }; |
138 | |
139 | //! Combine partial results |
140 | /** @ingroup algorithms */ |
141 | template<typename Range, typename Body> |
142 | class finish_scan: public task { |
143 | typedef sum_node<Range,Body> sum_node_type; |
144 | typedef final_sum<Range,Body> final_sum_type; |
145 | final_sum_type** const my_sum; |
146 | sum_node_type*& my_return_slot; |
147 | public: |
148 | final_sum_type* my_right_zombie; |
149 | sum_node_type& my_result; |
150 | |
151 | task* execute() __TBB_override { |
152 | __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL ); |
153 | if( my_result.my_left ) |
154 | my_result.my_left_is_final = false; |
155 | if( my_right_zombie && my_sum ) |
156 | ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body); |
157 | __TBB_ASSERT( !my_return_slot, NULL ); |
158 | if( my_right_zombie || my_result.my_right ) { |
159 | my_return_slot = &my_result; |
160 | } else { |
161 | destroy( my_result ); |
162 | } |
163 | if( my_right_zombie && !my_sum && !my_result.my_right ) { |
164 | destroy(*my_right_zombie); |
165 | my_right_zombie = NULL; |
166 | } |
167 | return NULL; |
168 | } |
169 | |
170 | finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : |
171 | my_sum(sum_), |
172 | my_return_slot(return_slot_), |
173 | my_right_zombie(NULL), |
174 | my_result(result_) |
175 | { |
176 | __TBB_ASSERT( !my_return_slot, NULL ); |
177 | } |
178 | }; |
179 | |
180 | //! Initial task to split the work |
181 | /** @ingroup algorithms */ |
182 | template<typename Range, typename Body, typename Partitioner=simple_partitioner> |
183 | class start_scan: public task { |
184 | typedef sum_node<Range,Body> sum_node_type; |
185 | typedef final_sum<Range,Body> final_sum_type; |
186 | final_sum_type* my_body; |
187 | /** Non-null if caller is requesting total. */ |
188 | final_sum_type** my_sum; |
189 | sum_node_type** my_return_slot; |
190 | /** Null if computing root. */ |
191 | sum_node_type* my_parent_sum; |
192 | bool my_is_final; |
193 | bool my_is_right_child; |
194 | Range my_range; |
195 | typename Partitioner::partition_type my_partition; |
196 | task* execute() __TBB_override ; |
197 | public: |
198 | start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) : |
199 | my_body(parent_.my_body), |
200 | my_sum(parent_.my_sum), |
201 | my_return_slot(&return_slot_), |
202 | my_parent_sum(parent_sum_), |
203 | my_is_final(parent_.my_is_final), |
204 | my_is_right_child(false), |
205 | my_range(parent_.my_range,split()), |
206 | my_partition(parent_.my_partition,split()) |
207 | { |
208 | __TBB_ASSERT( !*my_return_slot, NULL ); |
209 | } |
210 | |
211 | start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) : |
212 | my_body(&body_), |
213 | my_sum(NULL), |
214 | my_return_slot(&return_slot_), |
215 | my_parent_sum(NULL), |
216 | my_is_final(true), |
217 | my_is_right_child(false), |
218 | my_range(range_), |
219 | my_partition(partitioner_) |
220 | { |
221 | __TBB_ASSERT( !*my_return_slot, NULL ); |
222 | } |
223 | |
224 | static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) { |
225 | if( !range_.empty() ) { |
226 | typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type; |
227 | internal::sum_node<Range,Body>* root = NULL; |
228 | final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ ); |
229 | start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type( |
230 | /*my_return_slot=*/root, |
231 | range_, |
232 | *temp_body, |
233 | partitioner_ ); |
234 | temp_body->my_body.reverse_join(body_); |
235 | task::spawn_root_and_wait( pass1 ); |
236 | if( root ) { |
237 | root->my_body = temp_body; |
238 | root->my_incoming = NULL; |
239 | root->my_stuff_last = &body_; |
240 | task::spawn_root_and_wait( *root ); |
241 | } else { |
242 | body_.assign(temp_body->my_body); |
243 | temp_body->finish_construction( range_, NULL ); |
244 | temp_body->destroy(*temp_body); |
245 | } |
246 | } |
247 | } |
248 | }; |
249 | |
250 | template<typename Range, typename Body, typename Partitioner> |
251 | task* start_scan<Range,Body,Partitioner>::execute() { |
252 | typedef internal::finish_scan<Range,Body> finish_pass1_type; |
253 | finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL; |
254 | // Inspecting p->result.left_sum would ordinarily be a race condition. |
255 | // But we inspect it only if we are not a stolen task, in which case we |
256 | // know that task assigning to p->result.left_sum has completed. |
257 | bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum); |
258 | if( treat_as_stolen ) { |
259 | // Invocation is for right child that has been really stolen or needs to be virtually stolen |
260 | p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body); |
261 | my_is_final = false; |
262 | } |
263 | task* next_task = NULL; |
264 | if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) { |
265 | if( my_is_final ) |
266 | (my_body->my_body)( my_range, final_scan_tag() ); |
267 | else if( my_sum ) |
268 | (my_body->my_body)( my_range, pre_scan_tag() ); |
269 | if( my_sum ) |
270 | *my_sum = my_body; |
271 | __TBB_ASSERT( !*my_return_slot, NULL ); |
272 | } else { |
273 | sum_node_type* result; |
274 | if( my_parent_sum ) |
275 | result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final); |
276 | else |
277 | result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final); |
278 | finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result); |
279 | // Split off right child |
280 | start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result ); |
281 | b.my_is_right_child = true; |
282 | // Left child is recycling of *this. Must recycle this before spawning b, |
283 | // otherwise b might complete and decrement c.ref_count() to zero, which |
284 | // would cause c.execute() to run prematurely. |
285 | recycle_as_child_of(c); |
286 | c.set_ref_count(2); |
287 | c.spawn(b); |
288 | my_sum = &result->my_left_sum; |
289 | my_return_slot = &result->my_left; |
290 | my_is_right_child = false; |
291 | next_task = this; |
292 | my_parent_sum = result; |
293 | __TBB_ASSERT( !*my_return_slot, NULL ); |
294 | } |
295 | return next_task; |
296 | } |
297 | |
298 | template<typename Range, typename Value, typename Scan, typename ReverseJoin> |
299 | class lambda_scan_body : no_assign { |
300 | Value my_sum; |
301 | const Value& identity_element; |
302 | const Scan& my_scan; |
303 | const ReverseJoin& my_reverse_join; |
304 | public: |
305 | lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join) |
306 | : my_sum(identity) |
307 | , identity_element(identity) |
308 | , my_scan(scan) |
309 | , my_reverse_join(rev_join) {} |
310 | |
311 | lambda_scan_body( lambda_scan_body& b, split ) |
312 | : my_sum(b.identity_element) |
313 | , identity_element(b.identity_element) |
314 | , my_scan(b.my_scan) |
315 | , my_reverse_join(b.my_reverse_join) {} |
316 | |
317 | template<typename Tag> |
318 | void operator()( const Range& r, Tag tag ) { |
319 | my_sum = my_scan(r, my_sum, tag); |
320 | } |
321 | |
322 | void reverse_join( lambda_scan_body& a ) { |
323 | my_sum = my_reverse_join(a.my_sum, my_sum); |
324 | } |
325 | |
326 | void assign( lambda_scan_body& b ) { |
327 | my_sum = b.my_sum; |
328 | } |
329 | |
330 | Value result() const { |
331 | return my_sum; |
332 | } |
333 | }; |
334 | } // namespace internal |
335 | //! @endcond |
336 | |
337 | // Requirements on Range concept are documented in blocked_range.h |
338 | |
339 | /** \page parallel_scan_body_req Requirements on parallel_scan body |
340 | Class \c Body implementing the concept of parallel_scan body must define: |
341 | - \code Body::Body( Body&, split ); \endcode Splitting constructor. |
342 | Split \c b so that \c this and \c b can accumulate separately |
343 | - \code Body::~Body(); \endcode Destructor |
344 | - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode |
345 | Preprocess iterations for range \c r |
346 | - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode |
347 | Do final processing for iterations of range \c r |
348 | - \code void Body::reverse_join( Body& a ); \endcode |
349 | Merge preprocessing state of \c a into \c this, where \c a was |
350 | created earlier from \c b by b's splitting constructor |
351 | **/ |
352 | |
353 | /** \name parallel_scan |
354 | See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ |
355 | //@{ |
356 | |
357 | //! Parallel prefix with default partitioner |
358 | /** @ingroup algorithms **/ |
359 | template<typename Range, typename Body> |
360 | void parallel_scan( const Range& range, Body& body ) { |
361 | internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); |
362 | } |
363 | |
364 | //! Parallel prefix with simple_partitioner |
365 | /** @ingroup algorithms **/ |
366 | template<typename Range, typename Body> |
367 | void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { |
368 | internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner); |
369 | } |
370 | |
371 | //! Parallel prefix with auto_partitioner |
372 | /** @ingroup algorithms **/ |
373 | template<typename Range, typename Body> |
374 | void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { |
375 | internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner); |
376 | } |
377 | |
378 | //! Parallel prefix with default partitioner |
379 | /** @ingroup algorithms **/ |
380 | template<typename Range, typename Value, typename Scan, typename ReverseJoin> |
381 | Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) { |
382 | internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); |
383 | tbb::parallel_scan(range,body,__TBB_DEFAULT_PARTITIONER()); |
384 | return body.result(); |
385 | } |
386 | |
387 | //! Parallel prefix with simple_partitioner |
388 | /** @ingroup algorithms **/ |
389 | template<typename Range, typename Value, typename Scan, typename ReverseJoin> |
390 | Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) { |
391 | internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); |
392 | tbb::parallel_scan(range,body,partitioner); |
393 | return body.result(); |
394 | } |
395 | |
396 | //! Parallel prefix with auto_partitioner |
397 | /** @ingroup algorithms **/ |
398 | template<typename Range, typename Value, typename Scan, typename ReverseJoin> |
399 | Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) { |
400 | internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); |
401 | tbb::parallel_scan(range,body,partitioner); |
402 | return body.result(); |
403 | } |
404 | |
405 | //@} |
406 | |
407 | } // namespace tbb |
408 | |
409 | #endif /* __TBB_parallel_scan_H */ |
410 | |
411 | |