1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_parallel_for_H |
18 | #define __TBB_parallel_for_H |
19 | |
20 | #include <new> |
21 | #include "task.h" |
22 | #include "partitioner.h" |
23 | #include "blocked_range.h" |
24 | #include "tbb_exception.h" |
25 | #include "internal/_tbb_trace_impl.h" |
26 | |
27 | namespace tbb { |
28 | |
29 | namespace interface9 { |
30 | //! @cond INTERNAL |
31 | namespace internal { |
32 | |
33 | //! allocate right task with new parent |
34 | void* allocate_sibling(task* start_for_task, size_t bytes); |
35 | |
36 | //! Task type used in parallel_for |
37 | /** @ingroup algorithms */ |
38 | template<typename Range, typename Body, typename Partitioner> |
39 | class start_for: public task { |
40 | Range my_range; |
41 | const Body my_body; |
42 | typename Partitioner::task_partition_type my_partition; |
43 | task* execute() __TBB_override; |
44 | |
45 | //! Update affinity info, if any. |
46 | void note_affinity( affinity_id id ) __TBB_override { |
47 | my_partition.note_affinity( id ); |
48 | } |
49 | |
50 | public: |
51 | //! Constructor for root task. |
52 | start_for( const Range& range, const Body& body, Partitioner& partitioner ) : |
53 | my_range(range), |
54 | my_body(body), |
55 | my_partition(partitioner) |
56 | { |
57 | tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL); |
58 | } |
59 | //! Splitting constructor used to generate children. |
60 | /** parent_ becomes left child. Newly constructed object is right child. */ |
61 | start_for( start_for& parent_, typename Partitioner::split_type& split_obj) : |
62 | my_range(parent_.my_range, split_obj), |
63 | my_body(parent_.my_body), |
64 | my_partition(parent_.my_partition, split_obj) |
65 | { |
66 | my_partition.set_affinity(*this); |
67 | tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_); |
68 | } |
69 | //! Construct right child from the given range as response to the demand. |
70 | /** parent_ remains left child. Newly constructed object is right child. */ |
71 | start_for( start_for& parent_, const Range& r, depth_t d ) : |
72 | my_range(r), |
73 | my_body(parent_.my_body), |
74 | my_partition(parent_.my_partition, split()) |
75 | { |
76 | my_partition.set_affinity(*this); |
77 | my_partition.align_depth( d ); |
78 | tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_); |
79 | } |
80 | static void run( const Range& range, const Body& body, Partitioner& partitioner ) { |
81 | if( !range.empty() ) { |
82 | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP |
83 | start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner); |
84 | #else |
85 | // Bound context prevents exceptions from body to affect nesting or sibling algorithms, |
86 | // and allows users to handle exceptions safely by wrapping parallel_for in the try-block. |
87 | task_group_context context(PARALLEL_FOR); |
88 | start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner); |
89 | #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ |
90 | // REGION BEGIN |
91 | fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context ); |
92 | task::spawn_root_and_wait(a); |
93 | fgt_end_algorithm( (void*)&context ); |
94 | // REGION END |
95 | } |
96 | } |
97 | #if __TBB_TASK_GROUP_CONTEXT |
98 | static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) { |
99 | if( !range.empty() ) { |
100 | start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner); |
101 | // REGION BEGIN |
102 | fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context ); |
103 | task::spawn_root_and_wait(a); |
104 | fgt_end_algorithm( (void*)&context ); |
105 | // END REGION |
106 | } |
107 | } |
108 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
109 | //! Run body for range, serves as callback for partitioner |
110 | void run_body( Range &r ) { |
111 | fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this ); |
112 | my_body( r ); |
113 | fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) ); |
114 | } |
115 | |
116 | //! spawn right task, serves as callback for partitioner |
117 | void offer_work(typename Partitioner::split_type& split_obj) { |
118 | spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) ); |
119 | } |
120 | //! spawn right task, serves as callback for partitioner |
121 | void offer_work(const Range& r, depth_t d = 0) { |
122 | spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) ); |
123 | } |
124 | }; |
125 | |
126 | //! allocate right task with new parent |
127 | // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined |
128 | inline void* allocate_sibling(task* start_for_task, size_t bytes) { |
129 | task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task(); |
130 | start_for_task->set_parent(parent_ptr); |
131 | parent_ptr->set_ref_count(2); |
132 | return &parent_ptr->allocate_child().allocate(bytes); |
133 | } |
134 | |
135 | //! execute task for parallel_for |
136 | template<typename Range, typename Body, typename Partitioner> |
137 | task* start_for<Range,Body,Partitioner>::execute() { |
138 | my_partition.check_being_stolen( *this ); |
139 | my_partition.execute(*this, my_range); |
140 | return NULL; |
141 | } |
142 | } // namespace internal |
143 | //! @endcond |
144 | } // namespace interfaceX |
145 | |
146 | //! @cond INTERNAL |
147 | namespace internal { |
148 | using interface9::internal::start_for; |
149 | |
150 | //! Calls the function with values from range [begin, end) with a step provided |
151 | template<typename Function, typename Index> |
152 | class parallel_for_body : internal::no_assign { |
153 | const Function &my_func; |
154 | const Index my_begin; |
155 | const Index my_step; |
156 | public: |
157 | parallel_for_body( const Function& _func, Index& _begin, Index& _step ) |
158 | : my_func(_func), my_begin(_begin), my_step(_step) {} |
159 | |
160 | void operator()( const tbb::blocked_range<Index>& r ) const { |
161 | // A set of local variables to help the compiler with vectorization of the following loop. |
162 | Index b = r.begin(); |
163 | Index e = r.end(); |
164 | Index ms = my_step; |
165 | Index k = my_begin + b*ms; |
166 | |
167 | #if __INTEL_COMPILER |
168 | #pragma ivdep |
169 | #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE |
170 | #pragma vector always assert |
171 | #endif |
172 | #endif |
173 | for ( Index i = b; i < e; ++i, k += ms ) { |
174 | my_func( k ); |
175 | } |
176 | } |
177 | }; |
178 | } // namespace internal |
179 | //! @endcond |
180 | |
181 | // Requirements on Range concept are documented in blocked_range.h |
182 | |
183 | /** \page parallel_for_body_req Requirements on parallel_for body |
184 | Class \c Body implementing the concept of parallel_for body must define: |
185 | - \code Body::Body( const Body& ); \endcode Copy constructor |
186 | - \code Body::~Body(); \endcode Destructor |
187 | - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r. |
188 | **/ |
189 | |
190 | /** \name parallel_for |
191 | See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/ |
192 | //@{ |
193 | |
194 | //! Parallel iteration over range with default partitioner. |
195 | /** @ingroup algorithms **/ |
196 | template<typename Range, typename Body> |
197 | void parallel_for( const Range& range, const Body& body ) { |
198 | internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); |
199 | } |
200 | |
201 | //! Parallel iteration over range with simple partitioner. |
202 | /** @ingroup algorithms **/ |
203 | template<typename Range, typename Body> |
204 | void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { |
205 | internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner); |
206 | } |
207 | |
208 | //! Parallel iteration over range with auto_partitioner. |
209 | /** @ingroup algorithms **/ |
210 | template<typename Range, typename Body> |
211 | void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { |
212 | internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner); |
213 | } |
214 | |
215 | //! Parallel iteration over range with static_partitioner. |
216 | /** @ingroup algorithms **/ |
217 | template<typename Range, typename Body> |
218 | void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { |
219 | internal::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner); |
220 | } |
221 | |
222 | //! Parallel iteration over range with affinity_partitioner. |
223 | /** @ingroup algorithms **/ |
224 | template<typename Range, typename Body> |
225 | void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { |
226 | internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner); |
227 | } |
228 | |
229 | #if __TBB_TASK_GROUP_CONTEXT |
230 | //! Parallel iteration over range with default partitioner and user-supplied context. |
231 | /** @ingroup algorithms **/ |
232 | template<typename Range, typename Body> |
233 | void parallel_for( const Range& range, const Body& body, task_group_context& context ) { |
234 | internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); |
235 | } |
236 | |
237 | //! Parallel iteration over range with simple partitioner and user-supplied context. |
238 | /** @ingroup algorithms **/ |
239 | template<typename Range, typename Body> |
240 | void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { |
241 | internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context); |
242 | } |
243 | |
244 | //! Parallel iteration over range with auto_partitioner and user-supplied context. |
245 | /** @ingroup algorithms **/ |
246 | template<typename Range, typename Body> |
247 | void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { |
248 | internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context); |
249 | } |
250 | |
251 | //! Parallel iteration over range with static_partitioner and user-supplied context. |
252 | /** @ingroup algorithms **/ |
253 | template<typename Range, typename Body> |
254 | void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { |
255 | internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context); |
256 | } |
257 | |
258 | //! Parallel iteration over range with affinity_partitioner and user-supplied context. |
259 | /** @ingroup algorithms **/ |
260 | template<typename Range, typename Body> |
261 | void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { |
262 | internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context); |
263 | } |
264 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
265 | //@} |
266 | |
267 | namespace strict_ppl { |
268 | |
269 | //@{ |
270 | //! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner |
271 | template <typename Index, typename Function, typename Partitioner> |
272 | void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { |
273 | if (step <= 0 ) |
274 | internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument |
275 | else if (last > first) { |
276 | // Above "else" avoids "potential divide by zero" warning on some platforms |
277 | Index end = (last - first - Index(1)) / step + Index(1); |
278 | tbb::blocked_range<Index> range(static_cast<Index>(0), end); |
279 | internal::parallel_for_body<Function, Index> body(f, first, step); |
280 | tbb::parallel_for(range, body, partitioner); |
281 | } |
282 | } |
283 | |
284 | //! Parallel iteration over a range of integers with a step provided and default partitioner |
285 | template <typename Index, typename Function> |
286 | void parallel_for(Index first, Index last, Index step, const Function& f) { |
287 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); |
288 | } |
289 | //! Parallel iteration over a range of integers with a step provided and simple partitioner |
290 | template <typename Index, typename Function> |
291 | void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { |
292 | parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner); |
293 | } |
294 | //! Parallel iteration over a range of integers with a step provided and auto partitioner |
295 | template <typename Index, typename Function> |
296 | void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { |
297 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner); |
298 | } |
299 | //! Parallel iteration over a range of integers with a step provided and static partitioner |
300 | template <typename Index, typename Function> |
301 | void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { |
302 | parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner); |
303 | } |
304 | //! Parallel iteration over a range of integers with a step provided and affinity partitioner |
305 | template <typename Index, typename Function> |
306 | void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { |
307 | parallel_for_impl(first, last, step, f, partitioner); |
308 | } |
309 | |
310 | //! Parallel iteration over a range of integers with a default step value and default partitioner |
311 | template <typename Index, typename Function> |
312 | void parallel_for(Index first, Index last, const Function& f) { |
313 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); |
314 | } |
315 | //! Parallel iteration over a range of integers with a default step value and simple partitioner |
316 | template <typename Index, typename Function> |
317 | void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { |
318 | parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner); |
319 | } |
320 | //! Parallel iteration over a range of integers with a default step value and auto partitioner |
321 | template <typename Index, typename Function> |
322 | void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { |
323 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner); |
324 | } |
325 | //! Parallel iteration over a range of integers with a default step value and static partitioner |
326 | template <typename Index, typename Function> |
327 | void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { |
328 | parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner); |
329 | } |
330 | //! Parallel iteration over a range of integers with a default step value and affinity partitioner |
331 | template <typename Index, typename Function> |
332 | void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { |
333 | parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner); |
334 | } |
335 | |
336 | #if __TBB_TASK_GROUP_CONTEXT |
337 | //! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner |
338 | template <typename Index, typename Function, typename Partitioner> |
339 | void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) { |
340 | if (step <= 0 ) |
341 | internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument |
342 | else if (last > first) { |
343 | // Above "else" avoids "potential divide by zero" warning on some platforms |
344 | Index end = (last - first - Index(1)) / step + Index(1); |
345 | tbb::blocked_range<Index> range(static_cast<Index>(0), end); |
346 | internal::parallel_for_body<Function, Index> body(f, first, step); |
347 | tbb::parallel_for(range, body, partitioner, context); |
348 | } |
349 | } |
350 | |
351 | //! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner |
352 | template <typename Index, typename Function> |
353 | void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) { |
354 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); |
355 | } |
356 | //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner |
357 | template <typename Index, typename Function> |
358 | void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) { |
359 | parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context); |
360 | } |
361 | //! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner |
362 | template <typename Index, typename Function> |
363 | void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) { |
364 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context); |
365 | } |
366 | //! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner |
367 | template <typename Index, typename Function> |
368 | void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) { |
369 | parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context); |
370 | } |
371 | //! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner |
372 | template <typename Index, typename Function> |
373 | void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) { |
374 | parallel_for_impl(first, last, step, f, partitioner, context); |
375 | } |
376 | |
377 | |
378 | //! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner |
379 | template <typename Index, typename Function> |
380 | void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) { |
381 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); |
382 | } |
383 | //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner |
384 | template <typename Index, typename Function> |
385 | void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) { |
386 | parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); |
387 | } |
388 | //! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner |
389 | template <typename Index, typename Function> |
390 | void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) { |
391 | parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); |
392 | } |
393 | //! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner |
394 | template <typename Index, typename Function> |
395 | void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) { |
396 | parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); |
397 | } |
398 | //! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner |
399 | template <typename Index, typename Function> |
400 | void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) { |
401 | parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context); |
402 | } |
403 | |
404 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
405 | //@} |
406 | |
407 | } // namespace strict_ppl |
408 | |
409 | using strict_ppl::parallel_for; |
410 | |
411 | } // namespace tbb |
412 | |
413 | #if TBB_PREVIEW_SERIAL_SUBSET |
414 | #define __TBB_NORMAL_EXECUTION |
415 | #include "../serial/tbb/parallel_for.h" |
416 | #undef __TBB_NORMAL_EXECUTION |
417 | #endif |
418 | |
419 | #endif /* __TBB_parallel_for_H */ |
420 | |