1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17#ifndef __TBB_parallel_for_H
18#define __TBB_parallel_for_H
19
20#include <new>
21#include "task.h"
22#include "partitioner.h"
23#include "blocked_range.h"
24#include "tbb_exception.h"
25#include "internal/_tbb_trace_impl.h"
26
27namespace tbb {
28
29namespace interface9 {
30//! @cond INTERNAL
31namespace internal {
32
33 //! allocate right task with new parent
34 void* allocate_sibling(task* start_for_task, size_t bytes);
35
36 //! Task type used in parallel_for
37 /** @ingroup algorithms */
38 template<typename Range, typename Body, typename Partitioner>
39 class start_for: public task {
40 Range my_range;
41 const Body my_body;
42 typename Partitioner::task_partition_type my_partition;
43 task* execute() __TBB_override;
44
45 //! Update affinity info, if any.
46 void note_affinity( affinity_id id ) __TBB_override {
47 my_partition.note_affinity( id );
48 }
49
50 public:
51 //! Constructor for root task.
52 start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
53 my_range(range),
54 my_body(body),
55 my_partition(partitioner)
56 {
57 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
58 }
59 //! Splitting constructor used to generate children.
60 /** parent_ becomes left child. Newly constructed object is right child. */
61 start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
62 my_range(parent_.my_range, split_obj),
63 my_body(parent_.my_body),
64 my_partition(parent_.my_partition, split_obj)
65 {
66 my_partition.set_affinity(*this);
67 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
68 }
69 //! Construct right child from the given range as response to the demand.
70 /** parent_ remains left child. Newly constructed object is right child. */
71 start_for( start_for& parent_, const Range& r, depth_t d ) :
72 my_range(r),
73 my_body(parent_.my_body),
74 my_partition(parent_.my_partition, split())
75 {
76 my_partition.set_affinity(*this);
77 my_partition.align_depth( d );
78 tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
79 }
80 static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
81 if( !range.empty() ) {
82#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
83 start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
84#else
85 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
86 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
87 task_group_context context(PARALLEL_FOR);
88 start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
89#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
90 // REGION BEGIN
91 fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
92 task::spawn_root_and_wait(a);
93 fgt_end_algorithm( (void*)&context );
94 // REGION END
95 }
96 }
97#if __TBB_TASK_GROUP_CONTEXT
98 static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
99 if( !range.empty() ) {
100 start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
101 // REGION BEGIN
102 fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
103 task::spawn_root_and_wait(a);
104 fgt_end_algorithm( (void*)&context );
105 // END REGION
106 }
107 }
108#endif /* __TBB_TASK_GROUP_CONTEXT */
109 //! Run body for range, serves as callback for partitioner
110 void run_body( Range &r ) {
111 fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
112 my_body( r );
113 fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
114 }
115
116 //! spawn right task, serves as callback for partitioner
117 void offer_work(typename Partitioner::split_type& split_obj) {
118 spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
119 }
120 //! spawn right task, serves as callback for partitioner
121 void offer_work(const Range& r, depth_t d = 0) {
122 spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
123 }
124 };
125
126 //! allocate right task with new parent
127 // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
128 inline void* allocate_sibling(task* start_for_task, size_t bytes) {
129 task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
130 start_for_task->set_parent(parent_ptr);
131 parent_ptr->set_ref_count(2);
132 return &parent_ptr->allocate_child().allocate(bytes);
133 }
134
135 //! execute task for parallel_for
136 template<typename Range, typename Body, typename Partitioner>
137 task* start_for<Range,Body,Partitioner>::execute() {
138 my_partition.check_being_stolen( *this );
139 my_partition.execute(*this, my_range);
140 return NULL;
141 }
142} // namespace internal
143//! @endcond
144} // namespace interfaceX
145
146//! @cond INTERNAL
147namespace internal {
148 using interface9::internal::start_for;
149
150 //! Calls the function with values from range [begin, end) with a step provided
151 template<typename Function, typename Index>
152 class parallel_for_body : internal::no_assign {
153 const Function &my_func;
154 const Index my_begin;
155 const Index my_step;
156 public:
157 parallel_for_body( const Function& _func, Index& _begin, Index& _step )
158 : my_func(_func), my_begin(_begin), my_step(_step) {}
159
160 void operator()( const tbb::blocked_range<Index>& r ) const {
161 // A set of local variables to help the compiler with vectorization of the following loop.
162 Index b = r.begin();
163 Index e = r.end();
164 Index ms = my_step;
165 Index k = my_begin + b*ms;
166
167#if __INTEL_COMPILER
168#pragma ivdep
169#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
170#pragma vector always assert
171#endif
172#endif
173 for ( Index i = b; i < e; ++i, k += ms ) {
174 my_func( k );
175 }
176 }
177 };
178} // namespace internal
179//! @endcond
180
181// Requirements on Range concept are documented in blocked_range.h
182
183/** \page parallel_for_body_req Requirements on parallel_for body
184 Class \c Body implementing the concept of parallel_for body must define:
185 - \code Body::Body( const Body& ); \endcode Copy constructor
186 - \code Body::~Body(); \endcode Destructor
187 - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r.
188**/
189
190/** \name parallel_for
191 See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
192//@{
193
194//! Parallel iteration over range with default partitioner.
195/** @ingroup algorithms **/
196template<typename Range, typename Body>
197void parallel_for( const Range& range, const Body& body ) {
198 internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
199}
200
201//! Parallel iteration over range with simple partitioner.
202/** @ingroup algorithms **/
203template<typename Range, typename Body>
204void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
205 internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
206}
207
208//! Parallel iteration over range with auto_partitioner.
209/** @ingroup algorithms **/
210template<typename Range, typename Body>
211void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
212 internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
213}
214
215//! Parallel iteration over range with static_partitioner.
216/** @ingroup algorithms **/
217template<typename Range, typename Body>
218void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
219 internal::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
220}
221
222//! Parallel iteration over range with affinity_partitioner.
223/** @ingroup algorithms **/
224template<typename Range, typename Body>
225void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
226 internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
227}
228
229#if __TBB_TASK_GROUP_CONTEXT
230//! Parallel iteration over range with default partitioner and user-supplied context.
231/** @ingroup algorithms **/
232template<typename Range, typename Body>
233void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
234 internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
235}
236
237//! Parallel iteration over range with simple partitioner and user-supplied context.
238/** @ingroup algorithms **/
239template<typename Range, typename Body>
240void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
241 internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
242}
243
244//! Parallel iteration over range with auto_partitioner and user-supplied context.
245/** @ingroup algorithms **/
246template<typename Range, typename Body>
247void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
248 internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
249}
250
251//! Parallel iteration over range with static_partitioner and user-supplied context.
252/** @ingroup algorithms **/
253template<typename Range, typename Body>
254void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
255 internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
256}
257
258//! Parallel iteration over range with affinity_partitioner and user-supplied context.
259/** @ingroup algorithms **/
260template<typename Range, typename Body>
261void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
262 internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
263}
264#endif /* __TBB_TASK_GROUP_CONTEXT */
265//@}
266
267namespace strict_ppl {
268
269//@{
270//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
271template <typename Index, typename Function, typename Partitioner>
272void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
273 if (step <= 0 )
274 internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
275 else if (last > first) {
276 // Above "else" avoids "potential divide by zero" warning on some platforms
277 Index end = (last - first - Index(1)) / step + Index(1);
278 tbb::blocked_range<Index> range(static_cast<Index>(0), end);
279 internal::parallel_for_body<Function, Index> body(f, first, step);
280 tbb::parallel_for(range, body, partitioner);
281 }
282}
283
284//! Parallel iteration over a range of integers with a step provided and default partitioner
285template <typename Index, typename Function>
286void parallel_for(Index first, Index last, Index step, const Function& f) {
287 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
288}
289//! Parallel iteration over a range of integers with a step provided and simple partitioner
290template <typename Index, typename Function>
291void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
292 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
293}
294//! Parallel iteration over a range of integers with a step provided and auto partitioner
295template <typename Index, typename Function>
296void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
297 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
298}
299//! Parallel iteration over a range of integers with a step provided and static partitioner
300template <typename Index, typename Function>
301void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
302 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
303}
304//! Parallel iteration over a range of integers with a step provided and affinity partitioner
305template <typename Index, typename Function>
306void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
307 parallel_for_impl(first, last, step, f, partitioner);
308}
309
310//! Parallel iteration over a range of integers with a default step value and default partitioner
311template <typename Index, typename Function>
312void parallel_for(Index first, Index last, const Function& f) {
313 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
314}
315//! Parallel iteration over a range of integers with a default step value and simple partitioner
316template <typename Index, typename Function>
317void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
318 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
319}
320//! Parallel iteration over a range of integers with a default step value and auto partitioner
321template <typename Index, typename Function>
322void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
323 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
324}
325//! Parallel iteration over a range of integers with a default step value and static partitioner
326template <typename Index, typename Function>
327void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
328 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
329}
330//! Parallel iteration over a range of integers with a default step value and affinity partitioner
331template <typename Index, typename Function>
332void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
333 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
334}
335
336#if __TBB_TASK_GROUP_CONTEXT
337//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
338template <typename Index, typename Function, typename Partitioner>
339void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
340 if (step <= 0 )
341 internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
342 else if (last > first) {
343 // Above "else" avoids "potential divide by zero" warning on some platforms
344 Index end = (last - first - Index(1)) / step + Index(1);
345 tbb::blocked_range<Index> range(static_cast<Index>(0), end);
346 internal::parallel_for_body<Function, Index> body(f, first, step);
347 tbb::parallel_for(range, body, partitioner, context);
348 }
349}
350
351//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
352template <typename Index, typename Function>
353void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
354 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
355}
356//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
357 template <typename Index, typename Function>
358void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
359 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
360}
361//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
362 template <typename Index, typename Function>
363void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
364 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
365}
366//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
367template <typename Index, typename Function>
368void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
369 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
370}
371//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
372 template <typename Index, typename Function>
373void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
374 parallel_for_impl(first, last, step, f, partitioner, context);
375}
376
377
378//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
379template <typename Index, typename Function>
380void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
381 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
382}
383//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
384 template <typename Index, typename Function>
385void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
386 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
387}
388//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
389 template <typename Index, typename Function>
390void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
391 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
392}
393//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
394template <typename Index, typename Function>
395void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
396 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
397}
398//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
399 template <typename Index, typename Function>
400void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
401 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
402}
403
404#endif /* __TBB_TASK_GROUP_CONTEXT */
405//@}
406
407} // namespace strict_ppl
408
409using strict_ppl::parallel_for;
410
411} // namespace tbb
412
413#if TBB_PREVIEW_SERIAL_SUBSET
414#define __TBB_NORMAL_EXECUTION
415#include "../serial/tbb/parallel_for.h"
416#undef __TBB_NORMAL_EXECUTION
417#endif
418
419#endif /* __TBB_parallel_for_H */
420