1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #ifndef __TBB_partitioner_H |
18 | #define __TBB_partitioner_H |
19 | |
20 | #ifndef __TBB_INITIAL_CHUNKS |
21 | // initial task divisions per thread |
22 | #define __TBB_INITIAL_CHUNKS 2 |
23 | #endif |
24 | #ifndef __TBB_RANGE_POOL_CAPACITY |
25 | // maximum number of elements in range pool |
26 | #define __TBB_RANGE_POOL_CAPACITY 8 |
27 | #endif |
28 | #ifndef __TBB_INIT_DEPTH |
29 | // initial value for depth of range pool |
30 | #define __TBB_INIT_DEPTH 5 |
31 | #endif |
32 | #ifndef __TBB_DEMAND_DEPTH_ADD |
33 | // when imbalance is found range splits this value times more |
34 | #define __TBB_DEMAND_DEPTH_ADD 1 |
35 | #endif |
36 | #ifndef __TBB_STATIC_THRESHOLD |
37 | // necessary number of clocks for the work to be distributed among all tasks |
38 | #define __TBB_STATIC_THRESHOLD 40000 |
39 | #endif |
40 | #if __TBB_DEFINE_MIC |
41 | #define __TBB_NONUNIFORM_TASK_CREATION 1 |
42 | #ifdef __TBB_time_stamp |
43 | #define __TBB_USE_MACHINE_TIME_STAMPS 1 |
44 | #define __TBB_task_duration() __TBB_STATIC_THRESHOLD |
45 | #endif // __TBB_machine_time_stamp |
46 | #endif // __TBB_DEFINE_MIC |
47 | |
48 | #include "task.h" |
49 | #include "task_arena.h" |
50 | #include "aligned_space.h" |
51 | #include "atomic.h" |
52 | #include "internal/_template_helpers.h" |
53 | |
54 | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) |
55 | // Workaround for overzealous compiler warnings |
56 | #pragma warning (push) |
57 | #pragma warning (disable: 4244) |
58 | #endif |
59 | |
60 | namespace tbb { |
61 | |
62 | class auto_partitioner; |
63 | class simple_partitioner; |
64 | class static_partitioner; |
65 | class affinity_partitioner; |
66 | |
67 | namespace interface9 { |
68 | namespace internal { |
69 | class affinity_partition_type; |
70 | } |
71 | } |
72 | |
73 | namespace internal { //< @cond INTERNAL |
74 | size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor(); |
75 | |
76 | //! Defines entry point for affinity partitioner into tbb run-time library. |
77 | class affinity_partitioner_base_v3: no_copy { |
78 | friend class tbb::affinity_partitioner; |
79 | friend class tbb::interface9::internal::affinity_partition_type; |
80 | //! Array that remembers affinities of tree positions to affinity_id. |
81 | /** NULL if my_size==0. */ |
82 | affinity_id* my_array; |
83 | //! Number of elements in my_array. |
84 | size_t my_size; |
85 | //! Zeros the fields. |
86 | affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {} |
87 | //! Deallocates my_array. |
88 | ~affinity_partitioner_base_v3() {resize(0);} |
89 | //! Resize my_array. |
90 | /** Retains values if resulting size is the same. */ |
91 | void __TBB_EXPORTED_METHOD resize( unsigned factor ); |
92 | }; |
93 | |
94 | //! Provides backward-compatible methods for partition objects without affinity. |
95 | class partition_type_base { |
96 | public: |
97 | void set_affinity( task & ) {} |
98 | void note_affinity( task::affinity_id ) {} |
99 | task* continue_after_execute_range() {return NULL;} |
100 | bool decide_whether_to_delay() {return false;} |
101 | void spawn_or_delay( bool, task& b ) { |
102 | task::spawn(b); |
103 | } |
104 | }; |
105 | |
106 | template<typename Range, typename Body, typename Partitioner> class start_scan; |
107 | |
108 | } //< namespace internal @endcond |
109 | |
110 | namespace serial { |
111 | namespace interface9 { |
112 | template<typename Range, typename Body, typename Partitioner> class start_for; |
113 | } |
114 | } |
115 | |
116 | namespace interface9 { |
117 | //! @cond INTERNAL |
118 | namespace internal { |
119 | using namespace tbb::internal; |
120 | template<typename Range, typename Body, typename Partitioner> class start_for; |
121 | template<typename Range, typename Body, typename Partitioner> class start_reduce; |
122 | template<typename Range, typename Body, typename Partitioner> class start_deterministic_reduce; |
123 | |
124 | //! Join task node that contains shared flag for stealing feedback |
125 | class flag_task: public task { |
126 | public: |
127 | tbb::atomic<bool> my_child_stolen; |
128 | flag_task() { my_child_stolen = false; } |
129 | task* execute() __TBB_override { return NULL; } |
130 | static void mark_task_stolen(task &t) { |
131 | tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen; |
132 | #if TBB_USE_THREADING_TOOLS |
133 | // Threading tools respect lock prefix but report false-positive data-race via plain store |
134 | flag.fetch_and_store<release>(true); |
135 | #else |
136 | flag = true; |
137 | #endif //TBB_USE_THREADING_TOOLS |
138 | } |
139 | static bool is_peer_stolen(task &t) { |
140 | return static_cast<flag_task*>(t.parent())->my_child_stolen; |
141 | } |
142 | }; |
143 | |
144 | //! Depth is a relative depth of recursive division inside a range pool. Relative depth allows |
145 | //! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented |
146 | //! by a number that cannot fit into machine word. |
147 | typedef unsigned char depth_t; |
148 | |
149 | //! Range pool stores ranges of type T in a circular buffer with MaxCapacity |
150 | template <typename T, depth_t MaxCapacity> |
151 | class range_vector { |
152 | depth_t my_head; |
153 | depth_t my_tail; |
154 | depth_t my_size; |
155 | depth_t my_depth[MaxCapacity]; // relative depths of stored ranges |
156 | tbb::aligned_space<T, MaxCapacity> my_pool; |
157 | |
158 | public: |
159 | //! initialize via first range in pool |
160 | range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { |
161 | my_depth[0] = 0; |
162 | new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move? |
163 | } |
164 | ~range_vector() { |
165 | while( !empty() ) pop_back(); |
166 | } |
167 | bool empty() const { return my_size == 0; } |
168 | depth_t size() const { return my_size; } |
169 | //! Populates range pool via ranges up to max depth or while divisible |
170 | //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces |
171 | void split_to_fill(depth_t max_depth) { |
172 | while( my_size < MaxCapacity && is_divisible(max_depth) ) { |
173 | depth_t prev = my_head; |
174 | my_head = (my_head + 1) % MaxCapacity; |
175 | new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move? |
176 | my_pool.begin()[prev].~T(); // instead of assignment |
177 | new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); // do 'inverse' split |
178 | my_depth[my_head] = ++my_depth[prev]; |
179 | my_size++; |
180 | } |
181 | } |
182 | void pop_back() { |
183 | __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size" ); |
184 | my_pool.begin()[my_head].~T(); |
185 | my_size--; |
186 | my_head = (my_head + MaxCapacity - 1) % MaxCapacity; |
187 | } |
188 | void pop_front() { |
189 | __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size" ); |
190 | my_pool.begin()[my_tail].~T(); |
191 | my_size--; |
192 | my_tail = (my_tail + 1) % MaxCapacity; |
193 | } |
194 | T& back() { |
195 | __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size" ); |
196 | return my_pool.begin()[my_head]; |
197 | } |
198 | T& front() { |
199 | __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size" ); |
200 | return my_pool.begin()[my_tail]; |
201 | } |
202 | //! similarly to front(), returns depth of the first range in the pool |
203 | depth_t front_depth() { |
204 | __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size" ); |
205 | return my_depth[my_tail]; |
206 | } |
207 | depth_t back_depth() { |
208 | __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size" ); |
209 | return my_depth[my_head]; |
210 | } |
211 | bool is_divisible(depth_t max_depth) { |
212 | return back_depth() < max_depth && back().is_divisible(); |
213 | } |
214 | }; |
215 | |
216 | //! Provides default methods for partition objects and common algorithm blocks. |
217 | template <typename Partition> |
218 | struct partition_type_base { |
219 | typedef split split_type; |
220 | // decision makers |
221 | void set_affinity( task & ) {} |
222 | void note_affinity( task::affinity_id ) {} |
223 | bool check_being_stolen(task &) { return false; } // part of old should_execute_range() |
224 | bool check_for_demand(task &) { return false; } |
225 | bool is_divisible() { return true; } // part of old should_execute_range() |
226 | depth_t max_depth() { return 0; } |
227 | void align_depth(depth_t) { } |
228 | template <typename Range> split_type get_split() { return split(); } |
229 | Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper |
230 | |
231 | template<typename StartType, typename Range> |
232 | void work_balance(StartType &start, Range &range) { |
233 | start.run_body( range ); // simple partitioner goes always here |
234 | } |
235 | |
236 | template<typename StartType, typename Range> |
237 | void execute(StartType &start, Range &range) { |
238 | // The algorithm in a few words ([]-denotes calls to decision methods of partitioner): |
239 | // [If this task is stolen, adjust depth and divisions if necessary, set flag]. |
240 | // If range is divisible { |
241 | // Spread the work while [initial divisions left]; |
242 | // Create trap task [if necessary]; |
243 | // } |
244 | // If not divisible or [max depth is reached], execute, else do the range pool part |
245 | if ( range.is_divisible() ) { |
246 | if ( self().is_divisible() ) { |
247 | do { // split until is divisible |
248 | typename Partition::split_type split_obj = self().template get_split<Range>(); |
249 | start.offer_work( split_obj ); |
250 | } while ( range.is_divisible() && self().is_divisible() ); |
251 | } |
252 | } |
253 | self().work_balance(start, range); |
254 | } |
255 | }; |
256 | |
257 | //! Provides default splitting strategy for partition objects. |
258 | template <typename Partition> |
259 | struct adaptive_mode : partition_type_base<Partition> { |
260 | typedef Partition my_partition; |
261 | size_t my_divisor; |
262 | // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves. |
263 | // A task which has only one index must produce the right split without reserved index in order to avoid |
264 | // it to be overwritten in note_affinity() of the created (right) task. |
265 | // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order) |
266 | static const unsigned factor = 1; |
267 | adaptive_mode() : my_divisor(tbb::internal::get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} |
268 | adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} |
269 | /*! Override do_split methods in order to specify splitting strategy */ |
270 | size_t do_split(adaptive_mode &src, split) { |
271 | return src.my_divisor /= 2u; |
272 | } |
273 | }; |
274 | |
275 | //! A helper class to create a proportional_split object for a given type of Range. |
276 | /** If the Range has static boolean constant 'is_splittable_in_proportion' set to 'true', |
277 | the created object splits a provided value in an implemenation-defined proportion; |
278 | otherwise it represents equal-size split. */ |
279 | // TODO: check if this helper can be a nested class of proportional_mode. |
280 | template <typename Range, typename = void> |
281 | struct proportion_helper { |
282 | static proportional_split get_split(size_t) { return proportional_split(1,1); } |
283 | }; |
284 | template <typename Range> |
285 | struct proportion_helper<Range, typename enable_if<Range::is_splittable_in_proportion, void>::type> { |
286 | static proportional_split get_split(size_t n) { |
287 | #if __TBB_NONUNIFORM_TASK_CREATION |
288 | size_t right = (n + 2) / 3; |
289 | #else |
290 | size_t right = n / 2; |
291 | #endif |
292 | size_t left = n - right; |
293 | return proportional_split(left, right); |
294 | } |
295 | }; |
296 | |
297 | //! Provides proportional splitting strategy for partition objects |
298 | template <typename Partition> |
299 | struct proportional_mode : adaptive_mode<Partition> { |
300 | typedef Partition my_partition; |
301 | using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes |
302 | |
303 | proportional_mode() : adaptive_mode<Partition>() {} |
304 | proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {} |
305 | proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); } |
306 | size_t do_split(proportional_mode &src, const proportional_split& split_obj) { |
307 | #if __TBB_ENABLE_RANGE_FEEDBACK |
308 | size_t portion = size_t(float(src.my_divisor) * float(split_obj.right()) |
309 | / float(split_obj.left() + split_obj.right()) + 0.5f); |
310 | #else |
311 | size_t portion = split_obj.right() * my_partition::factor; |
312 | #endif |
313 | portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); |
314 | #if __TBB_ENABLE_RANGE_FEEDBACK |
315 | /** Corner case handling */ |
316 | if (!portion) |
317 | portion = my_partition::factor; |
318 | else if (portion == src.my_divisor) |
319 | portion = src.my_divisor - my_partition::factor; |
320 | #endif |
321 | src.my_divisor -= portion; |
322 | return portion; |
323 | } |
324 | bool is_divisible() { // part of old should_execute_range() |
325 | return self().my_divisor > my_partition::factor; |
326 | } |
327 | template <typename Range> |
328 | proportional_split get_split() { |
329 | // Create a proportion for the number of threads expected to handle "this" subrange |
330 | return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor ); |
331 | } |
332 | }; |
333 | |
334 | static size_t get_initial_partition_head() { |
335 | int current_index = tbb::this_task_arena::current_thread_index(); |
336 | if (current_index == tbb::task_arena::not_initialized) |
337 | current_index = 0; |
338 | return size_t(current_index); |
339 | } |
340 | |
341 | //! Provides default linear indexing of partitioner's sequence |
342 | template <typename Partition> |
343 | struct linear_affinity_mode : proportional_mode<Partition> { |
344 | size_t my_head; |
345 | size_t my_max_affinity; |
346 | using proportional_mode<Partition>::self; |
347 | linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()), |
348 | my_max_affinity(self().my_divisor) {} |
349 | linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split()) |
350 | , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} |
351 | linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj) |
352 | , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} |
353 | void set_affinity( task &t ) { |
354 | if( self().my_divisor ) |
355 | t.set_affinity( affinity_id(my_head) + 1 ); |
356 | } |
357 | }; |
358 | |
359 | /*! Determine work-balance phase implementing splitting & stealing actions */ |
360 | template<class Mode> |
361 | struct dynamic_grainsize_mode : Mode { |
362 | using Mode::self; |
363 | #ifdef __TBB_USE_MACHINE_TIME_STAMPS |
364 | tbb::internal::machine_tsc_t my_dst_tsc; |
365 | #endif |
366 | enum { |
367 | begin = 0, |
368 | run, |
369 | pass |
370 | } my_delay; |
371 | depth_t my_max_depth; |
372 | static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; |
373 | dynamic_grainsize_mode(): Mode() |
374 | #ifdef __TBB_USE_MACHINE_TIME_STAMPS |
375 | , my_dst_tsc(0) |
376 | #endif |
377 | , my_delay(begin) |
378 | , my_max_depth(__TBB_INIT_DEPTH) {} |
379 | dynamic_grainsize_mode(dynamic_grainsize_mode& p, split) |
380 | : Mode(p, split()) |
381 | #ifdef __TBB_USE_MACHINE_TIME_STAMPS |
382 | , my_dst_tsc(0) |
383 | #endif |
384 | , my_delay(pass) |
385 | , my_max_depth(p.my_max_depth) {} |
386 | dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj) |
387 | : Mode(p, split_obj) |
388 | #ifdef __TBB_USE_MACHINE_TIME_STAMPS |
389 | , my_dst_tsc(0) |
390 | #endif |
391 | , my_delay(begin) |
392 | , my_max_depth(p.my_max_depth) {} |
393 | bool check_being_stolen(task &t) { // part of old should_execute_range() |
394 | if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree |
395 | self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)? |
396 | if( t.is_stolen_task() && t.parent()->ref_count() >= 2 ) { // runs concurrently with the left task |
397 | #if __TBB_USE_OPTIONAL_RTTI |
398 | // RTTI is available, check whether the cast is valid |
399 | __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0); |
400 | // correctness of the cast relies on avoiding the root task for which: |
401 | // - initial value of my_divisor != 0 (protected by separate assertion) |
402 | // - is_stolen_task() always returns false for the root task. |
403 | #endif |
404 | flag_task::mark_task_stolen(t); |
405 | if( !my_max_depth ) my_max_depth++; |
406 | my_max_depth += __TBB_DEMAND_DEPTH_ADD; |
407 | return true; |
408 | } |
409 | } |
410 | return false; |
411 | } |
412 | depth_t max_depth() { return my_max_depth; } |
413 | void align_depth(depth_t base) { |
414 | __TBB_ASSERT(base <= my_max_depth, 0); |
415 | my_max_depth -= base; |
416 | } |
417 | template<typename StartType, typename Range> |
418 | void work_balance(StartType &start, Range &range) { |
419 | if( !range.is_divisible() || !self().max_depth() ) { |
420 | start.run_body( range ); // simple partitioner goes always here |
421 | } |
422 | else { // do range pool |
423 | internal::range_vector<Range, range_pool_size> range_pool(range); |
424 | do { |
425 | range_pool.split_to_fill(self().max_depth()); // fill range pool |
426 | if( self().check_for_demand( start ) ) { |
427 | if( range_pool.size() > 1 ) { |
428 | start.offer_work( range_pool.front(), range_pool.front_depth() ); |
429 | range_pool.pop_front(); |
430 | continue; |
431 | } |
432 | if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task |
433 | continue; // note: next split_to_fill() should split range at least once |
434 | } |
435 | start.run_body( range_pool.back() ); |
436 | range_pool.pop_back(); |
437 | } while( !range_pool.empty() && !start.is_cancelled() ); |
438 | } |
439 | } |
440 | bool check_for_demand( task &t ) { |
441 | if( pass == my_delay ) { |
442 | if( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array |
443 | return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more |
444 | else if( self().my_divisor && my_max_depth ) { // make balancing task |
445 | self().my_divisor = 0; // once for each task; depth will be decreased in align_depth() |
446 | return true; |
447 | } |
448 | else if( flag_task::is_peer_stolen(t) ) { |
449 | my_max_depth += __TBB_DEMAND_DEPTH_ADD; |
450 | return true; |
451 | } |
452 | } else if( begin == my_delay ) { |
453 | #ifndef __TBB_USE_MACHINE_TIME_STAMPS |
454 | my_delay = pass; |
455 | #else |
456 | my_dst_tsc = __TBB_time_stamp() + __TBB_task_duration(); |
457 | my_delay = run; |
458 | } else if( run == my_delay ) { |
459 | if( __TBB_time_stamp() < my_dst_tsc ) { |
460 | __TBB_ASSERT(my_max_depth > 0, NULL); |
461 | my_max_depth--; // increase granularity since tasks seem having too small work |
462 | return false; |
463 | } |
464 | my_delay = pass; |
465 | return true; |
466 | #endif // __TBB_USE_MACHINE_TIME_STAMPS |
467 | } |
468 | return false; |
469 | } |
470 | }; |
471 | |
472 | class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > { |
473 | public: |
474 | auto_partition_type( const auto_partitioner& ) |
475 | : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() { |
476 | my_divisor *= __TBB_INITIAL_CHUNKS; |
477 | } |
478 | auto_partition_type( auto_partition_type& src, split) |
479 | : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {} |
480 | bool is_divisible() { // part of old should_execute_range() |
481 | if( my_divisor > 1 ) return true; |
482 | if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead |
483 | // keep same fragmentation while splitting for the local task pool |
484 | my_max_depth--; |
485 | my_divisor = 0; // decrease max_depth once per task |
486 | return true; |
487 | } else return false; |
488 | } |
489 | bool check_for_demand(task &t) { |
490 | if( flag_task::is_peer_stolen(t) ) { |
491 | my_max_depth += __TBB_DEMAND_DEPTH_ADD; |
492 | return true; |
493 | } else return false; |
494 | } |
495 | }; |
496 | |
497 | class simple_partition_type: public partition_type_base<simple_partition_type> { |
498 | public: |
499 | simple_partition_type( const simple_partitioner& ) {} |
500 | simple_partition_type( const simple_partition_type&, split ) {} |
501 | //! simplified algorithm |
502 | template<typename StartType, typename Range> |
503 | void execute(StartType &start, Range &range) { |
504 | split_type split_obj = split(); // start.offer_work accepts split_type as reference |
505 | while( range.is_divisible() ) |
506 | start.offer_work( split_obj ); |
507 | start.run_body( range ); |
508 | } |
509 | }; |
510 | |
511 | class static_partition_type : public linear_affinity_mode<static_partition_type> { |
512 | public: |
513 | typedef proportional_split split_type; |
514 | static_partition_type( const static_partitioner& ) |
515 | : linear_affinity_mode<static_partition_type>() {} |
516 | static_partition_type( static_partition_type& p, split ) |
517 | : linear_affinity_mode<static_partition_type>(p, split()) {} |
518 | static_partition_type( static_partition_type& p, const proportional_split& split_obj ) |
519 | : linear_affinity_mode<static_partition_type>(p, split_obj) {} |
520 | }; |
521 | |
522 | class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > { |
523 | static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units |
524 | tbb::internal::affinity_id* my_array; |
525 | public: |
526 | static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task |
527 | typedef proportional_split split_type; |
528 | affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap ) |
529 | : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() { |
530 | __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); |
531 | ap.resize(factor); |
532 | my_array = ap.my_array; |
533 | my_max_depth = factor_power + 1; |
534 | __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); |
535 | } |
536 | affinity_partition_type(affinity_partition_type& p, split) |
537 | : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split()) |
538 | , my_array(p.my_array) {} |
539 | affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj) |
540 | : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj) |
541 | , my_array(p.my_array) {} |
542 | void set_affinity( task &t ) { |
543 | if( my_divisor ) { |
544 | if( !my_array[my_head] ) |
545 | // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse |
546 | t.set_affinity( affinity_id(my_head / factor + 1) ); |
547 | else |
548 | t.set_affinity( my_array[my_head] ); |
549 | } |
550 | } |
551 | void note_affinity( task::affinity_id id ) { |
552 | if( my_divisor ) |
553 | my_array[my_head] = id; |
554 | } |
555 | }; |
556 | |
557 | //! Backward-compatible partition for auto and affinity partition objects. |
558 | class old_auto_partition_type: public tbb::internal::partition_type_base { |
559 | size_t num_chunks; |
560 | static const size_t VICTIM_CHUNKS = 4; |
561 | public: |
562 | bool should_execute_range(const task &t) { |
563 | if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() ) |
564 | num_chunks = VICTIM_CHUNKS; |
565 | return num_chunks==1; |
566 | } |
567 | old_auto_partition_type( const auto_partitioner& ) |
568 | : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} |
569 | old_auto_partition_type( const affinity_partitioner& ) |
570 | : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} |
571 | old_auto_partition_type( old_auto_partition_type& pt, split ) { |
572 | num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; |
573 | } |
574 | }; |
575 | |
576 | } // namespace interfaceX::internal |
577 | //! @endcond |
578 | } // namespace interfaceX |
579 | |
580 | //! A simple partitioner |
581 | /** Divides the range until the range is not divisible. |
582 | @ingroup algorithms */ |
583 | class simple_partitioner { |
584 | public: |
585 | simple_partitioner() {} |
586 | private: |
587 | template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for; |
588 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for; |
589 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce; |
590 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_deterministic_reduce; |
591 | template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan; |
592 | // backward compatibility |
593 | class partition_type: public internal::partition_type_base { |
594 | public: |
595 | bool should_execute_range(const task& ) {return false;} |
596 | partition_type( const simple_partitioner& ) {} |
597 | partition_type( const partition_type&, split ) {} |
598 | }; |
599 | // new implementation just extends existing interface |
600 | typedef interface9::internal::simple_partition_type task_partition_type; |
601 | |
602 | // TODO: consider to make split_type public |
603 | typedef interface9::internal::simple_partition_type::split_type split_type; |
604 | }; |
605 | |
606 | //! An auto partitioner |
607 | /** The range is initial divided into several large chunks. |
608 | Chunks are further subdivided into smaller pieces if demand detected and they are divisible. |
609 | @ingroup algorithms */ |
610 | class auto_partitioner { |
611 | public: |
612 | auto_partitioner() {} |
613 | |
614 | private: |
615 | template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for; |
616 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for; |
617 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce; |
618 | template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan; |
619 | // backward compatibility |
620 | typedef interface9::internal::old_auto_partition_type partition_type; |
621 | // new implementation just extends existing interface |
622 | typedef interface9::internal::auto_partition_type task_partition_type; |
623 | |
624 | // TODO: consider to make split_type public |
625 | typedef interface9::internal::auto_partition_type::split_type split_type; |
626 | }; |
627 | |
628 | //! A static partitioner |
629 | class static_partitioner { |
630 | public: |
631 | static_partitioner() {} |
632 | private: |
633 | template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for; |
634 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for; |
635 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce; |
636 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_deterministic_reduce; |
637 | template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan; |
638 | // backward compatibility |
639 | typedef interface9::internal::old_auto_partition_type partition_type; |
640 | // new implementation just extends existing interface |
641 | typedef interface9::internal::static_partition_type task_partition_type; |
642 | |
643 | // TODO: consider to make split_type public |
644 | typedef interface9::internal::static_partition_type::split_type split_type; |
645 | }; |
646 | |
647 | //! An affinity partitioner |
648 | class affinity_partitioner: internal::affinity_partitioner_base_v3 { |
649 | public: |
650 | affinity_partitioner() {} |
651 | |
652 | private: |
653 | template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for; |
654 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for; |
655 | template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce; |
656 | template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan; |
657 | // backward compatibility - for parallel_scan only |
658 | typedef interface9::internal::old_auto_partition_type partition_type; |
659 | // new implementation just extends existing interface |
660 | typedef interface9::internal::affinity_partition_type task_partition_type; |
661 | |
662 | // TODO: consider to make split_type public |
663 | typedef interface9::internal::affinity_partition_type::split_type split_type; |
664 | }; |
665 | |
666 | } // namespace tbb |
667 | |
668 | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) |
669 | #pragma warning (pop) |
670 | #endif // warning 4244 is back |
671 | #undef __TBB_INITIAL_CHUNKS |
672 | #undef __TBB_RANGE_POOL_CAPACITY |
673 | #undef __TBB_INIT_DEPTH |
674 | #endif /* __TBB_partitioner_H */ |
675 | |