1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17// Test for function template parallel_for.h
18
19// These features are pure additions and thus can be always "on" in the test
20#define TBB_PREVIEW_SERIAL_SUBSET 1
21#include "harness_defs.h"
22
23#if _MSC_VER
24#pragma warning (push)
25#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
26 // Suppress pointless "unreachable code" warning.
27 #pragma warning (disable: 4702)
28#endif
29#if defined(_Wp64)
30 // Workaround for overzealous compiler warnings in /Wp64 mode
31 #pragma warning (disable: 4267)
32#endif
33
34#define _SCL_SECURE_NO_WARNINGS
35#endif //#if _MSC_VER
36
37#include "harness_defs.h"
38#include "tbb/parallel_for.h"
39#include "tbb/atomic.h"
40#include "harness_assert.h"
41#include "harness.h"
42
43static tbb::atomic<int> FooBodyCount;
44
45//! A range object whose only public members are those required by the Range concept.
46template<size_t Pad>
47class FooRange {
48 //! Start of range
49 int start;
50
51 //! Size of range
52 int size;
53 FooRange( int start_, int size_ ) : start(start_), size(size_) {
54 zero_fill<char>(pad, Pad);
55 pad[Pad-1] = 'x';
56 }
57 template<typename Flavor_, size_t Pad_> friend void Flog( int nthread );
58 template<size_t Pad_> friend class FooBody;
59 void operator&();
60
61 char pad[Pad];
62public:
63 bool empty() const {return size==0;}
64 bool is_divisible() const {return size>1;}
65 FooRange( FooRange& original, tbb::split ) : size(original.size/2) {
66 original.size -= size;
67 start = original.start+original.size;
68 ASSERT( original.pad[Pad-1]=='x', NULL );
69 pad[Pad-1] = 'x';
70 }
71};
72
73//! A range object whose only public members are those required by the parallel_for.h body concept.
74template<size_t Pad>
75class FooBody {
76 static const int LIVE = 0x1234;
77 tbb::atomic<int>* array;
78 int state;
79 friend class FooRange<Pad>;
80 template<typename Flavor_, size_t Pad_> friend void Flog( int nthread );
81 FooBody( tbb::atomic<int>* array_ ) : array(array_), state(LIVE) {}
82public:
83 ~FooBody() {
84 --FooBodyCount;
85 for( size_t i=0; i<sizeof(*this); ++i )
86 reinterpret_cast<char*>(this)[i] = -1;
87 }
88 //! Copy constructor
89 FooBody( const FooBody& other ) : array(other.array), state(other.state) {
90 ++FooBodyCount;
91 ASSERT( state==LIVE, NULL );
92 }
93 void operator()( FooRange<Pad>& r ) const {
94 for( int k=0; k<r.size; ++k ) {
95 const int i = array[r.start+k]++;
96 ASSERT( i==0, NULL );
97 }
98 }
99};
100
101#include "tbb/tick_count.h"
102
103static const int N = 500;
104static tbb::atomic<int> Array[N];
105
106struct serial_tag {};
107struct parallel_tag {};
108struct empty_partitioner_tag {};
109
110template <typename Flavor, typename Partitioner, typename Range, typename Body>
111struct Invoker;
112
113#if TBB_PREVIEW_SERIAL_SUBSET
114template <typename Range, typename Body>
115struct Invoker<serial_tag, empty_partitioner_tag, Range, Body> {
116 void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
117 tbb::serial:: parallel_for( r, body );
118 }
119};
120template <typename Partitioner, typename Range, typename Body>
121struct Invoker<serial_tag, Partitioner, Range, Body> {
122 void operator()( const Range& r, const Body& body, Partitioner& p ) {
123 tbb::serial:: parallel_for( r, body, p );
124 }
125};
126#endif
127
128template <typename Range, typename Body>
129struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> {
130 void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
131 tbb:: parallel_for( r, body );
132 }
133};
134
135template <typename Partitioner, typename Range, typename Body>
136struct Invoker<parallel_tag, Partitioner, Range, Body> {
137 void operator()( const Range& r, const Body& body, Partitioner& p ) {
138 tbb:: parallel_for( r, body, p );
139 }
140};
141
142template <typename Flavor, typename Partitioner, typename T, typename Body>
143struct InvokerStep;
144
145#if TBB_PREVIEW_SERIAL_SUBSET
146template <typename T, typename Body>
147struct InvokerStep<serial_tag, empty_partitioner_tag, T, Body> {
148 void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
149 tbb::serial:: parallel_for( first, last, f );
150 }
151 void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
152 tbb::serial:: parallel_for( first, last, step, f );
153 }
154};
155
156template <typename Partitioner, typename T, typename Body>
157struct InvokerStep<serial_tag, Partitioner, T, Body> {
158 void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
159 tbb::serial:: parallel_for( first, last, f, p);
160 }
161 void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
162 tbb::serial:: parallel_for( first, last, step, f, p );
163 }
164};
165#endif
166
167template <typename T, typename Body>
168struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> {
169 void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
170 tbb:: parallel_for( first, last, f );
171 }
172 void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
173 tbb:: parallel_for( first, last, step, f );
174 }
175};
176
177template <typename Partitioner, typename T, typename Body>
178struct InvokerStep<parallel_tag, Partitioner, T, Body> {
179 void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
180 tbb:: parallel_for( first, last, f, p );
181 }
182 void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
183 tbb:: parallel_for( first, last, step, f, p );
184 }
185};
186
187template<typename Flavor, size_t Pad>
188void Flog( int nthread ) {
189 tbb::tick_count T0 = tbb::tick_count::now();
190 for( int i=0; i<N; ++i ) {
191 for ( int mode = 0; mode < 4; ++mode) {
192 FooRange<Pad> r( 0, i );
193 const FooRange<Pad> rc = r;
194 FooBody<Pad> f( Array );
195 const FooBody<Pad> fc = f;
196 memset( Array, 0, sizeof(Array) );
197 FooBodyCount = 1;
198 switch (mode) {
199 case 0: {
200 empty_partitioner_tag p;
201 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for;
202 invoke_for( rc, fc, p );
203 }
204 break;
205 case 1: {
206 Invoker< Flavor, const tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
207 invoke_for( rc, fc, tbb::simple_partitioner() );
208 }
209 break;
210 case 2: {
211 Invoker< Flavor, const tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
212 invoke_for( rc, fc, tbb::auto_partitioner() );
213 }
214 break;
215 case 3: {
216 static tbb::affinity_partitioner affinity;
217 Invoker< Flavor, tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
218 invoke_for( rc, fc, affinity );
219 }
220 break;
221 }
222 for( int j=0; j<i; ++j )
223 ASSERT( Array[j]==1, NULL );
224 for( int j=i; j<N; ++j )
225 ASSERT( Array[j]==0, NULL );
226 ASSERT( FooBodyCount==1, NULL );
227 }
228 }
229 tbb::tick_count T1 = tbb::tick_count::now();
230 REMARK("time=%g\tnthread=%d\tpad=%d\n",(T1-T0).seconds(),nthread,int(Pad));
231}
232
233// Testing parallel_for with step support
234const size_t PFOR_BUFFER_TEST_SIZE = 1024;
235// test_buffer has some extra items beyond its right bound
236const size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024;
237size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE];
238
239template<typename T>
240class TestFunctor{
241public:
242 void operator ()(T index) const {
243 pfor_buffer[index]++;
244 }
245};
246
247#include <stdexcept> // std::invalid_argument
248
249template <typename Flavor, typename T, typename Partitioner>
250void TestParallelForWithStepSupportHelper(Partitioner& p)
251{
252 const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE);
253 const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE);
254 // Testing parallel_for with different step values
255 InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for;
256 for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) {
257 T step;
258 for (step = 1; step < pfor_buffer_test_size; step++) {
259 memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(size_t));
260 if (step == 1){
261 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p);
262 } else {
263 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p);
264 }
265 // Verifying that parallel_for processed all items it should
266 for (T i = begin; i < pfor_buffer_test_size; i = i + step) {
267 ASSERT(pfor_buffer[i] == 1, "parallel_for didn't process all required elements");
268 pfor_buffer[i] = 0;
269 }
270 // Verifying that no extra items were processed and right bound of array wasn't crossed
271 for (T i = 0; i < pfor_buffer_actual_size; i++) {
272 ASSERT(pfor_buffer[i] == 0, "parallel_for processed an extra element");
273 }
274 }
275 }
276}
277
278template <typename Flavor, typename T>
279void TestParallelForWithStepSupport()
280{
281 static tbb::affinity_partitioner affinity_p;
282 tbb::auto_partitioner auto_p;
283 tbb::simple_partitioner simple_p;
284 empty_partitioner_tag p;
285
286 // Try out all partitioner combinations
287 TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p);
288 TestParallelForWithStepSupportHelper< Flavor,T,const tbb::auto_partitioner >(auto_p);
289 TestParallelForWithStepSupportHelper< Flavor,T,const tbb::simple_partitioner >(simple_p);
290 TestParallelForWithStepSupportHelper< Flavor,T,tbb::affinity_partitioner >(affinity_p);
291
292 // Testing some corner cases
293 tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>());
294#if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN
295 try{
296 tbb::parallel_for(static_cast<T>(1), static_cast<T>(100), static_cast<T>(0), TestFunctor<T>()); // should cause std::invalid_argument
297 }catch(std::invalid_argument&){
298 return;
299 }
300 catch ( ... ) {
301 ASSERT ( __TBB_EXCEPTION_TYPE_INFO_BROKEN, "Unrecognized exception. std::invalid_argument is expected" );
302 }
303#endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
304}
305
306#if __TBB_TASK_GROUP_CONTEXT
307// Exception support test
308#define HARNESS_EH_SIMPLE_MODE 1
309#include "tbb/tbb_exception.h"
310#include "harness_eh.h"
311
312#if TBB_USE_EXCEPTIONS
313class test_functor_with_exception {
314public:
315 void operator ()(size_t) const { ThrowTestException(); }
316};
317
318void TestExceptionsSupport() {
319 REMARK (__FUNCTION__);
320 { // Tests version with a step provided
321 ResetEhGlobals();
322 TRY();
323 tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, (size_t)1, test_functor_with_exception());
324 CATCH_AND_ASSERT();
325 }
326 { // Tests version without a step
327 ResetEhGlobals();
328 TRY();
329 tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, test_functor_with_exception());
330 CATCH_AND_ASSERT();
331 }
332}
333#endif /* TBB_USE_EXCEPTIONS */
334
335// Cancellation support test
336class functor_to_cancel {
337public:
338 void operator()(size_t) const {
339 ++g_CurExecuted;
340 CancellatorTask::WaitUntilReady();
341 }
342};
343
344size_t g_worker_task_step = 0;
345
346class my_worker_pfor_step_task : public tbb::task
347{
348 tbb::task_group_context &my_ctx;
349
350 tbb::task* execute () __TBB_override {
351 if (g_worker_task_step == 0){
352 tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, functor_to_cancel(), my_ctx);
353 }else{
354 tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, g_worker_task_step, functor_to_cancel(), my_ctx);
355 }
356 return NULL;
357 }
358public:
359 my_worker_pfor_step_task ( tbb::task_group_context &context_) : my_ctx(context_) { }
360};
361
362void TestCancellation()
363{
364 // tests version without a step
365 g_worker_task_step = 0;
366 ResetEhGlobals();
367 RunCancellationTest<my_worker_pfor_step_task, CancellatorTask>();
368
369 // tests version with step
370 g_worker_task_step = 1;
371 ResetEhGlobals();
372 RunCancellationTest<my_worker_pfor_step_task, CancellatorTask>();
373}
374#endif /* __TBB_TASK_GROUP_CONTEXT */
375
376#include "harness_m128.h"
377
378#if (HAVE_m128 || HAVE_m256) && !__TBB_SSE_STACK_ALIGNMENT_BROKEN
379template<typename ClassWithVectorType>
380struct SSE_Functor {
381 ClassWithVectorType* Src, * Dst;
382 SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {}
383
384 void operator()( tbb::blocked_range<int>& r ) const {
385 for( int i=r.begin(); i!=r.end(); ++i )
386 Dst[i] = Src[i];
387 }
388};
389
390//! Test that parallel_for works with stack-allocated __m128
391template<typename ClassWithVectorType>
392void TestVectorTypes() {
393 const int aSize = 300;
394 ClassWithVectorType Array1[aSize], Array2[aSize];
395 for( int i=0; i<aSize; ++i ) {
396 // VC8 does not properly align a temporary value; to work around, use explicit variable
397 ClassWithVectorType foo(i);
398 Array1[i] = foo;
399 }
400 tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) );
401 for( int i=0; i<aSize; ++i ) {
402 ClassWithVectorType foo(i);
403 ASSERT( Array2[i]==foo, NULL ) ;
404 }
405}
406#endif /* HAVE_m128 || HAVE_m256 */
407
408#include <vector>
409#include <sstream>
410#include <tbb/blocked_range.h>
411
412struct TestSimplePartitionerStabilityFunctor:NoAssign{
413 std::vector<int> & ranges;
414 TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){}
415 void operator()(tbb::blocked_range<size_t>& r)const{
416 ranges.at(r.begin())=true;
417 }
418};
419void TestSimplePartitionerStability(){
420 const std::size_t repeat_count= 10;
421 const std::size_t rangeToSplitSize=1000000;
422 const std::size_t grainsizeStep=rangeToSplitSize/repeat_count;
423 typedef TestSimplePartitionerStabilityFunctor FunctorType;
424
425 for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){
426 std::vector<int> firstSeries(rangeToSplitSize,0);
427 std::vector<int> secondSeries(rangeToSplitSize,0);
428
429 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner());
430 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner());
431 std::stringstream str; str<<i;
432 ASSERT(firstSeries==secondSeries,("splitting range with tbb::simple_partitioner must be reproducible; i=" +str.str()).c_str() );
433 }
434}
435#include <cstdio>
436#include "tbb/task_scheduler_init.h"
437#include "harness_cpu.h"
438#include "harness_barrier.h"
439#include "test_partitioner.h"
440
441namespace interaction_with_range_and_partitioner {
442
443// Test checks compatibility of parallel_for algorithm with various range implementations
444
445void test() {
446 using namespace test_partitioner_utils::interaction_with_range_and_partitioner;
447
448 test_partitioner_utils::SimpleBody b;
449 tbb::affinity_partitioner ap;
450
451 parallel_for(Range1(true, false), b, ap);
452 parallel_for(Range2(true, false), b, ap);
453 parallel_for(Range3(true, false), b, ap);
454 parallel_for(Range4(false, true), b, ap);
455 parallel_for(Range5(false, true), b, ap);
456 parallel_for(Range6(false, true), b, ap);
457
458 parallel_for(Range1(false, true), b, tbb::simple_partitioner());
459 parallel_for(Range2(false, true), b, tbb::simple_partitioner());
460 parallel_for(Range3(false, true), b, tbb::simple_partitioner());
461 parallel_for(Range4(false, true), b, tbb::simple_partitioner());
462 parallel_for(Range5(false, true), b, tbb::simple_partitioner());
463 parallel_for(Range6(false, true), b, tbb::simple_partitioner());
464
465 parallel_for(Range1(false, true), b, tbb::auto_partitioner());
466 parallel_for(Range2(false, true), b, tbb::auto_partitioner());
467 parallel_for(Range3(false, true), b, tbb::auto_partitioner());
468 parallel_for(Range4(false, true), b, tbb::auto_partitioner());
469 parallel_for(Range5(false, true), b, tbb::auto_partitioner());
470 parallel_for(Range6(false, true), b, tbb::auto_partitioner());
471}
472
473} // namespace interaction_with_range_and_partitioner
474
475namespace various_range_implementations {
476
477using namespace test_partitioner_utils;
478using namespace test_partitioner_utils::TestRanges;
479
480// Body ensures that initial work distribution is done uniformly through affinity mechanism and not
481// through work stealing
482class Body {
483 Harness::SpinBarrier &m_sb;
484public:
485 Body(Harness::SpinBarrier& sb) : m_sb(sb) { }
486 Body(Body& b, tbb::split) : m_sb(b.m_sb) { }
487 Body& operator =(const Body&) { return *this; }
488 template <typename Range>
489 void operator()(Range& r) const {
490 REMARK("Executing range [%lu, %lu)\n", r.begin(), r.end());
491 m_sb.timed_wait(10); // waiting for all threads
492 }
493};
494
495namespace correctness {
496
497/* Testing only correctness (that is parallel_for does not hang) */
498template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness>
499void test() {
500 static const int thread_num = tbb::task_scheduler_init::default_num_threads();
501 RangeType range( 0, thread_num, NULL, false, ensure_non_emptiness );
502 tbb::affinity_partitioner ap;
503 tbb::parallel_for( range, SimpleBody(), ap );
504}
505
506} // namespace correctness
507
508namespace uniform_distribution {
509
510/* Body of parallel_for algorithm would hang if non-uniform work distribution happened */
511template <typename RangeType, bool feedback, bool ensure_non_emptiness>
512void test() {
513 static const int thread_num = tbb::task_scheduler_init::default_num_threads();
514 Harness::SpinBarrier sb( thread_num );
515 RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness);
516 const Body sync_body( sb );
517 tbb::affinity_partitioner ap;
518 tbb::parallel_for( range, sync_body, ap );
519 tbb::parallel_for( range, sync_body, tbb::static_partitioner() );
520}
521
522} // namespace uniform_distribution
523
524void test() {
525 const bool provide_feedback = __TBB_ENABLE_RANGE_FEEDBACK;
526 const bool ensure_non_empty_range = true;
527
528 // BlockedRange does not take into account feedback and non-emptiness settings but uses the
529 // tbb::blocked_range implementation
530 uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>();
531
532#if __TBB_ENABLE_RANGE_FEEDBACK
533 using uniform_distribution::test; // if feedback is enabled ensure uniform work distribution
534#else
535 using correctness::test;
536#endif
537
538 {
539 test<RoundedDownRange, provide_feedback, ensure_non_empty_range>();
540 test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>();
541#if __TBB_ENABLE_RANGE_FEEDBACK && !__TBB_MIC_NATIVE
542 // due to fast division algorithm on MIC
543 test<RoundedDownRange, !provide_feedback, ensure_non_empty_range>();
544 test<RoundedDownRange, !provide_feedback, !ensure_non_empty_range>();
545#endif
546 }
547
548 {
549 test<RoundedUpRange, provide_feedback, ensure_non_empty_range>();
550 test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>();
551#if __TBB_ENABLE_RANGE_FEEDBACK && !__TBB_MIC_NATIVE
552 // due to fast division algorithm on MIC
553 test<RoundedUpRange, !provide_feedback, ensure_non_empty_range>();
554 test<RoundedUpRange, !provide_feedback, !ensure_non_empty_range>();
555#endif
556 }
557
558 // Testing that parallel_for algorithm works with such weird ranges
559 correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>();
560 correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>();
561 correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>();
562
563 // The following ranges do not comply with the proportion suggested by partitioner. Therefore
564 // they have to provide the proportion in which they were actually split back to partitioner and
565 // ensure theirs non-emptiness
566 test<Range1_2, provide_feedback, ensure_non_empty_range>();
567 test<Range1_999, provide_feedback, ensure_non_empty_range>();
568 test<Range999_1, provide_feedback, ensure_non_empty_range>();
569}
570
571} // namespace various_range_implementations
572
573#include <map>
574#include <utility>
575#include "tbb/task_arena.h"
576#include "tbb/enumerable_thread_specific.h"
577
578namespace parallel_for_within_task_arena {
579
580using namespace test_partitioner_utils::TestRanges;
581using tbb::split;
582using tbb::proportional_split;
583
584class BlockedRangeWhitebox;
585
586typedef std::pair<size_t, size_t> range_borders;
587typedef std::multimap<BlockedRangeWhitebox*, range_borders> MapType;
588typedef tbb::enumerable_thread_specific<MapType> ETSType;
589ETSType ets;
590
591class BlockedRangeWhitebox : public BlockedRange {
592public:
593 static const bool is_splittable_in_proportion = true;
594 BlockedRangeWhitebox(size_t _begin, size_t _end)
595 : BlockedRange(_begin, _end, NULL, false, false) { }
596
597 BlockedRangeWhitebox(BlockedRangeWhitebox& r, proportional_split& p)
598 :BlockedRange(r, p) {
599 update_ets(r);
600 update_ets(*this);
601 }
602
603 BlockedRangeWhitebox(BlockedRangeWhitebox& r, split)
604 :BlockedRange(r, split()) { }
605
606 void update_ets(BlockedRangeWhitebox& range) {
607 std::pair<MapType::iterator, MapType::iterator> equal_range = ets.local().equal_range(&range);
608 for (MapType::iterator it = equal_range.first; it != equal_range.second;++it) {
609 if (it->second.first <= range.begin() && range.end() <= it->second.second) {
610 ASSERT(!(it->second.first == range.begin() && it->second.second == range.end()), "Only one border of the range should be equal to the original");
611 it->second.first = range.begin();
612 it->second.second = range.end();
613 return;
614 }
615 }
616 ets.local().insert(std::make_pair<BlockedRangeWhitebox*, range_borders>(&range, range_borders(range.begin(), range.end())));
617 }
618};
619
620template <typename Partitioner>
621struct ArenaBody {
622 size_t range_begin;
623 size_t range_end;
624
625 ArenaBody(size_t _range_begin, size_t _range_end)
626 :range_begin(_range_begin), range_end(_range_end) { }
627
628 void operator()() const {
629 Partitioner my_partitioner;
630 tbb::parallel_for(BlockedRangeWhitebox(range_begin, range_end), test_partitioner_utils::SimpleBody(), my_partitioner);
631 }
632};
633
634struct CombineBody {
635 MapType operator()(MapType x, const MapType& y) const {
636 x.insert(y.begin(), y.end());
637 for (MapType::iterator it1 = x.begin(); it1 != x.end(); ++it1) {
638 for (MapType::iterator it2 = x.begin(); it2 != x.end(); ++it2) {
639 if (it1 == it2) continue;
640 bool is_1_subrange_of_2 =
641 it2->second.first <= it1->second.first && it1->second.second <= it2->second.second;
642 if (is_1_subrange_of_2) {
643 x.erase(it2);
644 break;
645 }
646 }
647 }
648 return x;
649 }
650};
651
652range_borders combine_range(const MapType& map) {
653 range_borders result_range = map.begin()->second;
654 for (MapType::const_iterator it = map.begin(); it != map.end(); it++)
655 result_range = range_borders(
656 (std::min)(result_range.first, it->second.first),
657 (std::max)(result_range.second, it->second.second)
658 );
659 return result_range;
660}
661
662template <typename Partitioner>
663void test_body() {
664 unsigned hw_concurrency = tbb::tbb_thread::hardware_concurrency();
665 for (unsigned int num_threads = hw_concurrency / 4 + 1; num_threads < hw_concurrency; num_threads *= 2) {
666 REMARK(" num_threads=%lu\n", num_threads);
667 for (size_t range_begin = 0, range_end = num_threads * 10 - 1, i = 0; i < 3;
668 range_begin += num_threads, range_end += num_threads + 1, ++i) {
669 REMARK(" processing range [%lu, %lu)\n", range_begin, range_end);
670 ets = ETSType(MapType());
671 tbb::task_arena limited(num_threads); // at least two slots in arena.
672 limited.execute(ArenaBody<Partitioner>(range_begin, range_end));
673 MapType combined_map = ets.combine(CombineBody());
674 range_borders result_borders = combine_range(combined_map);
675 ASSERT(result_borders.first == range_begin, "Restored range begin does not match initial one");
676 ASSERT(result_borders.second == range_end, "Restored range end does not match initial one");
677 size_t map_size = combined_map.size();
678 // In a single-thread arena, partitioners still do one split of a range.
679 size_t range_partitions = num_threads > 1 ? num_threads : 2;
680 ASSERT((map_size == range_partitions), "Incorrect number or post-proportional split ranges");
681 size_t expected_size = (range_end - range_begin) / range_partitions;
682 for (MapType::iterator it = combined_map.begin(); it != combined_map.end(); ++it) {
683 size_t size = it->second.second - it->second.first;
684 ASSERT((size == expected_size || size == expected_size + 1), "Incorrect post-proportional range size");
685 }
686 }
687 }
688}
689
690void test() {
691 REMARK("parallel_for with affinity partitioner within task_arena\n");
692 test_body<tbb::affinity_partitioner>();
693 REMARK("parallel_for with static partitioner within task_arena\n");
694 test_body<tbb::static_partitioner>();
695}
696
697} // namespace parallel_for_within_task_arena
698
699int TestMain () {
700 if( MinThread<1 ) {
701 REPORT("number of threads must be positive\n");
702 exit(1);
703 }
704 for( int p=MinThread; p<=MaxThread; ++p ) {
705 if( p>0 ) {
706 tbb::task_scheduler_init init( p );
707 Flog<parallel_tag,1>(p);
708 Flog<parallel_tag,10>(p);
709 Flog<parallel_tag,100>(p);
710 Flog<parallel_tag,1000>(p);
711 Flog<parallel_tag,10000>(p);
712
713 // Testing with different integer types
714 TestParallelForWithStepSupport<parallel_tag,short>();
715 TestParallelForWithStepSupport<parallel_tag,unsigned short>();
716 TestParallelForWithStepSupport<parallel_tag,int>();
717 TestParallelForWithStepSupport<parallel_tag,unsigned int>();
718 TestParallelForWithStepSupport<parallel_tag,long>();
719 TestParallelForWithStepSupport<parallel_tag,unsigned long>();
720 TestParallelForWithStepSupport<parallel_tag,long long>();
721 TestParallelForWithStepSupport<parallel_tag,unsigned long long>();
722 TestParallelForWithStepSupport<parallel_tag,size_t>();
723
724#if TBB_PREVIEW_SERIAL_SUBSET
725 // This is for testing serial implementation.
726 if( p == MaxThread ) {
727 Flog<serial_tag,1>(p);
728 Flog<serial_tag,10>(p);
729 Flog<serial_tag,100>(p);
730 TestParallelForWithStepSupport<serial_tag,short>();
731 TestParallelForWithStepSupport<serial_tag,unsigned short>();
732 TestParallelForWithStepSupport<serial_tag,int>();
733 TestParallelForWithStepSupport<serial_tag,unsigned int>();
734 TestParallelForWithStepSupport<serial_tag,long>();
735 TestParallelForWithStepSupport<serial_tag,unsigned long>();
736 TestParallelForWithStepSupport<serial_tag,long long>();
737 TestParallelForWithStepSupport<serial_tag,unsigned long long>();
738 TestParallelForWithStepSupport<serial_tag,size_t>();
739 }
740#endif
741
742#if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN
743 TestExceptionsSupport();
744#endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
745#if __TBB_TASK_GROUP_CONTEXT
746 if ( p > 1 )
747 TestCancellation();
748#endif /* __TBB_TASK_GROUP_CONTEXT */
749#if !__TBB_SSE_STACK_ALIGNMENT_BROKEN
750 #if HAVE_m128
751 TestVectorTypes<ClassWithSSE>();
752 #endif
753 #if HAVE_m256
754 if (have_AVX()) TestVectorTypes<ClassWithAVX>();
755 #endif
756#endif /*!__TBB_SSE_STACK_ALIGNMENT_BROKEN*/
757 // Test that all workers sleep when no work
758 TestCPUUserTime(p);
759 TestSimplePartitionerStability();
760 }
761 }
762#if __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN
763 REPORT("Known issue: exception handling tests are skipped.\n");
764#endif
765#if (HAVE_m128 || HAVE_m256) && __TBB_SSE_STACK_ALIGNMENT_BROKEN
766 REPORT("Known issue: stack alignment for SIMD instructions not tested.\n");
767#endif
768
769 various_range_implementations::test();
770 interaction_with_range_and_partitioner::test();
771 parallel_for_within_task_arena::test();
772 return Harness::Done;
773}
774
775#if _MSC_VER
776#pragma warning (pop)
777#endif
778