1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | // Test for function template parallel_for.h |
18 | |
19 | // These features are pure additions and thus can be always "on" in the test |
20 | #define TBB_PREVIEW_SERIAL_SUBSET 1 |
21 | #include "harness_defs.h" |
22 | |
23 | #if _MSC_VER |
24 | #pragma warning (push) |
25 | #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED |
26 | // Suppress pointless "unreachable code" warning. |
27 | #pragma warning (disable: 4702) |
28 | #endif |
29 | #if defined(_Wp64) |
30 | // Workaround for overzealous compiler warnings in /Wp64 mode |
31 | #pragma warning (disable: 4267) |
32 | #endif |
33 | |
34 | #define _SCL_SECURE_NO_WARNINGS |
35 | #endif //#if _MSC_VER |
36 | |
37 | #include "harness_defs.h" |
38 | #include "tbb/parallel_for.h" |
39 | #include "tbb/atomic.h" |
40 | #include "harness_assert.h" |
41 | #include "harness.h" |
42 | |
43 | static tbb::atomic<int> FooBodyCount; |
44 | |
45 | //! A range object whose only public members are those required by the Range concept. |
46 | template<size_t Pad> |
47 | class FooRange { |
48 | //! Start of range |
49 | int start; |
50 | |
51 | //! Size of range |
52 | int size; |
53 | FooRange( int start_, int size_ ) : start(start_), size(size_) { |
54 | zero_fill<char>(pad, Pad); |
55 | pad[Pad-1] = 'x'; |
56 | } |
57 | template<typename Flavor_, size_t Pad_> friend void Flog( int nthread ); |
58 | template<size_t Pad_> friend class FooBody; |
59 | void operator&(); |
60 | |
61 | char pad[Pad]; |
62 | public: |
63 | bool empty() const {return size==0;} |
64 | bool is_divisible() const {return size>1;} |
65 | FooRange( FooRange& original, tbb::split ) : size(original.size/2) { |
66 | original.size -= size; |
67 | start = original.start+original.size; |
68 | ASSERT( original.pad[Pad-1]=='x', NULL ); |
69 | pad[Pad-1] = 'x'; |
70 | } |
71 | }; |
72 | |
73 | //! A range object whose only public members are those required by the parallel_for.h body concept. |
74 | template<size_t Pad> |
75 | class FooBody { |
76 | static const int LIVE = 0x1234; |
77 | tbb::atomic<int>* array; |
78 | int state; |
79 | friend class FooRange<Pad>; |
80 | template<typename Flavor_, size_t Pad_> friend void Flog( int nthread ); |
81 | FooBody( tbb::atomic<int>* array_ ) : array(array_), state(LIVE) {} |
82 | public: |
83 | ~FooBody() { |
84 | --FooBodyCount; |
85 | for( size_t i=0; i<sizeof(*this); ++i ) |
86 | reinterpret_cast<char*>(this)[i] = -1; |
87 | } |
88 | //! Copy constructor |
89 | FooBody( const FooBody& other ) : array(other.array), state(other.state) { |
90 | ++FooBodyCount; |
91 | ASSERT( state==LIVE, NULL ); |
92 | } |
93 | void operator()( FooRange<Pad>& r ) const { |
94 | for( int k=0; k<r.size; ++k ) { |
95 | const int i = array[r.start+k]++; |
96 | ASSERT( i==0, NULL ); |
97 | } |
98 | } |
99 | }; |
100 | |
101 | #include "tbb/tick_count.h" |
102 | |
103 | static const int N = 500; |
104 | static tbb::atomic<int> Array[N]; |
105 | |
106 | struct serial_tag {}; |
107 | struct parallel_tag {}; |
108 | struct empty_partitioner_tag {}; |
109 | |
110 | template <typename Flavor, typename Partitioner, typename Range, typename Body> |
111 | struct Invoker; |
112 | |
113 | #if TBB_PREVIEW_SERIAL_SUBSET |
114 | template <typename Range, typename Body> |
115 | struct Invoker<serial_tag, empty_partitioner_tag, Range, Body> { |
116 | void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) { |
117 | tbb::serial:: parallel_for( r, body ); |
118 | } |
119 | }; |
120 | template <typename Partitioner, typename Range, typename Body> |
121 | struct Invoker<serial_tag, Partitioner, Range, Body> { |
122 | void operator()( const Range& r, const Body& body, Partitioner& p ) { |
123 | tbb::serial:: parallel_for( r, body, p ); |
124 | } |
125 | }; |
126 | #endif |
127 | |
128 | template <typename Range, typename Body> |
129 | struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> { |
130 | void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) { |
131 | tbb:: parallel_for( r, body ); |
132 | } |
133 | }; |
134 | |
135 | template <typename Partitioner, typename Range, typename Body> |
136 | struct Invoker<parallel_tag, Partitioner, Range, Body> { |
137 | void operator()( const Range& r, const Body& body, Partitioner& p ) { |
138 | tbb:: parallel_for( r, body, p ); |
139 | } |
140 | }; |
141 | |
142 | template <typename Flavor, typename Partitioner, typename T, typename Body> |
143 | struct InvokerStep; |
144 | |
145 | #if TBB_PREVIEW_SERIAL_SUBSET |
146 | template <typename T, typename Body> |
147 | struct InvokerStep<serial_tag, empty_partitioner_tag, T, Body> { |
148 | void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) { |
149 | tbb::serial:: parallel_for( first, last, f ); |
150 | } |
151 | void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) { |
152 | tbb::serial:: parallel_for( first, last, step, f ); |
153 | } |
154 | }; |
155 | |
156 | template <typename Partitioner, typename T, typename Body> |
157 | struct InvokerStep<serial_tag, Partitioner, T, Body> { |
158 | void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) { |
159 | tbb::serial:: parallel_for( first, last, f, p); |
160 | } |
161 | void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) { |
162 | tbb::serial:: parallel_for( first, last, step, f, p ); |
163 | } |
164 | }; |
165 | #endif |
166 | |
167 | template <typename T, typename Body> |
168 | struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> { |
169 | void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) { |
170 | tbb:: parallel_for( first, last, f ); |
171 | } |
172 | void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) { |
173 | tbb:: parallel_for( first, last, step, f ); |
174 | } |
175 | }; |
176 | |
177 | template <typename Partitioner, typename T, typename Body> |
178 | struct InvokerStep<parallel_tag, Partitioner, T, Body> { |
179 | void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) { |
180 | tbb:: parallel_for( first, last, f, p ); |
181 | } |
182 | void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) { |
183 | tbb:: parallel_for( first, last, step, f, p ); |
184 | } |
185 | }; |
186 | |
187 | template<typename Flavor, size_t Pad> |
188 | void Flog( int nthread ) { |
189 | tbb::tick_count T0 = tbb::tick_count::now(); |
190 | for( int i=0; i<N; ++i ) { |
191 | for ( int mode = 0; mode < 4; ++mode) { |
192 | FooRange<Pad> r( 0, i ); |
193 | const FooRange<Pad> rc = r; |
194 | FooBody<Pad> f( Array ); |
195 | const FooBody<Pad> fc = f; |
196 | memset( Array, 0, sizeof(Array) ); |
197 | FooBodyCount = 1; |
198 | switch (mode) { |
199 | case 0: { |
200 | empty_partitioner_tag p; |
201 | Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for; |
202 | invoke_for( rc, fc, p ); |
203 | } |
204 | break; |
205 | case 1: { |
206 | Invoker< Flavor, const tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; |
207 | invoke_for( rc, fc, tbb::simple_partitioner() ); |
208 | } |
209 | break; |
210 | case 2: { |
211 | Invoker< Flavor, const tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; |
212 | invoke_for( rc, fc, tbb::auto_partitioner() ); |
213 | } |
214 | break; |
215 | case 3: { |
216 | static tbb::affinity_partitioner affinity; |
217 | Invoker< Flavor, tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; |
218 | invoke_for( rc, fc, affinity ); |
219 | } |
220 | break; |
221 | } |
222 | for( int j=0; j<i; ++j ) |
223 | ASSERT( Array[j]==1, NULL ); |
224 | for( int j=i; j<N; ++j ) |
225 | ASSERT( Array[j]==0, NULL ); |
226 | ASSERT( FooBodyCount==1, NULL ); |
227 | } |
228 | } |
229 | tbb::tick_count T1 = tbb::tick_count::now(); |
230 | REMARK("time=%g\tnthread=%d\tpad=%d\n" ,(T1-T0).seconds(),nthread,int(Pad)); |
231 | } |
232 | |
233 | // Testing parallel_for with step support |
234 | const size_t PFOR_BUFFER_TEST_SIZE = 1024; |
235 | // test_buffer has some extra items beyond its right bound |
236 | const size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024; |
237 | size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE]; |
238 | |
239 | template<typename T> |
240 | class TestFunctor{ |
241 | public: |
242 | void operator ()(T index) const { |
243 | pfor_buffer[index]++; |
244 | } |
245 | }; |
246 | |
247 | #include <stdexcept> // std::invalid_argument |
248 | |
249 | template <typename Flavor, typename T, typename Partitioner> |
250 | void TestParallelForWithStepSupportHelper(Partitioner& p) |
251 | { |
252 | const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE); |
253 | const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE); |
254 | // Testing parallel_for with different step values |
255 | InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for; |
256 | for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) { |
257 | T step; |
258 | for (step = 1; step < pfor_buffer_test_size; step++) { |
259 | memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(size_t)); |
260 | if (step == 1){ |
261 | invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p); |
262 | } else { |
263 | invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p); |
264 | } |
265 | // Verifying that parallel_for processed all items it should |
266 | for (T i = begin; i < pfor_buffer_test_size; i = i + step) { |
267 | ASSERT(pfor_buffer[i] == 1, "parallel_for didn't process all required elements" ); |
268 | pfor_buffer[i] = 0; |
269 | } |
270 | // Verifying that no extra items were processed and right bound of array wasn't crossed |
271 | for (T i = 0; i < pfor_buffer_actual_size; i++) { |
272 | ASSERT(pfor_buffer[i] == 0, "parallel_for processed an extra element" ); |
273 | } |
274 | } |
275 | } |
276 | } |
277 | |
278 | template <typename Flavor, typename T> |
279 | void TestParallelForWithStepSupport() |
280 | { |
281 | static tbb::affinity_partitioner affinity_p; |
282 | tbb::auto_partitioner auto_p; |
283 | tbb::simple_partitioner simple_p; |
284 | empty_partitioner_tag p; |
285 | |
286 | // Try out all partitioner combinations |
287 | TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p); |
288 | TestParallelForWithStepSupportHelper< Flavor,T,const tbb::auto_partitioner >(auto_p); |
289 | TestParallelForWithStepSupportHelper< Flavor,T,const tbb::simple_partitioner >(simple_p); |
290 | TestParallelForWithStepSupportHelper< Flavor,T,tbb::affinity_partitioner >(affinity_p); |
291 | |
292 | // Testing some corner cases |
293 | tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>()); |
294 | #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN |
295 | try{ |
296 | tbb::parallel_for(static_cast<T>(1), static_cast<T>(100), static_cast<T>(0), TestFunctor<T>()); // should cause std::invalid_argument |
297 | }catch(std::invalid_argument&){ |
298 | return; |
299 | } |
300 | catch ( ... ) { |
301 | ASSERT ( __TBB_EXCEPTION_TYPE_INFO_BROKEN, "Unrecognized exception. std::invalid_argument is expected" ); |
302 | } |
303 | #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */ |
304 | } |
305 | |
306 | #if __TBB_TASK_GROUP_CONTEXT |
307 | // Exception support test |
308 | #define HARNESS_EH_SIMPLE_MODE 1 |
309 | #include "tbb/tbb_exception.h" |
310 | #include "harness_eh.h" |
311 | |
312 | #if TBB_USE_EXCEPTIONS |
313 | class test_functor_with_exception { |
314 | public: |
315 | void operator ()(size_t) const { ThrowTestException(); } |
316 | }; |
317 | |
318 | void TestExceptionsSupport() { |
319 | REMARK (__FUNCTION__); |
320 | { // Tests version with a step provided |
321 | ResetEhGlobals(); |
322 | TRY(); |
323 | tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, (size_t)1, test_functor_with_exception()); |
324 | CATCH_AND_ASSERT(); |
325 | } |
326 | { // Tests version without a step |
327 | ResetEhGlobals(); |
328 | TRY(); |
329 | tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, test_functor_with_exception()); |
330 | CATCH_AND_ASSERT(); |
331 | } |
332 | } |
333 | #endif /* TBB_USE_EXCEPTIONS */ |
334 | |
335 | // Cancellation support test |
336 | class functor_to_cancel { |
337 | public: |
338 | void operator()(size_t) const { |
339 | ++g_CurExecuted; |
340 | CancellatorTask::WaitUntilReady(); |
341 | } |
342 | }; |
343 | |
344 | size_t g_worker_task_step = 0; |
345 | |
346 | class my_worker_pfor_step_task : public tbb::task |
347 | { |
348 | tbb::task_group_context &my_ctx; |
349 | |
350 | tbb::task* execute () __TBB_override { |
351 | if (g_worker_task_step == 0){ |
352 | tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, functor_to_cancel(), my_ctx); |
353 | }else{ |
354 | tbb::parallel_for((size_t)0, (size_t)PFOR_BUFFER_TEST_SIZE, g_worker_task_step, functor_to_cancel(), my_ctx); |
355 | } |
356 | return NULL; |
357 | } |
358 | public: |
359 | my_worker_pfor_step_task ( tbb::task_group_context &context_) : my_ctx(context_) { } |
360 | }; |
361 | |
362 | void TestCancellation() |
363 | { |
364 | // tests version without a step |
365 | g_worker_task_step = 0; |
366 | ResetEhGlobals(); |
367 | RunCancellationTest<my_worker_pfor_step_task, CancellatorTask>(); |
368 | |
369 | // tests version with step |
370 | g_worker_task_step = 1; |
371 | ResetEhGlobals(); |
372 | RunCancellationTest<my_worker_pfor_step_task, CancellatorTask>(); |
373 | } |
374 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
375 | |
376 | #include "harness_m128.h" |
377 | |
378 | #if (HAVE_m128 || HAVE_m256) && !__TBB_SSE_STACK_ALIGNMENT_BROKEN |
379 | template<typename ClassWithVectorType> |
380 | struct SSE_Functor { |
381 | ClassWithVectorType* Src, * Dst; |
382 | SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {} |
383 | |
384 | void operator()( tbb::blocked_range<int>& r ) const { |
385 | for( int i=r.begin(); i!=r.end(); ++i ) |
386 | Dst[i] = Src[i]; |
387 | } |
388 | }; |
389 | |
390 | //! Test that parallel_for works with stack-allocated __m128 |
391 | template<typename ClassWithVectorType> |
392 | void TestVectorTypes() { |
393 | const int aSize = 300; |
394 | ClassWithVectorType Array1[aSize], Array2[aSize]; |
395 | for( int i=0; i<aSize; ++i ) { |
396 | // VC8 does not properly align a temporary value; to work around, use explicit variable |
397 | ClassWithVectorType foo(i); |
398 | Array1[i] = foo; |
399 | } |
400 | tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) ); |
401 | for( int i=0; i<aSize; ++i ) { |
402 | ClassWithVectorType foo(i); |
403 | ASSERT( Array2[i]==foo, NULL ) ; |
404 | } |
405 | } |
406 | #endif /* HAVE_m128 || HAVE_m256 */ |
407 | |
408 | #include <vector> |
409 | #include <sstream> |
410 | #include <tbb/blocked_range.h> |
411 | |
412 | struct TestSimplePartitionerStabilityFunctor:NoAssign{ |
413 | std::vector<int> & ranges; |
414 | TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){} |
415 | void operator()(tbb::blocked_range<size_t>& r)const{ |
416 | ranges.at(r.begin())=true; |
417 | } |
418 | }; |
419 | void TestSimplePartitionerStability(){ |
420 | const std::size_t repeat_count= 10; |
421 | const std::size_t rangeToSplitSize=1000000; |
422 | const std::size_t grainsizeStep=rangeToSplitSize/repeat_count; |
423 | typedef TestSimplePartitionerStabilityFunctor FunctorType; |
424 | |
425 | for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){ |
426 | std::vector<int> firstSeries(rangeToSplitSize,0); |
427 | std::vector<int> secondSeries(rangeToSplitSize,0); |
428 | |
429 | tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner()); |
430 | tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner()); |
431 | std::stringstream str; str<<i; |
432 | ASSERT(firstSeries==secondSeries,("splitting range with tbb::simple_partitioner must be reproducible; i=" +str.str()).c_str() ); |
433 | } |
434 | } |
435 | #include <cstdio> |
436 | #include "tbb/task_scheduler_init.h" |
437 | #include "harness_cpu.h" |
438 | #include "harness_barrier.h" |
439 | #include "test_partitioner.h" |
440 | |
441 | namespace interaction_with_range_and_partitioner { |
442 | |
443 | // Test checks compatibility of parallel_for algorithm with various range implementations |
444 | |
445 | void test() { |
446 | using namespace test_partitioner_utils::interaction_with_range_and_partitioner; |
447 | |
448 | test_partitioner_utils::SimpleBody b; |
449 | tbb::affinity_partitioner ap; |
450 | |
451 | parallel_for(Range1(true, false), b, ap); |
452 | parallel_for(Range2(true, false), b, ap); |
453 | parallel_for(Range3(true, false), b, ap); |
454 | parallel_for(Range4(false, true), b, ap); |
455 | parallel_for(Range5(false, true), b, ap); |
456 | parallel_for(Range6(false, true), b, ap); |
457 | |
458 | parallel_for(Range1(false, true), b, tbb::simple_partitioner()); |
459 | parallel_for(Range2(false, true), b, tbb::simple_partitioner()); |
460 | parallel_for(Range3(false, true), b, tbb::simple_partitioner()); |
461 | parallel_for(Range4(false, true), b, tbb::simple_partitioner()); |
462 | parallel_for(Range5(false, true), b, tbb::simple_partitioner()); |
463 | parallel_for(Range6(false, true), b, tbb::simple_partitioner()); |
464 | |
465 | parallel_for(Range1(false, true), b, tbb::auto_partitioner()); |
466 | parallel_for(Range2(false, true), b, tbb::auto_partitioner()); |
467 | parallel_for(Range3(false, true), b, tbb::auto_partitioner()); |
468 | parallel_for(Range4(false, true), b, tbb::auto_partitioner()); |
469 | parallel_for(Range5(false, true), b, tbb::auto_partitioner()); |
470 | parallel_for(Range6(false, true), b, tbb::auto_partitioner()); |
471 | } |
472 | |
473 | } // namespace interaction_with_range_and_partitioner |
474 | |
475 | namespace various_range_implementations { |
476 | |
477 | using namespace test_partitioner_utils; |
478 | using namespace test_partitioner_utils::TestRanges; |
479 | |
480 | // Body ensures that initial work distribution is done uniformly through affinity mechanism and not |
481 | // through work stealing |
482 | class Body { |
483 | Harness::SpinBarrier &m_sb; |
484 | public: |
485 | Body(Harness::SpinBarrier& sb) : m_sb(sb) { } |
486 | Body(Body& b, tbb::split) : m_sb(b.m_sb) { } |
487 | Body& operator =(const Body&) { return *this; } |
488 | template <typename Range> |
489 | void operator()(Range& r) const { |
490 | REMARK("Executing range [%lu, %lu)\n" , r.begin(), r.end()); |
491 | m_sb.timed_wait(10); // waiting for all threads |
492 | } |
493 | }; |
494 | |
495 | namespace correctness { |
496 | |
497 | /* Testing only correctness (that is parallel_for does not hang) */ |
498 | template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness> |
499 | void test() { |
500 | static const int thread_num = tbb::task_scheduler_init::default_num_threads(); |
501 | RangeType range( 0, thread_num, NULL, false, ensure_non_emptiness ); |
502 | tbb::affinity_partitioner ap; |
503 | tbb::parallel_for( range, SimpleBody(), ap ); |
504 | } |
505 | |
506 | } // namespace correctness |
507 | |
508 | namespace uniform_distribution { |
509 | |
510 | /* Body of parallel_for algorithm would hang if non-uniform work distribution happened */ |
511 | template <typename RangeType, bool feedback, bool ensure_non_emptiness> |
512 | void test() { |
513 | static const int thread_num = tbb::task_scheduler_init::default_num_threads(); |
514 | Harness::SpinBarrier sb( thread_num ); |
515 | RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness); |
516 | const Body sync_body( sb ); |
517 | tbb::affinity_partitioner ap; |
518 | tbb::parallel_for( range, sync_body, ap ); |
519 | tbb::parallel_for( range, sync_body, tbb::static_partitioner() ); |
520 | } |
521 | |
522 | } // namespace uniform_distribution |
523 | |
524 | void test() { |
525 | const bool provide_feedback = __TBB_ENABLE_RANGE_FEEDBACK; |
526 | const bool ensure_non_empty_range = true; |
527 | |
528 | // BlockedRange does not take into account feedback and non-emptiness settings but uses the |
529 | // tbb::blocked_range implementation |
530 | uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>(); |
531 | |
532 | #if __TBB_ENABLE_RANGE_FEEDBACK |
533 | using uniform_distribution::test; // if feedback is enabled ensure uniform work distribution |
534 | #else |
535 | using correctness::test; |
536 | #endif |
537 | |
538 | { |
539 | test<RoundedDownRange, provide_feedback, ensure_non_empty_range>(); |
540 | test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>(); |
541 | #if __TBB_ENABLE_RANGE_FEEDBACK && !__TBB_MIC_NATIVE |
542 | // due to fast division algorithm on MIC |
543 | test<RoundedDownRange, !provide_feedback, ensure_non_empty_range>(); |
544 | test<RoundedDownRange, !provide_feedback, !ensure_non_empty_range>(); |
545 | #endif |
546 | } |
547 | |
548 | { |
549 | test<RoundedUpRange, provide_feedback, ensure_non_empty_range>(); |
550 | test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>(); |
551 | #if __TBB_ENABLE_RANGE_FEEDBACK && !__TBB_MIC_NATIVE |
552 | // due to fast division algorithm on MIC |
553 | test<RoundedUpRange, !provide_feedback, ensure_non_empty_range>(); |
554 | test<RoundedUpRange, !provide_feedback, !ensure_non_empty_range>(); |
555 | #endif |
556 | } |
557 | |
558 | // Testing that parallel_for algorithm works with such weird ranges |
559 | correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>(); |
560 | correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>(); |
561 | correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>(); |
562 | |
563 | // The following ranges do not comply with the proportion suggested by partitioner. Therefore |
564 | // they have to provide the proportion in which they were actually split back to partitioner and |
565 | // ensure theirs non-emptiness |
566 | test<Range1_2, provide_feedback, ensure_non_empty_range>(); |
567 | test<Range1_999, provide_feedback, ensure_non_empty_range>(); |
568 | test<Range999_1, provide_feedback, ensure_non_empty_range>(); |
569 | } |
570 | |
571 | } // namespace various_range_implementations |
572 | |
573 | #include <map> |
574 | #include <utility> |
575 | #include "tbb/task_arena.h" |
576 | #include "tbb/enumerable_thread_specific.h" |
577 | |
578 | namespace parallel_for_within_task_arena { |
579 | |
580 | using namespace test_partitioner_utils::TestRanges; |
581 | using tbb::split; |
582 | using tbb::proportional_split; |
583 | |
584 | class BlockedRangeWhitebox; |
585 | |
586 | typedef std::pair<size_t, size_t> range_borders; |
587 | typedef std::multimap<BlockedRangeWhitebox*, range_borders> MapType; |
588 | typedef tbb::enumerable_thread_specific<MapType> ETSType; |
589 | ETSType ets; |
590 | |
591 | class BlockedRangeWhitebox : public BlockedRange { |
592 | public: |
593 | static const bool is_splittable_in_proportion = true; |
594 | BlockedRangeWhitebox(size_t _begin, size_t _end) |
595 | : BlockedRange(_begin, _end, NULL, false, false) { } |
596 | |
597 | BlockedRangeWhitebox(BlockedRangeWhitebox& r, proportional_split& p) |
598 | :BlockedRange(r, p) { |
599 | update_ets(r); |
600 | update_ets(*this); |
601 | } |
602 | |
603 | BlockedRangeWhitebox(BlockedRangeWhitebox& r, split) |
604 | :BlockedRange(r, split()) { } |
605 | |
606 | void update_ets(BlockedRangeWhitebox& range) { |
607 | std::pair<MapType::iterator, MapType::iterator> equal_range = ets.local().equal_range(&range); |
608 | for (MapType::iterator it = equal_range.first; it != equal_range.second;++it) { |
609 | if (it->second.first <= range.begin() && range.end() <= it->second.second) { |
610 | ASSERT(!(it->second.first == range.begin() && it->second.second == range.end()), "Only one border of the range should be equal to the original" ); |
611 | it->second.first = range.begin(); |
612 | it->second.second = range.end(); |
613 | return; |
614 | } |
615 | } |
616 | ets.local().insert(std::make_pair<BlockedRangeWhitebox*, range_borders>(&range, range_borders(range.begin(), range.end()))); |
617 | } |
618 | }; |
619 | |
620 | template <typename Partitioner> |
621 | struct ArenaBody { |
622 | size_t range_begin; |
623 | size_t range_end; |
624 | |
625 | ArenaBody(size_t _range_begin, size_t _range_end) |
626 | :range_begin(_range_begin), range_end(_range_end) { } |
627 | |
628 | void operator()() const { |
629 | Partitioner my_partitioner; |
630 | tbb::parallel_for(BlockedRangeWhitebox(range_begin, range_end), test_partitioner_utils::SimpleBody(), my_partitioner); |
631 | } |
632 | }; |
633 | |
634 | struct CombineBody { |
635 | MapType operator()(MapType x, const MapType& y) const { |
636 | x.insert(y.begin(), y.end()); |
637 | for (MapType::iterator it1 = x.begin(); it1 != x.end(); ++it1) { |
638 | for (MapType::iterator it2 = x.begin(); it2 != x.end(); ++it2) { |
639 | if (it1 == it2) continue; |
640 | bool is_1_subrange_of_2 = |
641 | it2->second.first <= it1->second.first && it1->second.second <= it2->second.second; |
642 | if (is_1_subrange_of_2) { |
643 | x.erase(it2); |
644 | break; |
645 | } |
646 | } |
647 | } |
648 | return x; |
649 | } |
650 | }; |
651 | |
652 | range_borders combine_range(const MapType& map) { |
653 | range_borders result_range = map.begin()->second; |
654 | for (MapType::const_iterator it = map.begin(); it != map.end(); it++) |
655 | result_range = range_borders( |
656 | (std::min)(result_range.first, it->second.first), |
657 | (std::max)(result_range.second, it->second.second) |
658 | ); |
659 | return result_range; |
660 | } |
661 | |
662 | template <typename Partitioner> |
663 | void test_body() { |
664 | unsigned hw_concurrency = tbb::tbb_thread::hardware_concurrency(); |
665 | for (unsigned int num_threads = hw_concurrency / 4 + 1; num_threads < hw_concurrency; num_threads *= 2) { |
666 | REMARK(" num_threads=%lu\n" , num_threads); |
667 | for (size_t range_begin = 0, range_end = num_threads * 10 - 1, i = 0; i < 3; |
668 | range_begin += num_threads, range_end += num_threads + 1, ++i) { |
669 | REMARK(" processing range [%lu, %lu)\n" , range_begin, range_end); |
670 | ets = ETSType(MapType()); |
671 | tbb::task_arena limited(num_threads); // at least two slots in arena. |
672 | limited.execute(ArenaBody<Partitioner>(range_begin, range_end)); |
673 | MapType combined_map = ets.combine(CombineBody()); |
674 | range_borders result_borders = combine_range(combined_map); |
675 | ASSERT(result_borders.first == range_begin, "Restored range begin does not match initial one" ); |
676 | ASSERT(result_borders.second == range_end, "Restored range end does not match initial one" ); |
677 | size_t map_size = combined_map.size(); |
678 | // In a single-thread arena, partitioners still do one split of a range. |
679 | size_t range_partitions = num_threads > 1 ? num_threads : 2; |
680 | ASSERT((map_size == range_partitions), "Incorrect number or post-proportional split ranges" ); |
681 | size_t expected_size = (range_end - range_begin) / range_partitions; |
682 | for (MapType::iterator it = combined_map.begin(); it != combined_map.end(); ++it) { |
683 | size_t size = it->second.second - it->second.first; |
684 | ASSERT((size == expected_size || size == expected_size + 1), "Incorrect post-proportional range size" ); |
685 | } |
686 | } |
687 | } |
688 | } |
689 | |
690 | void test() { |
691 | REMARK("parallel_for with affinity partitioner within task_arena\n" ); |
692 | test_body<tbb::affinity_partitioner>(); |
693 | REMARK("parallel_for with static partitioner within task_arena\n" ); |
694 | test_body<tbb::static_partitioner>(); |
695 | } |
696 | |
697 | } // namespace parallel_for_within_task_arena |
698 | |
699 | int TestMain () { |
700 | if( MinThread<1 ) { |
701 | REPORT("number of threads must be positive\n" ); |
702 | exit(1); |
703 | } |
704 | for( int p=MinThread; p<=MaxThread; ++p ) { |
705 | if( p>0 ) { |
706 | tbb::task_scheduler_init init( p ); |
707 | Flog<parallel_tag,1>(p); |
708 | Flog<parallel_tag,10>(p); |
709 | Flog<parallel_tag,100>(p); |
710 | Flog<parallel_tag,1000>(p); |
711 | Flog<parallel_tag,10000>(p); |
712 | |
713 | // Testing with different integer types |
714 | TestParallelForWithStepSupport<parallel_tag,short>(); |
715 | TestParallelForWithStepSupport<parallel_tag,unsigned short>(); |
716 | TestParallelForWithStepSupport<parallel_tag,int>(); |
717 | TestParallelForWithStepSupport<parallel_tag,unsigned int>(); |
718 | TestParallelForWithStepSupport<parallel_tag,long>(); |
719 | TestParallelForWithStepSupport<parallel_tag,unsigned long>(); |
720 | TestParallelForWithStepSupport<parallel_tag,long long>(); |
721 | TestParallelForWithStepSupport<parallel_tag,unsigned long long>(); |
722 | TestParallelForWithStepSupport<parallel_tag,size_t>(); |
723 | |
724 | #if TBB_PREVIEW_SERIAL_SUBSET |
725 | // This is for testing serial implementation. |
726 | if( p == MaxThread ) { |
727 | Flog<serial_tag,1>(p); |
728 | Flog<serial_tag,10>(p); |
729 | Flog<serial_tag,100>(p); |
730 | TestParallelForWithStepSupport<serial_tag,short>(); |
731 | TestParallelForWithStepSupport<serial_tag,unsigned short>(); |
732 | TestParallelForWithStepSupport<serial_tag,int>(); |
733 | TestParallelForWithStepSupport<serial_tag,unsigned int>(); |
734 | TestParallelForWithStepSupport<serial_tag,long>(); |
735 | TestParallelForWithStepSupport<serial_tag,unsigned long>(); |
736 | TestParallelForWithStepSupport<serial_tag,long long>(); |
737 | TestParallelForWithStepSupport<serial_tag,unsigned long long>(); |
738 | TestParallelForWithStepSupport<serial_tag,size_t>(); |
739 | } |
740 | #endif |
741 | |
742 | #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN |
743 | TestExceptionsSupport(); |
744 | #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */ |
745 | #if __TBB_TASK_GROUP_CONTEXT |
746 | if ( p > 1 ) |
747 | TestCancellation(); |
748 | #endif /* __TBB_TASK_GROUP_CONTEXT */ |
749 | #if !__TBB_SSE_STACK_ALIGNMENT_BROKEN |
750 | #if HAVE_m128 |
751 | TestVectorTypes<ClassWithSSE>(); |
752 | #endif |
753 | #if HAVE_m256 |
754 | if (have_AVX()) TestVectorTypes<ClassWithAVX>(); |
755 | #endif |
756 | #endif /*!__TBB_SSE_STACK_ALIGNMENT_BROKEN*/ |
757 | // Test that all workers sleep when no work |
758 | TestCPUUserTime(p); |
759 | TestSimplePartitionerStability(); |
760 | } |
761 | } |
762 | #if __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN |
763 | REPORT("Known issue: exception handling tests are skipped.\n" ); |
764 | #endif |
765 | #if (HAVE_m128 || HAVE_m256) && __TBB_SSE_STACK_ALIGNMENT_BROKEN |
766 | REPORT("Known issue: stack alignment for SIMD instructions not tested.\n" ); |
767 | #endif |
768 | |
769 | various_range_implementations::test(); |
770 | interaction_with_range_and_partitioner::test(); |
771 | parallel_for_within_task_arena::test(); |
772 | return Harness::Done; |
773 | } |
774 | |
775 | #if _MSC_VER |
776 | #pragma warning (pop) |
777 | #endif |
778 | |