test_parallel_for.cpp source code [ThreadBB/src/test/test_parallel_for.cpp]

1	/*
2	Copyright (c) 2005-2019 Intel Corporation
3
4	Licensed under the Apache License, Version 2.0 (the "License");
5	you may not use this file except in compliance with the License.
6	You may obtain a copy of the License at
7
8	http://www.apache.org/licenses/LICENSE-2.0
9
10	Unless required by applicable law or agreed to in writing, software
11	distributed under the License is distributed on an "AS IS" BASIS,
12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	See the License for the specific language governing permissions and
14	limitations under the License.
15	*/
16
17	// Test for function template parallel_for.h
18
19	// These features are pure additions and thus can be always "on" in the test
20	#define TBB_PREVIEW_SERIAL_SUBSET 1
21	#include "harness_defs.h"
22
23	#if _MSC_VER
24	#pragma warning (push)
25	#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
26	// Suppress pointless "unreachable code" warning.
27	#pragma warning (disable: 4702)
28	#endif
29	#if defined(_Wp64)
30	// Workaround for overzealous compiler warnings in /Wp64 mode
31	#pragma warning (disable: 4267)
32	#endif
33
34	#define _SCL_SECURE_NO_WARNINGS
35	#endif //#if _MSC_VER
36
37	#include "harness_defs.h"
38	#include "tbb/parallel_for.h"
39	#include "tbb/atomic.h"
40	#include "harness_assert.h"
41	#include "harness.h"
42
43	static tbb::atomic<int> FooBodyCount;
44
45	//! A range object whose only public members are those required by the Range concept.
46	template<size_t Pad>
47	class FooRange {
48	//! Start of range
49	int start;
50
51	//! Size of range
52	int size;
53	FooRange( int start_, int size_ ) : start(start_), size(size_) {
54	zero_fill<char>(pad, Pad);
55	pad[Pad-`1`] = `'x'`;
56	}
57	template<typename Flavor_, size_t Pad_> friend void Flog( int nthread );
58	template<size_t Pad_> friend class FooBody;
59	void operator&();
60
61	char pad[Pad];
62	public:
63	bool empty() const {return size==`0`;}
64	bool is_divisible() const {return size>`1`;}
65	FooRange( FooRange& original, tbb::split ) : size(original.size/`2`) {
66	original.size -= size;
67	start = original.start+original.size;
68	ASSERT( original.pad[Pad-`1`]==`'x'`, NULL );
69	pad[Pad-`1`] = `'x'`;
70	}
71	};
72
73	//! A range object whose only public members are those required by the parallel_for.h body concept.
74	template<size_t Pad>
75	class FooBody {
76	static const int LIVE = `0x1234`;
77	tbb::atomic<int>* array;
78	int state;
79	friend class FooRange<Pad>;
80	template<typename Flavor_, size_t Pad_> friend void Flog( int nthread );
81	FooBody( tbb::atomic<int>* array_ ) : array(array_), state(LIVE) {}
82	public:
83	~FooBody() {
84	--FooBodyCount;
85	for( size_t i=`0`; i<sizeof(*this); ++i )
86	reinterpret_cast<char>(this*)[i] = -`1`;
87	}
88	//! Copy constructor
89	FooBody( const FooBody& other ) : array(other.array), state(other.state) {
90	++FooBodyCount;
91	ASSERT( state==LIVE, NULL );
92	}
93	void operator()( FooRange<Pad>& r ) const {
94	for( int k=`0`; k<r.size; ++k ) {
95	const int i = array[r.start+k]++;
96	ASSERT( i==`0`, NULL );
97	}
98	}
99	};
100
101	#include "tbb/tick_count.h"
102
103	static const int N = `500`;
104	static tbb::atomic<int> Array[N];
105
106	struct serial_tag {};
107	struct parallel_tag {};
108	struct empty_partitioner_tag {};
109
110	template <typename Flavor, typename Partitioner, typename Range, typename Body>
111	struct Invoker;
112
113	#if TBB_PREVIEW_SERIAL_SUBSET
114	template <typename Range, typename Body>
115	struct Invoker<serial_tag, empty_partitioner_tag, Range, Body> {
116	void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
117	tbb::serial:: parallel_for( r, body );
118	}
119	};
120	template <typename Partitioner, typename Range, typename Body>
121	struct Invoker<serial_tag, Partitioner, Range, Body> {
122	void operator()( const Range& r, const Body& body, Partitioner& p ) {
123	tbb::serial:: parallel_for( r, body, p );
124	}
125	};
126	#endif
127
128	template <typename Range, typename Body>
129	struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> {
130	void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
131	tbb:: parallel_for( r, body );
132	}
133	};
134
135	template <typename Partitioner, typename Range, typename Body>
136	struct Invoker<parallel_tag, Partitioner, Range, Body> {
137	void operator()( const Range& r, const Body& body, Partitioner& p ) {
138	tbb:: parallel_for( r, body, p );
139	}
140	};
141
142	template <typename Flavor, typename Partitioner, typename T, typename Body>
143	struct InvokerStep;
144
145	#if TBB_PREVIEW_SERIAL_SUBSET
146	template <typename T, typename Body>
147	struct InvokerStep<serial_tag, empty_partitioner_tag, T, Body> {
148	void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
149	tbb::serial:: parallel_for( first, last, f );
150	}
151	void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
152	tbb::serial:: parallel_for( first, last, step, f );
153	}
154	};
155
156	template <typename Partitioner, typename T, typename Body>
157	struct InvokerStep<serial_tag, Partitioner, T, Body> {
158	void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
159	tbb::serial:: parallel_for( first, last, f, p);
160	}
161	void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
162	tbb::serial:: parallel_for( first, last, step, f, p );
163	}
164	};
165	#endif
166
167	template <typename T, typename Body>
168	struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> {
169	void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
170	tbb:: parallel_for( first, last, f );
171	}
172	void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
173	tbb:: parallel_for( first, last, step, f );
174	}
175	};
176
177	template <typename Partitioner, typename T, typename Body>
178	struct InvokerStep<parallel_tag, Partitioner, T, Body> {
179	void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
180	tbb:: parallel_for( first, last, f, p );
181	}
182	void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
183	tbb:: parallel_for( first, last, step, f, p );
184	}
185	};
186
187	template<typename Flavor, size_t Pad>
188	void Flog( int nthread ) {
189	tbb::tick_count T0 = tbb::tick_count::now();
190	for( int i=`0`; i<N; ++i ) {
191	for ( int mode = `0`; mode < `4`; ++mode) {
192	FooRange<Pad> r( `0`, i );
193	const FooRange<Pad> rc = r;
194	FooBody<Pad> f( Array );
195	const FooBody<Pad> fc = f;
196	memset( Array, `0`, sizeof(Array) );
197	FooBodyCount = `1`;
198	switch (mode) {
199	case `0`: {
200	empty_partitioner_tag p;
201	Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for;
202	invoke_for( rc, fc, p );
203	}
204	break;
205	case `1`: {
206	Invoker< Flavor, const tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
207	invoke_for( rc, fc, tbb::simple_partitioner () );
208	}
209	break;
210	case `2`: {
211	Invoker< Flavor, const tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
212	invoke_for( rc, fc, tbb::auto_partitioner () );
213	}
214	break;
215	case `3`: {
216	static tbb::affinity_partitioner affinity;
217	Invoker< Flavor, tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
218	invoke_for( rc, fc, affinity );
219	}
220	break;
221	}
222	for( int j=`0`; j<i; ++j )
223	ASSERT( Array[j]==`1`, NULL );
224	for( int j=i; j<N; ++j )
225	ASSERT( Array[j]==`0`, NULL );
226	ASSERT( FooBodyCount==`1`, NULL );
227	}
228	}
229	tbb::tick_count T1 = tbb::tick_count::now();
230	REMARK("time=%g\tnthread=%d\tpad=%d\n",(T1 -T0).seconds(),nthread,int(Pad));
231	}
232
233	// Testing parallel_for with step support
234	const size_t PFOR_BUFFER_TEST_SIZE = `1024`;
235	// test_buffer has some extra items beyond its right bound
236	const size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + `1024`;
237	size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE];
238
239	template<typename T>
240	class TestFunctor{
241	public:
242	void operator ()(T index) const {
243	pfor_buffer[index]++;
244	}
245	};
246
247	#include <stdexcept> // std::invalid_argument
248
249	template <typename Flavor, typename T, typename Partitioner>
250	void TestParallelForWithStepSupportHelper(Partitioner& p)
251	{
252	const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE);
253	const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE);
254	// Testing parallel_for with different step values
255	InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for;
256	for (T begin = `0`; begin < pfor_buffer_test_size - `1`; begin += pfor_buffer_test_size / `10` + `1`) {
257	T step;
258	for (step = `1`; step < pfor_buffer_test_size; step++) {
259	memset(pfor_buffer, `0`, pfor_buffer_actual_size * sizeof(size_t));
260	if (step == `1`){
261	invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p);
262	} else {
263	invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p);
264	}
265	// Verifying that parallel_for processed all items it should
266	for (T i = begin; i < pfor_buffer_test_size; i = i + step) {
267	ASSERT(pfor_buffer[i] == `1`, "parallel_for didn't process all required elements");
268	pfor_buffer[i] = `0`;
269	}
270	// Verifying that no extra items were processed and right bound of array wasn't crossed
271	for (T i = `0`; i < pfor_buffer_actual_size; i++) {
272	ASSERT(pfor_buffer[i] == `0`, "parallel_for processed an extra element");
273	}
274	}
275	}
276	}
277
278	template <typename Flavor, typename T>
279	void TestParallelForWithStepSupport()
280	{
281	static tbb::affinity_partitioner affinity_p;
282	tbb::auto_partitioner auto_p;
283	tbb::simple_partitioner simple_p;
284	empty_partitioner_tag p;
285
286	// Try out all partitioner combinations
287	TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p);
288	TestParallelForWithStepSupportHelper< Flavor,T,const tbb::auto_partitioner >(auto_p);
289	TestParallelForWithStepSupportHelper< Flavor,T,const tbb::simple_partitioner >(simple_p);
290	TestParallelForWithStepSupportHelper< Flavor,T,tbb::affinity_partitioner >(affinity_p);
291
292	// Testing some corner cases
293	tbb::parallel_for(static_cast<T>(`2`), static_cast<T>(`1`), static_cast<T>(`1`), TestFunctor<T>());
294	#if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN
295	try{
296	tbb::parallel_for(static_cast<T>(`1`), static_cast<T>(`100`), static_cast<T>(`0`), TestFunctor<T>()); // should cause std::invalid_argument
297	}catch(std::invalid_argument&){
298	return;
299	}
300	catch ( ... ) {
301	ASSERT ( __TBB_EXCEPTION_TYPE_INFO_BROKEN, "Unrecognized exception. std::invalid_argument is expected" );
302	}
303	#endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
304	}
305
306	#if __TBB_TASK_GROUP_CONTEXT
307	// Exception support test
308	#define HARNESS_EH_SIMPLE_MODE 1
309	#include "tbb/tbb_exception.h"
310	#include "harness_eh.h"
311
312	#if TBB_USE_EXCEPTIONS
313	class test_functor_with_exception {
314	public:
315	void operator ()(size_t) const { ThrowTestException(); }
316	};
317
318	void TestExceptionsSupport() {
319	REMARK (__FUNCTION__);
320	{ // Tests version with a step provided
321	ResetEhGlobals();
322	TRY();
323	tbb::parallel_for((size_t)`0`, (size_t)PFOR_BUFFER_TEST_SIZE, (size_t)`1`, test_functor_with_exception ());
324	CATCH_AND_ASSERT();
325	}
326	{ // Tests version without a step
327	ResetEhGlobals();
328	TRY();
329	tbb::parallel_for((size_t)`0`, (size_t)PFOR_BUFFER_TEST_SIZE, test_functor_with_exception ());
330	CATCH_AND_ASSERT();
331	}
332	}
333	#endif /* TBB_USE_EXCEPTIONS */
334
335	// Cancellation support test
336	class functor_to_cancel {
337	public:
338	void operator()(size_t) const {
339	++g_CurExecuted;
340	CancellatorTask::WaitUntilReady();
341	}
342	};
343
344	size_t g_worker_task_step = `0`;
345
346	class my_worker_pfor_step_task : public tbb::task
347	{
348	tbb::task_group_context &my_ctx;
349
350	tbb::task* execute () __TBB_override {
351	if (g_worker_task_step == `0`){
352	tbb::parallel_for((size_t)`0`, (size_t)PFOR_BUFFER_TEST_SIZE, functor_to_cancel (), my_ctx);
353	}else{
354	tbb::parallel_for((size_t)`0`, (size_t)PFOR_BUFFER_TEST_SIZE, g_worker_task_step, functor_to_cancel (), my_ctx);
355	}
356	return NULL;
357	}
358	public:
359	my_worker_pfor_step_task ( tbb::task_group_context &context_) : my_ctx(context_) { }
360	};
361
362	void TestCancellation()
363	{
364	// tests version without a step
365	g_worker_task_step = `0`;
366	ResetEhGlobals();
367	RunCancellationTest<my_worker_pfor_step_task, CancellatorTask>();
368
369	// tests version with step
370	g_worker_task_step = `1`;
371	ResetEhGlobals();
372	RunCancellationTest<my_worker_pfor_step_task, CancellatorTask>();
373	}
374	#endif /* __TBB_TASK_GROUP_CONTEXT */
375
376	#include "harness_m128.h"
377
378	#if (HAVE_m128 \|\| HAVE_m256) && !__TBB_SSE_STACK_ALIGNMENT_BROKEN
379	template<typename ClassWithVectorType>
380	struct SSE_Functor {
381	ClassWithVectorType* Src, * Dst;
382	SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {}
383
384	void operator()( tbb::blocked_range<int>& r ) const {
385	for( int i=r.begin(); i!=r.end(); ++i )
386	Dst[i] = Src[i];
387	}
388	};
389
390	//! Test that parallel_for works with stack-allocated __m128
391	template<typename ClassWithVectorType>
392	void TestVectorTypes() {
393	const int aSize = `300`;
394	ClassWithVectorType Array1[aSize], Array2[aSize];
395	for( int i=`0`; i<aSize; ++i ) {
396	// VC8 does not properly align a temporary value; to work around, use explicit variable
397	ClassWithVectorType foo(i);
398	Array1[i] = foo;
399	}
400	tbb::parallel_for( tbb::blocked_range<int>(`0`,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) );
401	for( int i=`0`; i<aSize; ++i ) {
402	ClassWithVectorType foo(i);
403	ASSERT( Array2[i]==foo, NULL ) ;
404	}
405	}
406	#endif /* HAVE_m128 \|\| HAVE_m256 */
407
408	#include <vector>
409	#include <sstream>
410	#include <tbb/blocked_range.h>
411
412	struct TestSimplePartitionerStabilityFunctor:NoAssign{
413	std::vector<int> & ranges;
414	TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){}
415	void operator()(tbb::blocked_range<size_t>& r)const{
416	ranges.at(r.begin())=true;
417	}
418	};
419	void TestSimplePartitionerStability(){
420	const std::size_t repeat_count= `10`;
421	const std::size_t rangeToSplitSize=`1000000`;
422	const std::size_t grainsizeStep=rangeToSplitSize/repeat_count;
423	typedef TestSimplePartitionerStabilityFunctor FunctorType;
424
425	for (std::size_t i=`0` , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){
426	std::vector<int> firstSeries(rangeToSplitSize,`0`);
427	std::vector<int> secondSeries(rangeToSplitSize,`0`);
428
429	tbb::parallel_for(tbb::blocked_range<size_t>(`0`,rangeToSplitSize,grainsize),FunctorType (firstSeries),tbb::simple_partitioner ());
430	tbb::parallel_for(tbb::blocked_range<size_t>(`0`,rangeToSplitSize,grainsize),FunctorType (secondSeries),tbb::simple_partitioner ());
431	std::stringstream str; str <<i;
432	ASSERT(firstSeries ==secondSeries,("splitting range with tbb::simple_partitioner must be reproducible; i=" +str.str()).c_str() );
433	}
434	}
435	#include <cstdio>
436	#include "tbb/task_scheduler_init.h"
437	#include "harness_cpu.h"
438	#include "harness_barrier.h"
439	#include "test_partitioner.h"
440
441	namespace interaction_with_range_and_partitioner {
442
443	// Test checks compatibility of parallel_for algorithm with various range implementations
444
445	void test() {
446	using namespace test_partitioner_utils::interaction_with_range_and_partitioner;
447
448	test_partitioner_utils::SimpleBody b;
449	tbb::affinity_partitioner ap;
450
451	parallel_for(Range1 (true, false), b, ap);
452	parallel_for(Range2 (true, false), b, ap);
453	parallel_for(Range3 (true, false), b, ap);
454	parallel_for(Range4 (false, true), b, ap);
455	parallel_for(Range5 (false, true), b, ap);
456	parallel_for(Range6 (false, true), b, ap);
457
458	parallel_for(Range1 (false, true), b, tbb::simple_partitioner ());
459	parallel_for(Range2 (false, true), b, tbb::simple_partitioner ());
460	parallel_for(Range3 (false, true), b, tbb::simple_partitioner ());
461	parallel_for(Range4 (false, true), b, tbb::simple_partitioner ());
462	parallel_for(Range5 (false, true), b, tbb::simple_partitioner ());
463	parallel_for(Range6 (false, true), b, tbb::simple_partitioner ());
464
465	parallel_for(Range1 (false, true), b, tbb::auto_partitioner ());
466	parallel_for(Range2 (false, true), b, tbb::auto_partitioner ());
467	parallel_for(Range3 (false, true), b, tbb::auto_partitioner ());
468	parallel_for(Range4 (false, true), b, tbb::auto_partitioner ());
469	parallel_for(Range5 (false, true), b, tbb::auto_partitioner ());
470	parallel_for(Range6 (false, true), b, tbb::auto_partitioner ());
471	}
472
473	} // namespace interaction_with_range_and_partitioner
474
475	namespace various_range_implementations {
476
477	using namespace test_partitioner_utils;
478	using namespace test_partitioner_utils::TestRanges;
479
480	// Body ensures that initial work distribution is done uniformly through affinity mechanism and not
481	// through work stealing
482	class Body {
483	Harness::SpinBarrier &m_sb;
484	public:
485	Body(Harness::SpinBarrier& sb) : m_sb(sb) { }
486	Body(Body& b, tbb::split) : m_sb(b.m_sb) { }
487	Body& operator =(const Body&) { return *this; }
488	template <typename Range>
489	void operator()(Range& r) const {
490	REMARK("Executing range [%lu, %lu)\n", r.begin(), r.end());
491	m_sb.timed_wait(`10`); // waiting for all threads
492	}
493	};
494
495	namespace correctness {
496
497	/ Testing only correctness (that is parallel_for does not hang) /
498	template <typename RangeType, bool / feedback /, bool ensure_non_emptiness>
499	void test() {
500	static const int thread_num = tbb::task_scheduler_init::default_num_threads();
501	RangeType range( `0`, thread_num, NULL, false, ensure_non_emptiness );
502	tbb::affinity_partitioner ap;
503	tbb::parallel_for( range, SimpleBody (), ap );
504	}
505
506	} // namespace correctness
507
508	namespace uniform_distribution {
509
510	/ Body of parallel_for algorithm would hang if non-uniform work distribution happened /
511	template <typename RangeType, bool feedback, bool ensure_non_emptiness>
512	void test() {
513	static const int thread_num = tbb::task_scheduler_init::default_num_threads();
514	Harness::SpinBarrier sb( thread_num );
515	RangeType range(`0`, thread_num, NULL, feedback, ensure_non_emptiness);
516	const Body sync_body( sb );
517	tbb::affinity_partitioner ap;
518	tbb::parallel_for( range, sync_body, ap );
519	tbb::parallel_for( range, sync_body, tbb::static_partitioner () );
520	}
521
522	} // namespace uniform_distribution
523
524	void test() {
525	const bool provide_feedback = __TBB_ENABLE_RANGE_FEEDBACK;
526	const bool ensure_non_empty_range = true;
527
528	// BlockedRange does not take into account feedback and non-emptiness settings but uses the
529	// tbb::blocked_range implementation
530	uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>();
531
532	#if __TBB_ENABLE_RANGE_FEEDBACK
533	using uniform_distribution::test; // if feedback is enabled ensure uniform work distribution
534	#else
535	using correctness::test;
536	#endif
537
538	{
539	test<RoundedDownRange, provide_feedback, ensure_non_empty_range>();
540	test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>();
541	#if __TBB_ENABLE_RANGE_FEEDBACK && !__TBB_MIC_NATIVE
542	// due to fast division algorithm on MIC
543	test<RoundedDownRange, !provide_feedback, ensure_non_empty_range>();
544	test<RoundedDownRange, !provide_feedback, !ensure_non_empty_range>();
545	#endif
546	}
547
548	{
549	test<RoundedUpRange, provide_feedback, ensure_non_empty_range>();
550	test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>();
551	#if __TBB_ENABLE_RANGE_FEEDBACK && !__TBB_MIC_NATIVE
552	// due to fast division algorithm on MIC
553	test<RoundedUpRange, !provide_feedback, ensure_non_empty_range>();
554	test<RoundedUpRange, !provide_feedback, !ensure_non_empty_range>();
555	#endif
556	}
557
558	// Testing that parallel_for algorithm works with such weird ranges
559	correctness::test<Range1_2, / provide_feedback= / false, !ensure_non_empty_range>();
560	correctness::test<Range1_999, / provide_feedback= / false, !ensure_non_empty_range>();
561	correctness::test<Range999_1, / provide_feedback= / false, !ensure_non_empty_range>();
562
563	// The following ranges do not comply with the proportion suggested by partitioner. Therefore
564	// they have to provide the proportion in which they were actually split back to partitioner and
565	// ensure theirs non-emptiness
566	test<Range1_2, provide_feedback, ensure_non_empty_range>();
567	test<Range1_999, provide_feedback, ensure_non_empty_range>();
568	test<Range999_1, provide_feedback, ensure_non_empty_range>();
569	}
570
571	} // namespace various_range_implementations
572
573	#include <map>
574	#include <utility>
575	#include "tbb/task_arena.h"
576	#include "tbb/enumerable_thread_specific.h"
577
578	namespace parallel_for_within_task_arena {
579
580	using namespace test_partitioner_utils::TestRanges;
581	using tbb::split;
582	using tbb::proportional_split;
583
584	class BlockedRangeWhitebox;
585
586	typedef std::pair<size_t, size_t> range_borders;
587	typedef std::multimap<BlockedRangeWhitebox*, range_borders> MapType;
588	typedef tbb::enumerable_thread_specific<MapType> ETSType;
589	ETSType ets;
590
591	class BlockedRangeWhitebox : public BlockedRange {
592	public:
593	static const bool is_splittable_in_proportion = true;
594	BlockedRangeWhitebox(size_t _begin, size_t _end)
595	: BlockedRange (_begin, _end, NULL, false, false) { }
596
597	BlockedRangeWhitebox(BlockedRangeWhitebox& r, proportional_split& p)
598	:BlockedRange (r, p) {
599	update_ets(r);
600	update_ets(*this);
601	}
602
603	BlockedRangeWhitebox(BlockedRangeWhitebox& r, split)
604	:BlockedRange (r, split ()) { }
605
606	void update_ets(BlockedRangeWhitebox& range) {
607	std::pair<MapType::iterator, MapType::iterator> equal_range = ets.local().equal_range(&range);
608	for (MapType::iterator it = equal_range.first; it != equal_range.second;++it) {
609	if (it ->second.first <= range.begin() && range.end() <= it ->second.second) {
610	ASSERT(!(it ->second.first == range.begin() && it ->second.second == range.end()), "Only one border of the range should be equal to the original");
611	it ->second.first = range.begin();
612	it ->second.second = range.end();
613	return;
614	}
615	}
616	ets.local().insert(std::make_pair<BlockedRangeWhitebox*, range_borders>(&range, range_borders (range.begin(), range.end())));
617	}
618	};
619
620	template <typename Partitioner>
621	struct ArenaBody {
622	size_t range_begin;
623	size_t range_end;
624
625	ArenaBody(size_t _range_begin, size_t _range_end)
626	:range_begin(_range_begin), range_end(_range_end) { }
627
628	void operator()() const {
629	Partitioner my_partitioner;
630	tbb::parallel_for(BlockedRangeWhitebox(range_begin, range_end), test_partitioner_utils::SimpleBody (), my_partitioner);
631	}
632	};
633
634	struct CombineBody {
635	MapType operator()(MapType x, const MapType& y) const {
636	x.insert(y.begin(), y.end());
637	for (MapType::iterator it1 = x.begin(); it1 != x.end(); ++it1) {
638	for (MapType::iterator it2 = x.begin(); it2 != x.end(); ++it2) {
639	if (it1 == it2) continue;
640	bool is_1_subrange_of_2 =
641	it2 ->second.first <= it1 ->second.first && it1 ->second.second <= it2 ->second.second;
642	if (is_1_subrange_of_2) {
643	x.erase(it2);
644	break;
645	}
646	}
647	}
648	return x;
649	}
650	};
651
652	range_borders combine_range(const MapType& map) {
653	range_borders result_range = map.begin()->second;
654	for (MapType::const_iterator it = map.begin(); it != map.end(); it ++)
655	result_range = range_borders (
656	(std::min)(result_range.first, it ->second.first),
657	(std::max)(result_range.second, it ->second.second)
658	);
659	return result_range;
660	}
661
662	template <typename Partitioner>
663	void test_body() {
664	unsigned hw_concurrency = tbb::tbb_thread::hardware_concurrency();
665	for (unsigned int num_threads = hw_concurrency / `4` + `1`; num_threads < hw_concurrency; num_threads *= `2`) {
666	REMARK(" num_threads=%lu\n", num_threads);
667	for (size_t range_begin = `0`, range_end = num_threads * `10` - `1`, i = `0`; i < `3`;
668	range_begin += num_threads, range_end += num_threads + `1`, ++i) {
669	REMARK(" processing range [%lu, %lu)\n", range_begin, range_end);
670	ets = ETSType (MapType ());
671	tbb::task_arena limited(num_threads); // at least two slots in arena.
672	limited.execute(ArenaBody<Partitioner>(range_begin, range_end));
673	MapType combined_map = ets.combine(CombineBody ());
674	range_borders result_borders = combine_range(combined_map);
675	ASSERT(result_borders.first == range_begin, "Restored range begin does not match initial one");
676	ASSERT(result_borders.second == range_end, "Restored range end does not match initial one");
677	size_t map_size = combined_map.size();
678	// In a single-thread arena, partitioners still do one split of a range.
679	size_t range_partitions = num_threads > `1` ? num_threads : `2`;
680	ASSERT((map_size == range_partitions), "Incorrect number or post-proportional split ranges");
681	size_t expected_size = (range_end - range_begin) / range_partitions;
682	for (MapType::iterator it = combined_map.begin(); it != combined_map.end(); ++it) {
683	size_t size = it ->second.second - it ->second.first;
684	ASSERT((size == expected_size \|\| size == expected_size + `1`), "Incorrect post-proportional range size");
685	}
686	}
687	}
688	}
689
690	void test() {
691	REMARK("parallel_for with affinity partitioner within task_arena\n");
692	test_body<tbb::affinity_partitioner>();
693	REMARK("parallel_for with static partitioner within task_arena\n");
694	test_body<tbb::static_partitioner>();
695	}
696
697	} // namespace parallel_for_within_task_arena
698
699	int TestMain () {
700	if( MinThread<`1` ) {
701	REPORT("number of threads must be positive\n");
702	exit(`1`);
703	}
704	for( int p=MinThread; p<=MaxThread; ++p ) {
705	if( p>`0` ) {
706	tbb::task_scheduler_init init( p );
707	Flog<parallel_tag,`1`>(p);
708	Flog<parallel_tag,`10`>(p);
709	Flog<parallel_tag,`100`>(p);
710	Flog<parallel_tag,`1000`>(p);
711	Flog<parallel_tag,`10000`>(p);
712
713	// Testing with different integer types
714	TestParallelForWithStepSupport<parallel_tag,short>();
715	TestParallelForWithStepSupport<parallel_tag,unsigned short>();
716	TestParallelForWithStepSupport<parallel_tag,int>();
717	TestParallelForWithStepSupport<parallel_tag,unsigned int>();
718	TestParallelForWithStepSupport<parallel_tag,long>();
719	TestParallelForWithStepSupport<parallel_tag,unsigned long>();
720	TestParallelForWithStepSupport<parallel_tag,long long>();
721	TestParallelForWithStepSupport<parallel_tag,unsigned long long>();
722	TestParallelForWithStepSupport<parallel_tag,size_t>();
723
724	#if TBB_PREVIEW_SERIAL_SUBSET
725	// This is for testing serial implementation.
726	if( p == MaxThread ) {
727	Flog<serial_tag,`1`>(p);
728	Flog<serial_tag,`10`>(p);
729	Flog<serial_tag,`100`>(p);
730	TestParallelForWithStepSupport<serial_tag,short>();
731	TestParallelForWithStepSupport<serial_tag,unsigned short>();
732	TestParallelForWithStepSupport<serial_tag,int>();
733	TestParallelForWithStepSupport<serial_tag,unsigned int>();
734	TestParallelForWithStepSupport<serial_tag,long>();
735	TestParallelForWithStepSupport<serial_tag,unsigned long>();
736	TestParallelForWithStepSupport<serial_tag,long long>();
737	TestParallelForWithStepSupport<serial_tag,unsigned long long>();
738	TestParallelForWithStepSupport<serial_tag,size_t>();
739	}
740	#endif
741
742	#if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN
743	TestExceptionsSupport();
744	#endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
745	#if __TBB_TASK_GROUP_CONTEXT
746	if ( p > `1` )
747	TestCancellation();
748	#endif /* __TBB_TASK_GROUP_CONTEXT */
749	#if !__TBB_SSE_STACK_ALIGNMENT_BROKEN
750	#if HAVE_m128
751	TestVectorTypes<ClassWithSSE>();
752	#endif
753	#if HAVE_m256
754	if (have_AVX()) TestVectorTypes<ClassWithAVX>();
755	#endif
756	#endif /!__TBB_SSE_STACK_ALIGNMENT_BROKEN/
757	// Test that all workers sleep when no work
758	TestCPUUserTime(p);
759	TestSimplePartitionerStability();
760	}
761	}
762	#if __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN
763	REPORT("Known issue: exception handling tests are skipped.\n");
764	#endif
765	#if (HAVE_m128 \|\| HAVE_m256) && __TBB_SSE_STACK_ALIGNMENT_BROKEN
766	REPORT("Known issue: stack alignment for SIMD instructions not tested.\n");
767	#endif
768
769	various_range_implementations::test();
770	interaction_with_range_and_partitioner::test();
771	parallel_for_within_task_arena::test();
772	return Harness::Done;
773	}
774
775	#if _MSC_VER
776	#pragma warning (pop)
777	#endif
778

Browse the source code of ThreadBB/src/test/test_parallel_for.cpp