test_partitioner_whitebox.h source code [ThreadBB/src/test/test_partitioner_whitebox.h]

1	/*
2	Copyright (c) 2005-2019 Intel Corporation
3
4	Licensed under the Apache License, Version 2.0 (the "License");
5	you may not use this file except in compliance with the License.
6	You may obtain a copy of the License at
7
8	http://www.apache.org/licenses/LICENSE-2.0
9
10	Unless required by applicable law or agreed to in writing, software
11	distributed under the License is distributed on an "AS IS" BASIS,
12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	See the License for the specific language governing permissions and
14	limitations under the License.
15	*/
16
17	/ Common part for the partitioner whitebox tests /
18
19	#include <typeinfo>
20
21	#include "tbb/tbb_thread.h"
22	#include "tbb/enumerable_thread_specific.h"
23
24	#include "string.h"
25	#include "harness_assert.h"
26	#include "test_partitioner.h"
27	#include <numeric>
28
29	#if TBB_USE_DEBUG
30	// reducing number of simulations due to test timeout
31	const size_t max_simulated_threads = `256`;
32	#else
33	const size_t max_simulated_threads = `640`;
34	#endif
35
36	typedef tbb::enumerable_thread_specific<size_t> ThreadNumsType;
37	size_t g_threadNumInitialValue = `10`;
38	ThreadNumsType g_threadNums(g_threadNumInitialValue);
39
40	namespace whitebox_simulation {
41	size_t whitebox_thread_index = `0`;
42	test_partitioner_utils::BinaryTree reference_tree;
43	}
44
45	// simulate a subset of task.h
46	namespace tbb {
47	namespace internal {
48	typedef unsigned short affinity_id;
49	}
50	class fake_task {
51	public:
52	typedef internal::affinity_id affinity_id;
53	void set_affinity(affinity_id a) { my_affinity = a; }
54	affinity_id affinity() const { return my_affinity; }
55	void set_parent(fake_task* p) { my_parent = p; }
56	fake_task parent() const* { return my_parent; }
57	bool is_stolen_task() const { return false; }
58	intptr_t ref_count() const { return `1`; }
59	bool is_cancelled() const { return false; }
60	static void spawn(fake_task &) {} // for legacy in partitioner.h
61	virtual fake_task* execute() = `0`; // enables dynamic_cast
62
63	fake_task() : my_parent(`0`), my_affinity(`0`) {}
64	virtual ~fake_task() {}
65	private:
66	fake_task *my_parent;
67	affinity_id my_affinity;
68	};
69	namespace task_arena {
70	static const int not_initialized = -`2`;//should match corresponding value in task_arena.h
71	}//namespace task_arena
72	namespace this_task_arena {
73	inline int current_thread_index() { return (int)whitebox_simulation::whitebox_thread_index; }
74	}
75	}//namespace tbb
76
77	#define __TBB_task_H
78	#define __TBB_task_arena_H
79	#define get_initial_auto_partitioner_divisor my_get_initial_auto_partitioner_divisor
80	#define affinity_partitioner_base_v3 my_affinity_partitioner_base_v3
81	#define task fake_task
82	#define __TBB_STATIC_THRESHOLD 0
83	#include "tbb/partitioner.h"
84	#undef __TBB_STATIC_THRESHOLD
85	#undef task
86	#undef affinity_partitioner_base_v3
87	#undef get_initial_auto_partitioner_divisor
88
89	// replace library functions to simulate concurrency
90	namespace tbb {
91	namespace internal {
92	size_t my_get_initial_auto_partitioner_divisor() {
93	const size_t X_FACTOR = `4`;
94	return X_FACTOR * g_threadNums.local();
95	}
96
97	void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
98	void __TBB_EXPORTED_FUNC NFS_Free( void* );
99
100	void my_affinity_partitioner_base_v3::resize( unsigned factor ) {
101	// Check factor to avoid asking for number of workers while there might be no arena.
102	size_t new_size = factor ? factor * g_threadNums.local() : `0`;
103	if (new_size != my_size) {
104	if (my_array) {
105	NFS_Free(my_array);
106	// Following two assignments must be done here for sake of exception safety.
107	my_array = NULL;
108	my_size = `0`;
109	}
110	if (new_size) {
111	my_array = static_cast<affinity_id>(NFS_Allocate(new_size, sizeof*(affinity_id), NULL ));
112	memset(my_array, `0`, sizeof(affinity_id) * new_size);
113	my_size = new_size;
114	}
115	}
116	}
117
118	} //namespace internal
119	// simulate a subset of parallel_for
120	namespace interface9 {
121	namespace internal {
122
123	// parallel_for algorithm that executes sequentially
124	template<typename Range, typename Body, typename Partitioner>
125	class start_for : public fake_task {
126	Range my_range;
127	Body my_body;
128	typename Partitioner::task_partition_type my_partition;
129	size_t m_executedBegin, m_executedEnd;
130	bool m_firstTimeRun;
131	size_t m_joinedBegin, m_joinedEnd;
132	test_partitioner_utils::BinaryTree* m_tree;
133	public:
134	start_for( const Range& range, const Body& body, Partitioner& partitioner,
135	test_partitioner_utils::BinaryTree* tree ) :
136	my_range(range), my_body(body), my_partition(partitioner),
137	m_executedBegin(`0`), m_executedEnd(`0`), m_firstTimeRun(true),
138	m_joinedBegin(/ grows left / range.end()), m_joinedEnd(range.end()), m_tree(tree)
139	{
140	if (m_tree) {
141	m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(), my_range.end(), affinity()) );
142	}
143	}
144	//! Splitting constructor used to generate children.
145	/* parent_ becomes left child. Newly constructed object is right child. /
146	start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
147	my_range(parent_.my_range, split_obj),
148	my_body(parent_.my_body),
149	my_partition(parent_.my_partition, split_obj),
150	m_executedBegin(`0`), m_executedEnd(`0`), m_firstTimeRun(true),
151	m_joinedBegin(/ grows left / my_range.end()), m_joinedEnd(my_range.end()),
152	m_tree(parent_.m_tree)
153	{
154	set_parent(parent_.parent());
155	my_partition.set_affinity(*this);
156
157	if (m_tree) {
158	// collecting splitting statistics
159	m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(),
160	my_range.end(),
161	affinity()) );
162	m_tree->push_node( test_partitioner_utils::make_node(parent_.my_range.begin(),
163	parent_.my_range.end(),
164	parent_.affinity()) );
165	}
166	}
167	//! Construct right child from the given range as response to the demand.
168	/* parent_ remains left child. Newly constructed object is right child. /
169	start_for( start_for& parent_, const Range& r, depth_t d ) :
170	my_range(r),
171	my_body(parent_.my_body),
172	my_partition(parent_.my_partition, tbb::split ()),
173	m_executedBegin(`0`), m_executedEnd(`0`), m_firstTimeRun(true),
174	m_joinedBegin(/ grows left / r.end()), m_joinedEnd(r.end()),
175	m_tree(parent_.m_tree)
176	{
177	set_parent(parent_.parent());
178	my_partition.set_affinity(*this);
179	my_partition.align_depth( d );
180	}
181	fake_task* execute() __TBB_override {
182	my_partition.check_being_stolen( *this );
183	size_t origBegin = my_range.begin();
184	size_t origEnd = my_range.end();
185
186	my_partition.execute(*this, my_range);
187
188	ASSERT(m_executedEnd == m_joinedBegin, "Non-continuous execution");
189	m_executedEnd = m_joinedEnd;
190
191	ASSERT(origBegin == m_executedBegin && origEnd == m_executedEnd,
192	"Not all iterations were processed");
193	return NULL;
194	}
195	//! Run body for range, serves as callback for partitioner
196	void run_body( Range &r ) {
197	if( r.is_ensure_non_emptiness() )
198	ASSERT( !r.empty(), "Empty ranges are not allowed" );
199	my_body(r);
200	if (m_firstTimeRun) {
201	m_firstTimeRun = false;
202	m_executedBegin = m_executedEnd = r.begin();
203	}
204	ASSERT(m_executedBegin <= r.begin() && m_executedEnd <= r.end(),
205	"Non-continuous execution");
206	m_executedEnd = r.end();
207	}
208	//! spawn right task, serves as callback for partitioner
209	void offer_work(typename Partitioner::split_type& split_obj) {
210	start_for sibling(*this, split_obj);
211	sibling.execute();
212	join(sibling.m_executedBegin, sibling.m_executedEnd);
213	}
214	//! spawn right task, serves as callback for partitioner
215	void offer_work(const Range& r, depth_t d = `0`) {
216	start_for sibling(*this, r, d);
217	sibling.execute();
218	join(sibling.m_executedBegin, sibling.m_executedEnd);
219	}
220	void join(size_t siblingExecutedBegin, size_t siblingExecutedEnd) {
221	ASSERT(siblingExecutedEnd == m_joinedBegin, "?");
222	m_joinedBegin = siblingExecutedBegin;
223	}
224	};
225
226	} //namespace internal
227	} //namespace interfaceX
228	} //namespace tbb
229
230	namespace whitebox_simulation {
231	using namespace tbb::interface9::internal;
232	template<typename Range, typename Body, typename Partitioner>
233	void parallel_for( const Range& range, const Body& body, Partitioner& partitioner,
234	test_partitioner_utils::BinaryTree* tree = NULL) {
235	if (!range.empty()) {
236	flag_task parent;
237	start_for<Range, Body, Partitioner> start(range, body, partitioner, tree);
238	start.set_parent(&parent);
239	start.execute();
240	}
241	}
242
243	} //namespace whitebox_simulation
244
245	template <typename Range, typename Body, typename Partitioner>
246	void test_case(Range& range, const Body& body, Partitioner& partitioner,
247	test_partitioner_utils::BinaryTree* tree = NULL) {
248	whitebox_simulation::parallel_for(range, body, partitioner, tree);
249	}
250
251	// Functions generate size for range objects used in tests
252	template <typename T>
253	size_t default_range_size_generator(T* factor, unsigned index, size_t thread_num) {
254	return size_t(factor[index] * thread_num);
255	}
256
257	size_t shifted_left_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
258	return factor[index] * thread_num - `1`;
259	}
260
261	size_t shifted_right_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
262	return factor[index] * thread_num + `1`;
263	}
264
265	size_t max_range_size_generator(size_t, unsigned*, size_t) {
266	return size_t(-`1`);
267	}
268
269	size_t simple_size_generator(size_t, unsigned* index, size_t) {
270	return index;
271	}
272
273	namespace uniform_iterations_distribution {
274
275	/*
276	* Test checks uniform distribution of range's iterations among all tasks just after
277	* work distribution phase has been completed and just before work balancing phase has been started
278	*/
279
280	using namespace test_partitioner_utils;
281
282	class ParallelTestBody {
283	public:
284	struct use_case_settings_t;
285
286	typedef void (CheckerFuncType)(const* char, size_t, const* use_case_settings_t, const* RangeStatisticData&);
287
288	struct use_case_settings_t {
289	size_t thread_num; // number of threads used during current use case
290	unsigned factors_array_len; // size of 'factors' array
291	size_t range_begin; // beginning of range iterations
292	bool provide_feedback; // 'true' if range should give feedback
293	bool ensure_non_empty_size; // don't allow empty size ranges
294
295	size_t above_threads_size_tolerance; // allowed value for number of created ranges
296	// when initial size of the range was greater or
297	// equal to number of threads
298
299	size_t below_threads_size_tolerance; // allowed value for number of created ranges
300	// when initial size of the range was less than
301	// number of threads
302
303	size_t between_min_max_ranges_tolerance; // allowed value for difference of iterations
304	// between bigger and lesser ranges
305
306	CheckerFuncType checker; // checker function for a particular test case
307	};
308
309	ParallelTestBody(size_t parallel_group_thread_starting_index)
310	: m_parallel_group_thread_starting_index(parallel_group_thread_starting_index) { }
311
312	void operator()(size_t) const { ASSERT( false, "Empty ParallelTestBody called" ); }
313
314	static void uniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
315	const RangeStatisticData& stat)
316	{
317	// Checking that all threads were given a task
318	if (rangeSize >= settings->thread_num) {
319	uint64_t disparity =
320	max(stat.m_rangeNum, settings->thread_num) - min(stat.m_rangeNum, settings->thread_num);
321	if (disparity > settings->above_threads_size_tolerance) {
322	REPORT("ERROR: '%s (f=%d\|e=%d)': \|#ranges(%llu)-#threads(%llu)\|=%llu > %llu=tolerance\n",
323	rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size), stat.m_rangeNum,
324	settings->thread_num, disparity, uint64_t(settings->above_threads_size_tolerance));
325	ASSERT(disparity <= settings->above_threads_size_tolerance, "Incorrect number of range "
326	"objects was created before work balancing phase started");
327	}
328	} else if (settings->ensure_non_empty_size && rangeSize != `0`) {
329	uint64_t disparity = max(stat.m_rangeNum, rangeSize) - min(stat.m_rangeNum, rangeSize);
330	if (disparity > settings->below_threads_size_tolerance ) {
331	REPORT("ERROR: '%s (f=%d\|e=%d)': \|#ranges-range size\|=%llu > %llu=tolerance\n",
332	rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
333	disparity, uint64_t(settings->below_threads_size_tolerance));
334	ASSERT(disparity <= settings->below_threads_size_tolerance, "Incorrect number of range objects"
335	" was created before work balancing phase started");
336	}
337	}
338	// Checking difference between min and max number of range iterations
339	size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
340	if (diff > settings->between_min_max_ranges_tolerance) {
341	REPORT("ERROR: '%s (f=%d\|e=%d)': range size difference=%llu > %llu=tolerance\n",
342	rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
343	uint64_t(diff), uint64_t(settings->between_min_max_ranges_tolerance));
344	ASSERT(diff <= settings->between_min_max_ranges_tolerance, "Uniform iteration distribution error");
345	}
346	}
347	// Checker for test cases where ranges don't provide feedback during proportional split to
348	// partitioner and differ from tbb::blocked_range implementation in their splitting algorithm
349	static void nonuniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
350	const RangeStatisticData& stat)
351	{
352	if (stat.m_rangeNum > settings->thread_num) {
353	REPORT("ERROR: '%s (f=%d\|e=%d)': %llu=#ranges > #threads=%llu\n",
354	rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
355	uint64_t(stat.m_rangeNum), uint64_t(settings->thread_num));
356	ASSERT(stat.m_rangeNum <= settings->thread_num,
357	"Incorrect number of range objects was created before work balancing phase started");
358	}
359	// Checking difference between min and max number of range iterations
360	size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
361	if (diff > rangeSize) {
362	REPORT("ERROR: '%s (f=%d\|e=%d)': range size difference=%llu > %llu=initial range size\n",
363	rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
364	uint64_t(diff), uint64_t(rangeSize));
365	ASSERT(diff <= rangeSize, "Iteration distribution error");
366	}
367	}
368
369	protected:
370	size_t m_parallel_group_thread_starting_index; // starting index of thread
371
372	template <typename Range, typename Partitioner, typename T>
373	void test(use_case_settings_t& settings, T factors[], size_t (rsgFunc)(T, unsigned, size_t)
374	= &default_range_size_generator<T>) const
375	{
376	for (unsigned i = `0`; i < settings.factors_array_len; ++i) {
377	size_t range_end = rsgFunc(factors, i, settings.thread_num);
378	RangeStatisticData stat = { /range num=/ `0`, /minimal size of range=/ `0`,
379	/maximal size of range=/ `0`, /minimal size of range was not rewritten yet=/ false };
380	Range range = Range(settings.range_begin, range_end, &stat, settings.provide_feedback,
381	settings.ensure_non_empty_size);
382	Partitioner my_partitioner;
383	test_case(range, SimpleBody (), my_partitioner, NULL);
384	size_t range_size = range_end - settings.range_begin;
385	const char* rangeName = typeid(range).name();
386	settings.checker(rangeName, range_size, &settings, stat);
387	}
388	}
389	};
390
391	template <typename ParallelTestBody>
392	void test() {
393	size_t hw_threads_num = tbb::tbb_thread::hardware_concurrency();
394	size_t threadsToRunOn = std::min<size_t>(max_simulated_threads, hw_threads_num);
395
396	size_t parallel_group_thread_starting_index = `1`;
397	while( parallel_group_thread_starting_index <= max_simulated_threads - threadsToRunOn ) {
398	NativeParallelFor(threadsToRunOn, ParallelTestBody(parallel_group_thread_starting_index));
399	parallel_group_thread_starting_index += threadsToRunOn;
400	}
401	NativeParallelFor(max_simulated_threads - parallel_group_thread_starting_index,
402	ParallelTestBody(parallel_group_thread_starting_index));
403	}
404
405	namespace task_affinity_whitebox {
406	size_t range_begin = `0`;
407	size_t range_end = `20`;
408	}
409
410	template<typename Partitioner>
411	void check_tree(const test_partitioner_utils::BinaryTree&);
412
413	template<>
414	void check_tree<tbb::affinity_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
415	ASSERT(tree == whitebox_simulation::reference_tree,
416	"affinity_partitioner distributes tasks differently from run to run");
417	}
418
419	template<>
420	void check_tree<tbb::static_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
421	std::vector<test_partitioner_utils::TreeNode* > tree_leafs;
422	tree.fill_leafs(tree_leafs);
423	typedef std::vector<size_t> Slots;
424	Slots affinity_slots(tree_leafs.size() + `1`, `0`);
425
426	for (std::vector<test_partitioner_utils::TreeNode*>::iterator i = tree_leafs.begin(); i != tree_leafs.end(); ++i) {
427	affinity_slots [(*i)->m_affinity]++;
428	if ((*i)->m_affinity == `0`)
429	ASSERT((*i)->m_range_begin == task_affinity_whitebox::range_begin,
430	"Task with affinity 0 was executed with wrong range");
431	}
432
433	typedef std::iterator_traits<Slots::iterator>::difference_type slots_difference_type;
434	ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(`0`)) == slots_difference_type(`1`),
435	"static_partitioner incorrectly distributed tasks by threads");
436	ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(`1`)) == slots_difference_type(g_threadNums.local()),
437	"static_partitioner incorrectly distributed tasks by threads");
438	ASSERT(affinity_slots[tbb::this_task_arena::current_thread_index() + `1`] == `0`,
439	"static_partitioner incorrectly assigns task with 0 affinity");
440	ASSERT(std::accumulate(affinity_slots.begin(), affinity_slots.end(), size_t(`0`)) == g_threadNums.local(),
441	"static_partitioner has created more tasks than the number of threads");
442	}
443
444	template<typename Partitioner>
445	void test_task_affinity() {
446	using namespace task_affinity_whitebox;
447	test_partitioner_utils::SimpleBody body;
448	for (size_t p = `1`; p <= `50`; ++p) {
449	g_threadNums.local() = p;
450	whitebox_simulation::whitebox_thread_index = `0`;
451	test_partitioner_utils::TestRanges::BlockedRange range(range_begin, range_end, /statData/NULL,
452	/provide_feedback/false, /ensure_non_empty_size/false);
453	Partitioner partitioner;
454	whitebox_simulation::reference_tree = test_partitioner_utils::BinaryTree ();
455	whitebox_simulation::parallel_for(range, body, partitioner, &(whitebox_simulation::reference_tree));
456	while (whitebox_simulation::whitebox_thread_index < p) {
457	test_partitioner_utils::BinaryTree tree;
458	whitebox_simulation::parallel_for(range, body, partitioner, &tree);
459	check_tree<Partitioner>(tree);
460	whitebox_simulation::whitebox_thread_index++;
461	}
462	range_begin++;
463	range_end += `2`;
464	}
465	}
466
467	} / namespace uniform_iterations_distribution /
468

Browse the source code of ThreadBB/src/test/test_partitioner_whitebox.h