1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17/* Common part for the partitioner whitebox tests */
18
19#include <typeinfo>
20
21#include "tbb/tbb_thread.h"
22#include "tbb/enumerable_thread_specific.h"
23
24#include "string.h"
25#include "harness_assert.h"
26#include "test_partitioner.h"
27#include <numeric>
28
29#if TBB_USE_DEBUG
30// reducing number of simulations due to test timeout
31const size_t max_simulated_threads = 256;
32#else
33const size_t max_simulated_threads = 640;
34#endif
35
36typedef tbb::enumerable_thread_specific<size_t> ThreadNumsType;
37size_t g_threadNumInitialValue = 10;
38ThreadNumsType g_threadNums(g_threadNumInitialValue);
39
40namespace whitebox_simulation {
41size_t whitebox_thread_index = 0;
42test_partitioner_utils::BinaryTree reference_tree;
43}
44
45// simulate a subset of task.h
46namespace tbb {
47namespace internal {
48typedef unsigned short affinity_id;
49}
50class fake_task {
51public:
52 typedef internal::affinity_id affinity_id;
53 void set_affinity(affinity_id a) { my_affinity = a; }
54 affinity_id affinity() const { return my_affinity; }
55 void set_parent(fake_task* p) { my_parent = p; }
56 fake_task *parent() const { return my_parent; }
57 bool is_stolen_task() const { return false; }
58 intptr_t ref_count() const { return 1; }
59 bool is_cancelled() const { return false; }
60 static void spawn(fake_task &) {} // for legacy in partitioner.h
61 virtual fake_task* execute() = 0; // enables dynamic_cast
62
63 fake_task() : my_parent(0), my_affinity(0) {}
64 virtual ~fake_task() {}
65private:
66 fake_task *my_parent;
67 affinity_id my_affinity;
68};
69namespace task_arena {
70static const int not_initialized = -2;//should match corresponding value in task_arena.h
71}//namespace task_arena
72namespace this_task_arena {
73inline int current_thread_index() { return (int)whitebox_simulation::whitebox_thread_index; }
74}
75}//namespace tbb
76
77#define __TBB_task_H
78#define __TBB_task_arena_H
79#define get_initial_auto_partitioner_divisor my_get_initial_auto_partitioner_divisor
80#define affinity_partitioner_base_v3 my_affinity_partitioner_base_v3
81#define task fake_task
82#define __TBB_STATIC_THRESHOLD 0
83#include "tbb/partitioner.h"
84#undef __TBB_STATIC_THRESHOLD
85#undef task
86#undef affinity_partitioner_base_v3
87#undef get_initial_auto_partitioner_divisor
88
89// replace library functions to simulate concurrency
90namespace tbb {
91namespace internal {
92size_t my_get_initial_auto_partitioner_divisor() {
93 const size_t X_FACTOR = 4;
94 return X_FACTOR * g_threadNums.local();
95}
96
97void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
98void __TBB_EXPORTED_FUNC NFS_Free( void* );
99
100void my_affinity_partitioner_base_v3::resize( unsigned factor ) {
101 // Check factor to avoid asking for number of workers while there might be no arena.
102 size_t new_size = factor ? factor * g_threadNums.local() : 0;
103 if (new_size != my_size) {
104 if (my_array) {
105 NFS_Free(my_array);
106 // Following two assignments must be done here for sake of exception safety.
107 my_array = NULL;
108 my_size = 0;
109 }
110 if (new_size) {
111 my_array = static_cast<affinity_id*>(NFS_Allocate(new_size, sizeof(affinity_id), NULL ));
112 memset(my_array, 0, sizeof(affinity_id) * new_size);
113 my_size = new_size;
114 }
115 }
116}
117
118} //namespace internal
119// simulate a subset of parallel_for
120namespace interface9 {
121namespace internal {
122
123// parallel_for algorithm that executes sequentially
124template<typename Range, typename Body, typename Partitioner>
125class start_for : public fake_task {
126 Range my_range;
127 Body my_body;
128 typename Partitioner::task_partition_type my_partition;
129 size_t m_executedBegin, m_executedEnd;
130 bool m_firstTimeRun;
131 size_t m_joinedBegin, m_joinedEnd;
132 test_partitioner_utils::BinaryTree* m_tree;
133public:
134 start_for( const Range& range, const Body& body, Partitioner& partitioner,
135 test_partitioner_utils::BinaryTree* tree ) :
136 my_range(range), my_body(body), my_partition(partitioner),
137 m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
138 m_joinedBegin(/* grows left */ range.end()), m_joinedEnd(range.end()), m_tree(tree)
139 {
140 if (m_tree) {
141 m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(), my_range.end(), affinity()) );
142 }
143 }
144 //! Splitting constructor used to generate children.
145 /** parent_ becomes left child. Newly constructed object is right child. */
146 start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
147 my_range(parent_.my_range, split_obj),
148 my_body(parent_.my_body),
149 my_partition(parent_.my_partition, split_obj),
150 m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
151 m_joinedBegin(/* grows left */ my_range.end()), m_joinedEnd(my_range.end()),
152 m_tree(parent_.m_tree)
153 {
154 set_parent(parent_.parent());
155 my_partition.set_affinity(*this);
156
157 if (m_tree) {
158 // collecting splitting statistics
159 m_tree->push_node( test_partitioner_utils::make_node(my_range.begin(),
160 my_range.end(),
161 affinity()) );
162 m_tree->push_node( test_partitioner_utils::make_node(parent_.my_range.begin(),
163 parent_.my_range.end(),
164 parent_.affinity()) );
165 }
166 }
167 //! Construct right child from the given range as response to the demand.
168 /** parent_ remains left child. Newly constructed object is right child. */
169 start_for( start_for& parent_, const Range& r, depth_t d ) :
170 my_range(r),
171 my_body(parent_.my_body),
172 my_partition(parent_.my_partition, tbb::split()),
173 m_executedBegin(0), m_executedEnd(0), m_firstTimeRun(true),
174 m_joinedBegin(/* grows left */ r.end()), m_joinedEnd(r.end()),
175 m_tree(parent_.m_tree)
176 {
177 set_parent(parent_.parent());
178 my_partition.set_affinity(*this);
179 my_partition.align_depth( d );
180 }
181 fake_task* execute() __TBB_override {
182 my_partition.check_being_stolen( *this );
183 size_t origBegin = my_range.begin();
184 size_t origEnd = my_range.end();
185
186 my_partition.execute(*this, my_range);
187
188 ASSERT(m_executedEnd == m_joinedBegin, "Non-continuous execution");
189 m_executedEnd = m_joinedEnd;
190
191 ASSERT(origBegin == m_executedBegin && origEnd == m_executedEnd,
192 "Not all iterations were processed");
193 return NULL;
194 }
195 //! Run body for range, serves as callback for partitioner
196 void run_body( Range &r ) {
197 if( r.is_ensure_non_emptiness() )
198 ASSERT( !r.empty(), "Empty ranges are not allowed" );
199 my_body(r);
200 if (m_firstTimeRun) {
201 m_firstTimeRun = false;
202 m_executedBegin = m_executedEnd = r.begin();
203 }
204 ASSERT(m_executedBegin <= r.begin() && m_executedEnd <= r.end(),
205 "Non-continuous execution");
206 m_executedEnd = r.end();
207 }
208 //! spawn right task, serves as callback for partitioner
209 void offer_work(typename Partitioner::split_type& split_obj) {
210 start_for sibling(*this, split_obj);
211 sibling.execute();
212 join(sibling.m_executedBegin, sibling.m_executedEnd);
213 }
214 //! spawn right task, serves as callback for partitioner
215 void offer_work(const Range& r, depth_t d = 0) {
216 start_for sibling(*this, r, d);
217 sibling.execute();
218 join(sibling.m_executedBegin, sibling.m_executedEnd);
219 }
220 void join(size_t siblingExecutedBegin, size_t siblingExecutedEnd) {
221 ASSERT(siblingExecutedEnd == m_joinedBegin, "?");
222 m_joinedBegin = siblingExecutedBegin;
223 }
224};
225
226} //namespace internal
227} //namespace interfaceX
228} //namespace tbb
229
230namespace whitebox_simulation {
231using namespace tbb::interface9::internal;
232template<typename Range, typename Body, typename Partitioner>
233void parallel_for( const Range& range, const Body& body, Partitioner& partitioner,
234 test_partitioner_utils::BinaryTree* tree = NULL) {
235 if (!range.empty()) {
236 flag_task parent;
237 start_for<Range, Body, Partitioner> start(range, body, partitioner, tree);
238 start.set_parent(&parent);
239 start.execute();
240 }
241}
242
243} //namespace whitebox_simulation
244
245template <typename Range, typename Body, typename Partitioner>
246void test_case(Range& range, const Body& body, Partitioner& partitioner,
247 test_partitioner_utils::BinaryTree* tree = NULL) {
248 whitebox_simulation::parallel_for(range, body, partitioner, tree);
249}
250
251// Functions generate size for range objects used in tests
252template <typename T>
253size_t default_range_size_generator(T* factor, unsigned index, size_t thread_num) {
254 return size_t(factor[index] * thread_num);
255}
256
257size_t shifted_left_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
258 return factor[index] * thread_num - 1;
259}
260
261size_t shifted_right_range_size_generator(size_t* factor, unsigned index, size_t thread_num) {
262 return factor[index] * thread_num + 1;
263}
264
265size_t max_range_size_generator(size_t*, unsigned, size_t) {
266 return size_t(-1);
267}
268
269size_t simple_size_generator(size_t*, unsigned index, size_t) {
270 return index;
271}
272
273namespace uniform_iterations_distribution {
274
275/*
276 * Test checks uniform distribution of range's iterations among all tasks just after
277 * work distribution phase has been completed and just before work balancing phase has been started
278 */
279
280using namespace test_partitioner_utils;
281
282class ParallelTestBody {
283public:
284 struct use_case_settings_t;
285
286 typedef void (*CheckerFuncType)(const char*, size_t, const use_case_settings_t*, const RangeStatisticData&);
287
288 struct use_case_settings_t {
289 size_t thread_num; // number of threads used during current use case
290 unsigned factors_array_len; // size of 'factors' array
291 size_t range_begin; // beginning of range iterations
292 bool provide_feedback; // 'true' if range should give feedback
293 bool ensure_non_empty_size; // don't allow empty size ranges
294
295 size_t above_threads_size_tolerance; // allowed value for number of created ranges
296 // when initial size of the range was greater or
297 // equal to number of threads
298
299 size_t below_threads_size_tolerance; // allowed value for number of created ranges
300 // when initial size of the range was less than
301 // number of threads
302
303 size_t between_min_max_ranges_tolerance; // allowed value for difference of iterations
304 // between bigger and lesser ranges
305
306 CheckerFuncType checker; // checker function for a particular test case
307 };
308
309 ParallelTestBody(size_t parallel_group_thread_starting_index)
310 : m_parallel_group_thread_starting_index(parallel_group_thread_starting_index) { }
311
312 void operator()(size_t) const { ASSERT( false, "Empty ParallelTestBody called" ); }
313
314 static void uniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
315 const RangeStatisticData& stat)
316 {
317 // Checking that all threads were given a task
318 if (rangeSize >= settings->thread_num) {
319 uint64_t disparity =
320 max(stat.m_rangeNum, settings->thread_num) - min(stat.m_rangeNum, settings->thread_num);
321 if (disparity > settings->above_threads_size_tolerance) {
322 REPORT("ERROR: '%s (f=%d|e=%d)': |#ranges(%llu)-#threads(%llu)|=%llu > %llu=tolerance\n",
323 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size), stat.m_rangeNum,
324 settings->thread_num, disparity, uint64_t(settings->above_threads_size_tolerance));
325 ASSERT(disparity <= settings->above_threads_size_tolerance, "Incorrect number of range "
326 "objects was created before work balancing phase started");
327 }
328 } else if (settings->ensure_non_empty_size && rangeSize != 0) {
329 uint64_t disparity = max(stat.m_rangeNum, rangeSize) - min(stat.m_rangeNum, rangeSize);
330 if (disparity > settings->below_threads_size_tolerance ) {
331 REPORT("ERROR: '%s (f=%d|e=%d)': |#ranges-range size|=%llu > %llu=tolerance\n",
332 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
333 disparity, uint64_t(settings->below_threads_size_tolerance));
334 ASSERT(disparity <= settings->below_threads_size_tolerance, "Incorrect number of range objects"
335 " was created before work balancing phase started");
336 }
337 }
338 // Checking difference between min and max number of range iterations
339 size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
340 if (diff > settings->between_min_max_ranges_tolerance) {
341 REPORT("ERROR: '%s (f=%d|e=%d)': range size difference=%llu > %llu=tolerance\n",
342 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
343 uint64_t(diff), uint64_t(settings->between_min_max_ranges_tolerance));
344 ASSERT(diff <= settings->between_min_max_ranges_tolerance, "Uniform iteration distribution error");
345 }
346 }
347 // Checker for test cases where ranges don't provide feedback during proportional split to
348 // partitioner and differ from tbb::blocked_range implementation in their splitting algorithm
349 static void nonuniform_distribution_checker(const char* rangeName, size_t rangeSize, const use_case_settings_t* settings,
350 const RangeStatisticData& stat)
351 {
352 if (stat.m_rangeNum > settings->thread_num) {
353 REPORT("ERROR: '%s (f=%d|e=%d)': %llu=#ranges > #threads=%llu\n",
354 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
355 uint64_t(stat.m_rangeNum), uint64_t(settings->thread_num));
356 ASSERT(stat.m_rangeNum <= settings->thread_num,
357 "Incorrect number of range objects was created before work balancing phase started");
358 }
359 // Checking difference between min and max number of range iterations
360 size_t diff = stat.m_maxRangeSize - stat.m_minRangeSize;
361 if (diff > rangeSize) {
362 REPORT("ERROR: '%s (f=%d|e=%d)': range size difference=%llu > %llu=initial range size\n",
363 rangeName, int(settings->provide_feedback), int(settings->ensure_non_empty_size),
364 uint64_t(diff), uint64_t(rangeSize));
365 ASSERT(diff <= rangeSize, "Iteration distribution error");
366 }
367 }
368
369protected:
370 size_t m_parallel_group_thread_starting_index; // starting index of thread
371
372 template <typename Range, typename Partitioner, typename T>
373 void test(use_case_settings_t& settings, T factors[], size_t (*rsgFunc)(T*, unsigned, size_t)
374 = &default_range_size_generator<T>) const
375 {
376 for (unsigned i = 0; i < settings.factors_array_len; ++i) {
377 size_t range_end = rsgFunc(factors, i, settings.thread_num);
378 RangeStatisticData stat = { /*range num=*/ 0, /*minimal size of range=*/ 0,
379 /*maximal size of range=*/ 0, /*minimal size of range was not rewritten yet=*/ false };
380 Range range = Range(settings.range_begin, range_end, &stat, settings.provide_feedback,
381 settings.ensure_non_empty_size);
382 Partitioner my_partitioner;
383 test_case(range, SimpleBody(), my_partitioner, NULL);
384 size_t range_size = range_end - settings.range_begin;
385 const char* rangeName = typeid(range).name();
386 settings.checker(rangeName, range_size, &settings, stat);
387 }
388 }
389};
390
391template <typename ParallelTestBody>
392void test() {
393 size_t hw_threads_num = tbb::tbb_thread::hardware_concurrency();
394 size_t threadsToRunOn = std::min<size_t>(max_simulated_threads, hw_threads_num);
395
396 size_t parallel_group_thread_starting_index = 1;
397 while( parallel_group_thread_starting_index <= max_simulated_threads - threadsToRunOn ) {
398 NativeParallelFor(threadsToRunOn, ParallelTestBody(parallel_group_thread_starting_index));
399 parallel_group_thread_starting_index += threadsToRunOn;
400 }
401 NativeParallelFor(max_simulated_threads - parallel_group_thread_starting_index,
402 ParallelTestBody(parallel_group_thread_starting_index));
403}
404
405namespace task_affinity_whitebox {
406size_t range_begin = 0;
407size_t range_end = 20;
408}
409
410template<typename Partitioner>
411void check_tree(const test_partitioner_utils::BinaryTree&);
412
413template<>
414void check_tree<tbb::affinity_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
415 ASSERT(tree == whitebox_simulation::reference_tree,
416 "affinity_partitioner distributes tasks differently from run to run");
417}
418
419template<>
420void check_tree<tbb::static_partitioner>(const test_partitioner_utils::BinaryTree& tree) {
421 std::vector<test_partitioner_utils::TreeNode* > tree_leafs;
422 tree.fill_leafs(tree_leafs);
423 typedef std::vector<size_t> Slots;
424 Slots affinity_slots(tree_leafs.size() + 1, 0);
425
426 for (std::vector<test_partitioner_utils::TreeNode*>::iterator i = tree_leafs.begin(); i != tree_leafs.end(); ++i) {
427 affinity_slots[(*i)->m_affinity]++;
428 if ((*i)->m_affinity == 0)
429 ASSERT((*i)->m_range_begin == task_affinity_whitebox::range_begin,
430 "Task with affinity 0 was executed with wrong range");
431 }
432
433 typedef std::iterator_traits<Slots::iterator>::difference_type slots_difference_type;
434 ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(0)) == slots_difference_type(1),
435 "static_partitioner incorrectly distributed tasks by threads");
436 ASSERT(std::count(affinity_slots.begin(), affinity_slots.end(), size_t(1)) == slots_difference_type(g_threadNums.local()),
437 "static_partitioner incorrectly distributed tasks by threads");
438 ASSERT(affinity_slots[tbb::this_task_arena::current_thread_index() + 1] == 0,
439 "static_partitioner incorrectly assigns task with 0 affinity");
440 ASSERT(std::accumulate(affinity_slots.begin(), affinity_slots.end(), size_t(0)) == g_threadNums.local(),
441 "static_partitioner has created more tasks than the number of threads");
442}
443
444template<typename Partitioner>
445void test_task_affinity() {
446 using namespace task_affinity_whitebox;
447 test_partitioner_utils::SimpleBody body;
448 for (size_t p = 1; p <= 50; ++p) {
449 g_threadNums.local() = p;
450 whitebox_simulation::whitebox_thread_index = 0;
451 test_partitioner_utils::TestRanges::BlockedRange range(range_begin, range_end, /*statData*/NULL,
452 /*provide_feedback*/false, /*ensure_non_empty_size*/false);
453 Partitioner partitioner;
454 whitebox_simulation::reference_tree = test_partitioner_utils::BinaryTree();
455 whitebox_simulation::parallel_for(range, body, partitioner, &(whitebox_simulation::reference_tree));
456 while (whitebox_simulation::whitebox_thread_index < p) {
457 test_partitioner_utils::BinaryTree tree;
458 whitebox_simulation::parallel_for(range, body, partitioner, &tree);
459 check_tree<Partitioner>(tree);
460 whitebox_simulation::whitebox_thread_index++;
461 }
462 range_begin++;
463 range_end += 2;
464 }
465}
466
467} /* namespace uniform_iterations_distribution */
468