| 1 | /* | 
|---|
| 2 | Copyright (c) 2005-2019 Intel Corporation | 
|---|
| 3 |  | 
|---|
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 5 | you may not use this file except in compliance with the License. | 
|---|
| 6 | You may obtain a copy of the License at | 
|---|
| 7 |  | 
|---|
| 8 | http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 9 |  | 
|---|
| 10 | Unless required by applicable law or agreed to in writing, software | 
|---|
| 11 | distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 13 | See the License for the specific language governing permissions and | 
|---|
| 14 | limitations under the License. | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | /* Example program that computes Fibonacci numbers in different ways. | 
|---|
| 18 | Arguments are: [ Number [Threads [Repeats]]] | 
|---|
| 19 | The defaults are Number=500 Threads=1:4 Repeats=1. | 
|---|
| 20 |  | 
|---|
| 21 | The point of this program is to check that the library is working properly. | 
|---|
| 22 | Most of the computations are deliberately silly and not expected to | 
|---|
| 23 | show any speedup on multiprocessors. | 
|---|
| 24 | */ | 
|---|
| 25 |  | 
|---|
| 26 | // enable assertions | 
|---|
| 27 | #ifdef NDEBUG | 
|---|
| 28 | #undef NDEBUG | 
|---|
| 29 | #endif | 
|---|
| 30 |  | 
|---|
| 31 | #include <cstdio> | 
|---|
| 32 | #include <cstdlib> | 
|---|
| 33 | #include <cassert> | 
|---|
| 34 | #include <utility> | 
|---|
| 35 | #include "tbb/task.h" | 
|---|
| 36 | #include "tbb/task_scheduler_init.h" | 
|---|
| 37 | #include "tbb/tick_count.h" | 
|---|
| 38 | #include "tbb/blocked_range.h" | 
|---|
| 39 | #include "tbb/concurrent_vector.h" | 
|---|
| 40 | #include "tbb/concurrent_queue.h" | 
|---|
| 41 | #include "tbb/concurrent_hash_map.h" | 
|---|
| 42 | #include "tbb/parallel_while.h" | 
|---|
| 43 | #include "tbb/parallel_for.h" | 
|---|
| 44 | #include "tbb/parallel_reduce.h" | 
|---|
| 45 | #include "tbb/parallel_scan.h" | 
|---|
| 46 | #include "tbb/pipeline.h" | 
|---|
| 47 | #include "tbb/atomic.h" | 
|---|
| 48 | #include "tbb/mutex.h" | 
|---|
| 49 | #include "tbb/spin_mutex.h" | 
|---|
| 50 | #include "tbb/queuing_mutex.h" | 
|---|
| 51 | #include "tbb/tbb_thread.h" | 
|---|
| 52 |  | 
|---|
| 53 | using namespace std; | 
|---|
| 54 | using namespace tbb; | 
|---|
| 55 |  | 
|---|
| 56 | //! type used for Fibonacci number computations | 
|---|
| 57 | typedef long long value; | 
|---|
| 58 |  | 
|---|
| 59 | //! Matrix 2x2 class | 
|---|
| 60 | struct Matrix2x2 | 
|---|
| 61 | { | 
|---|
| 62 | //! Array of values | 
|---|
| 63 | value v[2][2]; | 
|---|
| 64 | Matrix2x2() {} | 
|---|
| 65 | Matrix2x2(value v00, value v01, value v10, value v11) { | 
|---|
| 66 | v[0][0] = v00; v[0][1] = v01; v[1][0] = v10; v[1][1] = v11; | 
|---|
| 67 | } | 
|---|
| 68 | Matrix2x2 operator * (const Matrix2x2 &to) const; //< Multiply two Matrices | 
|---|
| 69 | }; | 
|---|
| 70 | //! Identity matrix | 
|---|
| 71 | static const Matrix2x2 MatrixIdentity(1, 0, 0, 1); | 
|---|
| 72 | //! Default matrix to multiply | 
|---|
| 73 | static const Matrix2x2 Matrix1110(1, 1, 1, 0); | 
|---|
| 74 | //! Raw arrays matrices multiply | 
|---|
| 75 | void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]); | 
|---|
| 76 |  | 
|---|
| 77 | /////////////////////// Serial methods //////////////////////// | 
|---|
| 78 |  | 
|---|
| 79 | //! Plain serial sum | 
|---|
| 80 | value SerialFib(int n) | 
|---|
| 81 | { | 
|---|
| 82 | if(n < 2) | 
|---|
| 83 | return n; | 
|---|
| 84 | value a = 0, b = 1, sum; int i; | 
|---|
| 85 | for( i = 2; i <= n; i++ ) | 
|---|
| 86 | {   // n is really index of Fibonacci number | 
|---|
| 87 | sum = a + b; a = b; b = sum; | 
|---|
| 88 | } | 
|---|
| 89 | return sum; | 
|---|
| 90 | } | 
|---|
| 91 | //! Serial n-1 matrices multiplication | 
|---|
| 92 | value SerialMatrixFib(int n) | 
|---|
| 93 | { | 
|---|
| 94 | value c[2][2], a[2][2] = {{1, 1}, {1, 0}}, b[2][2] = {{1, 1}, {1, 0}}; int i; | 
|---|
| 95 | for(i = 2; i < n; i++) | 
|---|
| 96 | {   // Using condition to prevent copying of values | 
|---|
| 97 | if(i & 1) Matrix2x2Multiply(a, c, b); | 
|---|
| 98 | else      Matrix2x2Multiply(a, b, c); | 
|---|
| 99 | } | 
|---|
| 100 | return (i & 1) ? c[0][0] : b[0][0]; // get result from upper left cell | 
|---|
| 101 | } | 
|---|
| 102 | //! Recursive summing. Just for complete list of serial algorithms, not used | 
|---|
| 103 | value SerialRecursiveFib(int n) | 
|---|
| 104 | { | 
|---|
| 105 | value result; | 
|---|
| 106 | if(n < 2) | 
|---|
| 107 | result = n; | 
|---|
| 108 | else | 
|---|
| 109 | result = SerialRecursiveFib(n - 1) + SerialRecursiveFib(n - 2); | 
|---|
| 110 | return result; | 
|---|
| 111 | } | 
|---|
| 112 | //! Introducing of queue method in serial | 
|---|
| 113 | value SerialQueueFib(int n) | 
|---|
| 114 | { | 
|---|
| 115 | concurrent_queue<Matrix2x2> Q; | 
|---|
| 116 | for(int i = 1; i < n; i++) | 
|---|
| 117 | Q.push(Matrix1110); | 
|---|
| 118 | Matrix2x2 A, B; | 
|---|
| 119 | while(true) { | 
|---|
| 120 | while( !Q.try_pop(A) ) this_tbb_thread::yield(); | 
|---|
| 121 | if(Q.empty()) break; | 
|---|
| 122 | while( !Q.try_pop(B) ) this_tbb_thread::yield(); | 
|---|
| 123 | Q.push(A * B); | 
|---|
| 124 | } | 
|---|
| 125 | return A.v[0][0]; | 
|---|
| 126 | } | 
|---|
| 127 | //! Trying to use concurrent_vector | 
|---|
| 128 | value SerialVectorFib(int n) | 
|---|
| 129 | { | 
|---|
| 130 | concurrent_vector<value> A; | 
|---|
| 131 | A.grow_by(2); | 
|---|
| 132 | A[0] = 0; A[1] = 1; | 
|---|
| 133 | for( int i = 2; i <= n; i++) | 
|---|
| 134 | { | 
|---|
| 135 | A.grow_to_at_least(i+1); | 
|---|
| 136 | A[i] = A[i-1] + A[i-2]; | 
|---|
| 137 | } | 
|---|
| 138 | return A[n]; | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|
| 141 | ///////////////////// Parallel methods //////////////////////// | 
|---|
| 142 |  | 
|---|
| 143 | // *** Serial shared by mutexes *** // | 
|---|
| 144 |  | 
|---|
| 145 | //! Shared glabals | 
|---|
| 146 | value SharedA = 0, SharedB = 1; int SharedI = 1, SharedN; | 
|---|
| 147 |  | 
|---|
| 148 | //! Template task class which computes Fibonacci numbers with shared globals | 
|---|
| 149 | template<typename M> | 
|---|
| 150 | class SharedSerialFibBody { | 
|---|
| 151 | M &mutex; | 
|---|
| 152 | public: | 
|---|
| 153 | SharedSerialFibBody( M &m ) : mutex( m ) {} | 
|---|
| 154 | //! main loop | 
|---|
| 155 | void operator()( const blocked_range<int>& range ) const { | 
|---|
| 156 | for(;;) { | 
|---|
| 157 | typename M::scoped_lock lock( mutex ); | 
|---|
| 158 | if(SharedI >= SharedN) break; | 
|---|
| 159 | value sum = SharedA + SharedB; | 
|---|
| 160 | SharedA = SharedB; SharedB = sum; | 
|---|
| 161 | ++SharedI; | 
|---|
| 162 | } | 
|---|
| 163 | } | 
|---|
| 164 | }; | 
|---|
| 165 |  | 
|---|
| 166 | //! Root function | 
|---|
| 167 | template<class M> | 
|---|
| 168 | value SharedSerialFib(int n) | 
|---|
| 169 | { | 
|---|
| 170 | SharedA = 0; SharedB = 1; SharedI = 1; SharedN = n; M mutex; | 
|---|
| 171 | parallel_for( blocked_range<int>(0,4,1), SharedSerialFibBody<M>( mutex ) ); | 
|---|
| 172 | return SharedB; | 
|---|
| 173 | } | 
|---|
| 174 |  | 
|---|
| 175 | // *** Serial shared by concurrent hash map *** // | 
|---|
| 176 |  | 
|---|
| 177 | //! Hash comparer | 
|---|
| 178 | struct IntHashCompare { | 
|---|
| 179 | bool equal( const int j, const int k ) const { return j == k; } | 
|---|
| 180 | unsigned long hash( const int k ) const { return (unsigned long)k; } | 
|---|
| 181 | }; | 
|---|
| 182 | //! NumbersTable type based on concurrent_hash_map | 
|---|
| 183 | typedef concurrent_hash_map<int, value, IntHashCompare> NumbersTable; | 
|---|
| 184 | //! task for serial method using shared concurrent_hash_map | 
|---|
| 185 | class ConcurrentHashSerialFibTask: public task { | 
|---|
| 186 | NumbersTable &Fib; | 
|---|
| 187 | int my_n; | 
|---|
| 188 | public: | 
|---|
| 189 | //! constructor | 
|---|
| 190 | ConcurrentHashSerialFibTask( NumbersTable &cht, int n ) : Fib(cht), my_n(n) { } | 
|---|
| 191 | //! executing task | 
|---|
| 192 | task* execute() /*override*/ { | 
|---|
| 193 | for( int i = 2; i <= my_n; ++i ) { // there is no difference in to recycle or to make loop | 
|---|
| 194 | NumbersTable::const_accessor f1, f2; // same as iterators | 
|---|
| 195 | if( !Fib.find(f1, i-1) || !Fib.find(f2, i-2) ) { | 
|---|
| 196 | // Something is seriously wrong, because i-1 and i-2 must have been inserted | 
|---|
| 197 | // earlier by this thread or another thread. | 
|---|
| 198 | assert(0); | 
|---|
| 199 | } | 
|---|
| 200 | value sum = f1->second + f2->second; | 
|---|
| 201 | NumbersTable::const_accessor fsum; | 
|---|
| 202 | Fib.insert(fsum, make_pair(i, sum)); // inserting | 
|---|
| 203 | assert( fsum->second == sum ); // check value | 
|---|
| 204 | } | 
|---|
| 205 | return 0; | 
|---|
| 206 | } | 
|---|
| 207 | }; | 
|---|
| 208 |  | 
|---|
| 209 | //! Root function | 
|---|
| 210 | value ConcurrentHashSerialFib(int n) | 
|---|
| 211 | { | 
|---|
| 212 | NumbersTable Fib; | 
|---|
| 213 | bool okay; | 
|---|
| 214 | okay = Fib.insert( make_pair(0, 0) ); assert(okay); // assign initial values | 
|---|
| 215 | okay = Fib.insert( make_pair(1, 1) ); assert(okay); | 
|---|
| 216 |  | 
|---|
| 217 | task_list list; | 
|---|
| 218 | // allocate tasks | 
|---|
| 219 | list.push_back(*new(task::allocate_root()) ConcurrentHashSerialFibTask(Fib, n)); | 
|---|
| 220 | list.push_back(*new(task::allocate_root()) ConcurrentHashSerialFibTask(Fib, n)); | 
|---|
| 221 | task::spawn_root_and_wait(list); | 
|---|
| 222 | NumbersTable::const_accessor fresult; | 
|---|
| 223 | okay = Fib.find( fresult, n ); | 
|---|
| 224 | assert(okay); | 
|---|
| 225 | return fresult->second; | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | // *** Queue with parallel_for and parallel_while *** // | 
|---|
| 229 |  | 
|---|
| 230 | //! Stream of matrices | 
|---|
| 231 | struct QueueStream { | 
|---|
| 232 | volatile bool producer_is_done; | 
|---|
| 233 | concurrent_queue<Matrix2x2> Queue; | 
|---|
| 234 | //! Get pair of matricies if present | 
|---|
| 235 | bool pop_if_present( pair<Matrix2x2, Matrix2x2> &mm ) { | 
|---|
| 236 | // get first matrix if present | 
|---|
| 237 | if(!Queue.try_pop(mm.first)) return false; | 
|---|
| 238 | // get second matrix if present | 
|---|
| 239 | if(!Queue.try_pop(mm.second)) { | 
|---|
| 240 | // if not, then push back first matrix | 
|---|
| 241 | Queue.push(mm.first); return false; | 
|---|
| 242 | } | 
|---|
| 243 | return true; | 
|---|
| 244 | } | 
|---|
| 245 | }; | 
|---|
| 246 |  | 
|---|
| 247 | //! Functor for parallel_for which fills the queue | 
|---|
| 248 | struct parallel_forFibBody { | 
|---|
| 249 | QueueStream &my_stream; | 
|---|
| 250 | //! fill functor arguments | 
|---|
| 251 | parallel_forFibBody(QueueStream &s) : my_stream(s) { } | 
|---|
| 252 | //! iterate thorough range | 
|---|
| 253 | void operator()( const blocked_range<int> &range ) const { | 
|---|
| 254 | int i_end = range.end(); | 
|---|
| 255 | for( int i = range.begin(); i != i_end; ++i ) { | 
|---|
| 256 | my_stream.Queue.push( Matrix1110 ); // push initial matrix | 
|---|
| 257 | } | 
|---|
| 258 | } | 
|---|
| 259 | }; | 
|---|
| 260 | //! Functor for parallel_while which process the queue | 
|---|
| 261 | class parallel_whileFibBody | 
|---|
| 262 | { | 
|---|
| 263 | QueueStream &my_stream; | 
|---|
| 264 | parallel_while<parallel_whileFibBody> &my_while; | 
|---|
| 265 | public: | 
|---|
| 266 | typedef pair<Matrix2x2, Matrix2x2> argument_type; | 
|---|
| 267 | //! fill functor arguments | 
|---|
| 268 | parallel_whileFibBody(parallel_while<parallel_whileFibBody> &w, QueueStream &s) | 
|---|
| 269 | : my_while(w), my_stream(s) { } | 
|---|
| 270 | //! process pair of matrices | 
|---|
| 271 | void operator() (argument_type mm) const { | 
|---|
| 272 | mm.first = mm.first * mm.second; | 
|---|
| 273 | // note: it can run concurrently with QueueStream::pop_if_present() | 
|---|
| 274 | if(my_stream.Queue.try_pop(mm.second)) | 
|---|
| 275 | my_while.add( mm ); // now, two matrices available. Add next iteration. | 
|---|
| 276 | else my_stream.Queue.push( mm.first ); // or push back calculated value if queue is empty | 
|---|
| 277 | } | 
|---|
| 278 | }; | 
|---|
| 279 |  | 
|---|
| 280 | //! Parallel queue's filling task | 
|---|
| 281 | struct QueueInsertTask: public task { | 
|---|
| 282 | QueueStream &my_stream; | 
|---|
| 283 | int my_n; | 
|---|
| 284 | //! fill task arguments | 
|---|
| 285 | QueueInsertTask( int n, QueueStream &s ) : my_n(n), my_stream(s) { } | 
|---|
| 286 | //! executing task | 
|---|
| 287 | task* execute() /*override*/ { | 
|---|
| 288 | // Execute of parallel pushing of n-1 initial matrices | 
|---|
| 289 | parallel_for( blocked_range<int>( 1, my_n, 10 ), parallel_forFibBody(my_stream) ); | 
|---|
| 290 | my_stream.producer_is_done = true; | 
|---|
| 291 | return 0; | 
|---|
| 292 | } | 
|---|
| 293 | }; | 
|---|
| 294 | //! Parallel queue's processing task | 
|---|
| 295 | struct QueueProcessTask: public task { | 
|---|
| 296 | QueueStream &my_stream; | 
|---|
| 297 | //! fill task argument | 
|---|
| 298 | QueueProcessTask( QueueStream &s ) : my_stream(s) { } | 
|---|
| 299 | //! executing task | 
|---|
| 300 | task* execute() /*override*/ { | 
|---|
| 301 | while( !my_stream.producer_is_done || my_stream.Queue.unsafe_size()>1 ) { | 
|---|
| 302 | parallel_while<parallel_whileFibBody> w; // run while loop in parallel | 
|---|
| 303 | w.run( my_stream, parallel_whileFibBody( w, my_stream ) ); | 
|---|
| 304 | } | 
|---|
| 305 | return 0; | 
|---|
| 306 | } | 
|---|
| 307 | }; | 
|---|
| 308 | //! Root function | 
|---|
| 309 | value ParallelQueueFib(int n) | 
|---|
| 310 | { | 
|---|
| 311 | QueueStream stream; | 
|---|
| 312 | stream.producer_is_done = false; | 
|---|
| 313 | task_list list; | 
|---|
| 314 | list.push_back(*new(task::allocate_root()) QueueInsertTask( n, stream )); | 
|---|
| 315 | list.push_back(*new(task::allocate_root()) QueueProcessTask( stream )); | 
|---|
| 316 | // If there is only a single thread, the first task in the list runs to completion | 
|---|
| 317 | // before the second task in the list starts. | 
|---|
| 318 | task::spawn_root_and_wait(list); | 
|---|
| 319 | assert(stream.Queue.unsafe_size() == 1); // it is easy to lose some work | 
|---|
| 320 | Matrix2x2 M; | 
|---|
| 321 | bool result = stream.Queue.try_pop( M ); // get last matrix | 
|---|
| 322 | assert( result ); | 
|---|
| 323 | return M.v[0][0]; // and result number | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | // *** Queue with pipeline *** // | 
|---|
| 327 |  | 
|---|
| 328 | //! filter to fills queue | 
|---|
| 329 | class InputFilter: public filter { | 
|---|
| 330 | tbb::atomic<int> N; //< index of Fibonacci number minus 1 | 
|---|
| 331 | public: | 
|---|
| 332 | concurrent_queue<Matrix2x2> Queue; | 
|---|
| 333 | //! fill filter arguments | 
|---|
| 334 | InputFilter( int n ) : filter(false /*is not serial*/) { N = n; } | 
|---|
| 335 | //! executing filter | 
|---|
| 336 | void* operator()(void*) /*override*/ { | 
|---|
| 337 | int n = --N; | 
|---|
| 338 | if(n <= 0) return 0; | 
|---|
| 339 | Queue.push( Matrix1110 ); | 
|---|
| 340 | return &Queue; | 
|---|
| 341 | } | 
|---|
| 342 | }; | 
|---|
| 343 | //! filter to process queue | 
|---|
| 344 | class MultiplyFilter: public filter { | 
|---|
| 345 | public: | 
|---|
| 346 | MultiplyFilter( ) : filter(false /*is not serial*/) { } | 
|---|
| 347 | //! executing filter | 
|---|
| 348 | void* operator()(void*p) /*override*/ { | 
|---|
| 349 | concurrent_queue<Matrix2x2> &Queue = *static_cast<concurrent_queue<Matrix2x2> *>(p); | 
|---|
| 350 | Matrix2x2 m1, m2; | 
|---|
| 351 | // get two elements | 
|---|
| 352 | while( !Queue.try_pop( m1 ) ) this_tbb_thread::yield(); | 
|---|
| 353 | while( !Queue.try_pop( m2 ) ) this_tbb_thread::yield(); | 
|---|
| 354 | m1 = m1 * m2; // process them | 
|---|
| 355 | Queue.push( m1 ); // and push back | 
|---|
| 356 | return this; // just nothing | 
|---|
| 357 | } | 
|---|
| 358 | }; | 
|---|
| 359 | //! Root function | 
|---|
| 360 | value ParallelPipeFib(int n) | 
|---|
| 361 | { | 
|---|
| 362 | InputFilter input( n-1 ); | 
|---|
| 363 | MultiplyFilter process; | 
|---|
| 364 | // Create the pipeline | 
|---|
| 365 | pipeline pipeline; | 
|---|
| 366 | // add filters | 
|---|
| 367 | pipeline.add_filter( input ); // first | 
|---|
| 368 | pipeline.add_filter( process ); // second | 
|---|
| 369 |  | 
|---|
| 370 | input.Queue.push( Matrix1110 ); | 
|---|
| 371 | // Run the pipeline | 
|---|
| 372 | pipeline.run( n ); // must be larger then max threads number | 
|---|
| 373 | pipeline.clear(); // do not forget clear the pipeline | 
|---|
| 374 |  | 
|---|
| 375 | assert( input.Queue.unsafe_size()==1 ); | 
|---|
| 376 | Matrix2x2 M; | 
|---|
| 377 | bool result = input.Queue.try_pop( M ); // get last element | 
|---|
| 378 | assert( result ); | 
|---|
| 379 | return M.v[0][0]; // get value | 
|---|
| 380 | } | 
|---|
| 381 |  | 
|---|
| 382 | // *** parallel_reduce *** // | 
|---|
| 383 |  | 
|---|
| 384 | //! Functor for parallel_reduce | 
|---|
| 385 | struct parallel_reduceFibBody { | 
|---|
| 386 | Matrix2x2 sum; | 
|---|
| 387 | int split_flag;  //< flag to make one less operation for split bodies | 
|---|
| 388 | //! Constructor fills sum with initial matrix | 
|---|
| 389 | parallel_reduceFibBody() : sum( Matrix1110 ), split_flag(0) { } | 
|---|
| 390 | //! Splitting constructor | 
|---|
| 391 | parallel_reduceFibBody( parallel_reduceFibBody& other, split ) : sum( Matrix1110 ), split_flag(1/*note that it is split*/) {} | 
|---|
| 392 | //! Join point | 
|---|
| 393 | void join( parallel_reduceFibBody &s ) { | 
|---|
| 394 | sum = sum * s.sum; | 
|---|
| 395 | } | 
|---|
| 396 | //! Process multiplications | 
|---|
| 397 | void operator()( const blocked_range<int> &r ) { | 
|---|
| 398 | for( int k = r.begin() + split_flag; k < r.end(); ++k ) | 
|---|
| 399 | sum = sum * Matrix1110; | 
|---|
| 400 | split_flag = 0; // reset flag, because this method can be reused for next range | 
|---|
| 401 | } | 
|---|
| 402 | }; | 
|---|
| 403 | //! Root function | 
|---|
| 404 | value parallel_reduceFib(int n) | 
|---|
| 405 | { | 
|---|
| 406 | parallel_reduceFibBody b; | 
|---|
| 407 | parallel_reduce(blocked_range<int>(2, n, 3), b); // do parallel reduce on range [2, n) for b | 
|---|
| 408 | return b.sum.v[0][0]; | 
|---|
| 409 | } | 
|---|
| 410 |  | 
|---|
| 411 | // *** parallel_scan *** // | 
|---|
| 412 |  | 
|---|
| 413 | //! Functor for parallel_scan | 
|---|
| 414 | struct parallel_scanFibBody { | 
|---|
| 415 | /** Though parallel_scan is usually used to accumulate running sums, | 
|---|
| 416 | it can be used to accumulate running products too. */ | 
|---|
| 417 | Matrix2x2 product; | 
|---|
| 418 | /** Pointer to output sequence */ | 
|---|
| 419 | value* const output; | 
|---|
| 420 | //! Constructor sets product to identity matrix | 
|---|
| 421 | parallel_scanFibBody(value* output_) : product( MatrixIdentity ), output(output_) {} | 
|---|
| 422 | //! Splitting constructor | 
|---|
| 423 | parallel_scanFibBody( parallel_scanFibBody &b, split) : product( MatrixIdentity ), output(b.output) {} | 
|---|
| 424 | //! Method for merging summary information from a, which was split off from *this, into *this. | 
|---|
| 425 | void reverse_join( parallel_scanFibBody &a ) { | 
|---|
| 426 | // When using non-commutative reduction operation, reverse_join | 
|---|
| 427 | // should put argument "a" on the left side of the operation. | 
|---|
| 428 | // The reversal from the argument order is why the method is | 
|---|
| 429 | // called "reverse_join" instead of "join". | 
|---|
| 430 | product = a.product * product; | 
|---|
| 431 | } | 
|---|
| 432 | //! Method for assigning final result back to original body. | 
|---|
| 433 | void assign( parallel_scanFibBody &b ) { | 
|---|
| 434 | product = b.product; | 
|---|
| 435 | } | 
|---|
| 436 | //! Compute matrix running product. | 
|---|
| 437 | /** Tag indicates whether is is the final scan over the range, or | 
|---|
| 438 | just a helper "prescan" that is computing a partial reduction. */ | 
|---|
| 439 | template<typename Tag> | 
|---|
| 440 | void operator()( const blocked_range<int> &r, Tag tag) { | 
|---|
| 441 | for( int k = r.begin(); k < r.end(); ++k ) { | 
|---|
| 442 | // Code performs an "exclusive" scan, which outputs a value *before* updating the product. | 
|---|
| 443 | // For an "inclusive" scan, output the value after the update. | 
|---|
| 444 | if( tag.is_final_scan() ) | 
|---|
| 445 | output[k] = product.v[0][1]; | 
|---|
| 446 | product = product * Matrix1110; | 
|---|
| 447 | } | 
|---|
| 448 | } | 
|---|
| 449 | }; | 
|---|
| 450 | //! Root function | 
|---|
| 451 | value parallel_scanFib(int n) | 
|---|
| 452 | { | 
|---|
| 453 | value* output = new value[n]; | 
|---|
| 454 | parallel_scanFibBody b(output); | 
|---|
| 455 | parallel_scan(blocked_range<int>(0, n, 3), b); | 
|---|
| 456 | // output[0..n-1] now contains the Fibonacci sequence (modulo integer wrap-around). | 
|---|
| 457 | // Check the last two values for correctness. | 
|---|
| 458 | assert( n<2 || output[n-2]+output[n-1]==b.product.v[0][1] ); | 
|---|
| 459 | delete[] output; | 
|---|
| 460 | return b.product.v[0][1]; | 
|---|
| 461 | } | 
|---|
| 462 |  | 
|---|
| 463 | // *** Raw tasks *** // | 
|---|
| 464 |  | 
|---|
| 465 | //! task class which computes Fibonacci numbers by Lucas formula | 
|---|
| 466 | struct FibTask: public task { | 
|---|
| 467 | const int n; | 
|---|
| 468 | value& sum; | 
|---|
| 469 | value x, y; | 
|---|
| 470 | bool second_phase; //< flag of continuation | 
|---|
| 471 | // task arguments | 
|---|
| 472 | FibTask( int n_, value& sum_ ) : | 
|---|
| 473 | n(n_), sum(sum_), second_phase(false) | 
|---|
| 474 | {} | 
|---|
| 475 | //! Execute task | 
|---|
| 476 | task* execute() /*override*/ { | 
|---|
| 477 | // Using Lucas' formula here | 
|---|
| 478 | if( second_phase ) { // children finished | 
|---|
| 479 | sum = n&1 ? x*x + y*y : x*x - y*y; | 
|---|
| 480 | return NULL; | 
|---|
| 481 | } | 
|---|
| 482 | if( n <= 2 ) { | 
|---|
| 483 | sum = n!=0; | 
|---|
| 484 | return NULL; | 
|---|
| 485 | } else { | 
|---|
| 486 | recycle_as_continuation();  // repeat this task when children finish | 
|---|
| 487 | second_phase = true; // mark second phase | 
|---|
| 488 | FibTask& a = *new( allocate_child() ) FibTask( n/2 + 1, x ); | 
|---|
| 489 | FibTask& b = *new( allocate_child() ) FibTask( n/2 - 1 + (n&1), y ); | 
|---|
| 490 | set_ref_count(2); | 
|---|
| 491 | spawn( a ); | 
|---|
| 492 | return &b; | 
|---|
| 493 | } | 
|---|
| 494 | } | 
|---|
| 495 | }; | 
|---|
| 496 | //! Root function | 
|---|
| 497 | value ParallelTaskFib(int n) { | 
|---|
| 498 | value sum; | 
|---|
| 499 | FibTask& a = *new(task::allocate_root()) FibTask(n, sum); | 
|---|
| 500 | task::spawn_root_and_wait(a); | 
|---|
| 501 | return sum; | 
|---|
| 502 | } | 
|---|
| 503 |  | 
|---|
| 504 | /////////////////////////// Main //////////////////////////////////////////////////// | 
|---|
| 505 |  | 
|---|
| 506 | //! A closed range of int. | 
|---|
| 507 | struct IntRange { | 
|---|
| 508 | int low; | 
|---|
| 509 | int high; | 
|---|
| 510 | void set_from_string( const char* s ); | 
|---|
| 511 | IntRange( int low_, int high_ ) : low(low_), high(high_) {} | 
|---|
| 512 | }; | 
|---|
| 513 |  | 
|---|
| 514 | void IntRange::set_from_string( const char* s ) { | 
|---|
| 515 | char* end; | 
|---|
| 516 | high = low = strtol(s,&end,0); | 
|---|
| 517 | switch( *end ) { | 
|---|
| 518 | case ':': | 
|---|
| 519 | high = strtol(end+1,0,0); | 
|---|
| 520 | break; | 
|---|
| 521 | case '\0': | 
|---|
| 522 | break; | 
|---|
| 523 | default: | 
|---|
| 524 | printf( "unexpected character = %c\n",*end); | 
|---|
| 525 | } | 
|---|
| 526 | } | 
|---|
| 527 |  | 
|---|
| 528 | //! Tick count for start | 
|---|
| 529 | static tick_count t0; | 
|---|
| 530 |  | 
|---|
| 531 | //! Verbose output flag | 
|---|
| 532 | static bool Verbose = false; | 
|---|
| 533 |  | 
|---|
| 534 | typedef value (*MeasureFunc)(int); | 
|---|
| 535 | //! Measure ticks count in loop [2..n] | 
|---|
| 536 | value Measure(const char *name, MeasureFunc func, int n) | 
|---|
| 537 | { | 
|---|
| 538 | value result; | 
|---|
| 539 | if(Verbose) printf( "%s",name); | 
|---|
| 540 | t0 = tick_count::now(); | 
|---|
| 541 | for(int number = 2; number <= n; number++) | 
|---|
| 542 | result = func(number); | 
|---|
| 543 | if(Verbose) printf( "\t- in %f msec\n", (tick_count::now() - t0).seconds()*1000); | 
|---|
| 544 | return result; | 
|---|
| 545 | } | 
|---|
| 546 |  | 
|---|
| 547 | //! program entry | 
|---|
| 548 | int main(int argc, char* argv[]) | 
|---|
| 549 | { | 
|---|
| 550 | if(argc>1) Verbose = true; | 
|---|
| 551 | int NumbersCount = argc>1 ? strtol(argv[1],0,0) : 500; | 
|---|
| 552 | IntRange NThread(1,4);// Number of threads to use. | 
|---|
| 553 | if(argc>2) NThread.set_from_string(argv[2]); | 
|---|
| 554 | unsigned long ntrial = argc>3? (unsigned long)strtoul(argv[3],0,0) : 1; | 
|---|
| 555 | value result, sum; | 
|---|
| 556 |  | 
|---|
| 557 | if(Verbose) printf( "Fibonacci numbers example. Generating %d numbers..\n",  NumbersCount); | 
|---|
| 558 |  | 
|---|
| 559 | result = Measure( "Serial loop", SerialFib, NumbersCount); | 
|---|
| 560 | sum = Measure( "Serial matrix", SerialMatrixFib, NumbersCount); assert(result == sum); | 
|---|
| 561 | sum = Measure( "Serial vector", SerialVectorFib, NumbersCount); assert(result == sum); | 
|---|
| 562 | sum = Measure( "Serial queue", SerialQueueFib, NumbersCount); assert(result == sum); | 
|---|
| 563 | // now in parallel | 
|---|
| 564 | for( unsigned long i=0; i<ntrial; ++i ) { | 
|---|
| 565 | for(int threads = NThread.low; threads <= NThread.high; threads *= 2) | 
|---|
| 566 | { | 
|---|
| 567 | task_scheduler_init scheduler_init(threads); | 
|---|
| 568 | if(Verbose) printf( "\nThreads number is %d\n", threads); | 
|---|
| 569 |  | 
|---|
| 570 | sum = Measure( "Shared serial (mutex)\t", SharedSerialFib<mutex>, NumbersCount); assert(result == sum); | 
|---|
| 571 | sum = Measure( "Shared serial (spin_mutex)", SharedSerialFib<spin_mutex>, NumbersCount); assert(result == sum); | 
|---|
| 572 | sum = Measure( "Shared serial (queuing_mutex)", SharedSerialFib<queuing_mutex>, NumbersCount); assert(result == sum); | 
|---|
| 573 | sum = Measure( "Shared serial (Conc.HashTable)", ConcurrentHashSerialFib, NumbersCount); assert(result == sum); | 
|---|
| 574 | sum = Measure( "Parallel while+for/queue", ParallelQueueFib, NumbersCount); assert(result == sum); | 
|---|
| 575 | sum = Measure( "Parallel pipe/queue\t", ParallelPipeFib, NumbersCount); assert(result == sum); | 
|---|
| 576 | sum = Measure( "Parallel reduce\t\t", parallel_reduceFib, NumbersCount); assert(result == sum); | 
|---|
| 577 | sum = Measure( "Parallel scan\t\t", parallel_scanFib, NumbersCount); assert(result == sum); | 
|---|
| 578 | sum = Measure( "Parallel tasks\t\t", ParallelTaskFib, NumbersCount); assert(result == sum); | 
|---|
| 579 | } | 
|---|
| 580 |  | 
|---|
| 581 | #ifdef __GNUC__ | 
|---|
| 582 | if(Verbose) printf( "Fibonacci number #%d modulo 2^64 is %lld\n\n", NumbersCount, result); | 
|---|
| 583 | #else | 
|---|
| 584 | if(Verbose) printf( "Fibonacci number #%d modulo 2^64 is %I64d\n\n", NumbersCount, result); | 
|---|
| 585 | #endif | 
|---|
| 586 | } | 
|---|
| 587 | if(!Verbose) printf( "TEST PASSED\n"); | 
|---|
| 588 | return 0; | 
|---|
| 589 | } | 
|---|
| 590 |  | 
|---|
| 591 | // Utils | 
|---|
| 592 |  | 
|---|
| 593 | void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]) | 
|---|
| 594 | { | 
|---|
| 595 | for( int i = 0; i <= 1; i++) | 
|---|
| 596 | for( int j = 0; j <= 1; j++) | 
|---|
| 597 | c[i][j] = a[i][0]*b[0][j] + a[i][1]*b[1][j]; | 
|---|
| 598 | } | 
|---|
| 599 |  | 
|---|
| 600 | Matrix2x2 Matrix2x2::operator *(const Matrix2x2 &to) const | 
|---|
| 601 | { | 
|---|
| 602 | Matrix2x2 result; | 
|---|
| 603 | Matrix2x2Multiply(v, to.v, result.v); | 
|---|
| 604 | return result; | 
|---|
| 605 | } | 
|---|
| 606 |  | 
|---|