1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | /* Example program that computes Fibonacci numbers in different ways. |
18 | Arguments are: [ Number [Threads [Repeats]]] |
19 | The defaults are Number=500 Threads=1:4 Repeats=1. |
20 | |
21 | The point of this program is to check that the library is working properly. |
22 | Most of the computations are deliberately silly and not expected to |
23 | show any speedup on multiprocessors. |
24 | */ |
25 | |
26 | // enable assertions |
27 | #ifdef NDEBUG |
28 | #undef NDEBUG |
29 | #endif |
30 | |
31 | #include <cstdio> |
32 | #include <cstdlib> |
33 | #include <cassert> |
34 | #include <utility> |
35 | #include "tbb/task.h" |
36 | #include "tbb/task_scheduler_init.h" |
37 | #include "tbb/tick_count.h" |
38 | #include "tbb/blocked_range.h" |
39 | #include "tbb/concurrent_vector.h" |
40 | #include "tbb/concurrent_queue.h" |
41 | #include "tbb/concurrent_hash_map.h" |
42 | #include "tbb/parallel_while.h" |
43 | #include "tbb/parallel_for.h" |
44 | #include "tbb/parallel_reduce.h" |
45 | #include "tbb/parallel_scan.h" |
46 | #include "tbb/pipeline.h" |
47 | #include "tbb/atomic.h" |
48 | #include "tbb/mutex.h" |
49 | #include "tbb/spin_mutex.h" |
50 | #include "tbb/queuing_mutex.h" |
51 | #include "tbb/tbb_thread.h" |
52 | |
53 | using namespace std; |
54 | using namespace tbb; |
55 | |
56 | //! type used for Fibonacci number computations |
57 | typedef long long value; |
58 | |
59 | //! Matrix 2x2 class |
60 | struct Matrix2x2 |
61 | { |
62 | //! Array of values |
63 | value v[2][2]; |
64 | Matrix2x2() {} |
65 | Matrix2x2(value v00, value v01, value v10, value v11) { |
66 | v[0][0] = v00; v[0][1] = v01; v[1][0] = v10; v[1][1] = v11; |
67 | } |
68 | Matrix2x2 operator * (const Matrix2x2 &to) const; //< Multiply two Matrices |
69 | }; |
70 | //! Identity matrix |
71 | static const Matrix2x2 MatrixIdentity(1, 0, 0, 1); |
72 | //! Default matrix to multiply |
73 | static const Matrix2x2 Matrix1110(1, 1, 1, 0); |
74 | //! Raw arrays matrices multiply |
75 | void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]); |
76 | |
77 | /////////////////////// Serial methods //////////////////////// |
78 | |
79 | //! Plain serial sum |
80 | value SerialFib(int n) |
81 | { |
82 | if(n < 2) |
83 | return n; |
84 | value a = 0, b = 1, sum; int i; |
85 | for( i = 2; i <= n; i++ ) |
86 | { // n is really index of Fibonacci number |
87 | sum = a + b; a = b; b = sum; |
88 | } |
89 | return sum; |
90 | } |
91 | //! Serial n-1 matrices multiplication |
92 | value SerialMatrixFib(int n) |
93 | { |
94 | value c[2][2], a[2][2] = {{1, 1}, {1, 0}}, b[2][2] = {{1, 1}, {1, 0}}; int i; |
95 | for(i = 2; i < n; i++) |
96 | { // Using condition to prevent copying of values |
97 | if(i & 1) Matrix2x2Multiply(a, c, b); |
98 | else Matrix2x2Multiply(a, b, c); |
99 | } |
100 | return (i & 1) ? c[0][0] : b[0][0]; // get result from upper left cell |
101 | } |
102 | //! Recursive summing. Just for complete list of serial algorithms, not used |
103 | value SerialRecursiveFib(int n) |
104 | { |
105 | value result; |
106 | if(n < 2) |
107 | result = n; |
108 | else |
109 | result = SerialRecursiveFib(n - 1) + SerialRecursiveFib(n - 2); |
110 | return result; |
111 | } |
112 | //! Introducing of queue method in serial |
113 | value SerialQueueFib(int n) |
114 | { |
115 | concurrent_queue<Matrix2x2> Q; |
116 | for(int i = 1; i < n; i++) |
117 | Q.push(Matrix1110); |
118 | Matrix2x2 A, B; |
119 | while(true) { |
120 | while( !Q.try_pop(A) ) this_tbb_thread::yield(); |
121 | if(Q.empty()) break; |
122 | while( !Q.try_pop(B) ) this_tbb_thread::yield(); |
123 | Q.push(A * B); |
124 | } |
125 | return A.v[0][0]; |
126 | } |
127 | //! Trying to use concurrent_vector |
128 | value SerialVectorFib(int n) |
129 | { |
130 | concurrent_vector<value> A; |
131 | A.grow_by(2); |
132 | A[0] = 0; A[1] = 1; |
133 | for( int i = 2; i <= n; i++) |
134 | { |
135 | A.grow_to_at_least(i+1); |
136 | A[i] = A[i-1] + A[i-2]; |
137 | } |
138 | return A[n]; |
139 | } |
140 | |
141 | ///////////////////// Parallel methods //////////////////////// |
142 | |
143 | // *** Serial shared by mutexes *** // |
144 | |
145 | //! Shared glabals |
146 | value SharedA = 0, SharedB = 1; int SharedI = 1, SharedN; |
147 | |
148 | //! Template task class which computes Fibonacci numbers with shared globals |
149 | template<typename M> |
150 | class SharedSerialFibBody { |
151 | M &mutex; |
152 | public: |
153 | SharedSerialFibBody( M &m ) : mutex( m ) {} |
154 | //! main loop |
155 | void operator()( const blocked_range<int>& range ) const { |
156 | for(;;) { |
157 | typename M::scoped_lock lock( mutex ); |
158 | if(SharedI >= SharedN) break; |
159 | value sum = SharedA + SharedB; |
160 | SharedA = SharedB; SharedB = sum; |
161 | ++SharedI; |
162 | } |
163 | } |
164 | }; |
165 | |
166 | //! Root function |
167 | template<class M> |
168 | value SharedSerialFib(int n) |
169 | { |
170 | SharedA = 0; SharedB = 1; SharedI = 1; SharedN = n; M mutex; |
171 | parallel_for( blocked_range<int>(0,4,1), SharedSerialFibBody<M>( mutex ) ); |
172 | return SharedB; |
173 | } |
174 | |
175 | // *** Serial shared by concurrent hash map *** // |
176 | |
177 | //! Hash comparer |
178 | struct IntHashCompare { |
179 | bool equal( const int j, const int k ) const { return j == k; } |
180 | unsigned long hash( const int k ) const { return (unsigned long)k; } |
181 | }; |
182 | //! NumbersTable type based on concurrent_hash_map |
183 | typedef concurrent_hash_map<int, value, IntHashCompare> NumbersTable; |
184 | //! task for serial method using shared concurrent_hash_map |
185 | class ConcurrentHashSerialFibTask: public task { |
186 | NumbersTable &Fib; |
187 | int my_n; |
188 | public: |
189 | //! constructor |
190 | ConcurrentHashSerialFibTask( NumbersTable &cht, int n ) : Fib(cht), my_n(n) { } |
191 | //! executing task |
192 | task* execute() /*override*/ { |
193 | for( int i = 2; i <= my_n; ++i ) { // there is no difference in to recycle or to make loop |
194 | NumbersTable::const_accessor f1, f2; // same as iterators |
195 | if( !Fib.find(f1, i-1) || !Fib.find(f2, i-2) ) { |
196 | // Something is seriously wrong, because i-1 and i-2 must have been inserted |
197 | // earlier by this thread or another thread. |
198 | assert(0); |
199 | } |
200 | value sum = f1->second + f2->second; |
201 | NumbersTable::const_accessor fsum; |
202 | Fib.insert(fsum, make_pair(i, sum)); // inserting |
203 | assert( fsum->second == sum ); // check value |
204 | } |
205 | return 0; |
206 | } |
207 | }; |
208 | |
209 | //! Root function |
210 | value ConcurrentHashSerialFib(int n) |
211 | { |
212 | NumbersTable Fib; |
213 | bool okay; |
214 | okay = Fib.insert( make_pair(0, 0) ); assert(okay); // assign initial values |
215 | okay = Fib.insert( make_pair(1, 1) ); assert(okay); |
216 | |
217 | task_list list; |
218 | // allocate tasks |
219 | list.push_back(*new(task::allocate_root()) ConcurrentHashSerialFibTask(Fib, n)); |
220 | list.push_back(*new(task::allocate_root()) ConcurrentHashSerialFibTask(Fib, n)); |
221 | task::spawn_root_and_wait(list); |
222 | NumbersTable::const_accessor fresult; |
223 | okay = Fib.find( fresult, n ); |
224 | assert(okay); |
225 | return fresult->second; |
226 | } |
227 | |
228 | // *** Queue with parallel_for and parallel_while *** // |
229 | |
230 | //! Stream of matrices |
231 | struct QueueStream { |
232 | volatile bool producer_is_done; |
233 | concurrent_queue<Matrix2x2> Queue; |
234 | //! Get pair of matricies if present |
235 | bool pop_if_present( pair<Matrix2x2, Matrix2x2> &mm ) { |
236 | // get first matrix if present |
237 | if(!Queue.try_pop(mm.first)) return false; |
238 | // get second matrix if present |
239 | if(!Queue.try_pop(mm.second)) { |
240 | // if not, then push back first matrix |
241 | Queue.push(mm.first); return false; |
242 | } |
243 | return true; |
244 | } |
245 | }; |
246 | |
247 | //! Functor for parallel_for which fills the queue |
248 | struct parallel_forFibBody { |
249 | QueueStream &my_stream; |
250 | //! fill functor arguments |
251 | parallel_forFibBody(QueueStream &s) : my_stream(s) { } |
252 | //! iterate thorough range |
253 | void operator()( const blocked_range<int> &range ) const { |
254 | int i_end = range.end(); |
255 | for( int i = range.begin(); i != i_end; ++i ) { |
256 | my_stream.Queue.push( Matrix1110 ); // push initial matrix |
257 | } |
258 | } |
259 | }; |
260 | //! Functor for parallel_while which process the queue |
261 | class parallel_whileFibBody |
262 | { |
263 | QueueStream &my_stream; |
264 | parallel_while<parallel_whileFibBody> &my_while; |
265 | public: |
266 | typedef pair<Matrix2x2, Matrix2x2> argument_type; |
267 | //! fill functor arguments |
268 | parallel_whileFibBody(parallel_while<parallel_whileFibBody> &w, QueueStream &s) |
269 | : my_while(w), my_stream(s) { } |
270 | //! process pair of matrices |
271 | void operator() (argument_type mm) const { |
272 | mm.first = mm.first * mm.second; |
273 | // note: it can run concurrently with QueueStream::pop_if_present() |
274 | if(my_stream.Queue.try_pop(mm.second)) |
275 | my_while.add( mm ); // now, two matrices available. Add next iteration. |
276 | else my_stream.Queue.push( mm.first ); // or push back calculated value if queue is empty |
277 | } |
278 | }; |
279 | |
280 | //! Parallel queue's filling task |
281 | struct QueueInsertTask: public task { |
282 | QueueStream &my_stream; |
283 | int my_n; |
284 | //! fill task arguments |
285 | QueueInsertTask( int n, QueueStream &s ) : my_n(n), my_stream(s) { } |
286 | //! executing task |
287 | task* execute() /*override*/ { |
288 | // Execute of parallel pushing of n-1 initial matrices |
289 | parallel_for( blocked_range<int>( 1, my_n, 10 ), parallel_forFibBody(my_stream) ); |
290 | my_stream.producer_is_done = true; |
291 | return 0; |
292 | } |
293 | }; |
294 | //! Parallel queue's processing task |
295 | struct QueueProcessTask: public task { |
296 | QueueStream &my_stream; |
297 | //! fill task argument |
298 | QueueProcessTask( QueueStream &s ) : my_stream(s) { } |
299 | //! executing task |
300 | task* execute() /*override*/ { |
301 | while( !my_stream.producer_is_done || my_stream.Queue.unsafe_size()>1 ) { |
302 | parallel_while<parallel_whileFibBody> w; // run while loop in parallel |
303 | w.run( my_stream, parallel_whileFibBody( w, my_stream ) ); |
304 | } |
305 | return 0; |
306 | } |
307 | }; |
308 | //! Root function |
309 | value ParallelQueueFib(int n) |
310 | { |
311 | QueueStream stream; |
312 | stream.producer_is_done = false; |
313 | task_list list; |
314 | list.push_back(*new(task::allocate_root()) QueueInsertTask( n, stream )); |
315 | list.push_back(*new(task::allocate_root()) QueueProcessTask( stream )); |
316 | // If there is only a single thread, the first task in the list runs to completion |
317 | // before the second task in the list starts. |
318 | task::spawn_root_and_wait(list); |
319 | assert(stream.Queue.unsafe_size() == 1); // it is easy to lose some work |
320 | Matrix2x2 M; |
321 | bool result = stream.Queue.try_pop( M ); // get last matrix |
322 | assert( result ); |
323 | return M.v[0][0]; // and result number |
324 | } |
325 | |
326 | // *** Queue with pipeline *** // |
327 | |
328 | //! filter to fills queue |
329 | class InputFilter: public filter { |
330 | tbb::atomic<int> N; //< index of Fibonacci number minus 1 |
331 | public: |
332 | concurrent_queue<Matrix2x2> Queue; |
333 | //! fill filter arguments |
334 | InputFilter( int n ) : filter(false /*is not serial*/) { N = n; } |
335 | //! executing filter |
336 | void* operator()(void*) /*override*/ { |
337 | int n = --N; |
338 | if(n <= 0) return 0; |
339 | Queue.push( Matrix1110 ); |
340 | return &Queue; |
341 | } |
342 | }; |
343 | //! filter to process queue |
344 | class MultiplyFilter: public filter { |
345 | public: |
346 | MultiplyFilter( ) : filter(false /*is not serial*/) { } |
347 | //! executing filter |
348 | void* operator()(void*p) /*override*/ { |
349 | concurrent_queue<Matrix2x2> &Queue = *static_cast<concurrent_queue<Matrix2x2> *>(p); |
350 | Matrix2x2 m1, m2; |
351 | // get two elements |
352 | while( !Queue.try_pop( m1 ) ) this_tbb_thread::yield(); |
353 | while( !Queue.try_pop( m2 ) ) this_tbb_thread::yield(); |
354 | m1 = m1 * m2; // process them |
355 | Queue.push( m1 ); // and push back |
356 | return this; // just nothing |
357 | } |
358 | }; |
359 | //! Root function |
360 | value ParallelPipeFib(int n) |
361 | { |
362 | InputFilter input( n-1 ); |
363 | MultiplyFilter process; |
364 | // Create the pipeline |
365 | pipeline pipeline; |
366 | // add filters |
367 | pipeline.add_filter( input ); // first |
368 | pipeline.add_filter( process ); // second |
369 | |
370 | input.Queue.push( Matrix1110 ); |
371 | // Run the pipeline |
372 | pipeline.run( n ); // must be larger then max threads number |
373 | pipeline.clear(); // do not forget clear the pipeline |
374 | |
375 | assert( input.Queue.unsafe_size()==1 ); |
376 | Matrix2x2 M; |
377 | bool result = input.Queue.try_pop( M ); // get last element |
378 | assert( result ); |
379 | return M.v[0][0]; // get value |
380 | } |
381 | |
382 | // *** parallel_reduce *** // |
383 | |
384 | //! Functor for parallel_reduce |
385 | struct parallel_reduceFibBody { |
386 | Matrix2x2 sum; |
387 | int split_flag; //< flag to make one less operation for split bodies |
388 | //! Constructor fills sum with initial matrix |
389 | parallel_reduceFibBody() : sum( Matrix1110 ), split_flag(0) { } |
390 | //! Splitting constructor |
391 | parallel_reduceFibBody( parallel_reduceFibBody& other, split ) : sum( Matrix1110 ), split_flag(1/*note that it is split*/) {} |
392 | //! Join point |
393 | void join( parallel_reduceFibBody &s ) { |
394 | sum = sum * s.sum; |
395 | } |
396 | //! Process multiplications |
397 | void operator()( const blocked_range<int> &r ) { |
398 | for( int k = r.begin() + split_flag; k < r.end(); ++k ) |
399 | sum = sum * Matrix1110; |
400 | split_flag = 0; // reset flag, because this method can be reused for next range |
401 | } |
402 | }; |
403 | //! Root function |
404 | value parallel_reduceFib(int n) |
405 | { |
406 | parallel_reduceFibBody b; |
407 | parallel_reduce(blocked_range<int>(2, n, 3), b); // do parallel reduce on range [2, n) for b |
408 | return b.sum.v[0][0]; |
409 | } |
410 | |
411 | // *** parallel_scan *** // |
412 | |
413 | //! Functor for parallel_scan |
414 | struct parallel_scanFibBody { |
415 | /** Though parallel_scan is usually used to accumulate running sums, |
416 | it can be used to accumulate running products too. */ |
417 | Matrix2x2 product; |
418 | /** Pointer to output sequence */ |
419 | value* const output; |
420 | //! Constructor sets product to identity matrix |
421 | parallel_scanFibBody(value* output_) : product( MatrixIdentity ), output(output_) {} |
422 | //! Splitting constructor |
423 | parallel_scanFibBody( parallel_scanFibBody &b, split) : product( MatrixIdentity ), output(b.output) {} |
424 | //! Method for merging summary information from a, which was split off from *this, into *this. |
425 | void reverse_join( parallel_scanFibBody &a ) { |
426 | // When using non-commutative reduction operation, reverse_join |
427 | // should put argument "a" on the left side of the operation. |
428 | // The reversal from the argument order is why the method is |
429 | // called "reverse_join" instead of "join". |
430 | product = a.product * product; |
431 | } |
432 | //! Method for assigning final result back to original body. |
433 | void assign( parallel_scanFibBody &b ) { |
434 | product = b.product; |
435 | } |
436 | //! Compute matrix running product. |
437 | /** Tag indicates whether is is the final scan over the range, or |
438 | just a helper "prescan" that is computing a partial reduction. */ |
439 | template<typename Tag> |
440 | void operator()( const blocked_range<int> &r, Tag tag) { |
441 | for( int k = r.begin(); k < r.end(); ++k ) { |
442 | // Code performs an "exclusive" scan, which outputs a value *before* updating the product. |
443 | // For an "inclusive" scan, output the value after the update. |
444 | if( tag.is_final_scan() ) |
445 | output[k] = product.v[0][1]; |
446 | product = product * Matrix1110; |
447 | } |
448 | } |
449 | }; |
450 | //! Root function |
451 | value parallel_scanFib(int n) |
452 | { |
453 | value* output = new value[n]; |
454 | parallel_scanFibBody b(output); |
455 | parallel_scan(blocked_range<int>(0, n, 3), b); |
456 | // output[0..n-1] now contains the Fibonacci sequence (modulo integer wrap-around). |
457 | // Check the last two values for correctness. |
458 | assert( n<2 || output[n-2]+output[n-1]==b.product.v[0][1] ); |
459 | delete[] output; |
460 | return b.product.v[0][1]; |
461 | } |
462 | |
463 | // *** Raw tasks *** // |
464 | |
465 | //! task class which computes Fibonacci numbers by Lucas formula |
466 | struct FibTask: public task { |
467 | const int n; |
468 | value& sum; |
469 | value x, y; |
470 | bool second_phase; //< flag of continuation |
471 | // task arguments |
472 | FibTask( int n_, value& sum_ ) : |
473 | n(n_), sum(sum_), second_phase(false) |
474 | {} |
475 | //! Execute task |
476 | task* execute() /*override*/ { |
477 | // Using Lucas' formula here |
478 | if( second_phase ) { // children finished |
479 | sum = n&1 ? x*x + y*y : x*x - y*y; |
480 | return NULL; |
481 | } |
482 | if( n <= 2 ) { |
483 | sum = n!=0; |
484 | return NULL; |
485 | } else { |
486 | recycle_as_continuation(); // repeat this task when children finish |
487 | second_phase = true; // mark second phase |
488 | FibTask& a = *new( allocate_child() ) FibTask( n/2 + 1, x ); |
489 | FibTask& b = *new( allocate_child() ) FibTask( n/2 - 1 + (n&1), y ); |
490 | set_ref_count(2); |
491 | spawn( a ); |
492 | return &b; |
493 | } |
494 | } |
495 | }; |
496 | //! Root function |
497 | value ParallelTaskFib(int n) { |
498 | value sum; |
499 | FibTask& a = *new(task::allocate_root()) FibTask(n, sum); |
500 | task::spawn_root_and_wait(a); |
501 | return sum; |
502 | } |
503 | |
504 | /////////////////////////// Main //////////////////////////////////////////////////// |
505 | |
506 | //! A closed range of int. |
507 | struct IntRange { |
508 | int low; |
509 | int high; |
510 | void set_from_string( const char* s ); |
511 | IntRange( int low_, int high_ ) : low(low_), high(high_) {} |
512 | }; |
513 | |
514 | void IntRange::set_from_string( const char* s ) { |
515 | char* end; |
516 | high = low = strtol(s,&end,0); |
517 | switch( *end ) { |
518 | case ':': |
519 | high = strtol(end+1,0,0); |
520 | break; |
521 | case '\0': |
522 | break; |
523 | default: |
524 | printf("unexpected character = %c\n" ,*end); |
525 | } |
526 | } |
527 | |
528 | //! Tick count for start |
529 | static tick_count t0; |
530 | |
531 | //! Verbose output flag |
532 | static bool Verbose = false; |
533 | |
534 | typedef value (*MeasureFunc)(int); |
535 | //! Measure ticks count in loop [2..n] |
536 | value Measure(const char *name, MeasureFunc func, int n) |
537 | { |
538 | value result; |
539 | if(Verbose) printf("%s" ,name); |
540 | t0 = tick_count::now(); |
541 | for(int number = 2; number <= n; number++) |
542 | result = func(number); |
543 | if(Verbose) printf("\t- in %f msec\n" , (tick_count::now() - t0).seconds()*1000); |
544 | return result; |
545 | } |
546 | |
547 | //! program entry |
548 | int main(int argc, char* argv[]) |
549 | { |
550 | if(argc>1) Verbose = true; |
551 | int NumbersCount = argc>1 ? strtol(argv[1],0,0) : 500; |
552 | IntRange NThread(1,4);// Number of threads to use. |
553 | if(argc>2) NThread.set_from_string(argv[2]); |
554 | unsigned long ntrial = argc>3? (unsigned long)strtoul(argv[3],0,0) : 1; |
555 | value result, sum; |
556 | |
557 | if(Verbose) printf("Fibonacci numbers example. Generating %d numbers..\n" , NumbersCount); |
558 | |
559 | result = Measure("Serial loop" , SerialFib, NumbersCount); |
560 | sum = Measure("Serial matrix" , SerialMatrixFib, NumbersCount); assert(result == sum); |
561 | sum = Measure("Serial vector" , SerialVectorFib, NumbersCount); assert(result == sum); |
562 | sum = Measure("Serial queue" , SerialQueueFib, NumbersCount); assert(result == sum); |
563 | // now in parallel |
564 | for( unsigned long i=0; i<ntrial; ++i ) { |
565 | for(int threads = NThread.low; threads <= NThread.high; threads *= 2) |
566 | { |
567 | task_scheduler_init scheduler_init(threads); |
568 | if(Verbose) printf("\nThreads number is %d\n" , threads); |
569 | |
570 | sum = Measure("Shared serial (mutex)\t" , SharedSerialFib<mutex>, NumbersCount); assert(result == sum); |
571 | sum = Measure("Shared serial (spin_mutex)" , SharedSerialFib<spin_mutex>, NumbersCount); assert(result == sum); |
572 | sum = Measure("Shared serial (queuing_mutex)" , SharedSerialFib<queuing_mutex>, NumbersCount); assert(result == sum); |
573 | sum = Measure("Shared serial (Conc.HashTable)" , ConcurrentHashSerialFib, NumbersCount); assert(result == sum); |
574 | sum = Measure("Parallel while+for/queue" , ParallelQueueFib, NumbersCount); assert(result == sum); |
575 | sum = Measure("Parallel pipe/queue\t" , ParallelPipeFib, NumbersCount); assert(result == sum); |
576 | sum = Measure("Parallel reduce\t\t" , parallel_reduceFib, NumbersCount); assert(result == sum); |
577 | sum = Measure("Parallel scan\t\t" , parallel_scanFib, NumbersCount); assert(result == sum); |
578 | sum = Measure("Parallel tasks\t\t" , ParallelTaskFib, NumbersCount); assert(result == sum); |
579 | } |
580 | |
581 | #ifdef __GNUC__ |
582 | if(Verbose) printf("Fibonacci number #%d modulo 2^64 is %lld\n\n" , NumbersCount, result); |
583 | #else |
584 | if(Verbose) printf("Fibonacci number #%d modulo 2^64 is %I64d\n\n" , NumbersCount, result); |
585 | #endif |
586 | } |
587 | if(!Verbose) printf("TEST PASSED\n" ); |
588 | return 0; |
589 | } |
590 | |
591 | // Utils |
592 | |
593 | void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]) |
594 | { |
595 | for( int i = 0; i <= 1; i++) |
596 | for( int j = 0; j <= 1; j++) |
597 | c[i][j] = a[i][0]*b[0][j] + a[i][1]*b[1][j]; |
598 | } |
599 | |
600 | Matrix2x2 Matrix2x2::operator *(const Matrix2x2 &to) const |
601 | { |
602 | Matrix2x2 result; |
603 | Matrix2x2Multiply(v, to.v, result.v); |
604 | return result; |
605 | } |
606 | |