Fibonacci.cpp source code [ThreadBB/examples/test_all/fibonacci/Fibonacci.cpp]

1	/*
2	Copyright (c) 2005-2019 Intel Corporation
3
4	Licensed under the Apache License, Version 2.0 (the "License");
5	you may not use this file except in compliance with the License.
6	You may obtain a copy of the License at
7
8	http://www.apache.org/licenses/LICENSE-2.0
9
10	Unless required by applicable law or agreed to in writing, software
11	distributed under the License is distributed on an "AS IS" BASIS,
12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	See the License for the specific language governing permissions and
14	limitations under the License.
15	*/
16
17	/ Example program that computes Fibonacci numbers in different ways.*
18	Arguments are: [ Number [Threads [Repeats]]]
19	The defaults are Number=500 Threads=1:4 Repeats=1.
20
21	The point of this program is to check that the library is working properly.
22	Most of the computations are deliberately silly and not expected to
23	show any speedup on multiprocessors.
24	*/
25
26	// enable assertions
27	#ifdef NDEBUG
28	#undef NDEBUG
29	#endif
30
31	#include <cstdio>
32	#include <cstdlib>
33	#include <cassert>
34	#include <utility>
35	#include "tbb/task.h"
36	#include "tbb/task_scheduler_init.h"
37	#include "tbb/tick_count.h"
38	#include "tbb/blocked_range.h"
39	#include "tbb/concurrent_vector.h"
40	#include "tbb/concurrent_queue.h"
41	#include "tbb/concurrent_hash_map.h"
42	#include "tbb/parallel_while.h"
43	#include "tbb/parallel_for.h"
44	#include "tbb/parallel_reduce.h"
45	#include "tbb/parallel_scan.h"
46	#include "tbb/pipeline.h"
47	#include "tbb/atomic.h"
48	#include "tbb/mutex.h"
49	#include "tbb/spin_mutex.h"
50	#include "tbb/queuing_mutex.h"
51	#include "tbb/tbb_thread.h"
52
53	using namespace std;
54	using namespace tbb;
55
56	//! type used for Fibonacci number computations
57	typedef long long value;
58
59	//! Matrix 2x2 class
60	struct Matrix2x2
61	{
62	//! Array of values
63	value v[`2`][`2`];
64	Matrix2x2() {}
65	Matrix2x2(value v00, value v01, value v10, value v11) {
66	v[`0`][`0`] = v00; v[`0`][`1`] = v01; v[`1`][`0`] = v10; v[`1`][`1`] = v11;
67	}
68	Matrix2x2 operator * (const Matrix2x2 &to) const; //< Multiply two Matrices
69	};
70	//! Identity matrix
71	static const Matrix2x2 MatrixIdentity(`1`, `0`, `0`, `1`);
72	//! Default matrix to multiply
73	static const Matrix2x2 Matrix1110(`1`, `1`, `1`, `0`);
74	//! Raw arrays matrices multiply
75	void Matrix2x2Multiply(const value a[`2`][`2`], const value b[`2`][`2`], value c[`2`][`2`]);
76
77	/////////////////////// Serial methods ////////////////////////
78
79	//! Plain serial sum
80	value SerialFib(int n)
81	{
82	if(n < `2`)
83	return n;
84	value a = `0`, b = `1`, sum; int i;
85	for( i = `2`; i <= n; i++ )
86	{ // n is really index of Fibonacci number
87	sum = a + b; a = b; b = sum;
88	}
89	return sum;
90	}
91	//! Serial n-1 matrices multiplication
92	value SerialMatrixFib(int n)
93	{
94	value c[`2`][`2`], a[`2`][`2`] = {{`1`, `1`}, {`1`, `0`}}, b[`2`][`2`] = {{`1`, `1`}, {`1`, `0`}}; int i;
95	for(i = `2`; i < n; i++)
96	{ // Using condition to prevent copying of values
97	if(i & `1`) Matrix2x2Multiply(a, c, b);
98	else Matrix2x2Multiply(a, b, c);
99	}
100	return (i & `1`) ? c[`0`][`0`] : b[`0`][`0`]; // get result from upper left cell
101	}
102	//! Recursive summing. Just for complete list of serial algorithms, not used
103	value SerialRecursiveFib(int n)
104	{
105	value result;
106	if(n < `2`)
107	result = n;
108	else
109	result = SerialRecursiveFib(n - `1`) + SerialRecursiveFib(n - `2`);
110	return result;
111	}
112	//! Introducing of queue method in serial
113	value SerialQueueFib(int n)
114	{
115	concurrent_queue<Matrix2x2> Q;
116	for(int i = `1`; i < n; i++)
117	Q.push(Matrix1110);
118	Matrix2x2 A, B;
119	while(true) {
120	while( !Q.try_pop(A) ) this_tbb_thread::yield();
121	if(Q.empty()) break;
122	while( !Q.try_pop(B) ) this_tbb_thread::yield();
123	Q.push(A * B);
124	}
125	return A.v[`0`][`0`];
126	}
127	//! Trying to use concurrent_vector
128	value SerialVectorFib(int n)
129	{
130	concurrent_vector<value> A;
131	A.grow_by(`2`);
132	A [`0`] = `0`; A [`1`] = `1`;
133	for( int i = `2`; i <= n; i++)
134	{
135	A.grow_to_at_least(i+`1`);
136	A [i] = A [i-`1`] + A [i-`2`];
137	}
138	return A [n];
139	}
140
141	///////////////////// Parallel methods ////////////////////////
142
143	// Serial shared by mutexes * //*
144
145	//! Shared glabals
146	value SharedA = `0`, SharedB = `1`; int SharedI = `1`, SharedN;
147
148	//! Template task class which computes Fibonacci numbers with shared globals
149	template<typename M>
150	class SharedSerialFibBody {
151	M &mutex;
152	public:
153	SharedSerialFibBody( M &m ) : mutex( m ) {}
154	//! main loop
155	void operator()( const blocked_range<int>& range ) const {
156	for(;;) {
157	typename M::scoped_lock lock( mutex );
158	if(SharedI >= SharedN) break;
159	value sum = SharedA + SharedB;
160	SharedA = SharedB; SharedB = sum;
161	++SharedI;
162	}
163	}
164	};
165
166	//! Root function
167	template<class M>
168	value SharedSerialFib(int n)
169	{
170	SharedA = `0`; SharedB = `1`; SharedI = `1`; SharedN = n; M mutex;
171	parallel_for( blocked_range<int>(`0`,`4`,`1`), SharedSerialFibBody<M>( mutex ) );
172	return SharedB;
173	}
174
175	// Serial shared by concurrent hash map * //*
176
177	//! Hash comparer
178	struct IntHashCompare {
179	bool equal( const int j, const int k ) const { return j == k; }
180	unsigned long hash( const int k ) const { return (unsigned long)k; }
181	};
182	//! NumbersTable type based on concurrent_hash_map
183	typedef concurrent_hash_map<int, value, IntHashCompare> NumbersTable;
184	//! task for serial method using shared concurrent_hash_map
185	class ConcurrentHashSerialFibTask: public task {
186	NumbersTable &Fib;
187	int my_n;
188	public:
189	//! constructor
190	ConcurrentHashSerialFibTask( NumbersTable &cht, int n ) : Fib(cht), my_n(n) { }
191	//! executing task
192	task* execute() /override/ {
193	for( int i = `2`; i <= my_n; ++i ) { // there is no difference in to recycle or to make loop
194	NumbersTable::const_accessor f1, f2; // same as iterators
195	if( !Fib.find(f1, i-`1`) \|\| !Fib.find(f2, i-`2`) ) {
196	// Something is seriously wrong, because i-1 and i-2 must have been inserted
197	// earlier by this thread or another thread.
198	assert(`0`);
199	}
200	value sum = f1 ->second + f2 ->second;
201	NumbersTable::const_accessor fsum;
202	Fib.insert(fsum, make_pair(i, sum)); // inserting
203	assert( fsum ->second == sum ); // check value
204	}
205	return `0`;
206	}
207	};
208
209	//! Root function
210	value ConcurrentHashSerialFib(int n)
211	{
212	NumbersTable Fib;
213	bool okay;
214	okay = Fib.insert( make_pair(`0`, `0`) ); assert(okay); // assign initial values
215	okay = Fib.insert( make_pair(`1`, `1`) ); assert(okay);
216
217	task_list list;
218	// allocate tasks
219	list.push_back(*new(task::allocate_root()) ConcurrentHashSerialFibTask (Fib, n));
220	list.push_back(*new(task::allocate_root()) ConcurrentHashSerialFibTask (Fib, n));
221	task::spawn_root_and_wait(list);
222	NumbersTable::const_accessor fresult;
223	okay = Fib.find( fresult, n );
224	assert(okay);
225	return fresult ->second;
226	}
227
228	// Queue with parallel_for and parallel_while * //*
229
230	//! Stream of matrices
231	struct QueueStream {
232	volatile bool producer_is_done;
233	concurrent_queue<Matrix2x2> Queue;
234	//! Get pair of matricies if present
235	bool pop_if_present( pair<Matrix2x2, Matrix2x2> &mm ) {
236	// get first matrix if present
237	if(!Queue.try_pop(mm.first)) return false;
238	// get second matrix if present
239	if(!Queue.try_pop(mm.second)) {
240	// if not, then push back first matrix
241	Queue.push(mm.first); return false;
242	}
243	return true;
244	}
245	};
246
247	//! Functor for parallel_for which fills the queue
248	struct parallel_forFibBody {
249	QueueStream &my_stream;
250	//! fill functor arguments
251	parallel_forFibBody(QueueStream &s) : my_stream(s) { }
252	//! iterate thorough range
253	void operator()( const blocked_range<int> &range ) const {
254	int i_end = range.end();
255	for( int i = range.begin(); i != i_end; ++i ) {
256	my_stream.Queue.push( Matrix1110 ); // push initial matrix
257	}
258	}
259	};
260	//! Functor for parallel_while which process the queue
261	class parallel_whileFibBody
262	{
263	QueueStream &my_stream;
264	parallel_while<parallel_whileFibBody> &my_while;
265	public:
266	typedef pair<Matrix2x2, Matrix2x2> argument_type;
267	//! fill functor arguments
268	parallel_whileFibBody(parallel_while<parallel_whileFibBody> &w, QueueStream &s)
269	: my_while(w), my_stream(s) { }
270	//! process pair of matrices
271	void operator() (argument_type mm) const {
272	mm.first = mm.first * mm.second;
273	// note: it can run concurrently with QueueStream::pop_if_present()
274	if(my_stream.Queue.try_pop(mm.second))
275	my_while.add( mm ); // now, two matrices available. Add next iteration.
276	else my_stream.Queue.push( mm.first ); // or push back calculated value if queue is empty
277	}
278	};
279
280	//! Parallel queue's filling task
281	struct QueueInsertTask: public task {
282	QueueStream &my_stream;
283	int my_n;
284	//! fill task arguments
285	QueueInsertTask( int n, QueueStream &s ) : my_n(n), my_stream(s) { }
286	//! executing task
287	task* execute() /override/ {
288	// Execute of parallel pushing of n-1 initial matrices
289	parallel_for( blocked_range<int>( `1`, my_n, `10` ), parallel_forFibBody (my_stream) );
290	my_stream.producer_is_done = true;
291	return `0`;
292	}
293	};
294	//! Parallel queue's processing task
295	struct QueueProcessTask: public task {
296	QueueStream &my_stream;
297	//! fill task argument
298	QueueProcessTask( QueueStream &s ) : my_stream(s) { }
299	//! executing task
300	task* execute() /override/ {
301	while( !my_stream.producer_is_done \|\| my_stream.Queue.unsafe_size()>`1` ) {
302	parallel_while<parallel_whileFibBody> w; // run while loop in parallel
303	w.run( my_stream, parallel_whileFibBody ( w, my_stream ) );
304	}
305	return `0`;
306	}
307	};
308	//! Root function
309	value ParallelQueueFib(int n)
310	{
311	QueueStream stream;
312	stream.producer_is_done = false;
313	task_list list;
314	list.push_back(*new(task::allocate_root()) QueueInsertTask ( n, stream ));
315	list.push_back(*new(task::allocate_root()) QueueProcessTask ( stream ));
316	// If there is only a single thread, the first task in the list runs to completion
317	// before the second task in the list starts.
318	task::spawn_root_and_wait(list);
319	assert(stream.Queue.unsafe_size() == `1`); // it is easy to lose some work
320	Matrix2x2 M;
321	bool result = stream.Queue.try_pop( M ); // get last matrix
322	assert( result );
323	return M.v[`0`][`0`]; // and result number
324	}
325
326	// Queue with pipeline * //*
327
328	//! filter to fills queue
329	class InputFilter: public filter {
330	tbb::atomic<int> N; //< index of Fibonacci number minus 1
331	public:
332	concurrent_queue<Matrix2x2> Queue;
333	//! fill filter arguments
334	InputFilter( int n ) : filter (false /is not serial/) { N = n; }
335	//! executing filter
336	void* operator()(void) /override/* {
337	int n = --N;
338	if(n <= `0`) return `0`;
339	Queue.push( Matrix1110 );
340	return &Queue;
341	}
342	};
343	//! filter to process queue
344	class MultiplyFilter: public filter {
345	public:
346	MultiplyFilter( ) : filter (false /is not serial/) { }
347	//! executing filter
348	void* operator()(voidp) /override/* {
349	concurrent_queue<Matrix2x2> &Queue = *static_cast<concurrent_queue<Matrix2x2> *>(p);
350	Matrix2x2 m1, m2;
351	// get two elements
352	while( !Queue.try_pop( m1 ) ) this_tbb_thread::yield();
353	while( !Queue.try_pop( m2 ) ) this_tbb_thread::yield();
354	m1 = m1 * m2; // process them
355	Queue.push( m1 ); // and push back
356	return this; // just nothing
357	}
358	};
359	//! Root function
360	value ParallelPipeFib(int n)
361	{
362	InputFilter input( n-`1` );
363	MultiplyFilter process;
364	// Create the pipeline
365	pipeline pipeline;
366	// add filters
367	pipeline.add_filter( input ); // first
368	pipeline.add_filter( process ); // second
369
370	input.Queue.push( Matrix1110 );
371	// Run the pipeline
372	pipeline.run( n ); // must be larger then max threads number
373	pipeline.clear(); // do not forget clear the pipeline
374
375	assert( input.Queue.unsafe_size()==`1` );
376	Matrix2x2 M;
377	bool result = input.Queue.try_pop( M ); // get last element
378	assert( result );
379	return M.v[`0`][`0`]; // get value
380	}
381
382	// parallel_reduce * //*
383
384	//! Functor for parallel_reduce
385	struct parallel_reduceFibBody {
386	Matrix2x2 sum;
387	int split_flag; //< flag to make one less operation for split bodies
388	//! Constructor fills sum with initial matrix
389	parallel_reduceFibBody() : sum ( Matrix1110 ), split_flag(`0`) { }
390	//! Splitting constructor
391	parallel_reduceFibBody( parallel_reduceFibBody& other, split ) : sum ( Matrix1110 ), split_flag(`1`/note that it is split/) {}
392	//! Join point
393	void join( parallel_reduceFibBody &s ) {
394	sum = sum * s.sum;
395	}
396	//! Process multiplications
397	void operator()( const blocked_range<int> &r ) {
398	for( int k = r.begin() + split_flag; k < r.end(); ++k )
399	sum = sum * Matrix1110;
400	split_flag = `0`; // reset flag, because this method can be reused for next range
401	}
402	};
403	//! Root function
404	value parallel_reduceFib(int n)
405	{
406	parallel_reduceFibBody b;
407	parallel_reduce(blocked_range<int>(`2`, n, `3`), b); // do parallel reduce on range [2, n) for b
408	return b.sum.v[`0`][`0`];
409	}
410
411	// parallel_scan * //*
412
413	//! Functor for parallel_scan
414	struct parallel_scanFibBody {
415	/* Though parallel_scan is usually used to accumulate running sums,*
416	it can be used to accumulate running products too. /*
417	Matrix2x2 product;
418	/* Pointer to output sequence /
419	value* const output;
420	//! Constructor sets product to identity matrix
421	parallel_scanFibBody(value* output_) : product ( MatrixIdentity ), output(output_) {}
422	//! Splitting constructor
423	parallel_scanFibBody( parallel_scanFibBody &b, split) : product ( MatrixIdentity ), output(b.output) {}
424	//! Method for merging summary information from a, which was split off from this, into this.
425	void reverse_join( parallel_scanFibBody &a ) {
426	// When using non-commutative reduction operation, reverse_join
427	// should put argument "a" on the left side of the operation.
428	// The reversal from the argument order is why the method is
429	// called "reverse_join" instead of "join".
430	product = a.product * product;
431	}
432	//! Method for assigning final result back to original body.
433	void assign( parallel_scanFibBody &b ) {
434	product = b.product;
435	}
436	//! Compute matrix running product.
437	/* Tag indicates whether is is the final scan over the range, or*
438	just a helper "prescan" that is computing a partial reduction. /*
439	template<typename Tag>
440	void operator()( const blocked_range<int> &r, Tag tag) {
441	for( int k = r.begin(); k < r.end(); ++k ) {
442	// Code performs an "exclusive" scan, which outputs a value before* updating the product.*
443	// For an "inclusive" scan, output the value after the update.
444	if( tag.is_final_scan() )
445	output[k] = product.v[`0`][`1`];
446	product = product * Matrix1110;
447	}
448	}
449	};
450	//! Root function
451	value parallel_scanFib(int n)
452	{
453	value* output = new value[n];
454	parallel_scanFibBody b(output);
455	parallel_scan(blocked_range<int>(`0`, n, `3`), b);
456	// output[0..n-1] now contains the Fibonacci sequence (modulo integer wrap-around).
457	// Check the last two values for correctness.
458	assert( n<`2` \|\| output[n-`2`]+output[n-`1`]==b.product.v[`0`][`1`] );
459	delete[] output;
460	return b.product.v[`0`][`1`];
461	}
462
463	// Raw tasks * //*
464
465	//! task class which computes Fibonacci numbers by Lucas formula
466	struct FibTask: public task {
467	const int n;
468	value& sum;
469	value x, y;
470	bool second_phase; //< flag of continuation
471	// task arguments
472	FibTask( int n_, value& sum_ ) :
473	n(n_), sum(sum_), second_phase(false)
474	{}
475	//! Execute task
476	task* execute() /override/ {
477	// Using Lucas' formula here
478	if( second_phase ) { // children finished
479	sum = n&`1` ? xx + yy : xx - yy;
480	return NULL;
481	}
482	if( n <= `2` ) {
483	sum = n!=`0`;
484	return NULL;
485	} else {
486	recycle_as_continuation(); // repeat this task when children finish
487	second_phase = true; // mark second phase
488	FibTask& a = *new( allocate_child() ) FibTask ( n/`2` + `1`, x );
489	FibTask& b = *new( allocate_child() ) FibTask ( n/`2` - `1` + (n&`1`), y );
490	set_ref_count(`2`);
491	spawn( a );
492	return &b;
493	}
494	}
495	};
496	//! Root function
497	value ParallelTaskFib(int n) {
498	value sum;
499	FibTask& a = *new(task::allocate_root()) FibTask (n, sum);
500	task::spawn_root_and_wait(a);
501	return sum;
502	}
503
504	/////////////////////////// Main ////////////////////////////////////////////////////
505
506	//! A closed range of int.
507	struct IntRange {
508	int low;
509	int high;
510	void set_from_string( const char* s );
511	IntRange( int low_, int high_ ) : low(low_), high(high_) {}
512	};
513
514	void IntRange::set_from_string( const char* s ) {
515	char* end;
516	high = low = strtol(s,&end,`0`);
517	switch( *end ) {
518	case `':'`:
519	high = strtol(end+`1`,`0`,`0`);
520	break;
521	case `'\0'`:
522	break;
523	default:
524	printf("unexpected character = %c\n",*end);
525	}
526	}
527
528	//! Tick count for start
529	static tick_count t0;
530
531	//! Verbose output flag
532	static bool Verbose = false;
533
534	typedef value (MeasureFunc)(int*);
535	//! Measure ticks count in loop [2..n]
536	value Measure(const char name, MeasureFunc func, int* n)
537	{
538	value result;
539	if(Verbose) printf("%s",name);
540	t0 = tick_count::now();
541	for(int number = `2`; number <= n; number++)
542	result = func(number);
543	if(Verbose) printf("\t- in %f msec\n", (tick_count::now() - t0).seconds()*`1000`);
544	return result;
545	}
546
547	//! program entry
548	int main(int argc, char* argv[])
549	{
550	if(argc>`1`) Verbose = true;
551	int NumbersCount = argc>`1` ? strtol(argv[`1`],`0`,`0`) : `500`;
552	IntRange NThread(`1`,`4`);// Number of threads to use.
553	if(argc>`2`) NThread.set_from_string(argv[`2`]);
554	unsigned long ntrial = argc>`3`? (unsigned long)strtoul(argv[`3`],`0`,`0`) : `1`;
555	value result, sum;
556
557	if(Verbose) printf("Fibonacci numbers example. Generating %d numbers..\n", NumbersCount);
558
559	result = Measure("Serial loop", SerialFib, NumbersCount);
560	sum = Measure("Serial matrix", SerialMatrixFib, NumbersCount); assert(result == sum);
561	sum = Measure("Serial vector", SerialVectorFib, NumbersCount); assert(result == sum);
562	sum = Measure("Serial queue", SerialQueueFib, NumbersCount); assert(result == sum);
563	// now in parallel
564	for( unsigned long i=`0`; i<ntrial; ++i ) {
565	for(int threads = NThread.low; threads <= NThread.high; threads *= `2`)
566	{
567	task_scheduler_init scheduler_init(threads);
568	if(Verbose) printf("\nThreads number is %d\n", threads);
569
570	sum = Measure("Shared serial (mutex)\t", SharedSerialFib<mutex>, NumbersCount); assert(result == sum);
571	sum = Measure("Shared serial (spin_mutex)", SharedSerialFib<spin_mutex>, NumbersCount); assert(result == sum);
572	sum = Measure("Shared serial (queuing_mutex)", SharedSerialFib<queuing_mutex>, NumbersCount); assert(result == sum);
573	sum = Measure("Shared serial (Conc.HashTable)", ConcurrentHashSerialFib, NumbersCount); assert(result == sum);
574	sum = Measure("Parallel while+for/queue", ParallelQueueFib, NumbersCount); assert(result == sum);
575	sum = Measure("Parallel pipe/queue\t", ParallelPipeFib, NumbersCount); assert(result == sum);
576	sum = Measure("Parallel reduce\t\t", parallel_reduceFib, NumbersCount); assert(result == sum);
577	sum = Measure("Parallel scan\t\t", parallel_scanFib, NumbersCount); assert(result == sum);
578	sum = Measure("Parallel tasks\t\t", ParallelTaskFib, NumbersCount); assert(result == sum);
579	}
580
581	#ifdef __GNUC__
582	if(Verbose) printf("Fibonacci number #%d modulo 2^64 is %lld\n\n", NumbersCount, result);
583	#else
584	if(Verbose) printf("Fibonacci number #%d modulo 2^64 is %I64d\n\n", NumbersCount, result);
585	#endif
586	}
587	if(!Verbose) printf("TEST PASSED\n");
588	return `0`;
589	}
590
591	// Utils
592
593	void Matrix2x2Multiply(const value a[`2`][`2`], const value b[`2`][`2`], value c[`2`][`2`])
594	{
595	for( int i = `0`; i <= `1`; i++)
596	for( int j = `0`; j <= `1`; j++)
597	c[i][j] = a[i][`0`]b[`0`][j] + a[i][`1`]b[`1`][j];
598	}
599
600	Matrix2x2 Matrix2x2::operator (const* Matrix2x2 &to) const
601	{
602	Matrix2x2 result;
603	Matrix2x2Multiply(v, to.v, result.v);
604	return result;
605	}
606

Browse the source code of ThreadBB/examples/test_all/fibonacci/Fibonacci.cpp