test_fp.cpp source code [ThreadBB/src/test/test_fp.cpp]

1	/*
2	Copyright (c) 2005-2019 Intel Corporation
3
4	Licensed under the Apache License, Version 2.0 (the "License");
5	you may not use this file except in compliance with the License.
6	You may obtain a copy of the License at
7
8	http://www.apache.org/licenses/LICENSE-2.0
9
10	Unless required by applicable law or agreed to in writing, software
11	distributed under the License is distributed on an "AS IS" BASIS,
12	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	See the License for the specific language governing permissions and
14	limitations under the License.
15	*/
16
17	/* This test checks the automatic propagation of master thread FPU settings*
18	into the worker threads. /
19
20	#include "harness_fp.h"
21	#include "harness.h"
22	#define private public
23	#include "tbb/task.h"
24	#undef private
25	#include "tbb/parallel_for.h"
26	#include "tbb/task_scheduler_init.h"
27
28	const int N = `500000`;
29
30	#if ( __TBB_x86_32 \|\| __TBB_x86_64 ) && __TBB_CPU_CTL_ENV_PRESENT && !defined(__TBB_WIN32_USE_CL_BUILTINS)
31	#include "harness_barrier.h"
32
33	class CheckNoSseStatusPropagationBody : public NoAssign {
34	Harness::SpinBarrier &barrier;
35	public:
36	CheckNoSseStatusPropagationBody( Harness::SpinBarrier &_barrier ) : barrier(_barrier) {}
37	void operator()( const tbb::blocked_range<int>& ) const {
38	barrier.wait();
39	tbb::internal::cpu_ctl_env ctl;
40	ctl.get_env();
41	ASSERT( (ctl.mxcsr & SSE_STATUS_MASK) == `0`, "FPU control status bits have been propagated." );
42	}
43	};
44
45	void CheckNoSseStatusPropagation() {
46	tbb::internal::cpu_ctl_env ctl;
47	ctl.get_env();
48	ctl.mxcsr \|= SSE_STATUS_MASK;
49	ctl.set_env();
50	const int num_threads = tbb::task_scheduler_init::default_num_threads();
51	Harness::SpinBarrier barrier(num_threads);
52	tbb::task_scheduler_init init(num_threads);
53	tbb::parallel_for( tbb::blocked_range<int>(`0`, num_threads), CheckNoSseStatusPropagationBody (barrier) );
54	ctl.mxcsr &= ~SSE_STATUS_MASK;
55	ctl.set_env();
56	}
57	#else /* Other archs */
58	void CheckNoSseStatusPropagation() {}
59	#endif /* Other archs */
60
61	class RoundingModeCheckBody {
62	int m_mode;
63	int m_sseMode;
64	public:
65	void operator() ( int /iter/ ) const {
66	ASSERT( GetRoundingMode() == m_mode, "FPU control state has not been propagated." );
67	ASSERT( GetSseMode() == m_sseMode, "SSE control state has not been propagated." );
68	}
69
70	RoundingModeCheckBody ( int mode, int sseMode ) : m_mode(mode), m_sseMode(sseMode) {}
71	};
72
73	void TestArenaFpuEnvPropagation( int id ) {
74	// TBB scheduler instance in a master thread captures the FPU control state
75	// at the moment of its initialization and passes it to the workers toiling
76	// on its behalf.
77	for( int k = `0`; k < NumSseModes; ++k ) {
78	int sse_mode = SseModes[(k + id) % NumSseModes];
79	SetSseMode( sse_mode );
80	for( int i = `0`; i < NumRoundingModes; ++i ) {
81	int mode = RoundingModes[(i + id) % NumRoundingModes];
82	SetRoundingMode( mode );
83	// New mode must be set before TBB scheduler is initialized
84	tbb::task_scheduler_init init;
85	tbb::parallel_for( `0`, N, `1`, RoundingModeCheckBody (mode, sse_mode) );
86	ASSERT( GetRoundingMode() == mode, NULL );
87	}
88	}
89	}
90
91	#if __TBB_FP_CONTEXT
92	void TestArenaFpuEnvPersistence( int id ) {
93	// Since the following loop uses auto-initialization, the scheduler instance
94	// implicitly created by the first parallel_for invocation will persist
95	// until the thread ends, and thus workers will use the mode set by the
96	// first iteration.
97	int captured_mode = RoundingModes[id % NumRoundingModes];
98	int captured_sse_mode = SseModes[id % NumSseModes];
99	for( int k = `0`; k < NumSseModes; ++k ) {
100	int sse_mode = SseModes[(k + id) % NumSseModes];
101	SetSseMode( sse_mode );
102	for( int i = `0`; i < NumRoundingModes; ++i ) {
103	int mode = RoundingModes[(i + id) % NumRoundingModes];
104	SetRoundingMode( mode );
105	tbb::parallel_for( `0`, N, `1`, RoundingModeCheckBody (captured_mode, captured_sse_mode) );
106	ASSERT( GetRoundingMode() == mode, NULL );
107	}
108	}
109	}
110	#endif
111
112	class LauncherBody {
113	public:
114	void operator() ( int id ) const {
115	TestArenaFpuEnvPropagation( id );
116	#if __TBB_FP_CONTEXT
117	TestArenaFpuEnvPersistence( id );
118	#endif
119	}
120	};
121
122	void TestFpuEnvPropagation () {
123	const int p = tbb::task_scheduler_init::default_num_threads();
124	// The test should be run in an oversubscription mode. So create 4p threads but*
125	// limit the oversubscription for big machines (p>32) with 432+(p-32) threads.*
126	const int num_threads = p + (NumRoundingModes-`1`)*min(p,`32`);
127	NativeParallelFor ( num_threads, LauncherBody () );
128	}
129
130	void TestCpuCtlEnvApi () {
131	for( int k = `0`; k < NumSseModes; ++k ) {
132	SetSseMode( SseModes[k] );
133	for( int i = `0`; i < NumRoundingModes; ++i ) {
134	SetRoundingMode( RoundingModes[i] );
135	ASSERT( GetRoundingMode() == RoundingModes[i], NULL );
136	ASSERT( GetSseMode() == SseModes[k], NULL );
137	}
138	}
139	}
140
141	#if __TBB_FP_CONTEXT
142	const int numModes = NumRoundingModes*NumSseModes;
143	const int numArenas = `4`;
144	tbb::task_group_context *contexts[numModes];
145	// +1 for a default context
146	int roundingModes[numModes+numArenas];
147	int sseModes[numModes+numArenas];
148
149	class TestContextFpuEnvBody {
150	int arenaNum;
151	int mode;
152	int depth;
153	public:
154	TestContextFpuEnvBody( int _arenaNum, int _mode, int _depth = `0` ) : arenaNum(_arenaNum), mode(_mode), depth(_depth) {}
155	void operator()( const tbb::blocked_range<int> &r ) const;
156	};
157
158	inline void SetMode( int mode ) {
159	SetRoundingMode( roundingModes[mode] );
160	SetSseMode( sseModes[mode] );
161	}
162
163	inline void AssertMode( int mode ) {
164	ASSERT( GetRoundingMode() == roundingModes[mode], "FPU control state has not been set correctly." );
165	ASSERT( GetSseMode() == sseModes[mode], "SSE control state has not been set correctly." );
166	}
167
168	inline int SetNextMode( int mode, int step ) {
169	const int nextMode = (mode+step)%numModes;
170	SetMode( nextMode );
171	return nextMode;
172	}
173
174	class TestContextFpuEnvTask : public tbb::task {
175	int arenaNum;
176	int mode;
177	int depth;
178	#if __TBB_CPU_CTL_ENV_PRESENT
179	static const int MAX_DEPTH = `3`;
180	#else
181	static const int MAX_DEPTH = `4`;
182	#endif
183	public:
184	TestContextFpuEnvTask( int _arenaNum, int _mode, int _depth = `0` ) : arenaNum(_arenaNum), mode(_mode), depth(_depth) {}
185	tbb::task* execute() __TBB_override {
186	AssertMode( mode );
187	if ( depth < MAX_DEPTH ) {
188	// Test default context.
189	const int newMode1 = SetNextMode( mode, depth+`1` );
190	tbb::parallel_for( tbb::blocked_range<int>(`0`, numModes+`1`), TestContextFpuEnvBody ( arenaNum, mode, depth+`1` ) );
191	AssertMode( newMode1 );
192
193	// Test user default context.
194	const int newMode2 = SetNextMode( newMode1, depth+`1` );
195	tbb::task_group_context ctx1;
196	const int newMode3 = SetNextMode( newMode2, depth+`1` );
197	tbb::parallel_for( tbb::blocked_range<int>(`0`, numModes+`1`), TestContextFpuEnvBody ( arenaNum, mode, depth+`1` ), ctx1 );
198	AssertMode( newMode3 );
199
200	// Test user context which captured FPU control settings.
201	const int newMode4 = SetNextMode( newMode3, depth+`1` );
202	// Capture newMode4
203	ctx1.capture_fp_settings();
204	const int newMode5 = SetNextMode( newMode4, depth+`1` );
205	tbb::parallel_for( tbb::blocked_range<int>(`0`, numModes+`1`), TestContextFpuEnvBody ( arenaNum, newMode4, depth+`1` ), ctx1 );
206	AssertMode( newMode5 );
207
208	// And again test user context which captured FPU control settings to check multiple captures.
209	const int newMode6 = SetNextMode( newMode5, depth+`1` );
210	// Capture newMode6
211	ctx1.capture_fp_settings();
212	const int newMode7 = SetNextMode( newMode6, depth+`1` );
213	tbb::parallel_for( tbb::blocked_range<int>(`0`, numModes+`1`), TestContextFpuEnvBody ( arenaNum, newMode6, depth+`1` ), ctx1 );
214	AssertMode( newMode7 );
215
216	// Test an isolated context. The isolated context should use default FPU control settings.
217	const int newMode8 = SetNextMode( newMode7, depth+`1` );
218	tbb::task_group_context ctx2( tbb::task_group_context::isolated );
219	const int newMode9 = SetNextMode( newMode8, depth+`1` );
220	tbb::parallel_for( tbb::blocked_range<int>(`0`, numModes+`1`), TestContextFpuEnvBody ( arenaNum, numModes+arenaNum, depth+`1` ), ctx2 );
221	AssertMode( newMode9 );
222
223	// The binding should not owerrite captured FPU control settings.
224	const int newMode10 = SetNextMode( newMode9, depth+`1` );
225	tbb::task_group_context ctx3;
226	ctx3.capture_fp_settings();
227	const int newMode11 = SetNextMode( newMode10, depth+`1` );
228	tbb::parallel_for( tbb::blocked_range<int>(`0`, numModes+`1`), TestContextFpuEnvBody ( arenaNum, newMode10, depth+`1` ), ctx3 );
229	AssertMode( newMode11 );
230
231	// Restore initial mode since user code in tbb::task::execute should not change FPU settings.
232	SetMode( mode );
233	}
234
235	return NULL;
236	}
237	};
238
239	void TestContextFpuEnvBody::operator()( const tbb::blocked_range<int> &r ) const {
240	AssertMode( mode );
241
242	const int newMode = SetNextMode( mode, depth+`2` );
243
244	int end = r.end();
245	if ( end-`1` == numModes ) {
246	// For a default context our mode should be inherited.
247	tbb::task::spawn_root_and_wait(
248	*new( tbb::task::allocate_root() ) TestContextFpuEnvTask ( arenaNum, mode, depth ) );
249	AssertMode( newMode );
250	end--;
251	}
252	for ( int i=r.begin(); i<end; ++i ) {
253	tbb::task::spawn_root_and_wait(
254	*new( tbb::task::allocate_root(*contexts[i]) ) TestContextFpuEnvTask ( arenaNum, i, depth ) );
255	AssertMode( newMode );
256	}
257
258	// Restore initial mode since user code in tbb::task::execute should not change FPU settings.
259	SetMode( mode );
260	}
261
262	class TestContextFpuEnvNativeLoopBody {
263	public:
264	void operator() ( int arenaNum ) const {
265	SetMode(numModes+arenaNum);
266	tbb::task_scheduler_init init;
267	tbb::task::spawn_root_and_wait( *new (tbb::task::allocate_root() ) TestContextFpuEnvTask ( arenaNum, numModes+arenaNum ) );
268	}
269	};
270
271	#if TBB_USE_EXCEPTIONS
272	const int NUM_ITERS = `1000`;
273	class TestContextFpuEnvEhBody {
274	int mode;
275	int eh_iter;
276	int depth;
277	public:
278	TestContextFpuEnvEhBody( int _mode, int _eh_iter, int _depth = `0` ) : mode(_mode), eh_iter(_eh_iter), depth(_depth) {}
279	void operator()( const tbb::blocked_range<int> &r ) const {
280	AssertMode( mode );
281	if ( depth < `1` ) {
282	const int newMode1 = SetNextMode( mode, `1` );
283	tbb::task_group_context ctx;
284	ctx.capture_fp_settings();
285	const int newMode2 = SetNextMode( newMode1, `1` );
286	try {
287	tbb::parallel_for( tbb::blocked_range<int>(`0`, NUM_ITERS), TestContextFpuEnvEhBody (newMode1,rand()%NUM_ITERS,`1`), tbb::simple_partitioner (), ctx );
288	} catch (...) {
289	AssertMode( newMode2 );
290	if ( r.begin() == eh_iter ) throw;
291	}
292	AssertMode( newMode2 );
293	SetMode( mode );
294	} else if ( r.begin() == eh_iter ) throw `0`;
295	}
296	};
297
298	class TestContextFpuEnvEhNativeLoopBody {
299	public:
300	void operator() ( int arenaNum ) const {
301	SetMode( arenaNum%numModes );
302	try {
303	tbb::parallel_for( tbb::blocked_range<int>(`0`, NUM_ITERS), TestContextFpuEnvEhBody ((arenaNum+`1`)%numModes,rand()%NUM_ITERS),
304	tbb::simple_partitioner (), *contexts[(arenaNum+`1`)%numModes] );
305	ASSERT( false, "parallel_for has not thrown an exception." );
306	} catch (...) {
307	AssertMode( arenaNum%numModes );
308	}
309	}
310	};
311	#endif /* TBB_USE_EXCEPTIONS */
312
313	void TestContextFpuEnv() {
314	// Prepare contexts' fp modes.
315	for ( int i = `0`, modeNum = `0`; i < NumRoundingModes; ++i ) {
316	const int roundingMode = RoundingModes[i];
317	SetRoundingMode( roundingMode );
318	for( int j = `0`; j < NumSseModes; ++j, ++modeNum ) {
319	const int sseMode = SseModes[j];
320	SetSseMode( sseMode );
321
322	contexts[modeNum] = new tbb::task_group_context ( tbb::task_group_context::isolated,
323	tbb::task_group_context::default_traits \| tbb::task_group_context::fp_settings );
324	roundingModes[modeNum] = roundingMode;
325	sseModes[modeNum] = sseMode;
326	}
327	}
328	// Prepare arenas' fp modes.
329	for ( int arenaNum = `0`; arenaNum < numArenas; ++arenaNum ) {
330	roundingModes[numModes+arenaNum] = roundingModes[arenaNum%numModes];
331	sseModes[numModes+arenaNum] = sseModes[arenaNum%numModes];
332	}
333	NativeParallelFor( numArenas, TestContextFpuEnvNativeLoopBody () );
334	#if TBB_USE_EXCEPTIONS
335	NativeParallelFor( numArenas, TestContextFpuEnvEhNativeLoopBody () );
336	#endif
337	for ( int modeNum = `0`; modeNum < numModes; ++modeNum )
338	delete contexts[modeNum];
339	}
340
341	tbb::task_group_context glbIsolatedCtx( tbb::task_group_context::isolated );
342	int glbIsolatedCtxMode = -`1`;
343
344	struct TestGlobalIsolatedContextTask : public tbb::task {
345	tbb::task* execute() __TBB_override {
346	AssertFPMode( glbIsolatedCtxMode );
347	return NULL;
348	}
349	};
350
351	#include "tbb/mutex.h"
352
353	struct TestGlobalIsolatedContextNativeLoopBody {
354	void operator()( int threadId ) const {
355	FPModeContext fpGuard( threadId );
356	static tbb::mutex rootAllocMutex;
357	rootAllocMutex.lock();
358	if ( glbIsolatedCtxMode == -`1` )
359	glbIsolatedCtxMode = threadId;
360	tbb::task &root = *new (tbb::task::allocate_root( glbIsolatedCtx )) TestGlobalIsolatedContextTask ();
361	rootAllocMutex.unlock();
362	tbb::task::spawn_root_and_wait( root );
363	}
364	};
365
366	void TestGlobalIsolatedContext() {
367	ASSERT( numArenas > `1`, NULL );
368	NativeParallelFor( numArenas, TestGlobalIsolatedContextNativeLoopBody () );
369	}
370	#endif /* __TBB_FP_CONTEXT */
371
372	int TestMain () {
373	TestCpuCtlEnvApi();
374	TestFpuEnvPropagation();
375	CheckNoSseStatusPropagation();
376	#if __TBB_FP_CONTEXT
377	TestContextFpuEnv();
378	TestGlobalIsolatedContext();
379	#endif
380	return Harness::Done;
381	}
382

Browse the source code of ThreadBB/src/test/test_fp.cpp