1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | // Test mixing OpenMP and TBB |
18 | |
19 | /* SCR #471 |
20 | Below is workaround to compile test within environment of Intel Compiler |
21 | but by Microsoft Compiler. So, there is wrong "omp.h" file included and |
22 | manifest section is missed from .exe file - restoring here. |
23 | |
24 | As of Visual Studio 2010, crtassem.h is no longer shipped. |
25 | */ |
26 | #if !defined(__INTEL_COMPILER) && _MSC_VER >= 1400 && _MSC_VER < 1600 |
27 | #include <crtassem.h> |
28 | #if !defined(_OPENMP) |
29 | #define _OPENMP |
30 | #if defined(_DEBUG) |
31 | #pragma comment(lib, "vcompd") |
32 | #else // _DEBUG |
33 | #pragma comment(lib, "vcomp") |
34 | #endif // _DEBUG |
35 | #endif // _OPENMP |
36 | |
37 | #if defined(_DEBUG) |
38 | #if defined(_M_IX86) |
39 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
40 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \ |
41 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
42 | "processorArchitecture='x86' " \ |
43 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
44 | #elif defined(_M_X64) |
45 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
46 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \ |
47 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
48 | "processorArchitecture='amd64' " \ |
49 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
50 | #elif defined(_M_IA64) |
51 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
52 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \ |
53 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
54 | "processorArchitecture='ia64' " \ |
55 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
56 | #endif |
57 | #else // _DEBUG |
58 | #if defined(_M_IX86) |
59 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
60 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \ |
61 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
62 | "processorArchitecture='x86' " \ |
63 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
64 | #elif defined(_M_X64) |
65 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
66 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \ |
67 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
68 | "processorArchitecture='amd64' " \ |
69 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
70 | #elif defined(_M_IA64) |
71 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
72 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \ |
73 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
74 | "processorArchitecture='ia64' " \ |
75 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
76 | #endif |
77 | #endif // _DEBUG |
78 | #define _OPENMP_NOFORCE_MANIFEST |
79 | #endif |
80 | |
81 | #include <omp.h> |
82 | |
83 | |
84 | typedef short T; |
85 | |
86 | void SerialConvolve( T c[], const T a[], int m, const T b[], int n ) { |
87 | for( int i=0; i<m+n-1; ++i ) { |
88 | int start = i<n ? 0 : i-n+1; |
89 | int finish = i<m ? i+1 : m; |
90 | T sum = 0; |
91 | for( int j=start; j<finish; ++j ) |
92 | sum += a[j]*b[i-j]; |
93 | c[i] = sum; |
94 | } |
95 | } |
96 | |
97 | #define OPENMP_ASYNC_SHUTDOWN_BROKEN (__INTEL_COMPILER<=1400 && __linux__) |
98 | #define TBB_PREVIEW_WAITING_FOR_WORKERS 1 |
99 | |
100 | #include "tbb/blocked_range.h" |
101 | #include "tbb/parallel_for.h" |
102 | #include "tbb/parallel_reduce.h" |
103 | #include "tbb/task_scheduler_init.h" |
104 | #include "harness.h" |
105 | |
106 | using namespace tbb; |
107 | |
108 | #if _MSC_VER && !defined(__INTEL_COMPILER) |
109 | // Suppress overzealous warning about short+=short |
110 | #pragma warning( push ) |
111 | #pragma warning( disable: 4244 ) |
112 | #endif |
113 | |
114 | class InnerBody: NoAssign { |
115 | const T* my_a; |
116 | const T* my_b; |
117 | const int i; |
118 | public: |
119 | T sum; |
120 | InnerBody( T /*c*/[], const T a[], const T b[], int ii ) : |
121 | my_a(a), my_b(b), i(ii), sum(0) |
122 | {} |
123 | InnerBody( InnerBody& x, split ) : |
124 | my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0) |
125 | { |
126 | } |
127 | void join( InnerBody& x ) {sum += x.sum;} |
128 | void operator()( const blocked_range<int>& range ) { |
129 | for( int j=range.begin(); j!=range.end(); ++j ) |
130 | sum += my_a[j]*my_b[i-j]; |
131 | } |
132 | }; |
133 | |
134 | #if _MSC_VER && !defined(__INTEL_COMPILER) |
135 | #pragma warning( pop ) |
136 | #endif |
137 | |
138 | //! Test OpenMMP loop around TBB loop |
139 | void OpenMP_TBB_Convolve( T c[], const T a[], int m, const T b[], int n ) { |
140 | REMARK("testing OpenMP loop around TBB loop\n" ); |
141 | #pragma omp parallel |
142 | { |
143 | task_scheduler_init init; |
144 | #pragma omp for |
145 | for( int i=0; i<m+n-1; ++i ) { |
146 | int start = i<n ? 0 : i-n+1; |
147 | int finish = i<m ? i+1 : m; |
148 | InnerBody body(c,a,b,i); |
149 | parallel_reduce( blocked_range<int>(start,finish,10), body ); |
150 | c[i] = body.sum; |
151 | } |
152 | } |
153 | } |
154 | |
155 | class OuterBody: NoAssign { |
156 | const T* my_a; |
157 | const T* my_b; |
158 | T* my_c; |
159 | const int m; |
160 | const int n; |
161 | public: |
162 | OuterBody( T c[], const T a[], int m_, const T b[], int n_ ) : |
163 | my_a(a), my_b(b), my_c(c), m(m_), n(n_) |
164 | {} |
165 | void operator()( const blocked_range<int>& range ) const { |
166 | for( int i=range.begin(); i!=range.end(); ++i ) { |
167 | int start = i<n ? 0 : i-n+1; |
168 | int finish = i<m ? i+1 : m; |
169 | T sum = 0; |
170 | #pragma omp parallel for reduction(+:sum) |
171 | for( int j=start; j<finish; ++j ) |
172 | sum += my_a[j]*my_b[i-j]; |
173 | my_c[i] = sum; |
174 | } |
175 | } |
176 | }; |
177 | |
178 | //! Test TBB loop around OpenMP loop |
179 | void TBB_OpenMP_Convolve( T c[], const T a[], int m, const T b[], int n ) { |
180 | REMARK("testing TBB loop around OpenMP loop\n" ); |
181 | parallel_for( blocked_range<int>(0,m+n-1,10), OuterBody( c, a, m, b, n ) ); |
182 | } |
183 | |
184 | #if __INTEL_COMPILER |
185 | // A regression test on OpenMP affinity settings affecting TBB. |
186 | // Testing only with __INTEL_COMPILER because we do not provide interoperability with other OpenMP implementations. |
187 | |
188 | #define __TBB_NUM_THREADS_AFFECTED_BY_LIBIOMP (KMP_VERSION_BUILD<20150922) |
189 | |
190 | void TestNumThreads() { |
191 | #if __TBB_NUM_THREADS_AFFECTED_BY_LIBIOMP |
192 | REPORT("Known issue: the default number of threads is affected by OpenMP affinity\n" ); |
193 | #else |
194 | REMARK("Compare default number of threads for OpenMP and TBB\n" ); |
195 | Harness::SetEnv("KMP_AFFINITY" ,"compact" ); |
196 | // Make an OpenMP call before initializing TBB |
197 | int omp_nthreads = omp_get_max_threads(); |
198 | #pragma omp parallel |
199 | {} |
200 | int tbb_nthreads = tbb::task_scheduler_init::default_num_threads(); |
201 | REMARK("# of threads (OpenMP): %d\n" , omp_nthreads); |
202 | REMARK("# of threads (TBB): %d\n" , tbb_nthreads); |
203 | // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads. |
204 | // If it's not true on some platforms, the test will need to be adjusted. |
205 | ASSERT( tbb_nthreads==omp_nthreads, "Initialization of TBB is possibly affected by OpenMP" ); |
206 | #endif |
207 | } |
208 | #endif // __INTEL_COMPILER |
209 | |
210 | const int M = 17*17; |
211 | const int N = 13*13; |
212 | T A[M], B[N]; |
213 | T expected[M+N], actual[M+N]; |
214 | |
215 | template <class Func> |
216 | void RunTest( Func F, int m, int n, int p, bool wait_workers = false ) { |
217 | task_scheduler_init init( p ); |
218 | memset( actual, -1, (m+n)*sizeof(T) ); |
219 | F( actual, A, m, B, n ); |
220 | ASSERT( memcmp(actual, expected, (m+n-1)*sizeof(T))==0, NULL ); |
221 | if (wait_workers) init.blocking_terminate(std::nothrow); |
222 | else init.terminate(); |
223 | } |
224 | |
225 | int TestMain () { |
226 | #if __INTEL_COMPILER |
227 | TestNumThreads(); // Testing initialization-related behavior; must be the first |
228 | #endif // __INTEL_COMPILER |
229 | MinThread = 1; |
230 | for( int p=MinThread; p<=MaxThread; ++p ) { |
231 | for( int m=1; m<=M; m*=17 ) { |
232 | for( int n=1; n<=N; n*=13 ) { |
233 | for( int i=0; i<m; ++i ) A[i] = T(1+i/5); |
234 | for( int i=0; i<n; ++i ) B[i] = T(1+i/7); |
235 | SerialConvolve( expected, A, m, B, n ); |
236 | RunTest( OpenMP_TBB_Convolve, m, n, p ); |
237 | RunTest( TBB_OpenMP_Convolve, m, n, p |
238 | #if OPENMP_ASYNC_SHUTDOWN_BROKEN |
239 | ,true |
240 | #endif |
241 | ); |
242 | } |
243 | } |
244 | } |
245 | return Harness::Done; |
246 | } |
247 | |