1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17// Test mixing OpenMP and TBB
18
19/* SCR #471
20 Below is workaround to compile test within environment of Intel Compiler
21 but by Microsoft Compiler. So, there is wrong "omp.h" file included and
22 manifest section is missed from .exe file - restoring here.
23
24 As of Visual Studio 2010, crtassem.h is no longer shipped.
25 */
26#if !defined(__INTEL_COMPILER) && _MSC_VER >= 1400 && _MSC_VER < 1600
27 #include <crtassem.h>
28 #if !defined(_OPENMP)
29 #define _OPENMP
30 #if defined(_DEBUG)
31 #pragma comment(lib, "vcompd")
32 #else // _DEBUG
33 #pragma comment(lib, "vcomp")
34 #endif // _DEBUG
35 #endif // _OPENMP
36
37 #if defined(_DEBUG)
38 #if defined(_M_IX86)
39 #pragma comment(linker,"/manifestdependency:\"type='win32' " \
40 "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \
41 "version='" _CRT_ASSEMBLY_VERSION "' " \
42 "processorArchitecture='x86' " \
43 "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"")
44 #elif defined(_M_X64)
45 #pragma comment(linker,"/manifestdependency:\"type='win32' " \
46 "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \
47 "version='" _CRT_ASSEMBLY_VERSION "' " \
48 "processorArchitecture='amd64' " \
49 "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"")
50 #elif defined(_M_IA64)
51 #pragma comment(linker,"/manifestdependency:\"type='win32' " \
52 "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \
53 "version='" _CRT_ASSEMBLY_VERSION "' " \
54 "processorArchitecture='ia64' " \
55 "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"")
56 #endif
57 #else // _DEBUG
58 #if defined(_M_IX86)
59 #pragma comment(linker,"/manifestdependency:\"type='win32' " \
60 "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \
61 "version='" _CRT_ASSEMBLY_VERSION "' " \
62 "processorArchitecture='x86' " \
63 "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"")
64 #elif defined(_M_X64)
65 #pragma comment(linker,"/manifestdependency:\"type='win32' " \
66 "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \
67 "version='" _CRT_ASSEMBLY_VERSION "' " \
68 "processorArchitecture='amd64' " \
69 "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"")
70 #elif defined(_M_IA64)
71 #pragma comment(linker,"/manifestdependency:\"type='win32' " \
72 "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \
73 "version='" _CRT_ASSEMBLY_VERSION "' " \
74 "processorArchitecture='ia64' " \
75 "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"")
76 #endif
77 #endif // _DEBUG
78 #define _OPENMP_NOFORCE_MANIFEST
79#endif
80
81#include <omp.h>
82
83
84typedef short T;
85
86void SerialConvolve( T c[], const T a[], int m, const T b[], int n ) {
87 for( int i=0; i<m+n-1; ++i ) {
88 int start = i<n ? 0 : i-n+1;
89 int finish = i<m ? i+1 : m;
90 T sum = 0;
91 for( int j=start; j<finish; ++j )
92 sum += a[j]*b[i-j];
93 c[i] = sum;
94 }
95}
96
97#define OPENMP_ASYNC_SHUTDOWN_BROKEN (__INTEL_COMPILER<=1400 && __linux__)
98#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
99
100#include "tbb/blocked_range.h"
101#include "tbb/parallel_for.h"
102#include "tbb/parallel_reduce.h"
103#include "tbb/task_scheduler_init.h"
104#include "harness.h"
105
106using namespace tbb;
107
108#if _MSC_VER && !defined(__INTEL_COMPILER)
109 // Suppress overzealous warning about short+=short
110 #pragma warning( push )
111 #pragma warning( disable: 4244 )
112#endif
113
114class InnerBody: NoAssign {
115 const T* my_a;
116 const T* my_b;
117 const int i;
118public:
119 T sum;
120 InnerBody( T /*c*/[], const T a[], const T b[], int ii ) :
121 my_a(a), my_b(b), i(ii), sum(0)
122 {}
123 InnerBody( InnerBody& x, split ) :
124 my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0)
125 {
126 }
127 void join( InnerBody& x ) {sum += x.sum;}
128 void operator()( const blocked_range<int>& range ) {
129 for( int j=range.begin(); j!=range.end(); ++j )
130 sum += my_a[j]*my_b[i-j];
131 }
132};
133
134#if _MSC_VER && !defined(__INTEL_COMPILER)
135 #pragma warning( pop )
136#endif
137
138//! Test OpenMMP loop around TBB loop
139void OpenMP_TBB_Convolve( T c[], const T a[], int m, const T b[], int n ) {
140 REMARK("testing OpenMP loop around TBB loop\n");
141#pragma omp parallel
142 {
143 task_scheduler_init init;
144#pragma omp for
145 for( int i=0; i<m+n-1; ++i ) {
146 int start = i<n ? 0 : i-n+1;
147 int finish = i<m ? i+1 : m;
148 InnerBody body(c,a,b,i);
149 parallel_reduce( blocked_range<int>(start,finish,10), body );
150 c[i] = body.sum;
151 }
152 }
153}
154
155class OuterBody: NoAssign {
156 const T* my_a;
157 const T* my_b;
158 T* my_c;
159 const int m;
160 const int n;
161public:
162 OuterBody( T c[], const T a[], int m_, const T b[], int n_ ) :
163 my_a(a), my_b(b), my_c(c), m(m_), n(n_)
164 {}
165 void operator()( const blocked_range<int>& range ) const {
166 for( int i=range.begin(); i!=range.end(); ++i ) {
167 int start = i<n ? 0 : i-n+1;
168 int finish = i<m ? i+1 : m;
169 T sum = 0;
170#pragma omp parallel for reduction(+:sum)
171 for( int j=start; j<finish; ++j )
172 sum += my_a[j]*my_b[i-j];
173 my_c[i] = sum;
174 }
175 }
176};
177
178//! Test TBB loop around OpenMP loop
179void TBB_OpenMP_Convolve( T c[], const T a[], int m, const T b[], int n ) {
180 REMARK("testing TBB loop around OpenMP loop\n");
181 parallel_for( blocked_range<int>(0,m+n-1,10), OuterBody( c, a, m, b, n ) );
182}
183
184#if __INTEL_COMPILER
185// A regression test on OpenMP affinity settings affecting TBB.
186// Testing only with __INTEL_COMPILER because we do not provide interoperability with other OpenMP implementations.
187
188#define __TBB_NUM_THREADS_AFFECTED_BY_LIBIOMP (KMP_VERSION_BUILD<20150922)
189
190void TestNumThreads() {
191#if __TBB_NUM_THREADS_AFFECTED_BY_LIBIOMP
192 REPORT("Known issue: the default number of threads is affected by OpenMP affinity\n");
193#else
194 REMARK("Compare default number of threads for OpenMP and TBB\n");
195 Harness::SetEnv("KMP_AFFINITY","compact");
196 // Make an OpenMP call before initializing TBB
197 int omp_nthreads = omp_get_max_threads();
198 #pragma omp parallel
199 {}
200 int tbb_nthreads = tbb::task_scheduler_init::default_num_threads();
201 REMARK("# of threads (OpenMP): %d\n", omp_nthreads);
202 REMARK("# of threads (TBB): %d\n", tbb_nthreads);
203 // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads.
204 // If it's not true on some platforms, the test will need to be adjusted.
205 ASSERT( tbb_nthreads==omp_nthreads, "Initialization of TBB is possibly affected by OpenMP");
206#endif
207}
208#endif // __INTEL_COMPILER
209
210const int M = 17*17;
211const int N = 13*13;
212T A[M], B[N];
213T expected[M+N], actual[M+N];
214
215template <class Func>
216void RunTest( Func F, int m, int n, int p, bool wait_workers = false ) {
217 task_scheduler_init init( p );
218 memset( actual, -1, (m+n)*sizeof(T) );
219 F( actual, A, m, B, n );
220 ASSERT( memcmp(actual, expected, (m+n-1)*sizeof(T))==0, NULL );
221 if (wait_workers) init.blocking_terminate(std::nothrow);
222 else init.terminate();
223}
224
225int TestMain () {
226#if __INTEL_COMPILER
227 TestNumThreads(); // Testing initialization-related behavior; must be the first
228#endif // __INTEL_COMPILER
229 MinThread = 1;
230 for( int p=MinThread; p<=MaxThread; ++p ) {
231 for( int m=1; m<=M; m*=17 ) {
232 for( int n=1; n<=N; n*=13 ) {
233 for( int i=0; i<m; ++i ) A[i] = T(1+i/5);
234 for( int i=0; i<n; ++i ) B[i] = T(1+i/7);
235 SerialConvolve( expected, A, m, B, n );
236 RunTest( OpenMP_TBB_Convolve, m, n, p );
237 RunTest( TBB_OpenMP_Convolve, m, n, p
238#if OPENMP_ASYNC_SHUTDOWN_BROKEN
239 ,true
240#endif
241 );
242 }
243 }
244 }
245 return Harness::Done;
246}
247