| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | // Test mixing OpenMP and TBB |
| 18 | |
| 19 | /* SCR #471 |
| 20 | Below is workaround to compile test within environment of Intel Compiler |
| 21 | but by Microsoft Compiler. So, there is wrong "omp.h" file included and |
| 22 | manifest section is missed from .exe file - restoring here. |
| 23 | |
| 24 | As of Visual Studio 2010, crtassem.h is no longer shipped. |
| 25 | */ |
| 26 | #if !defined(__INTEL_COMPILER) && _MSC_VER >= 1400 && _MSC_VER < 1600 |
| 27 | #include <crtassem.h> |
| 28 | #if !defined(_OPENMP) |
| 29 | #define _OPENMP |
| 30 | #if defined(_DEBUG) |
| 31 | #pragma comment(lib, "vcompd") |
| 32 | #else // _DEBUG |
| 33 | #pragma comment(lib, "vcomp") |
| 34 | #endif // _DEBUG |
| 35 | #endif // _OPENMP |
| 36 | |
| 37 | #if defined(_DEBUG) |
| 38 | #if defined(_M_IX86) |
| 39 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
| 40 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \ |
| 41 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
| 42 | "processorArchitecture='x86' " \ |
| 43 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
| 44 | #elif defined(_M_X64) |
| 45 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
| 46 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \ |
| 47 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
| 48 | "processorArchitecture='amd64' " \ |
| 49 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
| 50 | #elif defined(_M_IA64) |
| 51 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
| 52 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".DebugOpenMP' " \ |
| 53 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
| 54 | "processorArchitecture='ia64' " \ |
| 55 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
| 56 | #endif |
| 57 | #else // _DEBUG |
| 58 | #if defined(_M_IX86) |
| 59 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
| 60 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \ |
| 61 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
| 62 | "processorArchitecture='x86' " \ |
| 63 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
| 64 | #elif defined(_M_X64) |
| 65 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
| 66 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \ |
| 67 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
| 68 | "processorArchitecture='amd64' " \ |
| 69 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
| 70 | #elif defined(_M_IA64) |
| 71 | #pragma comment(linker,"/manifestdependency:\"type='win32' " \ |
| 72 | "name='" __LIBRARIES_ASSEMBLY_NAME_PREFIX ".OpenMP' " \ |
| 73 | "version='" _CRT_ASSEMBLY_VERSION "' " \ |
| 74 | "processorArchitecture='ia64' " \ |
| 75 | "publicKeyToken='" _VC_ASSEMBLY_PUBLICKEYTOKEN "'\"") |
| 76 | #endif |
| 77 | #endif // _DEBUG |
| 78 | #define _OPENMP_NOFORCE_MANIFEST |
| 79 | #endif |
| 80 | |
| 81 | #include <omp.h> |
| 82 | |
| 83 | |
| 84 | typedef short T; |
| 85 | |
| 86 | void SerialConvolve( T c[], const T a[], int m, const T b[], int n ) { |
| 87 | for( int i=0; i<m+n-1; ++i ) { |
| 88 | int start = i<n ? 0 : i-n+1; |
| 89 | int finish = i<m ? i+1 : m; |
| 90 | T sum = 0; |
| 91 | for( int j=start; j<finish; ++j ) |
| 92 | sum += a[j]*b[i-j]; |
| 93 | c[i] = sum; |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | #define OPENMP_ASYNC_SHUTDOWN_BROKEN (__INTEL_COMPILER<=1400 && __linux__) |
| 98 | #define TBB_PREVIEW_WAITING_FOR_WORKERS 1 |
| 99 | |
| 100 | #include "tbb/blocked_range.h" |
| 101 | #include "tbb/parallel_for.h" |
| 102 | #include "tbb/parallel_reduce.h" |
| 103 | #include "tbb/task_scheduler_init.h" |
| 104 | #include "harness.h" |
| 105 | |
| 106 | using namespace tbb; |
| 107 | |
| 108 | #if _MSC_VER && !defined(__INTEL_COMPILER) |
| 109 | // Suppress overzealous warning about short+=short |
| 110 | #pragma warning( push ) |
| 111 | #pragma warning( disable: 4244 ) |
| 112 | #endif |
| 113 | |
| 114 | class InnerBody: NoAssign { |
| 115 | const T* my_a; |
| 116 | const T* my_b; |
| 117 | const int i; |
| 118 | public: |
| 119 | T sum; |
| 120 | InnerBody( T /*c*/[], const T a[], const T b[], int ii ) : |
| 121 | my_a(a), my_b(b), i(ii), sum(0) |
| 122 | {} |
| 123 | InnerBody( InnerBody& x, split ) : |
| 124 | my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0) |
| 125 | { |
| 126 | } |
| 127 | void join( InnerBody& x ) {sum += x.sum;} |
| 128 | void operator()( const blocked_range<int>& range ) { |
| 129 | for( int j=range.begin(); j!=range.end(); ++j ) |
| 130 | sum += my_a[j]*my_b[i-j]; |
| 131 | } |
| 132 | }; |
| 133 | |
| 134 | #if _MSC_VER && !defined(__INTEL_COMPILER) |
| 135 | #pragma warning( pop ) |
| 136 | #endif |
| 137 | |
| 138 | //! Test OpenMMP loop around TBB loop |
| 139 | void OpenMP_TBB_Convolve( T c[], const T a[], int m, const T b[], int n ) { |
| 140 | REMARK("testing OpenMP loop around TBB loop\n" ); |
| 141 | #pragma omp parallel |
| 142 | { |
| 143 | task_scheduler_init init; |
| 144 | #pragma omp for |
| 145 | for( int i=0; i<m+n-1; ++i ) { |
| 146 | int start = i<n ? 0 : i-n+1; |
| 147 | int finish = i<m ? i+1 : m; |
| 148 | InnerBody body(c,a,b,i); |
| 149 | parallel_reduce( blocked_range<int>(start,finish,10), body ); |
| 150 | c[i] = body.sum; |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | class OuterBody: NoAssign { |
| 156 | const T* my_a; |
| 157 | const T* my_b; |
| 158 | T* my_c; |
| 159 | const int m; |
| 160 | const int n; |
| 161 | public: |
| 162 | OuterBody( T c[], const T a[], int m_, const T b[], int n_ ) : |
| 163 | my_a(a), my_b(b), my_c(c), m(m_), n(n_) |
| 164 | {} |
| 165 | void operator()( const blocked_range<int>& range ) const { |
| 166 | for( int i=range.begin(); i!=range.end(); ++i ) { |
| 167 | int start = i<n ? 0 : i-n+1; |
| 168 | int finish = i<m ? i+1 : m; |
| 169 | T sum = 0; |
| 170 | #pragma omp parallel for reduction(+:sum) |
| 171 | for( int j=start; j<finish; ++j ) |
| 172 | sum += my_a[j]*my_b[i-j]; |
| 173 | my_c[i] = sum; |
| 174 | } |
| 175 | } |
| 176 | }; |
| 177 | |
| 178 | //! Test TBB loop around OpenMP loop |
| 179 | void TBB_OpenMP_Convolve( T c[], const T a[], int m, const T b[], int n ) { |
| 180 | REMARK("testing TBB loop around OpenMP loop\n" ); |
| 181 | parallel_for( blocked_range<int>(0,m+n-1,10), OuterBody( c, a, m, b, n ) ); |
| 182 | } |
| 183 | |
| 184 | #if __INTEL_COMPILER |
| 185 | // A regression test on OpenMP affinity settings affecting TBB. |
| 186 | // Testing only with __INTEL_COMPILER because we do not provide interoperability with other OpenMP implementations. |
| 187 | |
| 188 | #define __TBB_NUM_THREADS_AFFECTED_BY_LIBIOMP (KMP_VERSION_BUILD<20150922) |
| 189 | |
| 190 | void TestNumThreads() { |
| 191 | #if __TBB_NUM_THREADS_AFFECTED_BY_LIBIOMP |
| 192 | REPORT("Known issue: the default number of threads is affected by OpenMP affinity\n" ); |
| 193 | #else |
| 194 | REMARK("Compare default number of threads for OpenMP and TBB\n" ); |
| 195 | Harness::SetEnv("KMP_AFFINITY" ,"compact" ); |
| 196 | // Make an OpenMP call before initializing TBB |
| 197 | int omp_nthreads = omp_get_max_threads(); |
| 198 | #pragma omp parallel |
| 199 | {} |
| 200 | int tbb_nthreads = tbb::task_scheduler_init::default_num_threads(); |
| 201 | REMARK("# of threads (OpenMP): %d\n" , omp_nthreads); |
| 202 | REMARK("# of threads (TBB): %d\n" , tbb_nthreads); |
| 203 | // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads. |
| 204 | // If it's not true on some platforms, the test will need to be adjusted. |
| 205 | ASSERT( tbb_nthreads==omp_nthreads, "Initialization of TBB is possibly affected by OpenMP" ); |
| 206 | #endif |
| 207 | } |
| 208 | #endif // __INTEL_COMPILER |
| 209 | |
| 210 | const int M = 17*17; |
| 211 | const int N = 13*13; |
| 212 | T A[M], B[N]; |
| 213 | T expected[M+N], actual[M+N]; |
| 214 | |
| 215 | template <class Func> |
| 216 | void RunTest( Func F, int m, int n, int p, bool wait_workers = false ) { |
| 217 | task_scheduler_init init( p ); |
| 218 | memset( actual, -1, (m+n)*sizeof(T) ); |
| 219 | F( actual, A, m, B, n ); |
| 220 | ASSERT( memcmp(actual, expected, (m+n-1)*sizeof(T))==0, NULL ); |
| 221 | if (wait_workers) init.blocking_terminate(std::nothrow); |
| 222 | else init.terminate(); |
| 223 | } |
| 224 | |
| 225 | int TestMain () { |
| 226 | #if __INTEL_COMPILER |
| 227 | TestNumThreads(); // Testing initialization-related behavior; must be the first |
| 228 | #endif // __INTEL_COMPILER |
| 229 | MinThread = 1; |
| 230 | for( int p=MinThread; p<=MaxThread; ++p ) { |
| 231 | for( int m=1; m<=M; m*=17 ) { |
| 232 | for( int n=1; n<=N; n*=13 ) { |
| 233 | for( int i=0; i<m; ++i ) A[i] = T(1+i/5); |
| 234 | for( int i=0; i<n; ++i ) B[i] = T(1+i/7); |
| 235 | SerialConvolve( expected, A, m, B, n ); |
| 236 | RunTest( OpenMP_TBB_Convolve, m, n, p ); |
| 237 | RunTest( TBB_OpenMP_Convolve, m, n, p |
| 238 | #if OPENMP_ASYNC_SHUTDOWN_BROKEN |
| 239 | ,true |
| 240 | #endif |
| 241 | ); |
| 242 | } |
| 243 | } |
| 244 | } |
| 245 | return Harness::Done; |
| 246 | } |
| 247 | |