1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | // The test checks if the vectorization happens when PPL-style parallel_for is |
18 | // used. The test implements two ideas: |
19 | // 1. "pragma always assert" issues a compiler-time error if the vectorization |
20 | // cannot be produced; |
21 | // 2. "#pragma ivdep" has a peculiarity which also can be used for detection of |
22 | // successful vectorization. See the comment below. |
23 | |
24 | // For now, only Intel(R) C++ Compiler 14.0 and later is supported. Also, no |
25 | // sense to run the test in debug mode. |
26 | #define HARNESS_SKIP_TEST ( __INTEL_COMPILER < 1400 || TBB_USE_DEBUG ) |
27 | |
28 | // __TBB_ASSERT_ON_VECTORIZATION_FAILURE enables "pragma always assert" for |
29 | // Intel(R) C++ Compiler. |
30 | #define __TBB_ASSERT_ON_VECTORIZATION_FAILURE ( !HARNESS_SKIP_TEST ) |
31 | #include "tbb/parallel_for.h" |
32 | #include "tbb/task_scheduler_init.h" |
33 | |
34 | #include "harness.h" |
35 | #include "harness_assert.h" |
36 | |
37 | #include <algorithm> |
38 | |
39 | class Body : NoAssign { |
40 | int *out_, *in_; |
41 | public: |
42 | Body( int* out, int *in ) : out_(out), in_(in) {} |
43 | void operator() ( int i ) const { |
44 | out_[i] = in_[i] + 1; |
45 | } |
46 | }; |
47 | |
48 | int TestMain () { |
49 | // Should be big enough that the partitioner generated at least a one range |
50 | // with a size greater than 1. See the comment below. |
51 | const int N = 10000; |
52 | tbb::task_scheduler_init init(1); |
53 | int array1[N]; |
54 | std::fill( array1, array1+N, 0 ); |
55 | // Use the same array (with a shift) for both input and output |
56 | tbb::parallel_for( 0, N-1, Body(array1+1, array1) ); |
57 | |
58 | int array2[N]; |
59 | std::fill( array2, array2+N, 0 ); |
60 | Body b(array2+1, array2); |
61 | for ( int i=0; i<N-1; ++i ) |
62 | b(i); |
63 | |
64 | // The ppl-style parallel_for implementation has pragma ivdep before the |
65 | // range loop. This pragma suppresses the dependency of overlapping arrays |
66 | // in "Body". Thus the vectorizer should generate code that produces incorrect |
67 | // results. |
68 | ASSERT( !std::equal( array1, array1+N, array2 ), "The loop was not vectorized." ); |
69 | |
70 | return Harness::Done; |
71 | } |
72 | |