| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | // The test checks if the vectorization happens when PPL-style parallel_for is |
| 18 | // used. The test implements two ideas: |
| 19 | // 1. "pragma always assert" issues a compiler-time error if the vectorization |
| 20 | // cannot be produced; |
| 21 | // 2. "#pragma ivdep" has a peculiarity which also can be used for detection of |
| 22 | // successful vectorization. See the comment below. |
| 23 | |
| 24 | // For now, only Intel(R) C++ Compiler 14.0 and later is supported. Also, no |
| 25 | // sense to run the test in debug mode. |
| 26 | #define HARNESS_SKIP_TEST ( __INTEL_COMPILER < 1400 || TBB_USE_DEBUG ) |
| 27 | |
| 28 | // __TBB_ASSERT_ON_VECTORIZATION_FAILURE enables "pragma always assert" for |
| 29 | // Intel(R) C++ Compiler. |
| 30 | #define __TBB_ASSERT_ON_VECTORIZATION_FAILURE ( !HARNESS_SKIP_TEST ) |
| 31 | #include "tbb/parallel_for.h" |
| 32 | #include "tbb/task_scheduler_init.h" |
| 33 | |
| 34 | #include "harness.h" |
| 35 | #include "harness_assert.h" |
| 36 | |
| 37 | #include <algorithm> |
| 38 | |
| 39 | class Body : NoAssign { |
| 40 | int *out_, *in_; |
| 41 | public: |
| 42 | Body( int* out, int *in ) : out_(out), in_(in) {} |
| 43 | void operator() ( int i ) const { |
| 44 | out_[i] = in_[i] + 1; |
| 45 | } |
| 46 | }; |
| 47 | |
| 48 | int TestMain () { |
| 49 | // Should be big enough that the partitioner generated at least a one range |
| 50 | // with a size greater than 1. See the comment below. |
| 51 | const int N = 10000; |
| 52 | tbb::task_scheduler_init init(1); |
| 53 | int array1[N]; |
| 54 | std::fill( array1, array1+N, 0 ); |
| 55 | // Use the same array (with a shift) for both input and output |
| 56 | tbb::parallel_for( 0, N-1, Body(array1+1, array1) ); |
| 57 | |
| 58 | int array2[N]; |
| 59 | std::fill( array2, array2+N, 0 ); |
| 60 | Body b(array2+1, array2); |
| 61 | for ( int i=0; i<N-1; ++i ) |
| 62 | b(i); |
| 63 | |
| 64 | // The ppl-style parallel_for implementation has pragma ivdep before the |
| 65 | // range loop. This pragma suppresses the dependency of overlapping arrays |
| 66 | // in "Body". Thus the vectorizer should generate code that produces incorrect |
| 67 | // results. |
| 68 | ASSERT( !std::equal( array1, array1+N, array2 ), "The loop was not vectorized." ); |
| 69 | |
| 70 | return Harness::Done; |
| 71 | } |
| 72 | |