1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17// The test checks if the vectorization happens when PPL-style parallel_for is
18// used. The test implements two ideas:
19// 1. "pragma always assert" issues a compiler-time error if the vectorization
20// cannot be produced;
21// 2. "#pragma ivdep" has a peculiarity which also can be used for detection of
22// successful vectorization. See the comment below.
23
24// For now, only Intel(R) C++ Compiler 14.0 and later is supported. Also, no
25// sense to run the test in debug mode.
26#define HARNESS_SKIP_TEST ( __INTEL_COMPILER < 1400 || TBB_USE_DEBUG )
27
28// __TBB_ASSERT_ON_VECTORIZATION_FAILURE enables "pragma always assert" for
29// Intel(R) C++ Compiler.
30#define __TBB_ASSERT_ON_VECTORIZATION_FAILURE ( !HARNESS_SKIP_TEST )
31#include "tbb/parallel_for.h"
32#include "tbb/task_scheduler_init.h"
33
34#include "harness.h"
35#include "harness_assert.h"
36
37#include <algorithm>
38
39class Body : NoAssign {
40 int *out_, *in_;
41public:
42 Body( int* out, int *in ) : out_(out), in_(in) {}
43 void operator() ( int i ) const {
44 out_[i] = in_[i] + 1;
45 }
46};
47
48int TestMain () {
49 // Should be big enough that the partitioner generated at least a one range
50 // with a size greater than 1. See the comment below.
51 const int N = 10000;
52 tbb::task_scheduler_init init(1);
53 int array1[N];
54 std::fill( array1, array1+N, 0 );
55 // Use the same array (with a shift) for both input and output
56 tbb::parallel_for( 0, N-1, Body(array1+1, array1) );
57
58 int array2[N];
59 std::fill( array2, array2+N, 0 );
60 Body b(array2+1, array2);
61 for ( int i=0; i<N-1; ++i )
62 b(i);
63
64 // The ppl-style parallel_for implementation has pragma ivdep before the
65 // range loop. This pragma suppresses the dependency of overlapping arrays
66 // in "Body". Thus the vectorizer should generate code that produces incorrect
67 // results.
68 ASSERT( !std::equal( array1, array1+N, array2 ), "The loop was not vectorized." );
69
70 return Harness::Done;
71}
72