1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | #if __TBB_MIC_OFFLOAD |
18 | #pragma offload_attribute (push,target(mic)) |
19 | #endif // __TBB_MIC_OFFLOAD |
20 | |
21 | #include <iostream> |
22 | #include <string> |
23 | #include <vector> |
24 | #include <algorithm> //std::max |
25 | |
26 | #include "tbb/parallel_for.h" |
27 | #include "tbb/blocked_range.h" |
28 | #include "tbb/tick_count.h" |
29 | |
30 | #if __TBB_MIC_OFFLOAD |
31 | #pragma offload_attribute (pop) |
32 | |
33 | class __declspec(target(mic)) SubStringFinder; |
34 | #endif // __TBB_MIC_OFFLOAD |
35 | |
36 | static const std::size_t N = 22; |
37 | |
38 | void SerialSubStringFinder ( const std::string &str, std::vector<std::size_t> &max_array, std::vector<std::size_t> &pos_array ) { |
39 | for (std::size_t i = 0; i < str.size(); ++i) { |
40 | std::size_t max_size = 0, max_pos = 0; |
41 | for (std::size_t j = 0; j < str.size(); ++j) |
42 | if (j != i) { |
43 | std::size_t limit = str.size()-(std::max)(i,j); |
44 | for (std::size_t k = 0; k < limit; ++k) { |
45 | if (str[i + k] != str[j + k]) |
46 | break; |
47 | if (k > max_size) { |
48 | max_size = k; |
49 | max_pos = j; |
50 | } |
51 | } |
52 | } |
53 | max_array[i] = max_size; |
54 | pos_array[i] = max_pos; |
55 | } |
56 | } |
57 | |
58 | class SubStringFinder { |
59 | const char *str; |
60 | const std::size_t len; |
61 | std::size_t *max_array; |
62 | std::size_t *pos_array; |
63 | public: |
64 | void operator() ( const tbb::blocked_range<std::size_t>& r ) const { |
65 | for (std::size_t i = r.begin(); i != r.end(); ++i) { |
66 | std::size_t max_size = 0, max_pos = 0; |
67 | for (std::size_t j = 0; j < len; ++j) { |
68 | if (j != i) { |
69 | std::size_t limit = len-(std::max)(i,j); |
70 | for (std::size_t k = 0; k < limit; ++k) { |
71 | if (str[i + k] != str[j + k]) |
72 | break; |
73 | if (k > max_size) { |
74 | max_size = k; |
75 | max_pos = j; |
76 | } |
77 | } |
78 | } |
79 | } |
80 | max_array[i] = max_size; |
81 | pos_array[i] = max_pos; |
82 | } |
83 | } |
84 | // We do not use std::vector for compatibility with offload execution |
85 | SubStringFinder( const char *s, const std::size_t s_len, std::size_t *m, std::size_t *p ) : |
86 | str(s), len(s_len), max_array(m), pos_array(p) { } |
87 | }; |
88 | |
89 | int main() { |
90 | using namespace tbb; |
91 | |
92 | std::string str[N] = { std::string("a" ), std::string("b" ) }; |
93 | for (std::size_t i = 2; i < N; ++i) |
94 | str[i] = str[i-1]+str[i-2]; |
95 | std::string &to_scan = str[N-1]; |
96 | const std::size_t num_elem = to_scan.size(); |
97 | |
98 | std::vector<std::size_t> max1(num_elem); |
99 | std::vector<std::size_t> pos1(num_elem); |
100 | std::vector<std::size_t> max2(num_elem); |
101 | std::vector<std::size_t> pos2(num_elem); |
102 | |
103 | std::cout << " Done building string." << std::endl; |
104 | |
105 | tick_count serial_t0 = tick_count::now(); |
106 | SerialSubStringFinder( to_scan, max2, pos2 ); |
107 | tick_count serial_t1 = tick_count::now(); |
108 | std::cout << " Done with serial version." << std::endl; |
109 | |
110 | tick_count parallel_t0 = tick_count::now(); |
111 | parallel_for(blocked_range<std::size_t>(0, num_elem, 100), |
112 | SubStringFinder( to_scan.c_str(), num_elem, &max1[0], &pos1[0] ) ); |
113 | tick_count parallel_t1 = tick_count::now(); |
114 | std::cout << " Done with parallel version." << std::endl; |
115 | |
116 | for (std::size_t i = 0; i < num_elem; ++i) { |
117 | if (max1[i] != max2[i] || pos1[i] != pos2[i]) { |
118 | std::cout << "ERROR: Serial and Parallel Results are Different!" << std::endl; |
119 | break; |
120 | } |
121 | } |
122 | std::cout << " Done validating results." << std::endl; |
123 | |
124 | std::cout << "Serial version ran in " << (serial_t1 - serial_t0).seconds() << " seconds" << std::endl |
125 | << "Parallel version ran in " << (parallel_t1 - parallel_t0).seconds() << " seconds" << std::endl |
126 | << "Resulting in a speedup of " << (serial_t1 - serial_t0).seconds() / (parallel_t1 - parallel_t0).seconds() << std::endl; |
127 | |
128 | #if __TBB_MIC_OFFLOAD |
129 | // Do offloadable version. Do the timing on host. |
130 | |
131 | std::vector<std::size_t> max3(num_elem); |
132 | std::vector<std::size_t> pos3(num_elem); |
133 | |
134 | std::size_t *max3_array = &max3[0]; // method data() for vector is not available in C++03 |
135 | std::size_t *pos3_array = &pos3[0]; |
136 | tick_count parallel_tt0 = tick_count::now(); |
137 | const char *to_scan_str = to_scan.c_str(); // Offload the string as a char array. |
138 | #pragma offload target(mic) in(num_elem) in(to_scan_str:length(num_elem)) out(max3_array,pos3_array:length(num_elem)) |
139 | { |
140 | parallel_for(blocked_range<std::size_t>(0, num_elem, 100), |
141 | SubStringFinder ( to_scan_str, num_elem, max3_array, pos3_array ) ); |
142 | } |
143 | tick_count parallel_tt1 = tick_count::now(); |
144 | std::cout << " Done with offloadable version." << std::endl; |
145 | |
146 | // Do validation of offloadable results on host. |
147 | for (std::size_t i = 0; i < num_elem; ++i) { |
148 | if (max1[i] != max3[i] || pos1[i] != pos3[i]) { |
149 | std::cout << "ERROR: Serial and Offloadable Results are Different!" << std::endl; |
150 | break; |
151 | } |
152 | } |
153 | std::cout << " Done validating offloadable results." << std::endl; |
154 | |
155 | std::cout << "Offloadable version ran in " << (parallel_tt1 - parallel_tt0).seconds() << " seconds" << std::endl |
156 | << "Resulting in a speedup of " << (serial_t1 - serial_t0).seconds() / (parallel_tt1 - parallel_tt0).seconds() |
157 | << " of offloadable version" << std::endl; |
158 | |
159 | #endif // __TBB_MIC_OFFLOAD |
160 | |
161 | return 0; |
162 | } |
163 | |