1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | /* |
18 | This file contains the TBB-based implementation of convex hull algortihm. |
19 | It corresponds to the following settings in convex_hull_bench.cpp: |
20 | - USETBB defined to 1 |
21 | - USECONCVEC defined to 1 |
22 | - INIT_ONCE defined to 0 |
23 | - only buffered version is used |
24 | */ |
25 | #include "convex_hull.h" |
26 | |
27 | #include "tbb/task_scheduler_init.h" |
28 | #include "tbb/parallel_for.h" |
29 | #include "tbb/parallel_reduce.h" |
30 | #include "tbb/blocked_range.h" |
31 | #include "tbb/tick_count.h" |
32 | #include "tbb/concurrent_vector.h" |
33 | |
34 | typedef util::point<double> point_t; |
35 | typedef tbb::concurrent_vector< point_t > pointVec_t; |
36 | typedef tbb::blocked_range<size_t> range_t; |
37 | |
38 | void appendVector(const point_t* src, size_t srcSize, pointVec_t& dest) { |
39 | std::copy(src, src + srcSize, dest.grow_by(srcSize)); |
40 | } |
41 | |
42 | void appendVector(const pointVec_t& src, pointVec_t& dest) { |
43 | std::copy(src.begin(), src.end(), dest.grow_by(src.size())); |
44 | } |
45 | class FillRNDPointsVector_buf { |
46 | pointVec_t &points; |
47 | public: |
48 | static const size_t grainSize = cfg::generateGrainSize; |
49 | |
50 | explicit FillRNDPointsVector_buf(pointVec_t& _points) |
51 | : points(_points) {} |
52 | |
53 | void operator()(const range_t& range) const { |
54 | util::rng the_rng(range.begin()); |
55 | const size_t i_end = range.end(); |
56 | size_t count = 0, j = 0; |
57 | point_t tmp_vec[grainSize]; |
58 | |
59 | for(size_t i=range.begin(); i!=i_end; ++i) { |
60 | tmp_vec[j++] = util::GenerateRNDPoint<double>(count, the_rng, util::rng::max_rand); |
61 | } |
62 | //Here we have race condition. Elements being written to may be still under construction. |
63 | //For C++ 2003 it is workarounded by vector element type which default constructor does not touch memory, |
64 | //it being constructed on. See comments near default ctor of point class for more details. |
65 | //Strictly speaking it is UB. |
66 | //TODO: need to find more reliable/correct way |
67 | points.grow_to_at_least(range.end()); |
68 | std::copy(tmp_vec, tmp_vec+j,points.begin()+range.begin()); |
69 | } |
70 | }; |
71 | |
72 | void initialize(pointVec_t &points) { |
73 | //This function generate the same series of point on every call. |
74 | //Reproducibility is needed for benchmarking to produce reliable results. |
75 | //It is achieved through the following points: |
76 | // - FillRNDPointsVector_buf instance has its own local instance |
77 | // of random number generator, which in turn does not use any global data |
78 | // - tbb::simple_partitioner produce the same set of ranges on every call to |
79 | // tbb::parallel_for |
80 | // - local RNG instances are seeded by the starting indexes of corresponding ranges |
81 | // - grow_to_at_least() enables putting points into the resulting vector in deterministic order |
82 | // (unlike concurrent push_back or grow_by). |
83 | |
84 | // In the buffered version, a temporary storage for as much as grainSize elements |
85 | // is allocated inside the body. Since auto_partitioner may increase effective |
86 | // range size which would cause a crash, simple partitioner has to be used. |
87 | tbb::parallel_for(range_t(0, cfg::numberOfPoints, FillRNDPointsVector_buf::grainSize), |
88 | FillRNDPointsVector_buf(points), tbb::simple_partitioner()); |
89 | } |
90 | |
91 | class FindXExtremum { |
92 | public: |
93 | typedef enum { |
94 | minX, maxX |
95 | } extremumType; |
96 | |
97 | static const size_t grainSize = cfg::findExtremumGrainSize; |
98 | |
99 | FindXExtremum(const pointVec_t& points_, extremumType exType_) |
100 | : points(points_), exType(exType_), extrXPoint(points[0]) {} |
101 | |
102 | FindXExtremum(const FindXExtremum& fxex, tbb::split) |
103 | // Can run in parallel with fxex.operator()() or fxex.join(). |
104 | // The data race reported by tools is harmless. |
105 | : points(fxex.points), exType(fxex.exType), extrXPoint(fxex.extrXPoint) {} |
106 | |
107 | void operator()(const range_t& range) { |
108 | const size_t i_end = range.end(); |
109 | if(!range.empty()) { |
110 | for(size_t i = range.begin(); i != i_end; ++i) { |
111 | if(closerToExtremum(points[i])) { |
112 | extrXPoint = points[i]; |
113 | } |
114 | } |
115 | } |
116 | } |
117 | |
118 | void join(const FindXExtremum &rhs) { |
119 | if(closerToExtremum(rhs.extrXPoint)) { |
120 | extrXPoint = rhs.extrXPoint; |
121 | } |
122 | } |
123 | |
124 | point_t extremeXPoint() { |
125 | return extrXPoint; |
126 | } |
127 | |
128 | private: |
129 | const pointVec_t &points; |
130 | const extremumType exType; |
131 | point_t extrXPoint; |
132 | bool closerToExtremum(const point_t &p) const { |
133 | switch(exType) { |
134 | case minX: |
135 | return p.x<extrXPoint.x; break; |
136 | case maxX: |
137 | return p.x>extrXPoint.x; break; |
138 | } |
139 | return false; // avoid warning |
140 | } |
141 | }; |
142 | |
143 | template <FindXExtremum::extremumType type> |
144 | point_t extremum(const pointVec_t &P) { |
145 | FindXExtremum fxBody(P, type); |
146 | tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody); |
147 | return fxBody.extremeXPoint(); |
148 | } |
149 | |
150 | class SplitByCP_buf { |
151 | const pointVec_t &initialSet; |
152 | pointVec_t &reducedSet; |
153 | point_t p1, p2; |
154 | point_t farPoint; |
155 | double howFar; |
156 | public: |
157 | static const size_t grainSize = cfg::divideGrainSize; |
158 | |
159 | SplitByCP_buf( point_t _p1, point_t _p2, |
160 | const pointVec_t &_initialSet, pointVec_t &_reducedSet) |
161 | : p1(_p1), p2(_p2), |
162 | initialSet(_initialSet), reducedSet(_reducedSet), |
163 | howFar(0), farPoint(p1) {} |
164 | |
165 | SplitByCP_buf(SplitByCP_buf& sbcp, tbb::split) |
166 | : p1(sbcp.p1), p2(sbcp.p2), |
167 | initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet), |
168 | howFar(0), farPoint(p1) {} |
169 | |
170 | void operator()(const range_t& range) { |
171 | const size_t i_end = range.end(); |
172 | size_t j = 0; |
173 | double cp; |
174 | point_t tmp_vec[grainSize]; |
175 | for(size_t i = range.begin(); i != i_end; ++i) { |
176 | if( (initialSet[i] != p1) && (initialSet[i] != p2) ) { |
177 | cp = util::cross_product(p1, p2, initialSet[i]); |
178 | if(cp>0) { |
179 | tmp_vec[j++] = initialSet[i]; |
180 | if(cp>howFar) { |
181 | farPoint = initialSet[i]; |
182 | howFar = cp; |
183 | } |
184 | } |
185 | } |
186 | } |
187 | |
188 | appendVector(tmp_vec, j, reducedSet); |
189 | } |
190 | |
191 | void join(const SplitByCP_buf& rhs) { |
192 | if(rhs.howFar>howFar) { |
193 | howFar = rhs.howFar; |
194 | farPoint = rhs.farPoint; |
195 | } |
196 | } |
197 | |
198 | point_t farthestPoint() const { |
199 | return farPoint; |
200 | } |
201 | }; |
202 | |
203 | point_t divide(const pointVec_t &P, pointVec_t &P_reduced, |
204 | const point_t &p1, const point_t &p2) { |
205 | SplitByCP_buf sbcpb(p1, p2, P, P_reduced); |
206 | // Must use simple_partitioner (see the comment in initialize() above) |
207 | tbb::parallel_reduce(range_t(0, P.size(), SplitByCP_buf::grainSize), |
208 | sbcpb, tbb::simple_partitioner()); |
209 | |
210 | if(util::verbose) { |
211 | std::stringstream ss; |
212 | ss << P.size() << " nodes in bucket" << ", " |
213 | << "dividing by: [ " << p1 << ", " << p2 << " ], " |
214 | << "farthest node: " << sbcpb.farthestPoint(); |
215 | util::OUTPUT.push_back(ss.str()); |
216 | } |
217 | |
218 | return sbcpb.farthestPoint(); |
219 | } |
220 | |
221 | void divide_and_conquer(const pointVec_t &P, pointVec_t &H, |
222 | point_t p1, point_t p2) { |
223 | assert(P.size() >= 2); |
224 | pointVec_t P_reduced; |
225 | pointVec_t H1, H2; |
226 | point_t p_far = divide(P, P_reduced, p1, p2); |
227 | if (P_reduced.size()<2) { |
228 | H.push_back(p1); |
229 | appendVector(P_reduced, H); |
230 | } |
231 | else { |
232 | divide_and_conquer(P_reduced, H1, p1, p_far); |
233 | divide_and_conquer(P_reduced, H2, p_far, p2); |
234 | |
235 | appendVector(H1, H); |
236 | appendVector(H2, H); |
237 | } |
238 | } |
239 | |
240 | void quickhull(const pointVec_t &points, pointVec_t &hull) { |
241 | if (points.size() < 2) { |
242 | appendVector(points, hull); |
243 | return; |
244 | } |
245 | |
246 | point_t p_maxx = extremum<FindXExtremum::maxX>(points); |
247 | point_t p_minx = extremum<FindXExtremum::minX>(points); |
248 | |
249 | pointVec_t H; |
250 | |
251 | divide_and_conquer(points, hull, p_maxx, p_minx); |
252 | divide_and_conquer(points, H, p_minx, p_maxx); |
253 | |
254 | appendVector(H, hull); |
255 | } |
256 | |
257 | int main(int argc, char* argv[]) { |
258 | util::my_time_t tm_main_begin = util::gettime(); |
259 | |
260 | util::ParseInputArgs(argc, argv); |
261 | |
262 | pointVec_t points; |
263 | pointVec_t hull; |
264 | int nthreads; |
265 | |
266 | points.reserve(cfg::numberOfPoints); |
267 | |
268 | if(!util::silent) { |
269 | std::cout << "Starting TBB-buffered version of QUICK HULL algorithm" << std::endl; |
270 | } |
271 | |
272 | for(nthreads=cfg::threads.first; nthreads<=cfg::threads.last; nthreads=cfg::threads.step(nthreads)) { |
273 | tbb::task_scheduler_init init(nthreads); |
274 | |
275 | points.clear(); |
276 | util::my_time_t tm_init = util::gettime(); |
277 | initialize(points); |
278 | util::my_time_t tm_start = util::gettime(); |
279 | if(!util::silent) { |
280 | std::cout <<"Init time on " <<nthreads<<" threads: " <<util::time_diff(tm_init, tm_start)<<" Points in input: " <<points.size()<<std::endl; |
281 | } |
282 | |
283 | tm_start = util::gettime(); |
284 | quickhull(points, hull); |
285 | util::my_time_t tm_end = util::gettime(); |
286 | if(!util::silent) { |
287 | std::cout <<"Time on " <<nthreads<<" threads: " <<util::time_diff(tm_start, tm_end)<<" Points in hull: " <<hull.size()<<std::endl; |
288 | } |
289 | hull.clear(); |
290 | } |
291 | utility::report_elapsed_time(util::time_diff(tm_main_begin, util::gettime())); |
292 | return 0; |
293 | } |
294 | |