| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | /* |
| 18 | This file contains the TBB-based implementation of convex hull algortihm. |
| 19 | It corresponds to the following settings in convex_hull_bench.cpp: |
| 20 | - USETBB defined to 1 |
| 21 | - USECONCVEC defined to 1 |
| 22 | - INIT_ONCE defined to 0 |
| 23 | - only buffered version is used |
| 24 | */ |
| 25 | #include "convex_hull.h" |
| 26 | |
| 27 | #include "tbb/task_scheduler_init.h" |
| 28 | #include "tbb/parallel_for.h" |
| 29 | #include "tbb/parallel_reduce.h" |
| 30 | #include "tbb/blocked_range.h" |
| 31 | #include "tbb/tick_count.h" |
| 32 | #include "tbb/concurrent_vector.h" |
| 33 | |
| 34 | typedef util::point<double> point_t; |
| 35 | typedef tbb::concurrent_vector< point_t > pointVec_t; |
| 36 | typedef tbb::blocked_range<size_t> range_t; |
| 37 | |
| 38 | void appendVector(const point_t* src, size_t srcSize, pointVec_t& dest) { |
| 39 | std::copy(src, src + srcSize, dest.grow_by(srcSize)); |
| 40 | } |
| 41 | |
| 42 | void appendVector(const pointVec_t& src, pointVec_t& dest) { |
| 43 | std::copy(src.begin(), src.end(), dest.grow_by(src.size())); |
| 44 | } |
| 45 | class FillRNDPointsVector_buf { |
| 46 | pointVec_t &points; |
| 47 | public: |
| 48 | static const size_t grainSize = cfg::generateGrainSize; |
| 49 | |
| 50 | explicit FillRNDPointsVector_buf(pointVec_t& _points) |
| 51 | : points(_points) {} |
| 52 | |
| 53 | void operator()(const range_t& range) const { |
| 54 | util::rng the_rng(range.begin()); |
| 55 | const size_t i_end = range.end(); |
| 56 | size_t count = 0, j = 0; |
| 57 | point_t tmp_vec[grainSize]; |
| 58 | |
| 59 | for(size_t i=range.begin(); i!=i_end; ++i) { |
| 60 | tmp_vec[j++] = util::GenerateRNDPoint<double>(count, the_rng, util::rng::max_rand); |
| 61 | } |
| 62 | //Here we have race condition. Elements being written to may be still under construction. |
| 63 | //For C++ 2003 it is workarounded by vector element type which default constructor does not touch memory, |
| 64 | //it being constructed on. See comments near default ctor of point class for more details. |
| 65 | //Strictly speaking it is UB. |
| 66 | //TODO: need to find more reliable/correct way |
| 67 | points.grow_to_at_least(range.end()); |
| 68 | std::copy(tmp_vec, tmp_vec+j,points.begin()+range.begin()); |
| 69 | } |
| 70 | }; |
| 71 | |
| 72 | void initialize(pointVec_t &points) { |
| 73 | //This function generate the same series of point on every call. |
| 74 | //Reproducibility is needed for benchmarking to produce reliable results. |
| 75 | //It is achieved through the following points: |
| 76 | // - FillRNDPointsVector_buf instance has its own local instance |
| 77 | // of random number generator, which in turn does not use any global data |
| 78 | // - tbb::simple_partitioner produce the same set of ranges on every call to |
| 79 | // tbb::parallel_for |
| 80 | // - local RNG instances are seeded by the starting indexes of corresponding ranges |
| 81 | // - grow_to_at_least() enables putting points into the resulting vector in deterministic order |
| 82 | // (unlike concurrent push_back or grow_by). |
| 83 | |
| 84 | // In the buffered version, a temporary storage for as much as grainSize elements |
| 85 | // is allocated inside the body. Since auto_partitioner may increase effective |
| 86 | // range size which would cause a crash, simple partitioner has to be used. |
| 87 | tbb::parallel_for(range_t(0, cfg::numberOfPoints, FillRNDPointsVector_buf::grainSize), |
| 88 | FillRNDPointsVector_buf(points), tbb::simple_partitioner()); |
| 89 | } |
| 90 | |
| 91 | class FindXExtremum { |
| 92 | public: |
| 93 | typedef enum { |
| 94 | minX, maxX |
| 95 | } extremumType; |
| 96 | |
| 97 | static const size_t grainSize = cfg::findExtremumGrainSize; |
| 98 | |
| 99 | FindXExtremum(const pointVec_t& points_, extremumType exType_) |
| 100 | : points(points_), exType(exType_), extrXPoint(points[0]) {} |
| 101 | |
| 102 | FindXExtremum(const FindXExtremum& fxex, tbb::split) |
| 103 | // Can run in parallel with fxex.operator()() or fxex.join(). |
| 104 | // The data race reported by tools is harmless. |
| 105 | : points(fxex.points), exType(fxex.exType), extrXPoint(fxex.extrXPoint) {} |
| 106 | |
| 107 | void operator()(const range_t& range) { |
| 108 | const size_t i_end = range.end(); |
| 109 | if(!range.empty()) { |
| 110 | for(size_t i = range.begin(); i != i_end; ++i) { |
| 111 | if(closerToExtremum(points[i])) { |
| 112 | extrXPoint = points[i]; |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | void join(const FindXExtremum &rhs) { |
| 119 | if(closerToExtremum(rhs.extrXPoint)) { |
| 120 | extrXPoint = rhs.extrXPoint; |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | point_t extremeXPoint() { |
| 125 | return extrXPoint; |
| 126 | } |
| 127 | |
| 128 | private: |
| 129 | const pointVec_t &points; |
| 130 | const extremumType exType; |
| 131 | point_t extrXPoint; |
| 132 | bool closerToExtremum(const point_t &p) const { |
| 133 | switch(exType) { |
| 134 | case minX: |
| 135 | return p.x<extrXPoint.x; break; |
| 136 | case maxX: |
| 137 | return p.x>extrXPoint.x; break; |
| 138 | } |
| 139 | return false; // avoid warning |
| 140 | } |
| 141 | }; |
| 142 | |
| 143 | template <FindXExtremum::extremumType type> |
| 144 | point_t extremum(const pointVec_t &P) { |
| 145 | FindXExtremum fxBody(P, type); |
| 146 | tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody); |
| 147 | return fxBody.extremeXPoint(); |
| 148 | } |
| 149 | |
| 150 | class SplitByCP_buf { |
| 151 | const pointVec_t &initialSet; |
| 152 | pointVec_t &reducedSet; |
| 153 | point_t p1, p2; |
| 154 | point_t farPoint; |
| 155 | double howFar; |
| 156 | public: |
| 157 | static const size_t grainSize = cfg::divideGrainSize; |
| 158 | |
| 159 | SplitByCP_buf( point_t _p1, point_t _p2, |
| 160 | const pointVec_t &_initialSet, pointVec_t &_reducedSet) |
| 161 | : p1(_p1), p2(_p2), |
| 162 | initialSet(_initialSet), reducedSet(_reducedSet), |
| 163 | howFar(0), farPoint(p1) {} |
| 164 | |
| 165 | SplitByCP_buf(SplitByCP_buf& sbcp, tbb::split) |
| 166 | : p1(sbcp.p1), p2(sbcp.p2), |
| 167 | initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet), |
| 168 | howFar(0), farPoint(p1) {} |
| 169 | |
| 170 | void operator()(const range_t& range) { |
| 171 | const size_t i_end = range.end(); |
| 172 | size_t j = 0; |
| 173 | double cp; |
| 174 | point_t tmp_vec[grainSize]; |
| 175 | for(size_t i = range.begin(); i != i_end; ++i) { |
| 176 | if( (initialSet[i] != p1) && (initialSet[i] != p2) ) { |
| 177 | cp = util::cross_product(p1, p2, initialSet[i]); |
| 178 | if(cp>0) { |
| 179 | tmp_vec[j++] = initialSet[i]; |
| 180 | if(cp>howFar) { |
| 181 | farPoint = initialSet[i]; |
| 182 | howFar = cp; |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | appendVector(tmp_vec, j, reducedSet); |
| 189 | } |
| 190 | |
| 191 | void join(const SplitByCP_buf& rhs) { |
| 192 | if(rhs.howFar>howFar) { |
| 193 | howFar = rhs.howFar; |
| 194 | farPoint = rhs.farPoint; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | point_t farthestPoint() const { |
| 199 | return farPoint; |
| 200 | } |
| 201 | }; |
| 202 | |
| 203 | point_t divide(const pointVec_t &P, pointVec_t &P_reduced, |
| 204 | const point_t &p1, const point_t &p2) { |
| 205 | SplitByCP_buf sbcpb(p1, p2, P, P_reduced); |
| 206 | // Must use simple_partitioner (see the comment in initialize() above) |
| 207 | tbb::parallel_reduce(range_t(0, P.size(), SplitByCP_buf::grainSize), |
| 208 | sbcpb, tbb::simple_partitioner()); |
| 209 | |
| 210 | if(util::verbose) { |
| 211 | std::stringstream ss; |
| 212 | ss << P.size() << " nodes in bucket" << ", " |
| 213 | << "dividing by: [ " << p1 << ", " << p2 << " ], " |
| 214 | << "farthest node: " << sbcpb.farthestPoint(); |
| 215 | util::OUTPUT.push_back(ss.str()); |
| 216 | } |
| 217 | |
| 218 | return sbcpb.farthestPoint(); |
| 219 | } |
| 220 | |
| 221 | void divide_and_conquer(const pointVec_t &P, pointVec_t &H, |
| 222 | point_t p1, point_t p2) { |
| 223 | assert(P.size() >= 2); |
| 224 | pointVec_t P_reduced; |
| 225 | pointVec_t H1, H2; |
| 226 | point_t p_far = divide(P, P_reduced, p1, p2); |
| 227 | if (P_reduced.size()<2) { |
| 228 | H.push_back(p1); |
| 229 | appendVector(P_reduced, H); |
| 230 | } |
| 231 | else { |
| 232 | divide_and_conquer(P_reduced, H1, p1, p_far); |
| 233 | divide_and_conquer(P_reduced, H2, p_far, p2); |
| 234 | |
| 235 | appendVector(H1, H); |
| 236 | appendVector(H2, H); |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | void quickhull(const pointVec_t &points, pointVec_t &hull) { |
| 241 | if (points.size() < 2) { |
| 242 | appendVector(points, hull); |
| 243 | return; |
| 244 | } |
| 245 | |
| 246 | point_t p_maxx = extremum<FindXExtremum::maxX>(points); |
| 247 | point_t p_minx = extremum<FindXExtremum::minX>(points); |
| 248 | |
| 249 | pointVec_t H; |
| 250 | |
| 251 | divide_and_conquer(points, hull, p_maxx, p_minx); |
| 252 | divide_and_conquer(points, H, p_minx, p_maxx); |
| 253 | |
| 254 | appendVector(H, hull); |
| 255 | } |
| 256 | |
| 257 | int main(int argc, char* argv[]) { |
| 258 | util::my_time_t tm_main_begin = util::gettime(); |
| 259 | |
| 260 | util::ParseInputArgs(argc, argv); |
| 261 | |
| 262 | pointVec_t points; |
| 263 | pointVec_t hull; |
| 264 | int nthreads; |
| 265 | |
| 266 | points.reserve(cfg::numberOfPoints); |
| 267 | |
| 268 | if(!util::silent) { |
| 269 | std::cout << "Starting TBB-buffered version of QUICK HULL algorithm" << std::endl; |
| 270 | } |
| 271 | |
| 272 | for(nthreads=cfg::threads.first; nthreads<=cfg::threads.last; nthreads=cfg::threads.step(nthreads)) { |
| 273 | tbb::task_scheduler_init init(nthreads); |
| 274 | |
| 275 | points.clear(); |
| 276 | util::my_time_t tm_init = util::gettime(); |
| 277 | initialize(points); |
| 278 | util::my_time_t tm_start = util::gettime(); |
| 279 | if(!util::silent) { |
| 280 | std::cout <<"Init time on " <<nthreads<<" threads: " <<util::time_diff(tm_init, tm_start)<<" Points in input: " <<points.size()<<std::endl; |
| 281 | } |
| 282 | |
| 283 | tm_start = util::gettime(); |
| 284 | quickhull(points, hull); |
| 285 | util::my_time_t tm_end = util::gettime(); |
| 286 | if(!util::silent) { |
| 287 | std::cout <<"Time on " <<nthreads<<" threads: " <<util::time_diff(tm_start, tm_end)<<" Points in hull: " <<hull.size()<<std::endl; |
| 288 | } |
| 289 | hull.clear(); |
| 290 | } |
| 291 | utility::report_elapsed_time(util::time_diff(tm_main_begin, util::gettime())); |
| 292 | return 0; |
| 293 | } |
| 294 | |