| 1 | /* | 
|---|
| 2 | Copyright (c) 2005-2019 Intel Corporation | 
|---|
| 3 |  | 
|---|
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 5 | you may not use this file except in compliance with the License. | 
|---|
| 6 | You may obtain a copy of the License at | 
|---|
| 7 |  | 
|---|
| 8 | http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 9 |  | 
|---|
| 10 | Unless required by applicable law or agreed to in writing, software | 
|---|
| 11 | distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 13 | See the License for the specific language governing permissions and | 
|---|
| 14 | limitations under the License. | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | /* | 
|---|
| 18 | This file contains the TBB-based implementation of convex hull algortihm. | 
|---|
| 19 | It corresponds to the following settings in convex_hull_bench.cpp: | 
|---|
| 20 | - USETBB defined to 1 | 
|---|
| 21 | - USECONCVEC defined to 1 | 
|---|
| 22 | - INIT_ONCE defined to 0 | 
|---|
| 23 | - only buffered version is used | 
|---|
| 24 | */ | 
|---|
| 25 | #include "convex_hull.h" | 
|---|
| 26 |  | 
|---|
| 27 | #include "tbb/task_scheduler_init.h" | 
|---|
| 28 | #include "tbb/parallel_for.h" | 
|---|
| 29 | #include "tbb/parallel_reduce.h" | 
|---|
| 30 | #include "tbb/blocked_range.h" | 
|---|
| 31 | #include "tbb/tick_count.h" | 
|---|
| 32 | #include "tbb/concurrent_vector.h" | 
|---|
| 33 |  | 
|---|
| 34 | typedef util::point<double>               point_t; | 
|---|
| 35 | typedef tbb::concurrent_vector< point_t > pointVec_t; | 
|---|
| 36 | typedef tbb::blocked_range<size_t>        range_t; | 
|---|
| 37 |  | 
|---|
| 38 | void appendVector(const point_t* src, size_t srcSize, pointVec_t& dest) { | 
|---|
| 39 | std::copy(src, src + srcSize, dest.grow_by(srcSize)); | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | void appendVector(const pointVec_t& src, pointVec_t& dest) { | 
|---|
| 43 | std::copy(src.begin(), src.end(), dest.grow_by(src.size())); | 
|---|
| 44 | } | 
|---|
| 45 | class FillRNDPointsVector_buf { | 
|---|
| 46 | pointVec_t          &points; | 
|---|
| 47 | public: | 
|---|
| 48 | static const size_t  grainSize = cfg::generateGrainSize; | 
|---|
| 49 |  | 
|---|
| 50 | explicit FillRNDPointsVector_buf(pointVec_t& _points) | 
|---|
| 51 | : points(_points) {} | 
|---|
| 52 |  | 
|---|
| 53 | void operator()(const range_t& range) const { | 
|---|
| 54 | util::rng the_rng(range.begin()); | 
|---|
| 55 | const size_t i_end = range.end(); | 
|---|
| 56 | size_t count = 0, j = 0; | 
|---|
| 57 | point_t tmp_vec[grainSize]; | 
|---|
| 58 |  | 
|---|
| 59 | for(size_t i=range.begin(); i!=i_end; ++i) { | 
|---|
| 60 | tmp_vec[j++] = util::GenerateRNDPoint<double>(count, the_rng, util::rng::max_rand); | 
|---|
| 61 | } | 
|---|
| 62 | //Here we have race condition. Elements being written to may be still under construction. | 
|---|
| 63 | //For C++ 2003 it is workarounded by vector element type which default constructor does not touch memory, | 
|---|
| 64 | //it being constructed on. See comments near default ctor of point class for more details. | 
|---|
| 65 | //Strictly speaking it is UB. | 
|---|
| 66 | //TODO: need to find more reliable/correct way | 
|---|
| 67 | points.grow_to_at_least(range.end()); | 
|---|
| 68 | std::copy(tmp_vec, tmp_vec+j,points.begin()+range.begin()); | 
|---|
| 69 | } | 
|---|
| 70 | }; | 
|---|
| 71 |  | 
|---|
| 72 | void initialize(pointVec_t &points) { | 
|---|
| 73 | //This function generate the same series of point on every call. | 
|---|
| 74 | //Reproducibility is needed for benchmarking to produce reliable results. | 
|---|
| 75 | //It is achieved through the following points: | 
|---|
| 76 | //      - FillRNDPointsVector_buf instance has its own local instance | 
|---|
| 77 | //        of random number generator, which in turn does not use any global data | 
|---|
| 78 | //      - tbb::simple_partitioner produce the same set of ranges on every call to | 
|---|
| 79 | //        tbb::parallel_for | 
|---|
| 80 | //      - local RNG instances are seeded by the starting indexes of corresponding ranges | 
|---|
| 81 | //      - grow_to_at_least() enables putting points into the resulting vector in deterministic order | 
|---|
| 82 | //        (unlike concurrent push_back or grow_by). | 
|---|
| 83 |  | 
|---|
| 84 | // In the buffered version, a temporary storage for as much as grainSize elements | 
|---|
| 85 | // is allocated inside the body. Since auto_partitioner may increase effective | 
|---|
| 86 | // range size which would cause a crash, simple partitioner has to be used. | 
|---|
| 87 | tbb::parallel_for(range_t(0, cfg::numberOfPoints, FillRNDPointsVector_buf::grainSize), | 
|---|
| 88 | FillRNDPointsVector_buf(points), tbb::simple_partitioner()); | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | class FindXExtremum { | 
|---|
| 92 | public: | 
|---|
| 93 | typedef enum { | 
|---|
| 94 | minX, maxX | 
|---|
| 95 | } extremumType; | 
|---|
| 96 |  | 
|---|
| 97 | static const size_t  grainSize = cfg::findExtremumGrainSize; | 
|---|
| 98 |  | 
|---|
| 99 | FindXExtremum(const pointVec_t& points_, extremumType exType_) | 
|---|
| 100 | : points(points_), exType(exType_), extrXPoint(points[0]) {} | 
|---|
| 101 |  | 
|---|
| 102 | FindXExtremum(const FindXExtremum& fxex, tbb::split) | 
|---|
| 103 | // Can run in parallel with fxex.operator()() or fxex.join(). | 
|---|
| 104 | // The data race reported by tools is harmless. | 
|---|
| 105 | : points(fxex.points), exType(fxex.exType), extrXPoint(fxex.extrXPoint) {} | 
|---|
| 106 |  | 
|---|
| 107 | void operator()(const range_t& range) { | 
|---|
| 108 | const size_t i_end = range.end(); | 
|---|
| 109 | if(!range.empty()) { | 
|---|
| 110 | for(size_t i = range.begin(); i != i_end; ++i) { | 
|---|
| 111 | if(closerToExtremum(points[i])) { | 
|---|
| 112 | extrXPoint = points[i]; | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 | } | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | void join(const FindXExtremum &rhs) { | 
|---|
| 119 | if(closerToExtremum(rhs.extrXPoint)) { | 
|---|
| 120 | extrXPoint = rhs.extrXPoint; | 
|---|
| 121 | } | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | point_t extremeXPoint() { | 
|---|
| 125 | return extrXPoint; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | private: | 
|---|
| 129 | const pointVec_t    &points; | 
|---|
| 130 | const extremumType   exType; | 
|---|
| 131 | point_t              extrXPoint; | 
|---|
| 132 | bool closerToExtremum(const point_t &p) const { | 
|---|
| 133 | switch(exType) { | 
|---|
| 134 | case minX: | 
|---|
| 135 | return p.x<extrXPoint.x; break; | 
|---|
| 136 | case maxX: | 
|---|
| 137 | return p.x>extrXPoint.x; break; | 
|---|
| 138 | } | 
|---|
| 139 | return false; // avoid warning | 
|---|
| 140 | } | 
|---|
| 141 | }; | 
|---|
| 142 |  | 
|---|
| 143 | template <FindXExtremum::extremumType type> | 
|---|
| 144 | point_t extremum(const pointVec_t &P) { | 
|---|
| 145 | FindXExtremum fxBody(P, type); | 
|---|
| 146 | tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody); | 
|---|
| 147 | return fxBody.extremeXPoint(); | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | class SplitByCP_buf { | 
|---|
| 151 | const pointVec_t    &initialSet; | 
|---|
| 152 | pointVec_t          &reducedSet; | 
|---|
| 153 | point_t              p1, p2; | 
|---|
| 154 | point_t              farPoint; | 
|---|
| 155 | double               howFar; | 
|---|
| 156 | public: | 
|---|
| 157 | static const size_t  grainSize = cfg::divideGrainSize; | 
|---|
| 158 |  | 
|---|
| 159 | SplitByCP_buf( point_t _p1, point_t _p2, | 
|---|
| 160 | const pointVec_t &_initialSet, pointVec_t &_reducedSet) | 
|---|
| 161 | : p1(_p1), p2(_p2), | 
|---|
| 162 | initialSet(_initialSet), reducedSet(_reducedSet), | 
|---|
| 163 | howFar(0), farPoint(p1) {} | 
|---|
| 164 |  | 
|---|
| 165 | SplitByCP_buf(SplitByCP_buf& sbcp, tbb::split) | 
|---|
| 166 | : p1(sbcp.p1), p2(sbcp.p2), | 
|---|
| 167 | initialSet(sbcp.initialSet), reducedSet(sbcp.reducedSet), | 
|---|
| 168 | howFar(0), farPoint(p1) {} | 
|---|
| 169 |  | 
|---|
| 170 | void operator()(const range_t& range) { | 
|---|
| 171 | const size_t i_end = range.end(); | 
|---|
| 172 | size_t j = 0; | 
|---|
| 173 | double cp; | 
|---|
| 174 | point_t tmp_vec[grainSize]; | 
|---|
| 175 | for(size_t i = range.begin(); i != i_end; ++i) { | 
|---|
| 176 | if( (initialSet[i] != p1) && (initialSet[i] != p2) ) { | 
|---|
| 177 | cp = util::cross_product(p1, p2, initialSet[i]); | 
|---|
| 178 | if(cp>0) { | 
|---|
| 179 | tmp_vec[j++] = initialSet[i]; | 
|---|
| 180 | if(cp>howFar) { | 
|---|
| 181 | farPoint = initialSet[i]; | 
|---|
| 182 | howFar   = cp; | 
|---|
| 183 | } | 
|---|
| 184 | } | 
|---|
| 185 | } | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | appendVector(tmp_vec, j, reducedSet); | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | void join(const SplitByCP_buf& rhs) { | 
|---|
| 192 | if(rhs.howFar>howFar) { | 
|---|
| 193 | howFar   = rhs.howFar; | 
|---|
| 194 | farPoint = rhs.farPoint; | 
|---|
| 195 | } | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | point_t farthestPoint() const { | 
|---|
| 199 | return farPoint; | 
|---|
| 200 | } | 
|---|
| 201 | }; | 
|---|
| 202 |  | 
|---|
| 203 | point_t divide(const pointVec_t &P, pointVec_t &P_reduced, | 
|---|
| 204 | const point_t &p1, const point_t &p2) { | 
|---|
| 205 | SplitByCP_buf sbcpb(p1, p2, P, P_reduced); | 
|---|
| 206 | // Must use simple_partitioner (see the comment in initialize() above) | 
|---|
| 207 | tbb::parallel_reduce(range_t(0, P.size(), SplitByCP_buf::grainSize), | 
|---|
| 208 | sbcpb, tbb::simple_partitioner()); | 
|---|
| 209 |  | 
|---|
| 210 | if(util::verbose) { | 
|---|
| 211 | std::stringstream ss; | 
|---|
| 212 | ss << P.size() << " nodes in bucket"<< ", " | 
|---|
| 213 | << "dividing by: [ "<< p1 << ", "<< p2 << " ], " | 
|---|
| 214 | << "farthest node: "<< sbcpb.farthestPoint(); | 
|---|
| 215 | util::OUTPUT.push_back(ss.str()); | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | return sbcpb.farthestPoint(); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | void divide_and_conquer(const pointVec_t &P, pointVec_t &H, | 
|---|
| 222 | point_t p1, point_t p2) { | 
|---|
| 223 | assert(P.size() >= 2); | 
|---|
| 224 | pointVec_t P_reduced; | 
|---|
| 225 | pointVec_t H1, H2; | 
|---|
| 226 | point_t p_far = divide(P, P_reduced, p1, p2); | 
|---|
| 227 | if (P_reduced.size()<2) { | 
|---|
| 228 | H.push_back(p1); | 
|---|
| 229 | appendVector(P_reduced, H); | 
|---|
| 230 | } | 
|---|
| 231 | else { | 
|---|
| 232 | divide_and_conquer(P_reduced, H1, p1, p_far); | 
|---|
| 233 | divide_and_conquer(P_reduced, H2, p_far, p2); | 
|---|
| 234 |  | 
|---|
| 235 | appendVector(H1, H); | 
|---|
| 236 | appendVector(H2, H); | 
|---|
| 237 | } | 
|---|
| 238 | } | 
|---|
| 239 |  | 
|---|
| 240 | void quickhull(const pointVec_t &points, pointVec_t &hull) { | 
|---|
| 241 | if (points.size() < 2) { | 
|---|
| 242 | appendVector(points, hull); | 
|---|
| 243 | return; | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|
| 246 | point_t p_maxx = extremum<FindXExtremum::maxX>(points); | 
|---|
| 247 | point_t p_minx = extremum<FindXExtremum::minX>(points); | 
|---|
| 248 |  | 
|---|
| 249 | pointVec_t H; | 
|---|
| 250 |  | 
|---|
| 251 | divide_and_conquer(points, hull, p_maxx, p_minx); | 
|---|
| 252 | divide_and_conquer(points, H, p_minx, p_maxx); | 
|---|
| 253 |  | 
|---|
| 254 | appendVector(H, hull); | 
|---|
| 255 | } | 
|---|
| 256 |  | 
|---|
| 257 | int main(int argc, char* argv[]) { | 
|---|
| 258 | util::my_time_t tm_main_begin = util::gettime(); | 
|---|
| 259 |  | 
|---|
| 260 | util::ParseInputArgs(argc, argv); | 
|---|
| 261 |  | 
|---|
| 262 | pointVec_t      points; | 
|---|
| 263 | pointVec_t      hull; | 
|---|
| 264 | int             nthreads; | 
|---|
| 265 |  | 
|---|
| 266 | points.reserve(cfg::numberOfPoints); | 
|---|
| 267 |  | 
|---|
| 268 | if(!util::silent) { | 
|---|
| 269 | std::cout << "Starting TBB-buffered version of QUICK HULL algorithm"<< std::endl; | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | for(nthreads=cfg::threads.first; nthreads<=cfg::threads.last; nthreads=cfg::threads.step(nthreads)) { | 
|---|
| 273 | tbb::task_scheduler_init init(nthreads); | 
|---|
| 274 |  | 
|---|
| 275 | points.clear(); | 
|---|
| 276 | util::my_time_t tm_init = util::gettime(); | 
|---|
| 277 | initialize(points); | 
|---|
| 278 | util::my_time_t tm_start = util::gettime(); | 
|---|
| 279 | if(!util::silent) { | 
|---|
| 280 | std::cout << "Init time on "<<nthreads<< " threads: "<<util::time_diff(tm_init, tm_start)<< "  Points in input: "<<points.size()<<std::endl; | 
|---|
| 281 | } | 
|---|
| 282 |  | 
|---|
| 283 | tm_start = util::gettime(); | 
|---|
| 284 | quickhull(points, hull); | 
|---|
| 285 | util::my_time_t tm_end = util::gettime(); | 
|---|
| 286 | if(!util::silent) { | 
|---|
| 287 | std::cout << "Time on "<<nthreads<< " threads: "<<util::time_diff(tm_start, tm_end)<< "  Points in hull: "<<hull.size()<<std::endl; | 
|---|
| 288 | } | 
|---|
| 289 | hull.clear(); | 
|---|
| 290 | } | 
|---|
| 291 | utility::report_elapsed_time(util::time_diff(tm_main_begin, util::gettime())); | 
|---|
| 292 | return 0; | 
|---|
| 293 | } | 
|---|
| 294 |  | 
|---|