1 | //---------------------------------------------------------------------- |
2 | // File: ANN.h |
3 | // Programmer: Sunil Arya and David Mount |
4 | // Description: Basic include file for approximate nearest |
5 | // neighbor searching. |
6 | // Last modified: 01/27/10 (Version 1.1.2) |
7 | //---------------------------------------------------------------------- |
8 | // Copyright (c) 1997-2010 University of Maryland and Sunil Arya and |
9 | // David Mount. All Rights Reserved. |
10 | // |
11 | // This software and related documentation is part of the Approximate |
12 | // Nearest Neighbor Library (ANN). This software is provided under |
13 | // the provisions of the Lesser GNU Public License (LGPL). See the |
14 | // file ../ReadMe.txt for further information. |
15 | // |
16 | // The University of Maryland (U.M.) and the authors make no |
17 | // representations about the suitability or fitness of this software for |
18 | // any purpose. It is provided "as is" without express or implied |
19 | // warranty. |
20 | //---------------------------------------------------------------------- |
21 | // History: |
22 | // Revision 0.1 03/04/98 |
23 | // Initial release |
24 | // Revision 1.0 04/01/05 |
25 | // Added copyright and revision information |
26 | // Added ANNcoordPrec for coordinate precision. |
27 | // Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet. |
28 | // Cleaned up C++ structure for modern compilers |
29 | // Revision 1.1 05/03/05 |
30 | // Added fixed-radius k-NN searching |
31 | // Revision 1.1.2 01/27/10 |
32 | // Fixed minor compilation bugs for new versions of gcc |
33 | //---------------------------------------------------------------------- |
34 | |
35 | //---------------------------------------------------------------------- |
36 | // ANN - approximate nearest neighbor searching |
37 | // ANN is a library for approximate nearest neighbor searching, |
38 | // based on the use of standard and priority search in kd-trees |
39 | // and balanced box-decomposition (bbd) trees. Here are some |
40 | // references to the main algorithmic techniques used here: |
41 | // |
42 | // kd-trees: |
43 | // Friedman, Bentley, and Finkel, ``An algorithm for finding |
44 | // best matches in logarithmic expected time,'' ACM |
45 | // Transactions on Mathematical Software, 3(3):209-226, 1977. |
46 | // |
47 | // Priority search in kd-trees: |
48 | // Arya and Mount, ``Algorithms for fast vector quantization,'' |
49 | // Proc. of DCC '93: Data Compression Conference, eds. J. A. |
50 | // Storer and M. Cohn, IEEE Press, 1993, 381-390. |
51 | // |
52 | // Approximate nearest neighbor search and bbd-trees: |
53 | // Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal |
54 | // algorithm for approximate nearest neighbor searching,'' |
55 | // 5th Ann. ACM-SIAM Symposium on Discrete Algorithms, |
56 | // 1994, 573-582. |
57 | //---------------------------------------------------------------------- |
58 | |
59 | #ifndef ANN_H |
60 | #define ANN_H |
61 | |
62 | #ifdef WIN32 |
63 | //---------------------------------------------------------------------- |
64 | // For Microsoft Visual C++, externally accessible symbols must be |
65 | // explicitly indicated with DLL_API, which is somewhat like "extern." |
66 | // |
67 | // The following ifdef block is the standard way of creating macros |
68 | // which make exporting from a DLL simpler. All files within this DLL |
69 | // are compiled with the DLL_EXPORTS preprocessor symbol defined on the |
70 | // command line. In contrast, projects that use (or import) the DLL |
71 | // objects do not define the DLL_EXPORTS symbol. This way any other |
72 | // project whose source files include this file see DLL_API functions as |
73 | // being imported from a DLL, wheras this DLL sees symbols defined with |
74 | // this macro as being exported. |
75 | //---------------------------------------------------------------------- |
76 | #ifdef DLL_EXPORTS |
77 | #define DLL_API __declspec(dllexport) |
78 | #else |
79 | #define DLL_API __declspec(dllimport) |
80 | #endif |
81 | //---------------------------------------------------------------------- |
82 | // DLL_API is ignored for all other systems |
83 | //---------------------------------------------------------------------- |
84 | #else |
85 | #define DLL_API |
86 | #endif |
87 | |
88 | //---------------------------------------------------------------------- |
89 | // basic includes |
90 | //---------------------------------------------------------------------- |
91 | |
92 | #include <cstdlib> // standard lib includes |
93 | #include <cmath> // math includes |
94 | #include <iostream> // I/O streams |
95 | #include <cstring> // C-style strings |
96 | |
97 | //---------------------------------------------------------------------- |
98 | // Limits |
99 | // There are a number of places where we use the maximum double value as |
100 | // default initializers (and others may be used, depending on the |
101 | // data/distance representation). These can usually be found in limits.h |
102 | // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX). |
103 | // |
104 | // Not all systems have these files. If you are using such a system, |
105 | // you should set the preprocessor symbol ANN_NO_LIMITS_H when |
106 | // compiling, and modify the statements below to generate the |
107 | // appropriate value. For practical purposes, this does not need to be |
108 | // the maximum double value. It is sufficient that it be at least as |
109 | // large than the maximum squared distance between between any two |
110 | // points. |
111 | //---------------------------------------------------------------------- |
112 | #ifdef ANN_NO_LIMITS_H // limits.h unavailable |
113 | #include <cvalues> // replacement for limits.h |
114 | const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double |
115 | #else |
116 | #include <climits> |
117 | #include <cfloat> |
118 | const double ANN_DBL_MAX = DBL_MAX; |
119 | #endif |
120 | |
121 | #define ANNversion "1.1.2" // ANN version and information |
122 | #define ANNversionCmt "" |
123 | #define ANNcopyright "David M. Mount and Sunil Arya" |
124 | #define ANNlatestRev "Jan 27, 2010" |
125 | |
126 | //---------------------------------------------------------------------- |
127 | // ANNbool |
128 | // This is a simple boolean type. Although ANSI C++ is supposed |
129 | // to support the type bool, some compilers do not have it. |
130 | //---------------------------------------------------------------------- |
131 | |
132 | enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++) |
133 | |
134 | //---------------------------------------------------------------------- |
135 | // ANNcoord, ANNdist |
136 | // ANNcoord and ANNdist are the types used for representing |
137 | // point coordinates and distances. They can be modified by the |
138 | // user, with some care. It is assumed that they are both numeric |
139 | // types, and that ANNdist is generally of an equal or higher type |
140 | // from ANNcoord. A variable of type ANNdist should be large |
141 | // enough to store the sum of squared components of a variable |
142 | // of type ANNcoord for the number of dimensions needed in the |
143 | // application. For example, the following combinations are |
144 | // legal: |
145 | // |
146 | // ANNcoord ANNdist |
147 | // --------- ------------------------------- |
148 | // short short, int, long, float, double |
149 | // int int, long, float, double |
150 | // long long, float, double |
151 | // float float, double |
152 | // double double |
153 | // |
154 | // It is the user's responsibility to make sure that overflow does |
155 | // not occur in distance calculation. |
156 | //---------------------------------------------------------------------- |
157 | |
158 | typedef double ANNcoord; // coordinate data type |
159 | typedef double ANNdist; // distance data type |
160 | |
161 | //---------------------------------------------------------------------- |
162 | // ANNidx |
163 | // ANNidx is a point index. When the data structure is built, the |
164 | // points are given as an array. Nearest neighbor results are |
165 | // returned as an integer index into this array. To make it |
166 | // clearer when this is happening, we define the integer type |
167 | // ANNidx. Indexing starts from 0. |
168 | // |
169 | // For fixed-radius near neighbor searching, it is possible that |
170 | // there are not k nearest neighbors within the search radius. To |
171 | // indicate this, the algorithm returns ANN_NULL_IDX as its result. |
172 | // It should be distinguishable from any valid array index. |
173 | //---------------------------------------------------------------------- |
174 | |
175 | typedef int ANNidx; // point index |
176 | const ANNidx ANN_NULL_IDX = -1; // a NULL point index |
177 | |
178 | //---------------------------------------------------------------------- |
179 | // Infinite distance: |
180 | // The code assumes that there is an "infinite distance" which it |
181 | // uses to initialize distances before performing nearest neighbor |
182 | // searches. It should be as larger or larger than any legitimate |
183 | // nearest neighbor distance. |
184 | // |
185 | // On most systems, these should be found in the standard include |
186 | // file <limits.h> or possibly <float.h>. If you do not have these |
187 | // file, some suggested values are listed below, assuming 64-bit |
188 | // long, 32-bit int and 16-bit short. |
189 | // |
190 | // ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>) |
191 | // ------- ------------ ------------------------------------ |
192 | // double DBL_MAX 1.79769313486231570e+308 |
193 | // float FLT_MAX 3.40282346638528860e+38 |
194 | // long LONG_MAX 0x7fffffffffffffff |
195 | // int INT_MAX 0x7fffffff |
196 | // short SHRT_MAX 0x7fff |
197 | //---------------------------------------------------------------------- |
198 | |
199 | const ANNdist ANN_DIST_INF = ANN_DBL_MAX; |
200 | |
201 | //---------------------------------------------------------------------- |
202 | // Significant digits for tree dumps: |
203 | // When floating point coordinates are used, the routine that dumps |
204 | // a tree needs to know roughly how many significant digits there |
205 | // are in a ANNcoord, so it can output points to full precision. |
206 | // This is defined to be ANNcoordPrec. On most systems these |
207 | // values can be found in the standard include files <limits.h> or |
208 | // <float.h>. For integer types, the value is essentially ignored. |
209 | // |
210 | // ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>) |
211 | // -------- ------------ ------------------------------------ |
212 | // double DBL_DIG 15 |
213 | // float FLT_DIG 6 |
214 | // long doesn't matter 19 |
215 | // int doesn't matter 10 |
216 | // short doesn't matter 5 |
217 | //---------------------------------------------------------------------- |
218 | |
219 | #ifdef DBL_DIG // number of sig. bits in ANNcoord |
220 | const int ANNcoordPrec = DBL_DIG; |
221 | #else |
222 | const int ANNcoordPrec = 15; // default precision |
223 | #endif |
224 | |
225 | //---------------------------------------------------------------------- |
226 | // Self match? |
227 | // In some applications, the nearest neighbor of a point is not |
228 | // allowed to be the point itself. This occurs, for example, when |
229 | // computing all nearest neighbors in a set. By setting the |
230 | // parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor |
231 | // is the closest point whose distance from the query point is |
232 | // strictly positive. |
233 | //---------------------------------------------------------------------- |
234 | |
235 | const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue; |
236 | |
237 | //---------------------------------------------------------------------- |
238 | // Norms and metrics: |
239 | // ANN supports any Minkowski norm for defining distance. In |
240 | // particular, for any p >= 1, the L_p Minkowski norm defines the |
241 | // length of a d-vector (v0, v1, ..., v(d-1)) to be |
242 | // |
243 | // (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p), |
244 | // |
245 | // (where ^ denotes exponentiation, and |.| denotes absolute |
246 | // value). The distance between two points is defined to be the |
247 | // norm of the vector joining them. Some common distance metrics |
248 | // include |
249 | // |
250 | // Euclidean metric p = 2 |
251 | // Manhattan metric p = 1 |
252 | // Max metric p = infinity |
253 | // |
254 | // In the case of the max metric, the norm is computed by taking |
255 | // the maxima of the absolute values of the components. ANN is |
256 | // highly "coordinate-based" and does not support general distances |
257 | // functions (e.g. those obeying just the triangle inequality). It |
258 | // also does not support distance functions based on |
259 | // inner-products. |
260 | // |
261 | // For the purpose of computing nearest neighbors, it is not |
262 | // necessary to compute the final power (1/p). Thus the only |
263 | // component that is used by the program is |v(i)|^p. |
264 | // |
265 | // ANN parameterizes the distance computation through the following |
266 | // macros. (Macros are used rather than procedures for |
267 | // efficiency.) Recall that the distance between two points is |
268 | // given by the length of the vector joining them, and the length |
269 | // or norm of a vector v is given by formula: |
270 | // |
271 | // |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1))) |
272 | // |
273 | // where ROOT, POW are unary functions and # is an associative and |
274 | // commutative binary operator mapping the following types: |
275 | // |
276 | // ** POW: ANNcoord --> ANNdist |
277 | // ** #: ANNdist x ANNdist --> ANNdist |
278 | // ** ROOT: ANNdist (>0) --> double |
279 | // |
280 | // For early termination in distance calculation (partial distance |
281 | // calculation) we assume that POW and # together are monotonically |
282 | // increasing on sequences of arguments, meaning that for all |
283 | // v0..vk and y: |
284 | // |
285 | // POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y). |
286 | // |
287 | // Incremental Distance Calculation: |
288 | // The program uses an optimized method of computing distances for |
289 | // kd-trees and bd-trees, called incremental distance calculation. |
290 | // It is used when distances are to be updated when only a single |
291 | // coordinate of a point has been changed. In order to use this, |
292 | // we assume that there is an incremental update function DIFF(x,y) |
293 | // for #, such that if: |
294 | // |
295 | // s = x0 # ... # xi # ... # xk |
296 | // |
297 | // then if s' is equal to s but with xi replaced by y, that is, |
298 | // |
299 | // s' = x0 # ... # y # ... # xk |
300 | // |
301 | // then the length of s' can be computed by: |
302 | // |
303 | // |s'| = |s| # DIFF(xi,y). |
304 | // |
305 | // Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity |
306 | // norm we make use of the fact that in the program this function |
307 | // is only invoked when y > xi, and hence DIFF(xi,y)=y. |
308 | // |
309 | // Finally, for approximate nearest neighbor queries we assume |
310 | // that POW and ROOT are related such that |
311 | // |
312 | // v*ROOT(x) = ROOT(POW(v)*x) |
313 | // |
314 | // Here are the values for the various Minkowski norms: |
315 | // |
316 | // L_p: p even: p odd: |
317 | // ------------------------- ------------------------ |
318 | // POW(v) = v^p POW(v) = |v|^p |
319 | // ROOT(x) = x^(1/p) ROOT(x) = x^(1/p) |
320 | // # = + # = + |
321 | // DIFF(x,y) = y - x DIFF(x,y) = y - x |
322 | // |
323 | // L_inf: |
324 | // POW(v) = |v| |
325 | // ROOT(x) = x |
326 | // # = max |
327 | // DIFF(x,y) = y |
328 | // |
329 | // By default the Euclidean norm is assumed. To change the norm, |
330 | // uncomment the appropriate set of macros below. |
331 | //---------------------------------------------------------------------- |
332 | |
333 | //---------------------------------------------------------------------- |
334 | // Use the following for the Euclidean norm |
335 | //---------------------------------------------------------------------- |
336 | #define ANN_POW(v) ((v)*(v)) |
337 | #define ANN_ROOT(x) sqrt(x) |
338 | #define ANN_SUM(x,y) ((x) + (y)) |
339 | #define ANN_DIFF(x,y) ((y) - (x)) |
340 | |
341 | //---------------------------------------------------------------------- |
342 | // Use the following for the L_1 (Manhattan) norm |
343 | //---------------------------------------------------------------------- |
344 | // #define ANN_POW(v) fabs(v) |
345 | // #define ANN_ROOT(x) (x) |
346 | // #define ANN_SUM(x,y) ((x) + (y)) |
347 | // #define ANN_DIFF(x,y) ((y) - (x)) |
348 | |
349 | //---------------------------------------------------------------------- |
350 | // Use the following for a general L_p norm |
351 | //---------------------------------------------------------------------- |
352 | // #define ANN_POW(v) pow(fabs(v),p) |
353 | // #define ANN_ROOT(x) pow(fabs(x),1/p) |
354 | // #define ANN_SUM(x,y) ((x) + (y)) |
355 | // #define ANN_DIFF(x,y) ((y) - (x)) |
356 | |
357 | //---------------------------------------------------------------------- |
358 | // Use the following for the L_infinity (Max) norm |
359 | //---------------------------------------------------------------------- |
360 | // #define ANN_POW(v) fabs(v) |
361 | // #define ANN_ROOT(x) (x) |
362 | // #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y)) |
363 | // #define ANN_DIFF(x,y) (y) |
364 | |
365 | //---------------------------------------------------------------------- |
366 | // Array types |
367 | // The following array types are of basic interest. A point is |
368 | // just a dimensionless array of coordinates, a point array is a |
369 | // dimensionless array of points. A distance array is a |
370 | // dimensionless array of distances and an index array is a |
371 | // dimensionless array of point indices. The latter two are used |
372 | // when returning the results of k-nearest neighbor queries. |
373 | //---------------------------------------------------------------------- |
374 | |
375 | typedef ANNcoord* ANNpoint; // a point |
376 | typedef ANNpoint* ANNpointArray; // an array of points |
377 | typedef ANNdist* ANNdistArray; // an array of distances |
378 | typedef ANNidx* ANNidxArray; // an array of point indices |
379 | |
380 | //---------------------------------------------------------------------- |
381 | // Basic point and array utilities: |
382 | // The following procedures are useful supplements to ANN's nearest |
383 | // neighbor capabilities. |
384 | // |
385 | // annDist(): |
386 | // Computes the (squared) distance between a pair of points. |
387 | // Note that this routine is not used internally by ANN for |
388 | // computing distance calculations. For reasons of efficiency |
389 | // this is done using incremental distance calculation. Thus, |
390 | // this routine cannot be modified as a method of changing the |
391 | // metric. |
392 | // |
393 | // Because points (somewhat like strings in C) are stored as |
394 | // pointers. Consequently, creating and destroying copies of |
395 | // points may require storage allocation. These procedures do |
396 | // this. |
397 | // |
398 | // annAllocPt() and annDeallocPt(): |
399 | // Allocate a deallocate storage for a single point, and |
400 | // return a pointer to it. The argument to AllocPt() is |
401 | // used to initialize all components. |
402 | // |
403 | // annAllocPts() and annDeallocPts(): |
404 | // Allocate and deallocate an array of points as well a |
405 | // place to store their coordinates, and initializes the |
406 | // points to point to their respective coordinates. It |
407 | // allocates point storage in a contiguous block large |
408 | // enough to store all the points. It performs no |
409 | // initialization. |
410 | // |
411 | // annCopyPt(): |
412 | // Creates a copy of a given point, allocating space for |
413 | // the new point. It returns a pointer to the newly |
414 | // allocated copy. |
415 | //---------------------------------------------------------------------- |
416 | |
417 | DLL_API ANNdist annDist( |
418 | int dim, // dimension of space |
419 | ANNpoint p, // points |
420 | ANNpoint q); |
421 | |
422 | DLL_API ANNpoint annAllocPt( |
423 | int dim, // dimension |
424 | ANNcoord c = 0); // coordinate value (all equal) |
425 | |
426 | DLL_API ANNpointArray annAllocPts( |
427 | int n, // number of points |
428 | int dim); // dimension |
429 | |
430 | DLL_API void annDeallocPt( |
431 | ANNpoint &p); // deallocate 1 point |
432 | |
433 | DLL_API void annDeallocPts( |
434 | ANNpointArray &pa); // point array |
435 | |
436 | DLL_API ANNpoint annCopyPt( |
437 | int dim, // dimension |
438 | ANNpoint source); // point to copy |
439 | |
440 | //---------------------------------------------------------------------- |
441 | //Overall structure: ANN supports a number of different data structures |
442 | //for approximate and exact nearest neighbor searching. These are: |
443 | // |
444 | // ANNbruteForce A simple brute-force search structure. |
445 | // ANNkd_tree A kd-tree tree search structure. ANNbd_tree |
446 | // A bd-tree tree search structure (a kd-tree with shrink |
447 | // capabilities). |
448 | // |
449 | // At a minimum, each of these data structures support k-nearest |
450 | // neighbor queries. The nearest neighbor query, annkSearch, |
451 | // returns an integer identifier and the distance to the nearest |
452 | // neighbor(s) and annRangeSearch returns the nearest points that |
453 | // lie within a given query ball. |
454 | // |
455 | // Each structure is built by invoking the appropriate constructor |
456 | // and passing it (at a minimum) the array of points, the total |
457 | // number of points and the dimension of the space. Each structure |
458 | // is also assumed to support a destructor and member functions |
459 | // that return basic information about the point set. |
460 | // |
461 | // Note that the array of points is not copied by the data |
462 | // structure (for reasons of space efficiency), and it is assumed |
463 | // to be constant throughout the lifetime of the search structure. |
464 | // |
465 | // The search algorithm, annkSearch, is given the query point (q), |
466 | // and the desired number of nearest neighbors to report (k), and |
467 | // the error bound (eps) (whose default value is 0, implying exact |
468 | // nearest neighbors). It returns two arrays which are assumed to |
469 | // contain at least k elements: one (nn_idx) contains the indices |
470 | // (within the point array) of the nearest neighbors and the other |
471 | // (dd) contains the squared distances to these nearest neighbors. |
472 | // |
473 | // The search algorithm, annkFRSearch, is a fixed-radius kNN |
474 | // search. In addition to a query point, it is given a (squared) |
475 | // radius bound. (This is done for consistency, because the search |
476 | // returns distances as squared quantities.) It does two things. |
477 | // First, it computes the k nearest neighbors within the radius |
478 | // bound, and second, it returns the total number of points lying |
479 | // within the radius bound. It is permitted to set k = 0, in which |
480 | // case it effectively answers a range counting query. If the |
481 | // error bound epsilon is positive, then the search is approximate |
482 | // in the sense that it is free to ignore any point that lies |
483 | // outside a ball of radius r/(1+epsilon), where r is the given |
484 | // (unsquared) radius bound. |
485 | // |
486 | // The generic object from which all the search structures are |
487 | // dervied is given below. It is a virtual object, and is useless |
488 | // by itself. |
489 | //---------------------------------------------------------------------- |
490 | |
491 | class DLL_API ANNpointSet { |
492 | public: |
493 | virtual ~ANNpointSet() {} // virtual distructor |
494 | |
495 | virtual void annkSearch( // approx k near neighbor search |
496 | ANNpoint q, // query point |
497 | int k, // number of near neighbors to return |
498 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
499 | ANNdistArray dd, // dist to near neighbors (modified) |
500 | double eps=0.0 // error bound |
501 | ) = 0; // pure virtual (defined elsewhere) |
502 | |
503 | virtual int annkFRSearch( // approx fixed-radius kNN search |
504 | ANNpoint q, // query point |
505 | ANNdist sqRad, // squared radius |
506 | int k = 0, // number of near neighbors to return |
507 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
508 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
509 | double eps=0.0 // error bound |
510 | ) = 0; // pure virtual (defined elsewhere) |
511 | |
512 | virtual int theDim() = 0; // return dimension of space |
513 | virtual int nPoints() = 0; // return number of points |
514 | // return pointer to points |
515 | virtual ANNpointArray thePoints() = 0; |
516 | }; |
517 | |
518 | //---------------------------------------------------------------------- |
519 | // Brute-force nearest neighbor search: |
520 | // The brute-force search structure is very simple but inefficient. |
521 | // It has been provided primarily for the sake of comparison with |
522 | // and validation of the more complex search structures. |
523 | // |
524 | // Query processing is the same as described above, but the value |
525 | // of epsilon is ignored, since all distance calculations are |
526 | // performed exactly. |
527 | // |
528 | // WARNING: This data structure is very slow, and should not be |
529 | // used unless the number of points is very small. |
530 | // |
531 | // Internal information: |
532 | // --------------------- |
533 | // This data structure bascially consists of the array of points |
534 | // (each a pointer to an array of coordinates). The search is |
535 | // performed by a simple linear scan of all the points. |
536 | //---------------------------------------------------------------------- |
537 | |
538 | class DLL_API ANNbruteForce: public ANNpointSet { |
539 | int dim; // dimension |
540 | int n_pts; // number of points |
541 | ANNpointArray pts; // point array |
542 | public: |
543 | ANNbruteForce( // constructor from point array |
544 | ANNpointArray pa, // point array |
545 | int n, // number of points |
546 | int dd); // dimension |
547 | |
548 | ~ANNbruteForce(); // destructor |
549 | |
550 | void annkSearch( // approx k near neighbor search |
551 | ANNpoint q, // query point |
552 | int k, // number of near neighbors to return |
553 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
554 | ANNdistArray dd, // dist to near neighbors (modified) |
555 | double eps=0.0); // error bound |
556 | |
557 | int annkFRSearch( // approx fixed-radius kNN search |
558 | ANNpoint q, // query point |
559 | ANNdist sqRad, // squared radius |
560 | int k = 0, // number of near neighbors to return |
561 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
562 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
563 | double eps=0.0); // error bound |
564 | |
565 | int theDim() // return dimension of space |
566 | { return dim; } |
567 | |
568 | int nPoints() // return number of points |
569 | { return n_pts; } |
570 | |
571 | ANNpointArray thePoints() // return pointer to points |
572 | { return pts; } |
573 | }; |
574 | |
575 | //---------------------------------------------------------------------- |
576 | // kd- and bd-tree splitting and shrinking rules |
577 | // kd-trees supports a collection of different splitting rules. |
578 | // In addition to the standard kd-tree splitting rule proposed |
579 | // by Friedman, Bentley, and Finkel, we have introduced a |
580 | // number of other splitting rules, which seem to perform |
581 | // as well or better (for the distributions we have tested). |
582 | // |
583 | // The splitting methods given below allow the user to tailor |
584 | // the data structure to the particular data set. They are |
585 | // are described in greater details in the kd_split.cc source |
586 | // file. The method ANN_KD_SUGGEST is the method chosen (rather |
587 | // subjectively) by the implementors as the one giving the |
588 | // fastest performance, and is the default splitting method. |
589 | // |
590 | // As with splitting rules, there are a number of different |
591 | // shrinking rules. The shrinking rule ANN_BD_NONE does no |
592 | // shrinking (and hence produces a kd-tree tree). The rule |
593 | // ANN_BD_SUGGEST uses the implementors favorite rule. |
594 | //---------------------------------------------------------------------- |
595 | |
596 | enum ANNsplitRule { |
597 | ANN_KD_STD = 0, // the optimized kd-splitting rule |
598 | ANN_KD_MIDPT = 1, // midpoint split |
599 | ANN_KD_FAIR = 2, // fair split |
600 | ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method |
601 | ANN_KD_SL_FAIR = 4, // sliding fair split method |
602 | ANN_KD_SUGGEST = 5}; // the authors' suggestion for best |
603 | const int ANN_N_SPLIT_RULES = 6; // number of split rules |
604 | |
605 | enum ANNshrinkRule { |
606 | ANN_BD_NONE = 0, // no shrinking at all (just kd-tree) |
607 | ANN_BD_SIMPLE = 1, // simple splitting |
608 | ANN_BD_CENTROID = 2, // centroid splitting |
609 | ANN_BD_SUGGEST = 3}; // the authors' suggested choice |
610 | const int ANN_N_SHRINK_RULES = 4; // number of shrink rules |
611 | |
612 | //---------------------------------------------------------------------- |
613 | // kd-tree: |
614 | // The main search data structure supported by ANN is a kd-tree. |
615 | // The main constructor is given a set of points and a choice of |
616 | // splitting method to use in building the tree. |
617 | // |
618 | // Construction: |
619 | // ------------- |
620 | // The constructor is given the point array, number of points, |
621 | // dimension, bucket size (default = 1), and the splitting rule |
622 | // (default = ANN_KD_SUGGEST). The point array is not copied, and |
623 | // is assumed to be kept constant throughout the lifetime of the |
624 | // search structure. There is also a "load" constructor that |
625 | // builds a tree from a file description that was created by the |
626 | // Dump operation. |
627 | // |
628 | // Search: |
629 | // ------- |
630 | // There are two search methods: |
631 | // |
632 | // Standard search (annkSearch()): |
633 | // Searches nodes in tree-traversal order, always visiting |
634 | // the closer child first. |
635 | // Priority search (annkPriSearch()): |
636 | // Searches nodes in order of increasing distance of the |
637 | // associated cell from the query point. For many |
638 | // distributions the standard search seems to work just |
639 | // fine, but priority search is safer for worst-case |
640 | // performance. |
641 | // |
642 | // Printing: |
643 | // --------- |
644 | // There are two methods provided for printing the tree. Print() |
645 | // is used to produce a "human-readable" display of the tree, with |
646 | // indenation, which is handy for debugging. Dump() produces a |
647 | // format that is suitable reading by another program. There is a |
648 | // "load" constructor, which constructs a tree which is assumed to |
649 | // have been saved by the Dump() procedure. |
650 | // |
651 | // Performance and Structure Statistics: |
652 | // ------------------------------------- |
653 | // The procedure getStats() collects statistics information on the |
654 | // tree (its size, height, etc.) See ANNperf.h for information on |
655 | // the stats structure it returns. |
656 | // |
657 | // Internal information: |
658 | // --------------------- |
659 | // The data structure consists of three major chunks of storage. |
660 | // The first (implicit) storage are the points themselves (pts), |
661 | // which have been provided by the users as an argument to the |
662 | // constructor, or are allocated dynamically if the tree is built |
663 | // using the load constructor). These should not be changed during |
664 | // the lifetime of the search structure. It is the user's |
665 | // responsibility to delete these after the tree is destroyed. |
666 | // |
667 | // The second is the tree itself (which is dynamically allocated in |
668 | // the constructor) and is given as a pointer to its root node |
669 | // (root). These nodes are automatically deallocated when the tree |
670 | // is deleted. See the file src/kd_tree.h for further information |
671 | // on the structure of the tree nodes. |
672 | // |
673 | // Each leaf of the tree does not contain a pointer directly to a |
674 | // point, but rather contains a pointer to a "bucket", which is an |
675 | // array consisting of point indices. The third major chunk of |
676 | // storage is an array (pidx), which is a large array in which all |
677 | // these bucket subarrays reside. (The reason for storing them |
678 | // separately is the buckets are typically small, but of varying |
679 | // sizes. This was done to avoid fragmentation.) This array is |
680 | // also deallocated when the tree is deleted. |
681 | // |
682 | // In addition to this, the tree consists of a number of other |
683 | // pieces of information which are used in searching and for |
684 | // subsequent tree operations. These consist of the following: |
685 | // |
686 | // dim Dimension of space |
687 | // n_pts Number of points currently in the tree |
688 | // n_max Maximum number of points that are allowed |
689 | // in the tree |
690 | // bkt_size Maximum bucket size (no. of points per leaf) |
691 | // bnd_box_lo Bounding box low point |
692 | // bnd_box_hi Bounding box high point |
693 | // splitRule Splitting method used |
694 | // |
695 | //---------------------------------------------------------------------- |
696 | |
697 | //---------------------------------------------------------------------- |
698 | // Some types and objects used by kd-tree functions |
699 | // See src/kd_tree.h and src/kd_tree.cpp for definitions |
700 | //---------------------------------------------------------------------- |
701 | class ANNkdStats; // stats on kd-tree |
702 | class ANNkd_node; // generic node in a kd-tree |
703 | typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node |
704 | |
705 | class DLL_API ANNkd_tree: public ANNpointSet { |
706 | protected: |
707 | int dim; // dimension of space |
708 | int n_pts; // number of points in tree |
709 | int bkt_size; // bucket size |
710 | ANNpointArray pts; // the points |
711 | ANNidxArray pidx; // point indices (to pts array) |
712 | ANNkd_ptr root; // root of kd-tree |
713 | ANNpoint bnd_box_lo; // bounding box low point |
714 | ANNpoint bnd_box_hi; // bounding box high point |
715 | |
716 | void SkeletonTree( // construct skeleton tree |
717 | int n, // number of points |
718 | int dd, // dimension |
719 | int bs, // bucket size |
720 | ANNpointArray pa = NULL, // point array (optional) |
721 | ANNidxArray pi = NULL); // point indices (optional) |
722 | |
723 | public: |
724 | ANNkd_tree( // build skeleton tree |
725 | int n = 0, // number of points |
726 | int dd = 0, // dimension |
727 | int bs = 1); // bucket size |
728 | |
729 | ANNkd_tree( // build from point array |
730 | ANNpointArray pa, // point array |
731 | int n, // number of points |
732 | int dd, // dimension |
733 | int bs = 1, // bucket size |
734 | ANNsplitRule split = ANN_KD_SUGGEST); // splitting method |
735 | |
736 | ANNkd_tree( // build from dump file |
737 | std::istream& in); // input stream for dump file |
738 | |
739 | ~ANNkd_tree(); // tree destructor |
740 | |
741 | void annkSearch( // approx k near neighbor search |
742 | ANNpoint q, // query point |
743 | int k, // number of near neighbors to return |
744 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
745 | ANNdistArray dd, // dist to near neighbors (modified) |
746 | double eps=0.0); // error bound |
747 | |
748 | void annkPriSearch( // priority k near neighbor search |
749 | ANNpoint q, // query point |
750 | int k, // number of near neighbors to return |
751 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
752 | ANNdistArray dd, // dist to near neighbors (modified) |
753 | double eps=0.0); // error bound |
754 | |
755 | int annkFRSearch( // approx fixed-radius kNN search |
756 | ANNpoint q, // the query point |
757 | ANNdist sqRad, // squared radius of query ball |
758 | int k = 0, // number of neighbors to return |
759 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
760 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
761 | double eps=0.0); // error bound |
762 | |
763 | int theDim() // return dimension of space |
764 | { return dim; } |
765 | |
766 | int nPoints() // return number of points |
767 | { return n_pts; } |
768 | |
769 | ANNpointArray thePoints() // return pointer to points |
770 | { return pts; } |
771 | |
772 | virtual void Print( // print the tree (for debugging) |
773 | ANNbool with_pts, // print points as well? |
774 | std::ostream& out); // output stream |
775 | |
776 | virtual void Dump( // dump entire tree |
777 | ANNbool with_pts, // print points as well? |
778 | std::ostream& out); // output stream |
779 | |
780 | virtual void getStats( // compute tree statistics |
781 | ANNkdStats& st); // the statistics (modified) |
782 | }; |
783 | |
784 | //---------------------------------------------------------------------- |
785 | // Box decomposition tree (bd-tree) |
786 | // The bd-tree is inherited from a kd-tree. The main difference |
787 | // in the bd-tree and the kd-tree is a new type of internal node |
788 | // called a shrinking node (in the kd-tree there is only one type |
789 | // of internal node, a splitting node). The shrinking node |
790 | // makes it possible to generate balanced trees in which the |
791 | // cells have bounded aspect ratio, by allowing the decomposition |
792 | // to zoom in on regions of dense point concentration. Although |
793 | // this is a nice idea in theory, few point distributions are so |
794 | // densely clustered that this is really needed. |
795 | //---------------------------------------------------------------------- |
796 | |
797 | class DLL_API ANNbd_tree: public ANNkd_tree { |
798 | public: |
799 | ANNbd_tree( // build skeleton tree |
800 | int n, // number of points |
801 | int dd, // dimension |
802 | int bs = 1) // bucket size |
803 | : ANNkd_tree(n, dd, bs) {} // build base kd-tree |
804 | |
805 | ANNbd_tree( // build from point array |
806 | ANNpointArray pa, // point array |
807 | int n, // number of points |
808 | int dd, // dimension |
809 | int bs = 1, // bucket size |
810 | ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule |
811 | ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule |
812 | |
813 | ANNbd_tree( // build from dump file |
814 | std::istream& in); // input stream for dump file |
815 | }; |
816 | |
817 | //---------------------------------------------------------------------- |
818 | // Other functions |
819 | // annMaxPtsVisit Sets a limit on the maximum number of points |
820 | // to visit in the search. |
821 | // annClose Can be called when all use of ANN is finished. |
822 | // It clears up a minor memory leak. |
823 | //---------------------------------------------------------------------- |
824 | |
825 | DLL_API void annMaxPtsVisit( // max. pts to visit in search |
826 | int maxPts); // the limit |
827 | |
828 | DLL_API void annClose(); // called to end use of ANN |
829 | |
830 | #endif |
831 | |