1//----------------------------------------------------------------------
2// File: ANN.h
3// Programmer: Sunil Arya and David Mount
4// Description: Basic include file for approximate nearest
5// neighbor searching.
6// Last modified: 01/27/10 (Version 1.1.2)
7//----------------------------------------------------------------------
8// Copyright (c) 1997-2010 University of Maryland and Sunil Arya and
9// David Mount. All Rights Reserved.
10//
11// This software and related documentation is part of the Approximate
12// Nearest Neighbor Library (ANN). This software is provided under
13// the provisions of the Lesser GNU Public License (LGPL). See the
14// file ../ReadMe.txt for further information.
15//
16// The University of Maryland (U.M.) and the authors make no
17// representations about the suitability or fitness of this software for
18// any purpose. It is provided "as is" without express or implied
19// warranty.
20//----------------------------------------------------------------------
21// History:
22// Revision 0.1 03/04/98
23// Initial release
24// Revision 1.0 04/01/05
25// Added copyright and revision information
26// Added ANNcoordPrec for coordinate precision.
27// Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
28// Cleaned up C++ structure for modern compilers
29// Revision 1.1 05/03/05
30// Added fixed-radius k-NN searching
31// Revision 1.1.2 01/27/10
32// Fixed minor compilation bugs for new versions of gcc
33//----------------------------------------------------------------------
34
35//----------------------------------------------------------------------
36// ANN - approximate nearest neighbor searching
37// ANN is a library for approximate nearest neighbor searching,
38// based on the use of standard and priority search in kd-trees
39// and balanced box-decomposition (bbd) trees. Here are some
40// references to the main algorithmic techniques used here:
41//
42// kd-trees:
43// Friedman, Bentley, and Finkel, ``An algorithm for finding
44// best matches in logarithmic expected time,'' ACM
45// Transactions on Mathematical Software, 3(3):209-226, 1977.
46//
47// Priority search in kd-trees:
48// Arya and Mount, ``Algorithms for fast vector quantization,''
49// Proc. of DCC '93: Data Compression Conference, eds. J. A.
50// Storer and M. Cohn, IEEE Press, 1993, 381-390.
51//
52// Approximate nearest neighbor search and bbd-trees:
53// Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
54// algorithm for approximate nearest neighbor searching,''
55// 5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
56// 1994, 573-582.
57//----------------------------------------------------------------------
58
59#ifndef ANN_H
60#define ANN_H
61
62#ifdef WIN32
63 //----------------------------------------------------------------------
64 // For Microsoft Visual C++, externally accessible symbols must be
65 // explicitly indicated with DLL_API, which is somewhat like "extern."
66 //
67 // The following ifdef block is the standard way of creating macros
68 // which make exporting from a DLL simpler. All files within this DLL
69 // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
70 // command line. In contrast, projects that use (or import) the DLL
71 // objects do not define the DLL_EXPORTS symbol. This way any other
72 // project whose source files include this file see DLL_API functions as
73 // being imported from a DLL, wheras this DLL sees symbols defined with
74 // this macro as being exported.
75 //----------------------------------------------------------------------
76 #ifdef DLL_EXPORTS
77 #define DLL_API __declspec(dllexport)
78 #else
79 #define DLL_API __declspec(dllimport)
80 #endif
81 //----------------------------------------------------------------------
82 // DLL_API is ignored for all other systems
83 //----------------------------------------------------------------------
84#else
85 #define DLL_API
86#endif
87
88//----------------------------------------------------------------------
89// basic includes
90//----------------------------------------------------------------------
91
92#include <cstdlib> // standard lib includes
93#include <cmath> // math includes
94#include <iostream> // I/O streams
95#include <cstring> // C-style strings
96
97//----------------------------------------------------------------------
98// Limits
99// There are a number of places where we use the maximum double value as
100// default initializers (and others may be used, depending on the
101// data/distance representation). These can usually be found in limits.h
102// (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
103//
104// Not all systems have these files. If you are using such a system,
105// you should set the preprocessor symbol ANN_NO_LIMITS_H when
106// compiling, and modify the statements below to generate the
107// appropriate value. For practical purposes, this does not need to be
108// the maximum double value. It is sufficient that it be at least as
109// large than the maximum squared distance between between any two
110// points.
111//----------------------------------------------------------------------
112#ifdef ANN_NO_LIMITS_H // limits.h unavailable
113 #include <cvalues> // replacement for limits.h
114 const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double
115#else
116 #include <climits>
117 #include <cfloat>
118 const double ANN_DBL_MAX = DBL_MAX;
119#endif
120
121#define ANNversion "1.1.2" // ANN version and information
122#define ANNversionCmt ""
123#define ANNcopyright "David M. Mount and Sunil Arya"
124#define ANNlatestRev "Jan 27, 2010"
125
126//----------------------------------------------------------------------
127// ANNbool
128// This is a simple boolean type. Although ANSI C++ is supposed
129// to support the type bool, some compilers do not have it.
130//----------------------------------------------------------------------
131
132enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
133
134//----------------------------------------------------------------------
135// ANNcoord, ANNdist
136// ANNcoord and ANNdist are the types used for representing
137// point coordinates and distances. They can be modified by the
138// user, with some care. It is assumed that they are both numeric
139// types, and that ANNdist is generally of an equal or higher type
140// from ANNcoord. A variable of type ANNdist should be large
141// enough to store the sum of squared components of a variable
142// of type ANNcoord for the number of dimensions needed in the
143// application. For example, the following combinations are
144// legal:
145//
146// ANNcoord ANNdist
147// --------- -------------------------------
148// short short, int, long, float, double
149// int int, long, float, double
150// long long, float, double
151// float float, double
152// double double
153//
154// It is the user's responsibility to make sure that overflow does
155// not occur in distance calculation.
156//----------------------------------------------------------------------
157
158typedef double ANNcoord; // coordinate data type
159typedef double ANNdist; // distance data type
160
161//----------------------------------------------------------------------
162// ANNidx
163// ANNidx is a point index. When the data structure is built, the
164// points are given as an array. Nearest neighbor results are
165// returned as an integer index into this array. To make it
166// clearer when this is happening, we define the integer type
167// ANNidx. Indexing starts from 0.
168//
169// For fixed-radius near neighbor searching, it is possible that
170// there are not k nearest neighbors within the search radius. To
171// indicate this, the algorithm returns ANN_NULL_IDX as its result.
172// It should be distinguishable from any valid array index.
173//----------------------------------------------------------------------
174
175typedef int ANNidx; // point index
176const ANNidx ANN_NULL_IDX = -1; // a NULL point index
177
178//----------------------------------------------------------------------
179// Infinite distance:
180// The code assumes that there is an "infinite distance" which it
181// uses to initialize distances before performing nearest neighbor
182// searches. It should be as larger or larger than any legitimate
183// nearest neighbor distance.
184//
185// On most systems, these should be found in the standard include
186// file <limits.h> or possibly <float.h>. If you do not have these
187// file, some suggested values are listed below, assuming 64-bit
188// long, 32-bit int and 16-bit short.
189//
190// ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>)
191// ------- ------------ ------------------------------------
192// double DBL_MAX 1.79769313486231570e+308
193// float FLT_MAX 3.40282346638528860e+38
194// long LONG_MAX 0x7fffffffffffffff
195// int INT_MAX 0x7fffffff
196// short SHRT_MAX 0x7fff
197//----------------------------------------------------------------------
198
199const ANNdist ANN_DIST_INF = ANN_DBL_MAX;
200
201//----------------------------------------------------------------------
202// Significant digits for tree dumps:
203// When floating point coordinates are used, the routine that dumps
204// a tree needs to know roughly how many significant digits there
205// are in a ANNcoord, so it can output points to full precision.
206// This is defined to be ANNcoordPrec. On most systems these
207// values can be found in the standard include files <limits.h> or
208// <float.h>. For integer types, the value is essentially ignored.
209//
210// ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>)
211// -------- ------------ ------------------------------------
212// double DBL_DIG 15
213// float FLT_DIG 6
214// long doesn't matter 19
215// int doesn't matter 10
216// short doesn't matter 5
217//----------------------------------------------------------------------
218
219#ifdef DBL_DIG // number of sig. bits in ANNcoord
220 const int ANNcoordPrec = DBL_DIG;
221#else
222 const int ANNcoordPrec = 15; // default precision
223#endif
224
225//----------------------------------------------------------------------
226// Self match?
227// In some applications, the nearest neighbor of a point is not
228// allowed to be the point itself. This occurs, for example, when
229// computing all nearest neighbors in a set. By setting the
230// parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
231// is the closest point whose distance from the query point is
232// strictly positive.
233//----------------------------------------------------------------------
234
235const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue;
236
237//----------------------------------------------------------------------
238// Norms and metrics:
239// ANN supports any Minkowski norm for defining distance. In
240// particular, for any p >= 1, the L_p Minkowski norm defines the
241// length of a d-vector (v0, v1, ..., v(d-1)) to be
242//
243// (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
244//
245// (where ^ denotes exponentiation, and |.| denotes absolute
246// value). The distance between two points is defined to be the
247// norm of the vector joining them. Some common distance metrics
248// include
249//
250// Euclidean metric p = 2
251// Manhattan metric p = 1
252// Max metric p = infinity
253//
254// In the case of the max metric, the norm is computed by taking
255// the maxima of the absolute values of the components. ANN is
256// highly "coordinate-based" and does not support general distances
257// functions (e.g. those obeying just the triangle inequality). It
258// also does not support distance functions based on
259// inner-products.
260//
261// For the purpose of computing nearest neighbors, it is not
262// necessary to compute the final power (1/p). Thus the only
263// component that is used by the program is |v(i)|^p.
264//
265// ANN parameterizes the distance computation through the following
266// macros. (Macros are used rather than procedures for
267// efficiency.) Recall that the distance between two points is
268// given by the length of the vector joining them, and the length
269// or norm of a vector v is given by formula:
270//
271// |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
272//
273// where ROOT, POW are unary functions and # is an associative and
274// commutative binary operator mapping the following types:
275//
276// ** POW: ANNcoord --> ANNdist
277// ** #: ANNdist x ANNdist --> ANNdist
278// ** ROOT: ANNdist (>0) --> double
279//
280// For early termination in distance calculation (partial distance
281// calculation) we assume that POW and # together are monotonically
282// increasing on sequences of arguments, meaning that for all
283// v0..vk and y:
284//
285// POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
286//
287// Incremental Distance Calculation:
288// The program uses an optimized method of computing distances for
289// kd-trees and bd-trees, called incremental distance calculation.
290// It is used when distances are to be updated when only a single
291// coordinate of a point has been changed. In order to use this,
292// we assume that there is an incremental update function DIFF(x,y)
293// for #, such that if:
294//
295// s = x0 # ... # xi # ... # xk
296//
297// then if s' is equal to s but with xi replaced by y, that is,
298//
299// s' = x0 # ... # y # ... # xk
300//
301// then the length of s' can be computed by:
302//
303// |s'| = |s| # DIFF(xi,y).
304//
305// Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity
306// norm we make use of the fact that in the program this function
307// is only invoked when y > xi, and hence DIFF(xi,y)=y.
308//
309// Finally, for approximate nearest neighbor queries we assume
310// that POW and ROOT are related such that
311//
312// v*ROOT(x) = ROOT(POW(v)*x)
313//
314// Here are the values for the various Minkowski norms:
315//
316// L_p: p even: p odd:
317// ------------------------- ------------------------
318// POW(v) = v^p POW(v) = |v|^p
319// ROOT(x) = x^(1/p) ROOT(x) = x^(1/p)
320// # = + # = +
321// DIFF(x,y) = y - x DIFF(x,y) = y - x
322//
323// L_inf:
324// POW(v) = |v|
325// ROOT(x) = x
326// # = max
327// DIFF(x,y) = y
328//
329// By default the Euclidean norm is assumed. To change the norm,
330// uncomment the appropriate set of macros below.
331//----------------------------------------------------------------------
332
333//----------------------------------------------------------------------
334// Use the following for the Euclidean norm
335//----------------------------------------------------------------------
336#define ANN_POW(v) ((v)*(v))
337#define ANN_ROOT(x) sqrt(x)
338#define ANN_SUM(x,y) ((x) + (y))
339#define ANN_DIFF(x,y) ((y) - (x))
340
341//----------------------------------------------------------------------
342// Use the following for the L_1 (Manhattan) norm
343//----------------------------------------------------------------------
344// #define ANN_POW(v) fabs(v)
345// #define ANN_ROOT(x) (x)
346// #define ANN_SUM(x,y) ((x) + (y))
347// #define ANN_DIFF(x,y) ((y) - (x))
348
349//----------------------------------------------------------------------
350// Use the following for a general L_p norm
351//----------------------------------------------------------------------
352// #define ANN_POW(v) pow(fabs(v),p)
353// #define ANN_ROOT(x) pow(fabs(x),1/p)
354// #define ANN_SUM(x,y) ((x) + (y))
355// #define ANN_DIFF(x,y) ((y) - (x))
356
357//----------------------------------------------------------------------
358// Use the following for the L_infinity (Max) norm
359//----------------------------------------------------------------------
360// #define ANN_POW(v) fabs(v)
361// #define ANN_ROOT(x) (x)
362// #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y))
363// #define ANN_DIFF(x,y) (y)
364
365//----------------------------------------------------------------------
366// Array types
367// The following array types are of basic interest. A point is
368// just a dimensionless array of coordinates, a point array is a
369// dimensionless array of points. A distance array is a
370// dimensionless array of distances and an index array is a
371// dimensionless array of point indices. The latter two are used
372// when returning the results of k-nearest neighbor queries.
373//----------------------------------------------------------------------
374
375typedef ANNcoord* ANNpoint; // a point
376typedef ANNpoint* ANNpointArray; // an array of points
377typedef ANNdist* ANNdistArray; // an array of distances
378typedef ANNidx* ANNidxArray; // an array of point indices
379
380//----------------------------------------------------------------------
381// Basic point and array utilities:
382// The following procedures are useful supplements to ANN's nearest
383// neighbor capabilities.
384//
385// annDist():
386// Computes the (squared) distance between a pair of points.
387// Note that this routine is not used internally by ANN for
388// computing distance calculations. For reasons of efficiency
389// this is done using incremental distance calculation. Thus,
390// this routine cannot be modified as a method of changing the
391// metric.
392//
393// Because points (somewhat like strings in C) are stored as
394// pointers. Consequently, creating and destroying copies of
395// points may require storage allocation. These procedures do
396// this.
397//
398// annAllocPt() and annDeallocPt():
399// Allocate a deallocate storage for a single point, and
400// return a pointer to it. The argument to AllocPt() is
401// used to initialize all components.
402//
403// annAllocPts() and annDeallocPts():
404// Allocate and deallocate an array of points as well a
405// place to store their coordinates, and initializes the
406// points to point to their respective coordinates. It
407// allocates point storage in a contiguous block large
408// enough to store all the points. It performs no
409// initialization.
410//
411// annCopyPt():
412// Creates a copy of a given point, allocating space for
413// the new point. It returns a pointer to the newly
414// allocated copy.
415//----------------------------------------------------------------------
416
417DLL_API ANNdist annDist(
418 int dim, // dimension of space
419 ANNpoint p, // points
420 ANNpoint q);
421
422DLL_API ANNpoint annAllocPt(
423 int dim, // dimension
424 ANNcoord c = 0); // coordinate value (all equal)
425
426DLL_API ANNpointArray annAllocPts(
427 int n, // number of points
428 int dim); // dimension
429
430DLL_API void annDeallocPt(
431 ANNpoint &p); // deallocate 1 point
432
433DLL_API void annDeallocPts(
434 ANNpointArray &pa); // point array
435
436DLL_API ANNpoint annCopyPt(
437 int dim, // dimension
438 ANNpoint source); // point to copy
439
440//----------------------------------------------------------------------
441//Overall structure: ANN supports a number of different data structures
442//for approximate and exact nearest neighbor searching. These are:
443//
444// ANNbruteForce A simple brute-force search structure.
445// ANNkd_tree A kd-tree tree search structure. ANNbd_tree
446// A bd-tree tree search structure (a kd-tree with shrink
447// capabilities).
448//
449// At a minimum, each of these data structures support k-nearest
450// neighbor queries. The nearest neighbor query, annkSearch,
451// returns an integer identifier and the distance to the nearest
452// neighbor(s) and annRangeSearch returns the nearest points that
453// lie within a given query ball.
454//
455// Each structure is built by invoking the appropriate constructor
456// and passing it (at a minimum) the array of points, the total
457// number of points and the dimension of the space. Each structure
458// is also assumed to support a destructor and member functions
459// that return basic information about the point set.
460//
461// Note that the array of points is not copied by the data
462// structure (for reasons of space efficiency), and it is assumed
463// to be constant throughout the lifetime of the search structure.
464//
465// The search algorithm, annkSearch, is given the query point (q),
466// and the desired number of nearest neighbors to report (k), and
467// the error bound (eps) (whose default value is 0, implying exact
468// nearest neighbors). It returns two arrays which are assumed to
469// contain at least k elements: one (nn_idx) contains the indices
470// (within the point array) of the nearest neighbors and the other
471// (dd) contains the squared distances to these nearest neighbors.
472//
473// The search algorithm, annkFRSearch, is a fixed-radius kNN
474// search. In addition to a query point, it is given a (squared)
475// radius bound. (This is done for consistency, because the search
476// returns distances as squared quantities.) It does two things.
477// First, it computes the k nearest neighbors within the radius
478// bound, and second, it returns the total number of points lying
479// within the radius bound. It is permitted to set k = 0, in which
480// case it effectively answers a range counting query. If the
481// error bound epsilon is positive, then the search is approximate
482// in the sense that it is free to ignore any point that lies
483// outside a ball of radius r/(1+epsilon), where r is the given
484// (unsquared) radius bound.
485//
486// The generic object from which all the search structures are
487// dervied is given below. It is a virtual object, and is useless
488// by itself.
489//----------------------------------------------------------------------
490
491class DLL_API ANNpointSet {
492public:
493 virtual ~ANNpointSet() {} // virtual distructor
494
495 virtual void annkSearch( // approx k near neighbor search
496 ANNpoint q, // query point
497 int k, // number of near neighbors to return
498 ANNidxArray nn_idx, // nearest neighbor array (modified)
499 ANNdistArray dd, // dist to near neighbors (modified)
500 double eps=0.0 // error bound
501 ) = 0; // pure virtual (defined elsewhere)
502
503 virtual int annkFRSearch( // approx fixed-radius kNN search
504 ANNpoint q, // query point
505 ANNdist sqRad, // squared radius
506 int k = 0, // number of near neighbors to return
507 ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
508 ANNdistArray dd = NULL, // dist to near neighbors (modified)
509 double eps=0.0 // error bound
510 ) = 0; // pure virtual (defined elsewhere)
511
512 virtual int theDim() = 0; // return dimension of space
513 virtual int nPoints() = 0; // return number of points
514 // return pointer to points
515 virtual ANNpointArray thePoints() = 0;
516};
517
518//----------------------------------------------------------------------
519// Brute-force nearest neighbor search:
520// The brute-force search structure is very simple but inefficient.
521// It has been provided primarily for the sake of comparison with
522// and validation of the more complex search structures.
523//
524// Query processing is the same as described above, but the value
525// of epsilon is ignored, since all distance calculations are
526// performed exactly.
527//
528// WARNING: This data structure is very slow, and should not be
529// used unless the number of points is very small.
530//
531// Internal information:
532// ---------------------
533// This data structure bascially consists of the array of points
534// (each a pointer to an array of coordinates). The search is
535// performed by a simple linear scan of all the points.
536//----------------------------------------------------------------------
537
538class DLL_API ANNbruteForce: public ANNpointSet {
539 int dim; // dimension
540 int n_pts; // number of points
541 ANNpointArray pts; // point array
542public:
543 ANNbruteForce( // constructor from point array
544 ANNpointArray pa, // point array
545 int n, // number of points
546 int dd); // dimension
547
548 ~ANNbruteForce(); // destructor
549
550 void annkSearch( // approx k near neighbor search
551 ANNpoint q, // query point
552 int k, // number of near neighbors to return
553 ANNidxArray nn_idx, // nearest neighbor array (modified)
554 ANNdistArray dd, // dist to near neighbors (modified)
555 double eps=0.0); // error bound
556
557 int annkFRSearch( // approx fixed-radius kNN search
558 ANNpoint q, // query point
559 ANNdist sqRad, // squared radius
560 int k = 0, // number of near neighbors to return
561 ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
562 ANNdistArray dd = NULL, // dist to near neighbors (modified)
563 double eps=0.0); // error bound
564
565 int theDim() // return dimension of space
566 { return dim; }
567
568 int nPoints() // return number of points
569 { return n_pts; }
570
571 ANNpointArray thePoints() // return pointer to points
572 { return pts; }
573};
574
575//----------------------------------------------------------------------
576// kd- and bd-tree splitting and shrinking rules
577// kd-trees supports a collection of different splitting rules.
578// In addition to the standard kd-tree splitting rule proposed
579// by Friedman, Bentley, and Finkel, we have introduced a
580// number of other splitting rules, which seem to perform
581// as well or better (for the distributions we have tested).
582//
583// The splitting methods given below allow the user to tailor
584// the data structure to the particular data set. They are
585// are described in greater details in the kd_split.cc source
586// file. The method ANN_KD_SUGGEST is the method chosen (rather
587// subjectively) by the implementors as the one giving the
588// fastest performance, and is the default splitting method.
589//
590// As with splitting rules, there are a number of different
591// shrinking rules. The shrinking rule ANN_BD_NONE does no
592// shrinking (and hence produces a kd-tree tree). The rule
593// ANN_BD_SUGGEST uses the implementors favorite rule.
594//----------------------------------------------------------------------
595
596enum ANNsplitRule {
597 ANN_KD_STD = 0, // the optimized kd-splitting rule
598 ANN_KD_MIDPT = 1, // midpoint split
599 ANN_KD_FAIR = 2, // fair split
600 ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method
601 ANN_KD_SL_FAIR = 4, // sliding fair split method
602 ANN_KD_SUGGEST = 5}; // the authors' suggestion for best
603const int ANN_N_SPLIT_RULES = 6; // number of split rules
604
605enum ANNshrinkRule {
606 ANN_BD_NONE = 0, // no shrinking at all (just kd-tree)
607 ANN_BD_SIMPLE = 1, // simple splitting
608 ANN_BD_CENTROID = 2, // centroid splitting
609 ANN_BD_SUGGEST = 3}; // the authors' suggested choice
610const int ANN_N_SHRINK_RULES = 4; // number of shrink rules
611
612//----------------------------------------------------------------------
613// kd-tree:
614// The main search data structure supported by ANN is a kd-tree.
615// The main constructor is given a set of points and a choice of
616// splitting method to use in building the tree.
617//
618// Construction:
619// -------------
620// The constructor is given the point array, number of points,
621// dimension, bucket size (default = 1), and the splitting rule
622// (default = ANN_KD_SUGGEST). The point array is not copied, and
623// is assumed to be kept constant throughout the lifetime of the
624// search structure. There is also a "load" constructor that
625// builds a tree from a file description that was created by the
626// Dump operation.
627//
628// Search:
629// -------
630// There are two search methods:
631//
632// Standard search (annkSearch()):
633// Searches nodes in tree-traversal order, always visiting
634// the closer child first.
635// Priority search (annkPriSearch()):
636// Searches nodes in order of increasing distance of the
637// associated cell from the query point. For many
638// distributions the standard search seems to work just
639// fine, but priority search is safer for worst-case
640// performance.
641//
642// Printing:
643// ---------
644// There are two methods provided for printing the tree. Print()
645// is used to produce a "human-readable" display of the tree, with
646// indenation, which is handy for debugging. Dump() produces a
647// format that is suitable reading by another program. There is a
648// "load" constructor, which constructs a tree which is assumed to
649// have been saved by the Dump() procedure.
650//
651// Performance and Structure Statistics:
652// -------------------------------------
653// The procedure getStats() collects statistics information on the
654// tree (its size, height, etc.) See ANNperf.h for information on
655// the stats structure it returns.
656//
657// Internal information:
658// ---------------------
659// The data structure consists of three major chunks of storage.
660// The first (implicit) storage are the points themselves (pts),
661// which have been provided by the users as an argument to the
662// constructor, or are allocated dynamically if the tree is built
663// using the load constructor). These should not be changed during
664// the lifetime of the search structure. It is the user's
665// responsibility to delete these after the tree is destroyed.
666//
667// The second is the tree itself (which is dynamically allocated in
668// the constructor) and is given as a pointer to its root node
669// (root). These nodes are automatically deallocated when the tree
670// is deleted. See the file src/kd_tree.h for further information
671// on the structure of the tree nodes.
672//
673// Each leaf of the tree does not contain a pointer directly to a
674// point, but rather contains a pointer to a "bucket", which is an
675// array consisting of point indices. The third major chunk of
676// storage is an array (pidx), which is a large array in which all
677// these bucket subarrays reside. (The reason for storing them
678// separately is the buckets are typically small, but of varying
679// sizes. This was done to avoid fragmentation.) This array is
680// also deallocated when the tree is deleted.
681//
682// In addition to this, the tree consists of a number of other
683// pieces of information which are used in searching and for
684// subsequent tree operations. These consist of the following:
685//
686// dim Dimension of space
687// n_pts Number of points currently in the tree
688// n_max Maximum number of points that are allowed
689// in the tree
690// bkt_size Maximum bucket size (no. of points per leaf)
691// bnd_box_lo Bounding box low point
692// bnd_box_hi Bounding box high point
693// splitRule Splitting method used
694//
695//----------------------------------------------------------------------
696
697//----------------------------------------------------------------------
698// Some types and objects used by kd-tree functions
699// See src/kd_tree.h and src/kd_tree.cpp for definitions
700//----------------------------------------------------------------------
701class ANNkdStats; // stats on kd-tree
702class ANNkd_node; // generic node in a kd-tree
703typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node
704
705class DLL_API ANNkd_tree: public ANNpointSet {
706protected:
707 int dim; // dimension of space
708 int n_pts; // number of points in tree
709 int bkt_size; // bucket size
710 ANNpointArray pts; // the points
711 ANNidxArray pidx; // point indices (to pts array)
712 ANNkd_ptr root; // root of kd-tree
713 ANNpoint bnd_box_lo; // bounding box low point
714 ANNpoint bnd_box_hi; // bounding box high point
715
716 void SkeletonTree( // construct skeleton tree
717 int n, // number of points
718 int dd, // dimension
719 int bs, // bucket size
720 ANNpointArray pa = NULL, // point array (optional)
721 ANNidxArray pi = NULL); // point indices (optional)
722
723public:
724 ANNkd_tree( // build skeleton tree
725 int n = 0, // number of points
726 int dd = 0, // dimension
727 int bs = 1); // bucket size
728
729 ANNkd_tree( // build from point array
730 ANNpointArray pa, // point array
731 int n, // number of points
732 int dd, // dimension
733 int bs = 1, // bucket size
734 ANNsplitRule split = ANN_KD_SUGGEST); // splitting method
735
736 ANNkd_tree( // build from dump file
737 std::istream& in); // input stream for dump file
738
739 ~ANNkd_tree(); // tree destructor
740
741 void annkSearch( // approx k near neighbor search
742 ANNpoint q, // query point
743 int k, // number of near neighbors to return
744 ANNidxArray nn_idx, // nearest neighbor array (modified)
745 ANNdistArray dd, // dist to near neighbors (modified)
746 double eps=0.0); // error bound
747
748 void annkPriSearch( // priority k near neighbor search
749 ANNpoint q, // query point
750 int k, // number of near neighbors to return
751 ANNidxArray nn_idx, // nearest neighbor array (modified)
752 ANNdistArray dd, // dist to near neighbors (modified)
753 double eps=0.0); // error bound
754
755 int annkFRSearch( // approx fixed-radius kNN search
756 ANNpoint q, // the query point
757 ANNdist sqRad, // squared radius of query ball
758 int k = 0, // number of neighbors to return
759 ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
760 ANNdistArray dd = NULL, // dist to near neighbors (modified)
761 double eps=0.0); // error bound
762
763 int theDim() // return dimension of space
764 { return dim; }
765
766 int nPoints() // return number of points
767 { return n_pts; }
768
769 ANNpointArray thePoints() // return pointer to points
770 { return pts; }
771
772 virtual void Print( // print the tree (for debugging)
773 ANNbool with_pts, // print points as well?
774 std::ostream& out); // output stream
775
776 virtual void Dump( // dump entire tree
777 ANNbool with_pts, // print points as well?
778 std::ostream& out); // output stream
779
780 virtual void getStats( // compute tree statistics
781 ANNkdStats& st); // the statistics (modified)
782};
783
784//----------------------------------------------------------------------
785// Box decomposition tree (bd-tree)
786// The bd-tree is inherited from a kd-tree. The main difference
787// in the bd-tree and the kd-tree is a new type of internal node
788// called a shrinking node (in the kd-tree there is only one type
789// of internal node, a splitting node). The shrinking node
790// makes it possible to generate balanced trees in which the
791// cells have bounded aspect ratio, by allowing the decomposition
792// to zoom in on regions of dense point concentration. Although
793// this is a nice idea in theory, few point distributions are so
794// densely clustered that this is really needed.
795//----------------------------------------------------------------------
796
797class DLL_API ANNbd_tree: public ANNkd_tree {
798public:
799 ANNbd_tree( // build skeleton tree
800 int n, // number of points
801 int dd, // dimension
802 int bs = 1) // bucket size
803 : ANNkd_tree(n, dd, bs) {} // build base kd-tree
804
805 ANNbd_tree( // build from point array
806 ANNpointArray pa, // point array
807 int n, // number of points
808 int dd, // dimension
809 int bs = 1, // bucket size
810 ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule
811 ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule
812
813 ANNbd_tree( // build from dump file
814 std::istream& in); // input stream for dump file
815};
816
817//----------------------------------------------------------------------
818// Other functions
819// annMaxPtsVisit Sets a limit on the maximum number of points
820// to visit in the search.
821// annClose Can be called when all use of ANN is finished.
822// It clears up a minor memory leak.
823//----------------------------------------------------------------------
824
825DLL_API void annMaxPtsVisit( // max. pts to visit in search
826 int maxPts); // the limit
827
828DLL_API void annClose(); // called to end use of ANN
829
830#endif
831