ANN.h source code [include/ANN/ANN.h]

1	//----------------------------------------------------------------------
2	// File: ANN.h
3	// Programmer: Sunil Arya and David Mount
4	// Description: Basic include file for approximate nearest
5	// neighbor searching.
6	// Last modified: 01/27/10 (Version 1.1.2)
7	//----------------------------------------------------------------------
8	// Copyright (c) 1997-2010 University of Maryland and Sunil Arya and
9	// David Mount. All Rights Reserved.
10	//
11	// This software and related documentation is part of the Approximate
12	// Nearest Neighbor Library (ANN). This software is provided under
13	// the provisions of the Lesser GNU Public License (LGPL). See the
14	// file ../ReadMe.txt for further information.
15	//
16	// The University of Maryland (U.M.) and the authors make no
17	// representations about the suitability or fitness of this software for
18	// any purpose. It is provided "as is" without express or implied
19	// warranty.
20	//----------------------------------------------------------------------
21	// History:
22	// Revision 0.1 03/04/98
23	// Initial release
24	// Revision 1.0 04/01/05
25	// Added copyright and revision information
26	// Added ANNcoordPrec for coordinate precision.
27	// Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
28	// Cleaned up C++ structure for modern compilers
29	// Revision 1.1 05/03/05
30	// Added fixed-radius k-NN searching
31	// Revision 1.1.2 01/27/10
32	// Fixed minor compilation bugs for new versions of gcc
33	//----------------------------------------------------------------------
34
35	//----------------------------------------------------------------------
36	// ANN - approximate nearest neighbor searching
37	// ANN is a library for approximate nearest neighbor searching,
38	// based on the use of standard and priority search in kd-trees
39	// and balanced box-decomposition (bbd) trees. Here are some
40	// references to the main algorithmic techniques used here:
41	//
42	// kd-trees:
43	// Friedman, Bentley, and Finkel, ``An algorithm for finding
44	// best matches in logarithmic expected time,'' ACM
45	// Transactions on Mathematical Software, 3(3):209-226, 1977.
46	//
47	// Priority search in kd-trees:
48	// Arya and Mount, ``Algorithms for fast vector quantization,''
49	// Proc. of DCC '93: Data Compression Conference, eds. J. A.
50	// Storer and M. Cohn, IEEE Press, 1993, 381-390.
51	//
52	// Approximate nearest neighbor search and bbd-trees:
53	// Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
54	// algorithm for approximate nearest neighbor searching,''
55	// 5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
56	// 1994, 573-582.
57	//----------------------------------------------------------------------
58
59	#ifndef ANN_H
60	#define ANN_H
61
62	#ifdef WIN32
63	//----------------------------------------------------------------------
64	// For Microsoft Visual C++, externally accessible symbols must be
65	// explicitly indicated with DLL_API, which is somewhat like "extern."
66	//
67	// The following ifdef block is the standard way of creating macros
68	// which make exporting from a DLL simpler. All files within this DLL
69	// are compiled with the DLL_EXPORTS preprocessor symbol defined on the
70	// command line. In contrast, projects that use (or import) the DLL
71	// objects do not define the DLL_EXPORTS symbol. This way any other
72	// project whose source files include this file see DLL_API functions as
73	// being imported from a DLL, wheras this DLL sees symbols defined with
74	// this macro as being exported.
75	//----------------------------------------------------------------------
76	#ifdef DLL_EXPORTS
77	#define DLL_API __declspec(dllexport)
78	#else
79	#define DLL_API __declspec(dllimport)
80	#endif
81	//----------------------------------------------------------------------
82	// DLL_API is ignored for all other systems
83	//----------------------------------------------------------------------
84	#else
85	#define DLL_API
86	#endif
87
88	//----------------------------------------------------------------------
89	// basic includes
90	//----------------------------------------------------------------------
91
92	#include <cstdlib> // standard lib includes
93	#include <cmath> // math includes
94	#include <iostream> // I/O streams
95	#include <cstring> // C-style strings
96
97	//----------------------------------------------------------------------
98	// Limits
99	// There are a number of places where we use the maximum double value as
100	// default initializers (and others may be used, depending on the
101	// data/distance representation). These can usually be found in limits.h
102	// (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
103	//
104	// Not all systems have these files. If you are using such a system,
105	// you should set the preprocessor symbol ANN_NO_LIMITS_H when
106	// compiling, and modify the statements below to generate the
107	// appropriate value. For practical purposes, this does not need to be
108	// the maximum double value. It is sufficient that it be at least as
109	// large than the maximum squared distance between between any two
110	// points.
111	//----------------------------------------------------------------------
112	#ifdef ANN_NO_LIMITS_H // limits.h unavailable
113	#include <cvalues> // replacement for limits.h
114	const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double
115	#else
116	#include <climits>
117	#include <cfloat>
118	const double ANN_DBL_MAX = DBL_MAX;
119	#endif
120
121	#define ANNversion "1.1.2" // ANN version and information
122	#define ANNversionCmt ""
123	#define ANNcopyright "David M. Mount and Sunil Arya"
124	#define ANNlatestRev "Jan 27, 2010"
125
126	//----------------------------------------------------------------------
127	// ANNbool
128	// This is a simple boolean type. Although ANSI C++ is supposed
129	// to support the type bool, some compilers do not have it.
130	//----------------------------------------------------------------------
131
132	enum ANNbool {ANNfalse = `0`, ANNtrue = `1`}; // ANN boolean type (non ANSI C++)
133
134	//----------------------------------------------------------------------
135	// ANNcoord, ANNdist
136	// ANNcoord and ANNdist are the types used for representing
137	// point coordinates and distances. They can be modified by the
138	// user, with some care. It is assumed that they are both numeric
139	// types, and that ANNdist is generally of an equal or higher type
140	// from ANNcoord. A variable of type ANNdist should be large
141	// enough to store the sum of squared components of a variable
142	// of type ANNcoord for the number of dimensions needed in the
143	// application. For example, the following combinations are
144	// legal:
145	//
146	// ANNcoord ANNdist
147	// --------- -------------------------------
148	// short short, int, long, float, double
149	// int int, long, float, double
150	// long long, float, double
151	// float float, double
152	// double double
153	//
154	// It is the user's responsibility to make sure that overflow does
155	// not occur in distance calculation.
156	//----------------------------------------------------------------------
157
158	typedef double ANNcoord; // coordinate data type
159	typedef double ANNdist; // distance data type
160
161	//----------------------------------------------------------------------
162	// ANNidx
163	// ANNidx is a point index. When the data structure is built, the
164	// points are given as an array. Nearest neighbor results are
165	// returned as an integer index into this array. To make it
166	// clearer when this is happening, we define the integer type
167	// ANNidx. Indexing starts from 0.
168	//
169	// For fixed-radius near neighbor searching, it is possible that
170	// there are not k nearest neighbors within the search radius. To
171	// indicate this, the algorithm returns ANN_NULL_IDX as its result.
172	// It should be distinguishable from any valid array index.
173	//----------------------------------------------------------------------
174
175	typedef int ANNidx; // point index
176	const ANNidx ANN_NULL_IDX = -`1`; // a NULL point index
177
178	//----------------------------------------------------------------------
179	// Infinite distance:
180	// The code assumes that there is an "infinite distance" which it
181	// uses to initialize distances before performing nearest neighbor
182	// searches. It should be as larger or larger than any legitimate
183	// nearest neighbor distance.
184	//
185	// On most systems, these should be found in the standard include
186	// file <limits.h> or possibly <float.h>. If you do not have these
187	// file, some suggested values are listed below, assuming 64-bit
188	// long, 32-bit int and 16-bit short.
189	//
190	// ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>)
191	// ------- ------------ ------------------------------------
192	// double DBL_MAX 1.79769313486231570e+308
193	// float FLT_MAX 3.40282346638528860e+38
194	// long LONG_MAX 0x7fffffffffffffff
195	// int INT_MAX 0x7fffffff
196	// short SHRT_MAX 0x7fff
197	//----------------------------------------------------------------------
198
199	const ANNdist ANN_DIST_INF = ANN_DBL_MAX;
200
201	//----------------------------------------------------------------------
202	// Significant digits for tree dumps:
203	// When floating point coordinates are used, the routine that dumps
204	// a tree needs to know roughly how many significant digits there
205	// are in a ANNcoord, so it can output points to full precision.
206	// This is defined to be ANNcoordPrec. On most systems these
207	// values can be found in the standard include files <limits.h> or
208	// <float.h>. For integer types, the value is essentially ignored.
209	//
210	// ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>)
211	// -------- ------------ ------------------------------------
212	// double DBL_DIG 15
213	// float FLT_DIG 6
214	// long doesn't matter 19
215	// int doesn't matter 10
216	// short doesn't matter 5
217	//----------------------------------------------------------------------
218
219	#ifdef DBL_DIG // number of sig. bits in ANNcoord
220	const int ANNcoordPrec = DBL_DIG;
221	#else
222	const int ANNcoordPrec = `15`; // default precision
223	#endif
224
225	//----------------------------------------------------------------------
226	// Self match?
227	// In some applications, the nearest neighbor of a point is not
228	// allowed to be the point itself. This occurs, for example, when
229	// computing all nearest neighbors in a set. By setting the
230	// parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
231	// is the closest point whose distance from the query point is
232	// strictly positive.
233	//----------------------------------------------------------------------
234
235	const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue;
236
237	//----------------------------------------------------------------------
238	// Norms and metrics:
239	// ANN supports any Minkowski norm for defining distance. In
240	// particular, for any p >= 1, the L_p Minkowski norm defines the
241	// length of a d-vector (v0, v1, ..., v(d-1)) to be
242	//
243	// (\|v0\|^p + \|v1\|^p + ... + \|v(d-1)\|^p)^(1/p),
244	//
245	// (where ^ denotes exponentiation, and \|.\| denotes absolute
246	// value). The distance between two points is defined to be the
247	// norm of the vector joining them. Some common distance metrics
248	// include
249	//
250	// Euclidean metric p = 2
251	// Manhattan metric p = 1
252	// Max metric p = infinity
253	//
254	// In the case of the max metric, the norm is computed by taking
255	// the maxima of the absolute values of the components. ANN is
256	// highly "coordinate-based" and does not support general distances
257	// functions (e.g. those obeying just the triangle inequality). It
258	// also does not support distance functions based on
259	// inner-products.
260	//
261	// For the purpose of computing nearest neighbors, it is not
262	// necessary to compute the final power (1/p). Thus the only
263	// component that is used by the program is \|v(i)\|^p.
264	//
265	// ANN parameterizes the distance computation through the following
266	// macros. (Macros are used rather than procedures for
267	// efficiency.) Recall that the distance between two points is
268	// given by the length of the vector joining them, and the length
269	// or norm of a vector v is given by formula:
270	//
271	// \|v\| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
272	//
273	// where ROOT, POW are unary functions and # is an associative and
274	// commutative binary operator mapping the following types:
275	//
276	// * POW: ANNcoord --> ANNdist*
277	// * #: ANNdist x ANNdist --> ANNdist*
278	// * ROOT: ANNdist (>0) --> double*
279	//
280	// For early termination in distance calculation (partial distance
281	// calculation) we assume that POW and # together are monotonically
282	// increasing on sequences of arguments, meaning that for all
283	// v0..vk and y:
284	//
285	// POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
286	//
287	// Incremental Distance Calculation:
288	// The program uses an optimized method of computing distances for
289	// kd-trees and bd-trees, called incremental distance calculation.
290	// It is used when distances are to be updated when only a single
291	// coordinate of a point has been changed. In order to use this,
292	// we assume that there is an incremental update function DIFF(x,y)
293	// for #, such that if:
294	//
295	// s = x0 # ... # xi # ... # xk
296	//
297	// then if s' is equal to s but with xi replaced by y, that is,
298	//
299	// s' = x0 # ... # y # ... # xk
300	//
301	// then the length of s' can be computed by:
302	//
303	// \|s'\| = \|s\| # DIFF(xi,y).
304	//
305	// Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity
306	// norm we make use of the fact that in the program this function
307	// is only invoked when y > xi, and hence DIFF(xi,y)=y.
308	//
309	// Finally, for approximate nearest neighbor queries we assume
310	// that POW and ROOT are related such that
311	//
312	// vROOT(x) = ROOT(POW(v)x)
313	//
314	// Here are the values for the various Minkowski norms:
315	//
316	// L_p: p even: p odd:
317	// ------------------------- ------------------------
318	// POW(v) = v^p POW(v) = \|v\|^p
319	// ROOT(x) = x^(1/p) ROOT(x) = x^(1/p)
320	// # = + # = +
321	// DIFF(x,y) = y - x DIFF(x,y) = y - x
322	//
323	// L_inf:
324	// POW(v) = \|v\|
325	// ROOT(x) = x
326	// # = max
327	// DIFF(x,y) = y
328	//
329	// By default the Euclidean norm is assumed. To change the norm,
330	// uncomment the appropriate set of macros below.
331	//----------------------------------------------------------------------
332
333	//----------------------------------------------------------------------
334	// Use the following for the Euclidean norm
335	//----------------------------------------------------------------------
336	#define ANN_POW(v) ((v)*(v))
337	#define ANN_ROOT(x) sqrt(x)
338	#define ANN_SUM(x,y) ((x) + (y))
339	#define ANN_DIFF(x,y) ((y) - (x))
340
341	//----------------------------------------------------------------------
342	// Use the following for the L_1 (Manhattan) norm
343	//----------------------------------------------------------------------
344	// #define ANN_POW(v) fabs(v)
345	// #define ANN_ROOT(x) (x)
346	// #define ANN_SUM(x,y) ((x) + (y))
347	// #define ANN_DIFF(x,y) ((y) - (x))
348
349	//----------------------------------------------------------------------
350	// Use the following for a general L_p norm
351	//----------------------------------------------------------------------
352	// #define ANN_POW(v) pow(fabs(v),p)
353	// #define ANN_ROOT(x) pow(fabs(x),1/p)
354	// #define ANN_SUM(x,y) ((x) + (y))
355	// #define ANN_DIFF(x,y) ((y) - (x))
356
357	//----------------------------------------------------------------------
358	// Use the following for the L_infinity (Max) norm
359	//----------------------------------------------------------------------
360	// #define ANN_POW(v) fabs(v)
361	// #define ANN_ROOT(x) (x)
362	// #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y))
363	// #define ANN_DIFF(x,y) (y)
364
365	//----------------------------------------------------------------------
366	// Array types
367	// The following array types are of basic interest. A point is
368	// just a dimensionless array of coordinates, a point array is a
369	// dimensionless array of points. A distance array is a
370	// dimensionless array of distances and an index array is a
371	// dimensionless array of point indices. The latter two are used
372	// when returning the results of k-nearest neighbor queries.
373	//----------------------------------------------------------------------
374
375	typedef ANNcoord* ANNpoint; // a point
376	typedef ANNpoint* ANNpointArray; // an array of points
377	typedef ANNdist* ANNdistArray; // an array of distances
378	typedef ANNidx* ANNidxArray; // an array of point indices
379
380	//----------------------------------------------------------------------
381	// Basic point and array utilities:
382	// The following procedures are useful supplements to ANN's nearest
383	// neighbor capabilities.
384	//
385	// annDist():
386	// Computes the (squared) distance between a pair of points.
387	// Note that this routine is not used internally by ANN for
388	// computing distance calculations. For reasons of efficiency
389	// this is done using incremental distance calculation. Thus,
390	// this routine cannot be modified as a method of changing the
391	// metric.
392	//
393	// Because points (somewhat like strings in C) are stored as
394	// pointers. Consequently, creating and destroying copies of
395	// points may require storage allocation. These procedures do
396	// this.
397	//
398	// annAllocPt() and annDeallocPt():
399	// Allocate a deallocate storage for a single point, and
400	// return a pointer to it. The argument to AllocPt() is
401	// used to initialize all components.
402	//
403	// annAllocPts() and annDeallocPts():
404	// Allocate and deallocate an array of points as well a
405	// place to store their coordinates, and initializes the
406	// points to point to their respective coordinates. It
407	// allocates point storage in a contiguous block large
408	// enough to store all the points. It performs no
409	// initialization.
410	//
411	// annCopyPt():
412	// Creates a copy of a given point, allocating space for
413	// the new point. It returns a pointer to the newly
414	// allocated copy.
415	//----------------------------------------------------------------------
416
417	DLL_API ANNdist annDist(
418	int dim, // dimension of space
419	ANNpoint p, // points
420	ANNpoint q);
421
422	DLL_API ANNpoint annAllocPt(
423	int dim, // dimension
424	ANNcoord c = `0`); // coordinate value (all equal)
425
426	DLL_API ANNpointArray annAllocPts(
427	int n, // number of points
428	int dim); // dimension
429
430	DLL_API void annDeallocPt(
431	ANNpoint &p); // deallocate 1 point
432
433	DLL_API void annDeallocPts(
434	ANNpointArray &pa); // point array
435
436	DLL_API ANNpoint annCopyPt(
437	int dim, // dimension
438	ANNpoint source); // point to copy
439
440	//----------------------------------------------------------------------
441	//Overall structure: ANN supports a number of different data structures
442	//for approximate and exact nearest neighbor searching. These are:
443	//
444	// ANNbruteForce A simple brute-force search structure.
445	// ANNkd_tree A kd-tree tree search structure. ANNbd_tree
446	// A bd-tree tree search structure (a kd-tree with shrink
447	// capabilities).
448	//
449	// At a minimum, each of these data structures support k-nearest
450	// neighbor queries. The nearest neighbor query, annkSearch,
451	// returns an integer identifier and the distance to the nearest
452	// neighbor(s) and annRangeSearch returns the nearest points that
453	// lie within a given query ball.
454	//
455	// Each structure is built by invoking the appropriate constructor
456	// and passing it (at a minimum) the array of points, the total
457	// number of points and the dimension of the space. Each structure
458	// is also assumed to support a destructor and member functions
459	// that return basic information about the point set.
460	//
461	// Note that the array of points is not copied by the data
462	// structure (for reasons of space efficiency), and it is assumed
463	// to be constant throughout the lifetime of the search structure.
464	//
465	// The search algorithm, annkSearch, is given the query point (q),
466	// and the desired number of nearest neighbors to report (k), and
467	// the error bound (eps) (whose default value is 0, implying exact
468	// nearest neighbors). It returns two arrays which are assumed to
469	// contain at least k elements: one (nn_idx) contains the indices
470	// (within the point array) of the nearest neighbors and the other
471	// (dd) contains the squared distances to these nearest neighbors.
472	//
473	// The search algorithm, annkFRSearch, is a fixed-radius kNN
474	// search. In addition to a query point, it is given a (squared)
475	// radius bound. (This is done for consistency, because the search
476	// returns distances as squared quantities.) It does two things.
477	// First, it computes the k nearest neighbors within the radius
478	// bound, and second, it returns the total number of points lying
479	// within the radius bound. It is permitted to set k = 0, in which
480	// case it effectively answers a range counting query. If the
481	// error bound epsilon is positive, then the search is approximate
482	// in the sense that it is free to ignore any point that lies
483	// outside a ball of radius r/(1+epsilon), where r is the given
484	// (unsquared) radius bound.
485	//
486	// The generic object from which all the search structures are
487	// dervied is given below. It is a virtual object, and is useless
488	// by itself.
489	//----------------------------------------------------------------------
490
491	class DLL_API ANNpointSet {
492	public:
493	virtual ~ANNpointSet() {} // virtual distructor
494
495	virtual void annkSearch( // approx k near neighbor search
496	ANNpoint q, // query point
497	int k, // number of near neighbors to return
498	ANNidxArray nn_idx, // nearest neighbor array (modified)
499	ANNdistArray dd, // dist to near neighbors (modified)
500	double eps=`0.0` // error bound
501	) = `0`; // pure virtual (defined elsewhere)
502
503	virtual int annkFRSearch( // approx fixed-radius kNN search
504	ANNpoint q, // query point
505	ANNdist sqRad, // squared radius
506	int k = `0`, // number of near neighbors to return
507	ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
508	ANNdistArray dd = NULL, // dist to near neighbors (modified)
509	double eps=`0.0` // error bound
510	) = `0`; // pure virtual (defined elsewhere)
511
512	virtual int theDim() = `0`; // return dimension of space
513	virtual int nPoints() = `0`; // return number of points
514	// return pointer to points
515	virtual ANNpointArray thePoints() = `0`;
516	};
517
518	//----------------------------------------------------------------------
519	// Brute-force nearest neighbor search:
520	// The brute-force search structure is very simple but inefficient.
521	// It has been provided primarily for the sake of comparison with
522	// and validation of the more complex search structures.
523	//
524	// Query processing is the same as described above, but the value
525	// of epsilon is ignored, since all distance calculations are
526	// performed exactly.
527	//
528	// WARNING: This data structure is very slow, and should not be
529	// used unless the number of points is very small.
530	//
531	// Internal information:
532	// ---------------------
533	// This data structure bascially consists of the array of points
534	// (each a pointer to an array of coordinates). The search is
535	// performed by a simple linear scan of all the points.
536	//----------------------------------------------------------------------
537
538	class DLL_API ANNbruteForce: public ANNpointSet {
539	int dim; // dimension
540	int n_pts; // number of points
541	ANNpointArray pts; // point array
542	public:
543	ANNbruteForce( // constructor from point array
544	ANNpointArray pa, // point array
545	int n, // number of points
546	int dd); // dimension
547
548	~ANNbruteForce(); // destructor
549
550	void annkSearch( // approx k near neighbor search
551	ANNpoint q, // query point
552	int k, // number of near neighbors to return
553	ANNidxArray nn_idx, // nearest neighbor array (modified)
554	ANNdistArray dd, // dist to near neighbors (modified)
555	double eps=`0.0`); // error bound
556
557	int annkFRSearch( // approx fixed-radius kNN search
558	ANNpoint q, // query point
559	ANNdist sqRad, // squared radius
560	int k = `0`, // number of near neighbors to return
561	ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
562	ANNdistArray dd = NULL, // dist to near neighbors (modified)
563	double eps=`0.0`); // error bound
564
565	int theDim() // return dimension of space
566	{ return dim; }
567
568	int nPoints() // return number of points
569	{ return n_pts; }
570
571	ANNpointArray thePoints() // return pointer to points
572	{ return pts; }
573	};
574
575	//----------------------------------------------------------------------
576	// kd- and bd-tree splitting and shrinking rules
577	// kd-trees supports a collection of different splitting rules.
578	// In addition to the standard kd-tree splitting rule proposed
579	// by Friedman, Bentley, and Finkel, we have introduced a
580	// number of other splitting rules, which seem to perform
581	// as well or better (for the distributions we have tested).
582	//
583	// The splitting methods given below allow the user to tailor
584	// the data structure to the particular data set. They are
585	// are described in greater details in the kd_split.cc source
586	// file. The method ANN_KD_SUGGEST is the method chosen (rather
587	// subjectively) by the implementors as the one giving the
588	// fastest performance, and is the default splitting method.
589	//
590	// As with splitting rules, there are a number of different
591	// shrinking rules. The shrinking rule ANN_BD_NONE does no
592	// shrinking (and hence produces a kd-tree tree). The rule
593	// ANN_BD_SUGGEST uses the implementors favorite rule.
594	//----------------------------------------------------------------------
595
596	enum ANNsplitRule {
597	ANN_KD_STD = `0`, // the optimized kd-splitting rule
598	ANN_KD_MIDPT = `1`, // midpoint split
599	ANN_KD_FAIR = `2`, // fair split
600	ANN_KD_SL_MIDPT = `3`, // sliding midpoint splitting method
601	ANN_KD_SL_FAIR = `4`, // sliding fair split method
602	ANN_KD_SUGGEST = `5`}; // the authors' suggestion for best
603	const int ANN_N_SPLIT_RULES = `6`; // number of split rules
604
605	enum ANNshrinkRule {
606	ANN_BD_NONE = `0`, // no shrinking at all (just kd-tree)
607	ANN_BD_SIMPLE = `1`, // simple splitting
608	ANN_BD_CENTROID = `2`, // centroid splitting
609	ANN_BD_SUGGEST = `3`}; // the authors' suggested choice
610	const int ANN_N_SHRINK_RULES = `4`; // number of shrink rules
611
612	//----------------------------------------------------------------------
613	// kd-tree:
614	// The main search data structure supported by ANN is a kd-tree.
615	// The main constructor is given a set of points and a choice of
616	// splitting method to use in building the tree.
617	//
618	// Construction:
619	// -------------
620	// The constructor is given the point array, number of points,
621	// dimension, bucket size (default = 1), and the splitting rule
622	// (default = ANN_KD_SUGGEST). The point array is not copied, and
623	// is assumed to be kept constant throughout the lifetime of the
624	// search structure. There is also a "load" constructor that
625	// builds a tree from a file description that was created by the
626	// Dump operation.
627	//
628	// Search:
629	// -------
630	// There are two search methods:
631	//
632	// Standard search (annkSearch()):
633	// Searches nodes in tree-traversal order, always visiting
634	// the closer child first.
635	// Priority search (annkPriSearch()):
636	// Searches nodes in order of increasing distance of the
637	// associated cell from the query point. For many
638	// distributions the standard search seems to work just
639	// fine, but priority search is safer for worst-case
640	// performance.
641	//
642	// Printing:
643	// ---------
644	// There are two methods provided for printing the tree. Print()
645	// is used to produce a "human-readable" display of the tree, with
646	// indenation, which is handy for debugging. Dump() produces a
647	// format that is suitable reading by another program. There is a
648	// "load" constructor, which constructs a tree which is assumed to
649	// have been saved by the Dump() procedure.
650	//
651	// Performance and Structure Statistics:
652	// -------------------------------------
653	// The procedure getStats() collects statistics information on the
654	// tree (its size, height, etc.) See ANNperf.h for information on
655	// the stats structure it returns.
656	//
657	// Internal information:
658	// ---------------------
659	// The data structure consists of three major chunks of storage.
660	// The first (implicit) storage are the points themselves (pts),
661	// which have been provided by the users as an argument to the
662	// constructor, or are allocated dynamically if the tree is built
663	// using the load constructor). These should not be changed during
664	// the lifetime of the search structure. It is the user's
665	// responsibility to delete these after the tree is destroyed.
666	//
667	// The second is the tree itself (which is dynamically allocated in
668	// the constructor) and is given as a pointer to its root node
669	// (root). These nodes are automatically deallocated when the tree
670	// is deleted. See the file src/kd_tree.h for further information
671	// on the structure of the tree nodes.
672	//
673	// Each leaf of the tree does not contain a pointer directly to a
674	// point, but rather contains a pointer to a "bucket", which is an
675	// array consisting of point indices. The third major chunk of
676	// storage is an array (pidx), which is a large array in which all
677	// these bucket subarrays reside. (The reason for storing them
678	// separately is the buckets are typically small, but of varying
679	// sizes. This was done to avoid fragmentation.) This array is
680	// also deallocated when the tree is deleted.
681	//
682	// In addition to this, the tree consists of a number of other
683	// pieces of information which are used in searching and for
684	// subsequent tree operations. These consist of the following:
685	//
686	// dim Dimension of space
687	// n_pts Number of points currently in the tree
688	// n_max Maximum number of points that are allowed
689	// in the tree
690	// bkt_size Maximum bucket size (no. of points per leaf)
691	// bnd_box_lo Bounding box low point
692	// bnd_box_hi Bounding box high point
693	// splitRule Splitting method used
694	//
695	//----------------------------------------------------------------------
696
697	//----------------------------------------------------------------------
698	// Some types and objects used by kd-tree functions
699	// See src/kd_tree.h and src/kd_tree.cpp for definitions
700	//----------------------------------------------------------------------
701	class ANNkdStats; // stats on kd-tree
702	class ANNkd_node; // generic node in a kd-tree
703	typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node
704
705	class DLL_API ANNkd_tree: public ANNpointSet {
706	protected:
707	int dim; // dimension of space
708	int n_pts; // number of points in tree
709	int bkt_size; // bucket size
710	ANNpointArray pts; // the points
711	ANNidxArray pidx; // point indices (to pts array)
712	ANNkd_ptr root; // root of kd-tree
713	ANNpoint bnd_box_lo; // bounding box low point
714	ANNpoint bnd_box_hi; // bounding box high point
715
716	void SkeletonTree( // construct skeleton tree
717	int n, // number of points
718	int dd, // dimension
719	int bs, // bucket size
720	ANNpointArray pa = NULL, // point array (optional)
721	ANNidxArray pi = NULL); // point indices (optional)
722
723	public:
724	ANNkd_tree( // build skeleton tree
725	int n = `0`, // number of points
726	int dd = `0`, // dimension
727	int bs = `1`); // bucket size
728
729	ANNkd_tree( // build from point array
730	ANNpointArray pa, // point array
731	int n, // number of points
732	int dd, // dimension
733	int bs = `1`, // bucket size
734	ANNsplitRule split = ANN_KD_SUGGEST); // splitting method
735
736	ANNkd_tree( // build from dump file
737	std::istream& in); // input stream for dump file
738
739	~ANNkd_tree(); // tree destructor
740
741	void annkSearch( // approx k near neighbor search
742	ANNpoint q, // query point
743	int k, // number of near neighbors to return
744	ANNidxArray nn_idx, // nearest neighbor array (modified)
745	ANNdistArray dd, // dist to near neighbors (modified)
746	double eps=`0.0`); // error bound
747
748	void annkPriSearch( // priority k near neighbor search
749	ANNpoint q, // query point
750	int k, // number of near neighbors to return
751	ANNidxArray nn_idx, // nearest neighbor array (modified)
752	ANNdistArray dd, // dist to near neighbors (modified)
753	double eps=`0.0`); // error bound
754
755	int annkFRSearch( // approx fixed-radius kNN search
756	ANNpoint q, // the query point
757	ANNdist sqRad, // squared radius of query ball
758	int k = `0`, // number of neighbors to return
759	ANNidxArray nn_idx = NULL, // nearest neighbor array (modified)
760	ANNdistArray dd = NULL, // dist to near neighbors (modified)
761	double eps=`0.0`); // error bound
762
763	int theDim() // return dimension of space
764	{ return dim; }
765
766	int nPoints() // return number of points
767	{ return n_pts; }
768
769	ANNpointArray thePoints() // return pointer to points
770	{ return pts; }
771
772	virtual void Print( // print the tree (for debugging)
773	ANNbool with_pts, // print points as well?
774	std::ostream& out); // output stream
775
776	virtual void Dump( // dump entire tree
777	ANNbool with_pts, // print points as well?
778	std::ostream& out); // output stream
779
780	virtual void getStats( // compute tree statistics
781	ANNkdStats& st); // the statistics (modified)
782	};
783
784	//----------------------------------------------------------------------
785	// Box decomposition tree (bd-tree)
786	// The bd-tree is inherited from a kd-tree. The main difference
787	// in the bd-tree and the kd-tree is a new type of internal node
788	// called a shrinking node (in the kd-tree there is only one type
789	// of internal node, a splitting node). The shrinking node
790	// makes it possible to generate balanced trees in which the
791	// cells have bounded aspect ratio, by allowing the decomposition
792	// to zoom in on regions of dense point concentration. Although
793	// this is a nice idea in theory, few point distributions are so
794	// densely clustered that this is really needed.
795	//----------------------------------------------------------------------
796
797	class DLL_API ANNbd_tree: public ANNkd_tree {
798	public:
799	ANNbd_tree( // build skeleton tree
800	int n, // number of points
801	int dd, // dimension
802	int bs = `1`) // bucket size
803	: ANNkd_tree (n, dd, bs) {} // build base kd-tree
804
805	ANNbd_tree( // build from point array
806	ANNpointArray pa, // point array
807	int n, // number of points
808	int dd, // dimension
809	int bs = `1`, // bucket size
810	ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule
811	ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule
812
813	ANNbd_tree( // build from dump file
814	std::istream& in); // input stream for dump file
815	};
816
817	//----------------------------------------------------------------------
818	// Other functions
819	// annMaxPtsVisit Sets a limit on the maximum number of points
820	// to visit in the search.
821	// annClose Can be called when all use of ANN is finished.
822	// It clears up a minor memory leak.
823	//----------------------------------------------------------------------
824
825	DLL_API void annMaxPtsVisit( // max. pts to visit in search
826	int maxPts); // the limit
827
828	DLL_API void annClose(); // called to end use of ANN
829
830	#endif
831

Browse the source code of include/ANN/ANN.h