greatCircleDistance.cpp source code [ClickHouse/dbms/src/Functions/greatCircleDistance.cpp]

1	#include <DataTypes/DataTypesNumber.h>
2	#include <Columns/ColumnsNumber.h>
3	#include <Columns/ColumnConst.h>
4	#include <Common/typeid_cast.h>
5	#include <Common/assert_cast.h>
6	#include <Functions/IFunctionImpl.h>
7	#include <Functions/FunctionHelpers.h>
8	#include <Functions/FunctionFactory.h>
9	#include <ext/range.h>
10	#include <cmath>
11
12
13	namespace DB
14	{
15
16	/* Calculates the distance between two geographical locations.*
17	* There are three variants:
18	* greatCircleAngle: calculates the distance on a sphere in degrees: https://en.wikipedia.org/wiki/Great-circle_distance
19	* greatCircleDistance: calculates the distance on a sphere in meters.
20	* geoDistance: calculates the distance on WGS-84 ellipsoid in meters.
21	*
22	* The function calculates distance between two points on Earth specified by longitude and latitude in degrees.
23	*
24	* Latitude must be in [-90, 90], longitude must be [-180, 180].
25	*
26	* Original code of this implementation of this function is here:
27	* https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
28	* Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
29	* Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
30	* The main idea of this implementation is optimisations based on Taylor series, trigonometric identity
31	* and calculated constants once for cosine, arcsine(sqrt) and look up table.
32	*/
33
34	namespace
35	{
36
37	constexpr double PI = `3.14159265358979323846`;
38	constexpr float RAD_IN_DEG = static_cast<float>(PI / `180.0`);
39	constexpr float RAD_IN_DEG_HALF = static_cast<float>(PI / `360.0`);
40
41	constexpr size_t COS_LUT_SIZE = `1024`; // maxerr 0.00063%
42	constexpr size_t ASIN_SQRT_LUT_SIZE = `512`;
43	constexpr size_t METRIC_LUT_SIZE = `1024`;
44
45	/* Earth radius in meters using WGS84 authalic radius.*
46	* We use this value to be consistent with H3 library.
47	*/
48	constexpr float EARTH_RADIUS = `6371007.180918475`;
49	constexpr float EARTH_DIAMETER = `2` * EARTH_RADIUS;
50
51
52	float cos_lut[COS_LUT_SIZE + `1`]; /// cos(x) table
53	float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + `1`]; /// asin(sqrt(x)) earth_diameter table*
54
55	float sphere_metric_lut[METRIC_LUT_SIZE + `1`]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
56	float sphere_metric_meters_lut[METRIC_LUT_SIZE + `1`]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
57	float wgs84_metric_meters_lut[`2` * (METRIC_LUT_SIZE + `1`)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
58
59
60	inline double sqr(double v)
61	{
62	return v * v;
63	}
64
65	inline float sqrf(float v)
66	{
67	return v * v;
68	}
69
70	void geodistInit()
71	{
72	for (size_t i = `0`; i <= COS_LUT_SIZE; ++i)
73	cos_lut[i] = static_cast<float>(cos(`2` * PI * i / COS_LUT_SIZE)); // [0, 2 pi] -> [0, COS_LUT_SIZE]*
74
75	for (size_t i = `0`; i <= ASIN_SQRT_LUT_SIZE; ++i)
76	asin_sqrt_lut[i] = static_cast<float>(asin(
77	sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
78
79	for (size_t i = `0`; i <= METRIC_LUT_SIZE; ++i)
80	{
81	double latitude = i * (PI / METRIC_LUT_SIZE) - PI * `0.5`; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
82
83	/// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
84	/// depending on the latitude (in radians).
85
86	/// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
87	wgs84_metric_meters_lut[i * `2`] = static_cast<float>(sqr(`111132.09` - `566.05` * cos(`2` * latitude) + `1.20` * cos(`4` * latitude)));
88	wgs84_metric_meters_lut[i * `2` + `1`] = static_cast<float>(sqr(`111415.13` * cos(latitude) - `94.55` * cos(`3` * latitude) + `0.12` * cos(`5` * latitude)));
89
90	sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / `360`) * cos(latitude)));
91
92	sphere_metric_lut[i] = cosf(latitude);
93	}
94	}
95
96	inline float geodistDegDiff(float f)
97	{
98	f = fabsf(f);
99	while (f > `360`)
100	f -= `360`;
101	if (f > `180`)
102	f = `360` - f;
103	return f;
104	}
105
106	inline float geodistFastCos(float x)
107	{
108	float y = fabsf(x) * (COS_LUT_SIZE / PI / `2`);
109	size_t i = static_cast<size_t>(y);
110	y -= i;
111	i &= (COS_LUT_SIZE - `1`);
112	return cos_lut[i] + (cos_lut[i + `1`] - cos_lut[i]) * y;
113	}
114
115	inline float geodistFastSin(float x)
116	{
117	float y = fabsf(x) * (COS_LUT_SIZE / PI / `2`);
118	size_t i = static_cast<size_t>(y);
119	y -= i;
120	i = (i - COS_LUT_SIZE / `4`) & (COS_LUT_SIZE - `1`); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
121	return cos_lut[i] + (cos_lut[i + `1`] - cos_lut[i]) * y;
122	}
123
124	/// fast implementation of asin(sqrt(x))
125	/// max error in floats 0.00369%, in doubles 0.00072%
126	inline float geodistFastAsinSqrt(float x)
127	{
128	if (x < `0.122f`)
129	{
130	// distance under 4546 km, Taylor error under 0.00072%
131	float y = sqrtf(x);
132	return y + x * y * `0.166666666666666f` + x * x * y * `0.075f` + x * x * x * y * `0.044642857142857f`;
133	}
134	if (x < `0.948f`)
135	{
136	// distance under 17083 km, 512-entry LUT error under 0.00072%
137	x *= ASIN_SQRT_LUT_SIZE;
138	size_t i = static_cast<size_t>(x);
139	return asin_sqrt_lut[i] + (asin_sqrt_lut[i + `1`] - asin_sqrt_lut[i]) * (x - i);
140	}
141	return asinf(sqrtf(x)); // distance over 17083 km, just compute exact
142	}
143
144
145	enum class Method
146	{
147	SPHERE_DEGREES,
148	SPHERE_METERS,
149	WGS84_METERS,
150	};
151
152
153	template <Method method>
154	float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
155	{
156	float lat_diff = geodistDegDiff(lat1deg - lat2deg);
157	float lon_diff = geodistDegDiff(lon1deg - lon2deg);
158
159	if (lon_diff < `13`)
160	{
161	// points are close enough; use flat ellipsoid model
162	// interpolate metric coefficients using latitudes midpoint
163
164	/// Why comparing only difference in longitude?
165	/// If longitudes are different enough, there is a big difference between great circle line and a line with constant latitude.
166	/// (Remember how a plane flies from Moscow to New York)
167	/// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
168
169	float latitude_midpoint = (lat1deg + lat2deg + `180`) * METRIC_LUT_SIZE / `360`; // [-90, 90] degrees -> [0, KTABLE] indexes
170	size_t latitude_midpoint_index = static_cast<size_t>(latitude_midpoint) & (METRIC_LUT_SIZE - `1`);
171
172	/// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
173
174	float k_lat;
175	float k_lon;
176
177	if constexpr (method == Method::SPHERE_DEGREES)
178	{
179	k_lat = `1`;
180
181	k_lon = sphere_metric_lut[latitude_midpoint_index]
182	+ (sphere_metric_lut[latitude_midpoint_index + `1`] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
183	}
184	else if constexpr (method == Method::SPHERE_METERS)
185	{
186	k_lat = sqr(EARTH_DIAMETER * PI / `360`);
187
188	k_lon = sphere_metric_meters_lut[latitude_midpoint_index]
189	+ (sphere_metric_meters_lut[latitude_midpoint_index + `1`] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
190	}
191	else if constexpr (method == Method::WGS84_METERS)
192	{
193	k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * `2`]
194	+ (wgs84_metric_meters_lut[(latitude_midpoint_index + `1`) * `2`] - wgs84_metric_meters_lut[latitude_midpoint_index * `2`]) * (latitude_midpoint - latitude_midpoint_index);
195
196	k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * `2` + `1`]
197	+ (wgs84_metric_meters_lut[(latitude_midpoint_index + `1`) * `2` + `1`] - wgs84_metric_meters_lut[latitude_midpoint_index * `2` + `1`]) * (latitude_midpoint - latitude_midpoint_index);
198	}
199
200	/// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates.
201	return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff);
202	}
203	else
204	{
205	// points too far away; use haversine
206
207	float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF))
208	+ geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF));
209
210	if constexpr (method == Method::SPHERE_DEGREES)
211	return (`360.0f` / PI) * geodistFastAsinSqrt(a);
212	else
213	return EARTH_DIAMETER * geodistFastAsinSqrt(a);
214	}
215	}
216
217	}
218
219
220	template <Method method>
221	class FunctionGeoDistance : public IFunction
222	{
223	public:
224	static constexpr auto name =
225	(method == Method::SPHERE_DEGREES) ? "greatCircleAngle"
226	: ((method == Method::SPHERE_METERS) ? "greatCircleDistance"
227	: "geoDistance");
228
229	static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeoDistance<method>>(); }
230
231	private:
232	String getName() const override { return name; }
233	size_t getNumberOfArguments() const override { return `4`; }
234
235	bool useDefaultImplementationForConstants() const override { return true; }
236
237	DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
238	{
239	for (const auto arg_idx : ext::range(`0`, arguments.size()))
240	{
241	const auto arg = arguments [arg_idx].get();
242	if (!isNumber(WhichDataType (arg)))
243	throw Exception(
244	"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + `1`) + " of function " + getName() + ". Must be numeric",
245	ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
246	}
247
248	return std::make_shared<DataTypeFloat32>();
249	}
250
251	void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
252	{
253	auto dst = ColumnVector<Float32>::create();
254	auto & dst_data = dst ->getData();
255	dst_data.resize(input_rows_count);
256
257	const IColumn & col_lon1 = *block.getByPosition(arguments [`0`]).column;
258	const IColumn & col_lat1 = *block.getByPosition(arguments [`1`]).column;
259	const IColumn & col_lon2 = *block.getByPosition(arguments [`2`]).column;
260	const IColumn & col_lat2 = *block.getByPosition(arguments [`3`]).column;
261
262	for (size_t row_num = `0`; row_num < input_rows_count; ++row_num)
263	dst_data [row_num] = distance<method>(
264	col_lon1.getFloat32(row_num), col_lat1.getFloat32(row_num),
265	col_lon2.getFloat32(row_num), col_lat2.getFloat32(row_num));
266
267	block.getByPosition(result).column = std::move(dst);
268	}
269	};
270
271
272	void registerFunctionGeoDistance(FunctionFactory & factory)
273	{
274	geodistInit();
275	factory.registerFunction<FunctionGeoDistance<Method::SPHERE_DEGREES>>();
276	factory.registerFunction<FunctionGeoDistance<Method::SPHERE_METERS>>();
277	factory.registerFunction<FunctionGeoDistance<Method::WGS84_METERS>>();
278	}
279
280	}
281
282

Browse the source code of ClickHouse/dbms/src/Functions/greatCircleDistance.cpp