1#include <DataTypes/DataTypesNumber.h>
2#include <Columns/ColumnsNumber.h>
3#include <Columns/ColumnConst.h>
4#include <Common/typeid_cast.h>
5#include <Common/assert_cast.h>
6#include <Functions/IFunctionImpl.h>
7#include <Functions/FunctionHelpers.h>
8#include <Functions/FunctionFactory.h>
9#include <ext/range.h>
10#include <cmath>
11
12
13namespace DB
14{
15
16/** Calculates the distance between two geographical locations.
17 * There are three variants:
18 * greatCircleAngle: calculates the distance on a sphere in degrees: https://en.wikipedia.org/wiki/Great-circle_distance
19 * greatCircleDistance: calculates the distance on a sphere in meters.
20 * geoDistance: calculates the distance on WGS-84 ellipsoid in meters.
21 *
22 * The function calculates distance between two points on Earth specified by longitude and latitude in degrees.
23 *
24 * Latitude must be in [-90, 90], longitude must be [-180, 180].
25 *
26 * Original code of this implementation of this function is here:
27 * https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
28 * Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
29 * Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
30 * The main idea of this implementation is optimisations based on Taylor series, trigonometric identity
31 * and calculated constants once for cosine, arcsine(sqrt) and look up table.
32 */
33
34namespace
35{
36
37constexpr double PI = 3.14159265358979323846;
38constexpr float RAD_IN_DEG = static_cast<float>(PI / 180.0);
39constexpr float RAD_IN_DEG_HALF = static_cast<float>(PI / 360.0);
40
41constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063%
42constexpr size_t ASIN_SQRT_LUT_SIZE = 512;
43constexpr size_t METRIC_LUT_SIZE = 1024;
44
45/** Earth radius in meters using WGS84 authalic radius.
46 * We use this value to be consistent with H3 library.
47 */
48constexpr float EARTH_RADIUS = 6371007.180918475;
49constexpr float EARTH_DIAMETER = 2 * EARTH_RADIUS;
50
51
52float cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table
53float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table
54
55float sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude
56float sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude
57float wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude
58
59
60inline double sqr(double v)
61{
62 return v * v;
63}
64
65inline float sqrf(float v)
66{
67 return v * v;
68}
69
70void geodistInit()
71{
72 for (size_t i = 0; i <= COS_LUT_SIZE; ++i)
73 cos_lut[i] = static_cast<float>(cos(2 * PI * i / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE]
74
75 for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i)
76 asin_sqrt_lut[i] = static_cast<float>(asin(
77 sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE]
78
79 for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i)
80 {
81 double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE]
82
83 /// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees),
84 /// depending on the latitude (in radians).
85
86 /// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67
87 wgs84_metric_meters_lut[i * 2] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * latitude) + 1.20 * cos(4 * latitude)));
88 wgs84_metric_meters_lut[i * 2 + 1] = static_cast<float>(sqr(111415.13 * cos(latitude) - 94.55 * cos(3 * latitude) + 0.12 * cos(5 * latitude)));
89
90 sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude)));
91
92 sphere_metric_lut[i] = cosf(latitude);
93 }
94}
95
96inline float geodistDegDiff(float f)
97{
98 f = fabsf(f);
99 while (f > 360)
100 f -= 360;
101 if (f > 180)
102 f = 360 - f;
103 return f;
104}
105
106inline float geodistFastCos(float x)
107{
108 float y = fabsf(x) * (COS_LUT_SIZE / PI / 2);
109 size_t i = static_cast<size_t>(y);
110 y -= i;
111 i &= (COS_LUT_SIZE - 1);
112 return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
113}
114
115inline float geodistFastSin(float x)
116{
117 float y = fabsf(x) * (COS_LUT_SIZE / PI / 2);
118 size_t i = static_cast<size_t>(y);
119 y -= i;
120 i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
121 return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
122}
123
124/// fast implementation of asin(sqrt(x))
125/// max error in floats 0.00369%, in doubles 0.00072%
126inline float geodistFastAsinSqrt(float x)
127{
128 if (x < 0.122f)
129 {
130 // distance under 4546 km, Taylor error under 0.00072%
131 float y = sqrtf(x);
132 return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f;
133 }
134 if (x < 0.948f)
135 {
136 // distance under 17083 km, 512-entry LUT error under 0.00072%
137 x *= ASIN_SQRT_LUT_SIZE;
138 size_t i = static_cast<size_t>(x);
139 return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i);
140 }
141 return asinf(sqrtf(x)); // distance over 17083 km, just compute exact
142}
143
144
145enum class Method
146{
147 SPHERE_DEGREES,
148 SPHERE_METERS,
149 WGS84_METERS,
150};
151
152
153template <Method method>
154float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg)
155{
156 float lat_diff = geodistDegDiff(lat1deg - lat2deg);
157 float lon_diff = geodistDegDiff(lon1deg - lon2deg);
158
159 if (lon_diff < 13)
160 {
161 // points are close enough; use flat ellipsoid model
162 // interpolate metric coefficients using latitudes midpoint
163
164 /// Why comparing only difference in longitude?
165 /// If longitudes are different enough, there is a big difference between great circle line and a line with constant latitude.
166 /// (Remember how a plane flies from Moscow to New York)
167 /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line.
168
169 float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
170 size_t latitude_midpoint_index = static_cast<size_t>(latitude_midpoint) & (METRIC_LUT_SIZE - 1);
171
172 /// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1".
173
174 float k_lat;
175 float k_lon;
176
177 if constexpr (method == Method::SPHERE_DEGREES)
178 {
179 k_lat = 1;
180
181 k_lon = sphere_metric_lut[latitude_midpoint_index]
182 + (sphere_metric_lut[latitude_midpoint_index + 1] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
183 }
184 else if constexpr (method == Method::SPHERE_METERS)
185 {
186 k_lat = sqr(EARTH_DIAMETER * PI / 360);
187
188 k_lon = sphere_metric_meters_lut[latitude_midpoint_index]
189 + (sphere_metric_meters_lut[latitude_midpoint_index + 1] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index);
190 }
191 else if constexpr (method == Method::WGS84_METERS)
192 {
193 k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * 2]
194 + (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index);
195
196 k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]
197 + (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index);
198 }
199
200 /// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates.
201 return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff);
202 }
203 else
204 {
205 // points too far away; use haversine
206
207 float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF))
208 + geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF));
209
210 if constexpr (method == Method::SPHERE_DEGREES)
211 return (360.0f / PI) * geodistFastAsinSqrt(a);
212 else
213 return EARTH_DIAMETER * geodistFastAsinSqrt(a);
214 }
215}
216
217}
218
219
220template <Method method>
221class FunctionGeoDistance : public IFunction
222{
223public:
224 static constexpr auto name =
225 (method == Method::SPHERE_DEGREES) ? "greatCircleAngle"
226 : ((method == Method::SPHERE_METERS) ? "greatCircleDistance"
227 : "geoDistance");
228
229 static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeoDistance<method>>(); }
230
231private:
232 String getName() const override { return name; }
233 size_t getNumberOfArguments() const override { return 4; }
234
235 bool useDefaultImplementationForConstants() const override { return true; }
236
237 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
238 {
239 for (const auto arg_idx : ext::range(0, arguments.size()))
240 {
241 const auto arg = arguments[arg_idx].get();
242 if (!isNumber(WhichDataType(arg)))
243 throw Exception(
244 "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be numeric",
245 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
246 }
247
248 return std::make_shared<DataTypeFloat32>();
249 }
250
251 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
252 {
253 auto dst = ColumnVector<Float32>::create();
254 auto & dst_data = dst->getData();
255 dst_data.resize(input_rows_count);
256
257 const IColumn & col_lon1 = *block.getByPosition(arguments[0]).column;
258 const IColumn & col_lat1 = *block.getByPosition(arguments[1]).column;
259 const IColumn & col_lon2 = *block.getByPosition(arguments[2]).column;
260 const IColumn & col_lat2 = *block.getByPosition(arguments[3]).column;
261
262 for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
263 dst_data[row_num] = distance<method>(
264 col_lon1.getFloat32(row_num), col_lat1.getFloat32(row_num),
265 col_lon2.getFloat32(row_num), col_lat2.getFloat32(row_num));
266
267 block.getByPosition(result).column = std::move(dst);
268 }
269};
270
271
272void registerFunctionGeoDistance(FunctionFactory & factory)
273{
274 geodistInit();
275 factory.registerFunction<FunctionGeoDistance<Method::SPHERE_DEGREES>>();
276 factory.registerFunction<FunctionGeoDistance<Method::SPHERE_METERS>>();
277 factory.registerFunction<FunctionGeoDistance<Method::WGS84_METERS>>();
278}
279
280}
281
282