| 1 | #include <DataTypes/DataTypesNumber.h> |
| 2 | #include <Columns/ColumnsNumber.h> |
| 3 | #include <Columns/ColumnConst.h> |
| 4 | #include <Common/typeid_cast.h> |
| 5 | #include <Common/assert_cast.h> |
| 6 | #include <Functions/IFunctionImpl.h> |
| 7 | #include <Functions/FunctionHelpers.h> |
| 8 | #include <Functions/FunctionFactory.h> |
| 9 | #include <ext/range.h> |
| 10 | #include <cmath> |
| 11 | |
| 12 | |
| 13 | namespace DB |
| 14 | { |
| 15 | |
| 16 | /** Calculates the distance between two geographical locations. |
| 17 | * There are three variants: |
| 18 | * greatCircleAngle: calculates the distance on a sphere in degrees: https://en.wikipedia.org/wiki/Great-circle_distance |
| 19 | * greatCircleDistance: calculates the distance on a sphere in meters. |
| 20 | * geoDistance: calculates the distance on WGS-84 ellipsoid in meters. |
| 21 | * |
| 22 | * The function calculates distance between two points on Earth specified by longitude and latitude in degrees. |
| 23 | * |
| 24 | * Latitude must be in [-90, 90], longitude must be [-180, 180]. |
| 25 | * |
| 26 | * Original code of this implementation of this function is here: |
| 27 | * https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825. |
| 28 | * Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license. |
| 29 | * Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf |
| 30 | * The main idea of this implementation is optimisations based on Taylor series, trigonometric identity |
| 31 | * and calculated constants once for cosine, arcsine(sqrt) and look up table. |
| 32 | */ |
| 33 | |
| 34 | namespace |
| 35 | { |
| 36 | |
| 37 | constexpr double PI = 3.14159265358979323846; |
| 38 | constexpr float RAD_IN_DEG = static_cast<float>(PI / 180.0); |
| 39 | constexpr float RAD_IN_DEG_HALF = static_cast<float>(PI / 360.0); |
| 40 | |
| 41 | constexpr size_t COS_LUT_SIZE = 1024; // maxerr 0.00063% |
| 42 | constexpr size_t ASIN_SQRT_LUT_SIZE = 512; |
| 43 | constexpr size_t METRIC_LUT_SIZE = 1024; |
| 44 | |
| 45 | /** Earth radius in meters using WGS84 authalic radius. |
| 46 | * We use this value to be consistent with H3 library. |
| 47 | */ |
| 48 | constexpr float EARTH_RADIUS = 6371007.180918475; |
| 49 | constexpr float EARTH_DIAMETER = 2 * EARTH_RADIUS; |
| 50 | |
| 51 | |
| 52 | float cos_lut[COS_LUT_SIZE + 1]; /// cos(x) table |
| 53 | float asin_sqrt_lut[ASIN_SQRT_LUT_SIZE + 1]; /// asin(sqrt(x)) * earth_diameter table |
| 54 | |
| 55 | float sphere_metric_lut[METRIC_LUT_SIZE + 1]; /// sphere metric, unitless: the distance in degrees for one degree across longitude depending on latitude |
| 56 | float sphere_metric_meters_lut[METRIC_LUT_SIZE + 1]; /// sphere metric: the distance in meters for one degree across longitude depending on latitude |
| 57 | float wgs84_metric_meters_lut[2 * (METRIC_LUT_SIZE + 1)]; /// ellipsoid metric: the distance in meters across one degree latitude/longitude depending on latitude |
| 58 | |
| 59 | |
| 60 | inline double sqr(double v) |
| 61 | { |
| 62 | return v * v; |
| 63 | } |
| 64 | |
| 65 | inline float sqrf(float v) |
| 66 | { |
| 67 | return v * v; |
| 68 | } |
| 69 | |
| 70 | void geodistInit() |
| 71 | { |
| 72 | for (size_t i = 0; i <= COS_LUT_SIZE; ++i) |
| 73 | cos_lut[i] = static_cast<float>(cos(2 * PI * i / COS_LUT_SIZE)); // [0, 2 * pi] -> [0, COS_LUT_SIZE] |
| 74 | |
| 75 | for (size_t i = 0; i <= ASIN_SQRT_LUT_SIZE; ++i) |
| 76 | asin_sqrt_lut[i] = static_cast<float>(asin( |
| 77 | sqrt(static_cast<double>(i) / ASIN_SQRT_LUT_SIZE))); // [0, 1] -> [0, ASIN_SQRT_LUT_SIZE] |
| 78 | |
| 79 | for (size_t i = 0; i <= METRIC_LUT_SIZE; ++i) |
| 80 | { |
| 81 | double latitude = i * (PI / METRIC_LUT_SIZE) - PI * 0.5; // [-pi / 2, pi / 2] -> [0, METRIC_LUT_SIZE] |
| 82 | |
| 83 | /// Squared metric coefficients (for the distance in meters) on a tangent plane, for latitude and longitude (in degrees), |
| 84 | /// depending on the latitude (in radians). |
| 85 | |
| 86 | /// https://github.com/mapbox/cheap-ruler/blob/master/index.js#L67 |
| 87 | wgs84_metric_meters_lut[i * 2] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * latitude) + 1.20 * cos(4 * latitude))); |
| 88 | wgs84_metric_meters_lut[i * 2 + 1] = static_cast<float>(sqr(111415.13 * cos(latitude) - 94.55 * cos(3 * latitude) + 0.12 * cos(5 * latitude))); |
| 89 | |
| 90 | sphere_metric_meters_lut[i] = static_cast<float>(sqr((EARTH_DIAMETER * PI / 360) * cos(latitude))); |
| 91 | |
| 92 | sphere_metric_lut[i] = cosf(latitude); |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | inline float geodistDegDiff(float f) |
| 97 | { |
| 98 | f = fabsf(f); |
| 99 | while (f > 360) |
| 100 | f -= 360; |
| 101 | if (f > 180) |
| 102 | f = 360 - f; |
| 103 | return f; |
| 104 | } |
| 105 | |
| 106 | inline float geodistFastCos(float x) |
| 107 | { |
| 108 | float y = fabsf(x) * (COS_LUT_SIZE / PI / 2); |
| 109 | size_t i = static_cast<size_t>(y); |
| 110 | y -= i; |
| 111 | i &= (COS_LUT_SIZE - 1); |
| 112 | return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; |
| 113 | } |
| 114 | |
| 115 | inline float geodistFastSin(float x) |
| 116 | { |
| 117 | float y = fabsf(x) * (COS_LUT_SIZE / PI / 2); |
| 118 | size_t i = static_cast<size_t>(y); |
| 119 | y -= i; |
| 120 | i = (i - COS_LUT_SIZE / 4) & (COS_LUT_SIZE - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2 |
| 121 | return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y; |
| 122 | } |
| 123 | |
| 124 | /// fast implementation of asin(sqrt(x)) |
| 125 | /// max error in floats 0.00369%, in doubles 0.00072% |
| 126 | inline float geodistFastAsinSqrt(float x) |
| 127 | { |
| 128 | if (x < 0.122f) |
| 129 | { |
| 130 | // distance under 4546 km, Taylor error under 0.00072% |
| 131 | float y = sqrtf(x); |
| 132 | return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f; |
| 133 | } |
| 134 | if (x < 0.948f) |
| 135 | { |
| 136 | // distance under 17083 km, 512-entry LUT error under 0.00072% |
| 137 | x *= ASIN_SQRT_LUT_SIZE; |
| 138 | size_t i = static_cast<size_t>(x); |
| 139 | return asin_sqrt_lut[i] + (asin_sqrt_lut[i + 1] - asin_sqrt_lut[i]) * (x - i); |
| 140 | } |
| 141 | return asinf(sqrtf(x)); // distance over 17083 km, just compute exact |
| 142 | } |
| 143 | |
| 144 | |
| 145 | enum class Method |
| 146 | { |
| 147 | SPHERE_DEGREES, |
| 148 | SPHERE_METERS, |
| 149 | WGS84_METERS, |
| 150 | }; |
| 151 | |
| 152 | |
| 153 | template <Method method> |
| 154 | float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) |
| 155 | { |
| 156 | float lat_diff = geodistDegDiff(lat1deg - lat2deg); |
| 157 | float lon_diff = geodistDegDiff(lon1deg - lon2deg); |
| 158 | |
| 159 | if (lon_diff < 13) |
| 160 | { |
| 161 | // points are close enough; use flat ellipsoid model |
| 162 | // interpolate metric coefficients using latitudes midpoint |
| 163 | |
| 164 | /// Why comparing only difference in longitude? |
| 165 | /// If longitudes are different enough, there is a big difference between great circle line and a line with constant latitude. |
| 166 | /// (Remember how a plane flies from Moscow to New York) |
| 167 | /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line. |
| 168 | |
| 169 | float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, KTABLE] indexes |
| 170 | size_t latitude_midpoint_index = static_cast<size_t>(latitude_midpoint) & (METRIC_LUT_SIZE - 1); |
| 171 | |
| 172 | /// This is linear interpolation between two table items at index "latitude_midpoint_index" and "latitude_midpoint_index + 1". |
| 173 | |
| 174 | float k_lat; |
| 175 | float k_lon; |
| 176 | |
| 177 | if constexpr (method == Method::SPHERE_DEGREES) |
| 178 | { |
| 179 | k_lat = 1; |
| 180 | |
| 181 | k_lon = sphere_metric_lut[latitude_midpoint_index] |
| 182 | + (sphere_metric_lut[latitude_midpoint_index + 1] - sphere_metric_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); |
| 183 | } |
| 184 | else if constexpr (method == Method::SPHERE_METERS) |
| 185 | { |
| 186 | k_lat = sqr(EARTH_DIAMETER * PI / 360); |
| 187 | |
| 188 | k_lon = sphere_metric_meters_lut[latitude_midpoint_index] |
| 189 | + (sphere_metric_meters_lut[latitude_midpoint_index + 1] - sphere_metric_meters_lut[latitude_midpoint_index]) * (latitude_midpoint - latitude_midpoint_index); |
| 190 | } |
| 191 | else if constexpr (method == Method::WGS84_METERS) |
| 192 | { |
| 193 | k_lat = wgs84_metric_meters_lut[latitude_midpoint_index * 2] |
| 194 | + (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2] - wgs84_metric_meters_lut[latitude_midpoint_index * 2]) * (latitude_midpoint - latitude_midpoint_index); |
| 195 | |
| 196 | k_lon = wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1] |
| 197 | + (wgs84_metric_meters_lut[(latitude_midpoint_index + 1) * 2 + 1] - wgs84_metric_meters_lut[latitude_midpoint_index * 2 + 1]) * (latitude_midpoint - latitude_midpoint_index); |
| 198 | } |
| 199 | |
| 200 | /// Metric on a tangent plane: it differs from Euclidean metric only by scale of coordinates. |
| 201 | return sqrtf(k_lat * lat_diff * lat_diff + k_lon * lon_diff * lon_diff); |
| 202 | } |
| 203 | else |
| 204 | { |
| 205 | // points too far away; use haversine |
| 206 | |
| 207 | float a = sqrf(geodistFastSin(lat_diff * RAD_IN_DEG_HALF)) |
| 208 | + geodistFastCos(lat1deg * RAD_IN_DEG) * geodistFastCos(lat2deg * RAD_IN_DEG) * sqrf(geodistFastSin(lon_diff * RAD_IN_DEG_HALF)); |
| 209 | |
| 210 | if constexpr (method == Method::SPHERE_DEGREES) |
| 211 | return (360.0f / PI) * geodistFastAsinSqrt(a); |
| 212 | else |
| 213 | return EARTH_DIAMETER * geodistFastAsinSqrt(a); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | } |
| 218 | |
| 219 | |
| 220 | template <Method method> |
| 221 | class FunctionGeoDistance : public IFunction |
| 222 | { |
| 223 | public: |
| 224 | static constexpr auto name = |
| 225 | (method == Method::SPHERE_DEGREES) ? "greatCircleAngle" |
| 226 | : ((method == Method::SPHERE_METERS) ? "greatCircleDistance" |
| 227 | : "geoDistance" ); |
| 228 | |
| 229 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeoDistance<method>>(); } |
| 230 | |
| 231 | private: |
| 232 | String getName() const override { return name; } |
| 233 | size_t getNumberOfArguments() const override { return 4; } |
| 234 | |
| 235 | bool useDefaultImplementationForConstants() const override { return true; } |
| 236 | |
| 237 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
| 238 | { |
| 239 | for (const auto arg_idx : ext::range(0, arguments.size())) |
| 240 | { |
| 241 | const auto arg = arguments[arg_idx].get(); |
| 242 | if (!isNumber(WhichDataType(arg))) |
| 243 | throw Exception( |
| 244 | "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be numeric" , |
| 245 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 246 | } |
| 247 | |
| 248 | return std::make_shared<DataTypeFloat32>(); |
| 249 | } |
| 250 | |
| 251 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override |
| 252 | { |
| 253 | auto dst = ColumnVector<Float32>::create(); |
| 254 | auto & dst_data = dst->getData(); |
| 255 | dst_data.resize(input_rows_count); |
| 256 | |
| 257 | const IColumn & col_lon1 = *block.getByPosition(arguments[0]).column; |
| 258 | const IColumn & col_lat1 = *block.getByPosition(arguments[1]).column; |
| 259 | const IColumn & col_lon2 = *block.getByPosition(arguments[2]).column; |
| 260 | const IColumn & col_lat2 = *block.getByPosition(arguments[3]).column; |
| 261 | |
| 262 | for (size_t row_num = 0; row_num < input_rows_count; ++row_num) |
| 263 | dst_data[row_num] = distance<method>( |
| 264 | col_lon1.getFloat32(row_num), col_lat1.getFloat32(row_num), |
| 265 | col_lon2.getFloat32(row_num), col_lat2.getFloat32(row_num)); |
| 266 | |
| 267 | block.getByPosition(result).column = std::move(dst); |
| 268 | } |
| 269 | }; |
| 270 | |
| 271 | |
| 272 | void registerFunctionGeoDistance(FunctionFactory & factory) |
| 273 | { |
| 274 | geodistInit(); |
| 275 | factory.registerFunction<FunctionGeoDistance<Method::SPHERE_DEGREES>>(); |
| 276 | factory.registerFunction<FunctionGeoDistance<Method::SPHERE_METERS>>(); |
| 277 | factory.registerFunction<FunctionGeoDistance<Method::WGS84_METERS>>(); |
| 278 | } |
| 279 | |
| 280 | } |
| 281 | |
| 282 | |