1#include <Core/Types.h>
2#include <Functions/GeoUtils.h>
3
4namespace
5{
6
7using namespace DB;
8
9const char geohash_base32_encode_lookup_table[32] = {
10 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
11 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm',
12 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
13 'y', 'z',
14};
15
16// TODO: this could be halved by excluding 128-255 range.
17const UInt8 geohash_base32_decode_lookup_table[256] = {
18 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
19 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
20 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
21 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
22 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
23 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
24 0xFF, 0xFF, 10, 11, 12, 13, 14, 15, 16, 0xFF, 17, 18, 0xFF, 19, 20, 0xFF,
25 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
26 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
27 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
28 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
29 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
30 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
31 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
32 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
33 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
34};
35
36const size_t BITS_PER_SYMBOL = 5;
37const size_t MAX_PRECISION = 12;
38const size_t MAX_BITS = MAX_PRECISION * BITS_PER_SYMBOL * 1.5;
39const Float64 LON_MIN = -180;
40const Float64 LON_MAX = 180;
41const Float64 LAT_MIN = -90;
42const Float64 LAT_MAX = 90;
43
44using Encoded = std::array<UInt8, MAX_BITS>;
45
46enum CoordType
47{
48 LATITUDE,
49 LONGITUDE,
50};
51
52inline UInt8 singleCoordBitsPrecision(UInt8 precision, CoordType type)
53{
54 // Single coordinate occupies only half of the total bits.
55 const UInt8 bits = (precision * BITS_PER_SYMBOL) / 2;
56 if (precision & 0x1 && type == LONGITUDE)
57 {
58 return bits + 1;
59 }
60
61 return bits;
62}
63
64inline Encoded encodeCoordinate(Float64 coord, Float64 min, Float64 max, UInt8 bits)
65{
66 Encoded result;
67 result.fill(0);
68
69 for (size_t i = 0; i < bits; ++i)
70 {
71 const Float64 mid = (max + min) / 2;
72 if (coord >= mid)
73 {
74 result[i] = 1;
75 min = mid;
76 }
77 else
78 {
79 result[i] = 0;
80 max = mid;
81 }
82 }
83
84 return result;
85}
86
87inline Float64 decodeCoordinate(const Encoded & coord, Float64 min, Float64 max, UInt8 bits)
88{
89 Float64 mid = (max + min) / 2;
90 for (size_t i = 0; i < bits; ++i)
91 {
92 const auto c = coord[i];
93 if (c == 1)
94 {
95 min = mid;
96 }
97 else
98 {
99 max = mid;
100 }
101
102 mid = (max + min) / 2;
103 }
104
105 return mid;
106}
107
108inline Encoded merge(const Encoded & encodedLon, const Encoded & encodedLat, UInt8 precision)
109{
110 Encoded result;
111 result.fill(0);
112
113 const auto bits = (precision * BITS_PER_SYMBOL) / 2;
114 UInt8 i = 0;
115 for (; i < bits; ++i)
116 {
117 result[i * 2 + 0] = encodedLon[i];
118 result[i * 2 + 1] = encodedLat[i];
119 }
120 // in case of even precision, add last bit of longitude
121 if (precision & 0x1)
122 {
123 result[i * 2] = encodedLon[i];
124 }
125
126 return result;
127}
128
129inline std::tuple<Encoded, Encoded> split(const Encoded & combined, UInt8 precision)
130{
131 Encoded lat, lon;
132 lat.fill(0);
133 lon.fill(0);
134
135 UInt8 i = 0;
136 for (; i < precision * BITS_PER_SYMBOL - 1; i += 2)
137 {
138 // longitude is even bits
139 lon[i/2] = combined[i];
140 lat[i/2] = combined[i + 1];
141 }
142 // precision is even, read the last bit as lat.
143 if (precision & 0x1)
144 {
145 lon[i/2] = combined[precision * BITS_PER_SYMBOL - 1];
146 }
147
148 return std::tie(lon, lat);
149}
150
151inline void base32Encode(const Encoded & binary, UInt8 precision, char * out)
152{
153 extern const char geohash_base32_encode_lookup_table[32];
154
155 for (UInt8 i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL)
156 {
157 UInt8 v = binary[i];
158 v <<= 1;
159 v |= binary[i + 1];
160 v <<= 1;
161 v |= binary[i + 2];
162 v <<= 1;
163 v |= binary[i + 3];
164 v <<= 1;
165 v |= binary[i + 4];
166
167 assert(v < 32);
168
169 *out = geohash_base32_encode_lookup_table[v];
170 ++out;
171 }
172}
173
174inline Encoded base32Decode(const char * encoded_string, size_t encoded_length)
175{
176 extern const UInt8 geohash_base32_decode_lookup_table[256];
177
178 Encoded result;
179
180 for (size_t i = 0; i < encoded_length; ++i)
181 {
182 const UInt8 c = static_cast<UInt8>(encoded_string[i]);
183 const UInt8 decoded = geohash_base32_decode_lookup_table[c] & 0x1F;
184 result[i * 5 + 4] = (decoded >> 0) & 0x01;
185 result[i * 5 + 3] = (decoded >> 1) & 0x01;
186 result[i * 5 + 2] = (decoded >> 2) & 0x01;
187 result[i * 5 + 1] = (decoded >> 3) & 0x01;
188 result[i * 5 + 0] = (decoded >> 4) & 0x01;
189 }
190
191 return result;
192}
193
194inline Float64 getMaxSpan(CoordType type)
195{
196 if (type == LONGITUDE)
197 {
198 return LON_MAX - LON_MIN;
199 }
200
201 return LAT_MAX - LAT_MIN;
202}
203
204inline Float64 getSpan(UInt8 precision, CoordType type)
205{
206 const auto bits = singleCoordBitsPrecision(precision, type);
207 // since every bit of precision divides span by 2, divide max span by 2^bits.
208 return ldexp(getMaxSpan(type), -1 * bits);
209}
210
211inline UInt8 geohashPrecision(UInt8 precision)
212{
213 if (precision == 0 || precision > MAX_PRECISION)
214 {
215 precision = MAX_PRECISION;
216 }
217
218 return precision;
219}
220
221inline size_t geohashEncodeImpl(Float64 longitude, Float64 latitude, UInt8 precision, char * out)
222{
223 const Encoded combined = merge(
224 encodeCoordinate(longitude, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE)),
225 encodeCoordinate(latitude, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE)),
226 precision);
227
228 base32Encode(combined, precision, out);
229
230 return precision;
231}
232
233}
234
235namespace DB
236{
237
238namespace ErrorCodes
239{
240extern const int ARGUMENT_OUT_OF_BOUND;
241}
242
243namespace GeoUtils
244{
245
246size_t geohashEncode(Float64 longitude, Float64 latitude, UInt8 precision, char * out)
247{
248 precision = geohashPrecision(precision);
249 return geohashEncodeImpl(longitude, latitude, precision, out);
250}
251
252void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * longitude, Float64 * latitude)
253{
254 const UInt8 precision = std::min(encoded_len, static_cast<size_t>(MAX_PRECISION));
255 if (precision == 0)
256 {
257 // Empty string is converted to (0, 0)
258 *longitude = 0;
259 *latitude = 0;
260 return;
261 }
262
263 Encoded lat_encoded, lon_encoded;
264 std::tie(lon_encoded, lat_encoded) = split(base32Decode(encoded_string, precision), precision);
265
266 *longitude = decodeCoordinate(lon_encoded, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE));
267 *latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE));
268}
269
270GeohashesInBoxPreparedArgs geohashesInBoxPrepare(const Float64 longitude_min,
271 const Float64 latitude_min,
272 const Float64 longitude_max,
273 const Float64 latitude_max,
274 UInt8 precision)
275{
276 precision = geohashPrecision(precision);
277
278 if (longitude_max < longitude_min || latitude_max < latitude_min)
279 {
280 return {};
281 }
282
283 const auto lon_step = getSpan(precision, LONGITUDE);
284 const auto lat_step = getSpan(precision, LATITUDE);
285
286 // align max to the right(or up) border of geohash grid cell to ensure that cell is in result.
287 Float64 lon_min = floor(longitude_min / lon_step) * lon_step;
288 Float64 lat_min = floor(latitude_min / lat_step) * lat_step;
289 Float64 lon_max = ceil(longitude_max / lon_step) * lon_step;
290 Float64 lat_max = ceil(latitude_max / lat_step) * lat_step;
291
292 const auto lon_span = lon_max - lon_min;
293 const auto lat_span = lat_max - lat_min;
294 // in case of a very small (or zero) span, produce at least 1 item.
295 const auto items_count = std::max(size_t{1}, static_cast<size_t>(ceil(lon_span/lon_step * lat_span/lat_step)));
296
297 return GeohashesInBoxPreparedArgs{
298 items_count,
299 precision,
300 lon_min,
301 lat_min,
302 lon_max,
303 lat_max,
304 lon_step,
305 lat_step
306 };
307}
308
309UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out)
310{
311 if (args.items_count == 0
312 || args.precision == 0
313 || args.precision > MAX_PRECISION
314 || args.latitude_min > args.latitude_max
315 || args.longitude_min > args.longitude_max
316 || args.longitude_step <= 0
317 || args.latitude_step <= 0)
318 {
319 return 0;
320 }
321
322 UInt64 items = 0;
323 for (auto lon = args.longitude_min; lon < args.longitude_max; lon += args.longitude_step)
324 {
325 for (auto lat = args.latitude_min; lat < args.latitude_max; lat += args.latitude_step)
326 {
327 assert(items <= args.items_count);
328
329 size_t l = geohashEncodeImpl(lon, lat, args.precision, out);
330 out += l;
331 *out = '\0';
332 ++out;
333
334 ++items;
335 }
336 }
337
338 if (items == 0)
339 {
340 size_t l = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out);
341 out += l;
342 *out = '\0';
343 ++out;
344
345 ++items;
346 }
347
348 return items;
349}
350
351}
352
353}
354