1 | #include <Functions/IFunctionImpl.h> |
2 | #include <Functions/FunctionFactory.h> |
3 | #include <Functions/FunctionHelpers.h> |
4 | #include <Functions/GeoUtils.h> |
5 | |
6 | #include <Columns/ColumnArray.h> |
7 | #include <Columns/ColumnString.h> |
8 | #include <DataTypes/DataTypeArray.h> |
9 | #include <DataTypes/DataTypeString.h> |
10 | |
11 | #include <memory> |
12 | #include <string> |
13 | |
14 | namespace DB |
15 | { |
16 | |
17 | namespace ErrorCodes |
18 | { |
19 | extern const int LOGICAL_ERROR; |
20 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
21 | extern const int TOO_LARGE_ARRAY_SIZE; |
22 | } |
23 | |
24 | class FunctionGeohashesInBox : public IFunction |
25 | { |
26 | public: |
27 | static constexpr auto name = "geohashesInBox" ; |
28 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeohashesInBox>(); } |
29 | |
30 | String getName() const override { return name; } |
31 | |
32 | size_t getNumberOfArguments() const override { return 5; } |
33 | |
34 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
35 | { |
36 | validateArgumentType(*this, arguments, 0, isFloat, "float" ); |
37 | validateArgumentType(*this, arguments, 1, isFloat, "float" ); |
38 | validateArgumentType(*this, arguments, 2, isFloat, "float" ); |
39 | validateArgumentType(*this, arguments, 3, isFloat, "float" ); |
40 | validateArgumentType(*this, arguments, 4, isUInt8, "integer" ); |
41 | |
42 | if (!(arguments[0]->equals(*arguments[1]) && |
43 | arguments[0]->equals(*arguments[2]) && |
44 | arguments[0]->equals(*arguments[3]))) |
45 | { |
46 | throw Exception("Illegal type of argument of " + getName() + |
47 | " all coordinate arguments must have the same type, instead they are:" + |
48 | arguments[0]->getName() + ", " + |
49 | arguments[1]->getName() + ", " + |
50 | arguments[2]->getName() + ", " + |
51 | arguments[3]->getName() + "." , |
52 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
53 | } |
54 | |
55 | return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()); |
56 | } |
57 | |
58 | bool useDefaultImplementationForConstants() const override { return true; } |
59 | |
60 | template <typename LonAndLatType, typename PrecisionType> |
61 | void execute(const IColumn * lon_min_column, |
62 | const IColumn * lat_min_column, |
63 | const IColumn * lon_max_column, |
64 | const IColumn * lat_max_column, |
65 | const IColumn * precision_column, |
66 | ColumnPtr & result) |
67 | { |
68 | static constexpr size_t max_array_size = 10'000'000; |
69 | |
70 | const auto * lon_min = checkAndGetColumn<ColumnVector<LonAndLatType>>(lon_min_column); |
71 | const auto * lat_min = checkAndGetColumn<ColumnVector<LonAndLatType>>(lat_min_column); |
72 | const auto * lon_max = checkAndGetColumn<ColumnVector<LonAndLatType>>(lon_max_column); |
73 | const auto * lat_max = checkAndGetColumn<ColumnVector<LonAndLatType>>(lat_max_column); |
74 | auto * precision = checkAndGetColumn<ColumnVector<PrecisionType>>(precision_column); |
75 | if (precision == nullptr) |
76 | { |
77 | precision = checkAndGetColumnConstData<ColumnVector<PrecisionType>>(precision_column); |
78 | } |
79 | |
80 | if (!lon_min || !lat_min || !lon_max || !lat_max || !precision) |
81 | { |
82 | throw Exception("Unsupported argument types for function " + getName() + " : " + |
83 | lon_min_column->getName() + ", " + |
84 | lat_min_column->getName() + ", " + |
85 | lon_max_column->getName() + ", " + |
86 | lat_max_column->getName() + "." , |
87 | ErrorCodes::LOGICAL_ERROR); |
88 | } |
89 | |
90 | const size_t total_rows = lat_min->size(); |
91 | |
92 | auto col_res = ColumnArray::create(ColumnString::create()); |
93 | ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData()); |
94 | ColumnArray::Offsets & res_offsets = col_res->getOffsets(); |
95 | ColumnString::Chars & res_strings_chars = res_strings.getChars(); |
96 | ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets(); |
97 | |
98 | for (size_t row = 0; row < total_rows; ++row) |
99 | { |
100 | const Float64 lon_min_value = lon_min->getElement(row); |
101 | const Float64 lat_min_value = lat_min->getElement(row); |
102 | const Float64 lon_max_value = lon_max->getElement(row); |
103 | const Float64 lat_max_value = lat_max->getElement(row); |
104 | |
105 | const auto prepared_args = GeoUtils::geohashesInBoxPrepare( |
106 | lon_min_value, lat_min_value, lon_max_value, lat_max_value, |
107 | precision->getElement(row % precision->size())); |
108 | if (prepared_args.items_count > max_array_size) |
109 | { |
110 | throw Exception(getName() + " would produce " + std::to_string(prepared_args.items_count) + |
111 | " array elements, which is bigger than the allowed maximum of " + std::to_string(max_array_size), |
112 | ErrorCodes::TOO_LARGE_ARRAY_SIZE); |
113 | } |
114 | |
115 | res_strings_offsets.reserve(res_strings_offsets.size() + prepared_args.items_count); |
116 | res_strings_chars.resize(res_strings_chars.size() + prepared_args.items_count * (prepared_args.precision + 1)); |
117 | const auto starting_offset = res_strings_offsets.empty() ? 0 : res_strings_offsets.back(); |
118 | char * out = reinterpret_cast<char *>(res_strings_chars.data() + starting_offset); |
119 | |
120 | // Actually write geohashes into preallocated buffer. |
121 | GeoUtils::geohashesInBox(prepared_args, out); |
122 | |
123 | for (UInt8 i = 1; i <= prepared_args.items_count ; ++i) |
124 | { |
125 | res_strings_offsets.push_back(starting_offset + (prepared_args.precision + 1) * i); |
126 | } |
127 | res_offsets.push_back((res_offsets.empty() ? 0 : res_offsets.back()) + prepared_args.items_count); |
128 | } |
129 | if (!res_strings_offsets.empty() && res_strings_offsets.back() != res_strings_chars.size()) |
130 | { |
131 | throw Exception("String column size mismatch (internal logical error)" , ErrorCodes::LOGICAL_ERROR); |
132 | } |
133 | |
134 | if (!res_offsets.empty() && res_offsets.back() != res_strings.size()) |
135 | { |
136 | throw Exception("Arrary column size mismatch (internal logical error)" + |
137 | std::to_string(res_offsets.back()) + " != " + std::to_string(res_strings.size()), |
138 | ErrorCodes::LOGICAL_ERROR); |
139 | } |
140 | |
141 | result = std::move(col_res); |
142 | } |
143 | |
144 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override |
145 | { |
146 | const IColumn * lon_min = block.getByPosition(arguments[0]).column.get(); |
147 | const IColumn * lat_min = block.getByPosition(arguments[1]).column.get(); |
148 | const IColumn * lon_max = block.getByPosition(arguments[2]).column.get(); |
149 | const IColumn * lat_max = block.getByPosition(arguments[3]).column.get(); |
150 | const IColumn * prec = block.getByPosition(arguments[4]).column.get(); |
151 | ColumnPtr & res = block.getByPosition(result).column; |
152 | |
153 | if (checkColumn<ColumnVector<Float32>>(lon_min)) |
154 | { |
155 | execute<Float32, UInt8>(lon_min, lat_min, lon_max, lat_max, prec, res); |
156 | } |
157 | else |
158 | { |
159 | execute<Float64, UInt8>(lon_min, lat_min, lon_max, lat_max, prec, res); |
160 | } |
161 | } |
162 | }; |
163 | |
164 | void registerFunctionGeohashesInBox(FunctionFactory & factory) |
165 | { |
166 | factory.registerFunction<FunctionGeohashesInBox>(); |
167 | } |
168 | |
169 | } |
170 | |