| 1 | #include <Functions/IFunctionImpl.h> |
| 2 | #include <Functions/FunctionFactory.h> |
| 3 | #include <Functions/FunctionHelpers.h> |
| 4 | #include <Functions/GeoUtils.h> |
| 5 | |
| 6 | #include <Columns/ColumnArray.h> |
| 7 | #include <Columns/ColumnString.h> |
| 8 | #include <DataTypes/DataTypeArray.h> |
| 9 | #include <DataTypes/DataTypeString.h> |
| 10 | |
| 11 | #include <memory> |
| 12 | #include <string> |
| 13 | |
| 14 | namespace DB |
| 15 | { |
| 16 | |
| 17 | namespace ErrorCodes |
| 18 | { |
| 19 | extern const int LOGICAL_ERROR; |
| 20 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
| 21 | extern const int TOO_LARGE_ARRAY_SIZE; |
| 22 | } |
| 23 | |
| 24 | class FunctionGeohashesInBox : public IFunction |
| 25 | { |
| 26 | public: |
| 27 | static constexpr auto name = "geohashesInBox" ; |
| 28 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionGeohashesInBox>(); } |
| 29 | |
| 30 | String getName() const override { return name; } |
| 31 | |
| 32 | size_t getNumberOfArguments() const override { return 5; } |
| 33 | |
| 34 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
| 35 | { |
| 36 | validateArgumentType(*this, arguments, 0, isFloat, "float" ); |
| 37 | validateArgumentType(*this, arguments, 1, isFloat, "float" ); |
| 38 | validateArgumentType(*this, arguments, 2, isFloat, "float" ); |
| 39 | validateArgumentType(*this, arguments, 3, isFloat, "float" ); |
| 40 | validateArgumentType(*this, arguments, 4, isUInt8, "integer" ); |
| 41 | |
| 42 | if (!(arguments[0]->equals(*arguments[1]) && |
| 43 | arguments[0]->equals(*arguments[2]) && |
| 44 | arguments[0]->equals(*arguments[3]))) |
| 45 | { |
| 46 | throw Exception("Illegal type of argument of " + getName() + |
| 47 | " all coordinate arguments must have the same type, instead they are:" + |
| 48 | arguments[0]->getName() + ", " + |
| 49 | arguments[1]->getName() + ", " + |
| 50 | arguments[2]->getName() + ", " + |
| 51 | arguments[3]->getName() + "." , |
| 52 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 53 | } |
| 54 | |
| 55 | return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()); |
| 56 | } |
| 57 | |
| 58 | bool useDefaultImplementationForConstants() const override { return true; } |
| 59 | |
| 60 | template <typename LonAndLatType, typename PrecisionType> |
| 61 | void execute(const IColumn * lon_min_column, |
| 62 | const IColumn * lat_min_column, |
| 63 | const IColumn * lon_max_column, |
| 64 | const IColumn * lat_max_column, |
| 65 | const IColumn * precision_column, |
| 66 | ColumnPtr & result) |
| 67 | { |
| 68 | static constexpr size_t max_array_size = 10'000'000; |
| 69 | |
| 70 | const auto * lon_min = checkAndGetColumn<ColumnVector<LonAndLatType>>(lon_min_column); |
| 71 | const auto * lat_min = checkAndGetColumn<ColumnVector<LonAndLatType>>(lat_min_column); |
| 72 | const auto * lon_max = checkAndGetColumn<ColumnVector<LonAndLatType>>(lon_max_column); |
| 73 | const auto * lat_max = checkAndGetColumn<ColumnVector<LonAndLatType>>(lat_max_column); |
| 74 | auto * precision = checkAndGetColumn<ColumnVector<PrecisionType>>(precision_column); |
| 75 | if (precision == nullptr) |
| 76 | { |
| 77 | precision = checkAndGetColumnConstData<ColumnVector<PrecisionType>>(precision_column); |
| 78 | } |
| 79 | |
| 80 | if (!lon_min || !lat_min || !lon_max || !lat_max || !precision) |
| 81 | { |
| 82 | throw Exception("Unsupported argument types for function " + getName() + " : " + |
| 83 | lon_min_column->getName() + ", " + |
| 84 | lat_min_column->getName() + ", " + |
| 85 | lon_max_column->getName() + ", " + |
| 86 | lat_max_column->getName() + "." , |
| 87 | ErrorCodes::LOGICAL_ERROR); |
| 88 | } |
| 89 | |
| 90 | const size_t total_rows = lat_min->size(); |
| 91 | |
| 92 | auto col_res = ColumnArray::create(ColumnString::create()); |
| 93 | ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData()); |
| 94 | ColumnArray::Offsets & res_offsets = col_res->getOffsets(); |
| 95 | ColumnString::Chars & res_strings_chars = res_strings.getChars(); |
| 96 | ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets(); |
| 97 | |
| 98 | for (size_t row = 0; row < total_rows; ++row) |
| 99 | { |
| 100 | const Float64 lon_min_value = lon_min->getElement(row); |
| 101 | const Float64 lat_min_value = lat_min->getElement(row); |
| 102 | const Float64 lon_max_value = lon_max->getElement(row); |
| 103 | const Float64 lat_max_value = lat_max->getElement(row); |
| 104 | |
| 105 | const auto prepared_args = GeoUtils::geohashesInBoxPrepare( |
| 106 | lon_min_value, lat_min_value, lon_max_value, lat_max_value, |
| 107 | precision->getElement(row % precision->size())); |
| 108 | if (prepared_args.items_count > max_array_size) |
| 109 | { |
| 110 | throw Exception(getName() + " would produce " + std::to_string(prepared_args.items_count) + |
| 111 | " array elements, which is bigger than the allowed maximum of " + std::to_string(max_array_size), |
| 112 | ErrorCodes::TOO_LARGE_ARRAY_SIZE); |
| 113 | } |
| 114 | |
| 115 | res_strings_offsets.reserve(res_strings_offsets.size() + prepared_args.items_count); |
| 116 | res_strings_chars.resize(res_strings_chars.size() + prepared_args.items_count * (prepared_args.precision + 1)); |
| 117 | const auto starting_offset = res_strings_offsets.empty() ? 0 : res_strings_offsets.back(); |
| 118 | char * out = reinterpret_cast<char *>(res_strings_chars.data() + starting_offset); |
| 119 | |
| 120 | // Actually write geohashes into preallocated buffer. |
| 121 | GeoUtils::geohashesInBox(prepared_args, out); |
| 122 | |
| 123 | for (UInt8 i = 1; i <= prepared_args.items_count ; ++i) |
| 124 | { |
| 125 | res_strings_offsets.push_back(starting_offset + (prepared_args.precision + 1) * i); |
| 126 | } |
| 127 | res_offsets.push_back((res_offsets.empty() ? 0 : res_offsets.back()) + prepared_args.items_count); |
| 128 | } |
| 129 | if (!res_strings_offsets.empty() && res_strings_offsets.back() != res_strings_chars.size()) |
| 130 | { |
| 131 | throw Exception("String column size mismatch (internal logical error)" , ErrorCodes::LOGICAL_ERROR); |
| 132 | } |
| 133 | |
| 134 | if (!res_offsets.empty() && res_offsets.back() != res_strings.size()) |
| 135 | { |
| 136 | throw Exception("Arrary column size mismatch (internal logical error)" + |
| 137 | std::to_string(res_offsets.back()) + " != " + std::to_string(res_strings.size()), |
| 138 | ErrorCodes::LOGICAL_ERROR); |
| 139 | } |
| 140 | |
| 141 | result = std::move(col_res); |
| 142 | } |
| 143 | |
| 144 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override |
| 145 | { |
| 146 | const IColumn * lon_min = block.getByPosition(arguments[0]).column.get(); |
| 147 | const IColumn * lat_min = block.getByPosition(arguments[1]).column.get(); |
| 148 | const IColumn * lon_max = block.getByPosition(arguments[2]).column.get(); |
| 149 | const IColumn * lat_max = block.getByPosition(arguments[3]).column.get(); |
| 150 | const IColumn * prec = block.getByPosition(arguments[4]).column.get(); |
| 151 | ColumnPtr & res = block.getByPosition(result).column; |
| 152 | |
| 153 | if (checkColumn<ColumnVector<Float32>>(lon_min)) |
| 154 | { |
| 155 | execute<Float32, UInt8>(lon_min, lat_min, lon_max, lat_max, prec, res); |
| 156 | } |
| 157 | else |
| 158 | { |
| 159 | execute<Float64, UInt8>(lon_min, lat_min, lon_max, lat_max, prec, res); |
| 160 | } |
| 161 | } |
| 162 | }; |
| 163 | |
| 164 | void registerFunctionGeohashesInBox(FunctionFactory & factory) |
| 165 | { |
| 166 | factory.registerFunction<FunctionGeohashesInBox>(); |
| 167 | } |
| 168 | |
| 169 | } |
| 170 | |