| 1 | #include <Functions/IFunctionImpl.h> |
| 2 | #include <Functions/FunctionFactory.h> |
| 3 | #include <Functions/FunctionHelpers.h> |
| 4 | #include <DataTypes/DataTypeArray.h> |
| 5 | #include <Columns/ColumnArray.h> |
| 6 | #include <Columns/ColumnNullable.h> |
| 7 | #include <Columns/ColumnString.h> |
| 8 | #include <Columns/ColumnFixedString.h> |
| 9 | #include <Common/typeid_cast.h> |
| 10 | #include <Common/assert_cast.h> |
| 11 | |
| 12 | |
| 13 | namespace DB |
| 14 | { |
| 15 | |
| 16 | namespace ErrorCodes |
| 17 | { |
| 18 | extern const int LOGICAL_ERROR; |
| 19 | extern const int ILLEGAL_COLUMN; |
| 20 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
| 21 | } |
| 22 | |
| 23 | |
| 24 | /** emptyArrayToSingle(arr) - replace empty arrays with arrays of one element with a default value. |
| 25 | */ |
| 26 | class FunctionEmptyArrayToSingle : public IFunction |
| 27 | { |
| 28 | public: |
| 29 | static constexpr auto name = "emptyArrayToSingle" ; |
| 30 | static FunctionPtr create(const Context &) { return std::make_shared<FunctionEmptyArrayToSingle>(); } |
| 31 | |
| 32 | String getName() const override { return name; } |
| 33 | |
| 34 | size_t getNumberOfArguments() const override { return 1; } |
| 35 | bool useDefaultImplementationForConstants() const override { return true; } |
| 36 | bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } |
| 37 | |
| 38 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
| 39 | { |
| 40 | const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()); |
| 41 | if (!array_type) |
| 42 | throw Exception("Argument for function " + getName() + " must be array." , |
| 43 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 44 | |
| 45 | return arguments[0]; |
| 46 | } |
| 47 | |
| 48 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; |
| 49 | }; |
| 50 | |
| 51 | |
| 52 | namespace |
| 53 | { |
| 54 | namespace FunctionEmptyArrayToSingleImpl |
| 55 | { |
| 56 | bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) |
| 57 | { |
| 58 | if (const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[0]).column.get())) |
| 59 | { |
| 60 | if (const_array->getValue<Array>().empty()) |
| 61 | { |
| 62 | auto nested_type = typeid_cast<const DataTypeArray &>(*block.getByPosition(arguments[0]).type).getNestedType(); |
| 63 | |
| 64 | block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst( |
| 65 | input_rows_count, |
| 66 | Array{nested_type->getDefault()}); |
| 67 | } |
| 68 | else |
| 69 | block.getByPosition(result).column = block.getByPosition(arguments[0]).column; |
| 70 | |
| 71 | return true; |
| 72 | } |
| 73 | else |
| 74 | return false; |
| 75 | } |
| 76 | |
| 77 | template <typename T, bool nullable> |
| 78 | bool executeNumber( |
| 79 | const IColumn & src_data, const ColumnArray::Offsets & src_offsets, |
| 80 | IColumn & res_data_col, ColumnArray::Offsets & res_offsets, |
| 81 | const NullMap * src_null_map, |
| 82 | NullMap * res_null_map) |
| 83 | { |
| 84 | if (const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data)) |
| 85 | { |
| 86 | const PaddedPODArray<T> & src_data_vec = src_data_concrete->getData(); |
| 87 | PaddedPODArray<T> & res_data = assert_cast<ColumnVector<T> &>(res_data_col).getData(); |
| 88 | |
| 89 | size_t size = src_offsets.size(); |
| 90 | res_offsets.resize(size); |
| 91 | res_data.reserve(src_data_vec.size()); |
| 92 | |
| 93 | if (nullable) |
| 94 | res_null_map->reserve(src_null_map->size()); |
| 95 | |
| 96 | ColumnArray::Offset src_prev_offset = 0; |
| 97 | ColumnArray::Offset res_prev_offset = 0; |
| 98 | |
| 99 | for (size_t i = 0; i < size; ++i) |
| 100 | { |
| 101 | if (src_offsets[i] != src_prev_offset) |
| 102 | { |
| 103 | size_t size_to_write = src_offsets[i] - src_prev_offset; |
| 104 | res_data.resize(res_prev_offset + size_to_write); |
| 105 | memcpy(&res_data[res_prev_offset], &src_data_vec[src_prev_offset], size_to_write * sizeof(T)); |
| 106 | |
| 107 | if (nullable) |
| 108 | { |
| 109 | res_null_map->resize(res_prev_offset + size_to_write); |
| 110 | memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); |
| 111 | } |
| 112 | |
| 113 | res_prev_offset += size_to_write; |
| 114 | res_offsets[i] = res_prev_offset; |
| 115 | } |
| 116 | else |
| 117 | { |
| 118 | res_data.push_back(T()); |
| 119 | ++res_prev_offset; |
| 120 | res_offsets[i] = res_prev_offset; |
| 121 | |
| 122 | if (nullable) |
| 123 | res_null_map->push_back(1); /// Push NULL. |
| 124 | } |
| 125 | |
| 126 | src_prev_offset = src_offsets[i]; |
| 127 | } |
| 128 | |
| 129 | return true; |
| 130 | } |
| 131 | else |
| 132 | return false; |
| 133 | } |
| 134 | |
| 135 | |
| 136 | template <bool nullable> |
| 137 | bool executeFixedString( |
| 138 | const IColumn & src_data, const ColumnArray::Offsets & src_offsets, |
| 139 | IColumn & res_data_col, ColumnArray::Offsets & res_offsets, |
| 140 | const NullMap * src_null_map, |
| 141 | NullMap * res_null_map) |
| 142 | { |
| 143 | if (const ColumnFixedString * src_data_concrete = checkAndGetColumn<ColumnFixedString>(&src_data)) |
| 144 | { |
| 145 | const size_t n = src_data_concrete->getN(); |
| 146 | const ColumnFixedString::Chars & src_data_vec = src_data_concrete->getChars(); |
| 147 | |
| 148 | auto concrete_res_data = typeid_cast<ColumnFixedString *>(&res_data_col); |
| 149 | if (!concrete_res_data) |
| 150 | throw Exception{"Internal error" , ErrorCodes::LOGICAL_ERROR}; |
| 151 | |
| 152 | ColumnFixedString::Chars & res_data = concrete_res_data->getChars(); |
| 153 | size_t size = src_offsets.size(); |
| 154 | res_offsets.resize(size); |
| 155 | res_data.reserve(src_data_vec.size()); |
| 156 | |
| 157 | if (nullable) |
| 158 | res_null_map->reserve(src_null_map->size()); |
| 159 | |
| 160 | ColumnArray::Offset src_prev_offset = 0; |
| 161 | ColumnArray::Offset res_prev_offset = 0; |
| 162 | |
| 163 | for (size_t i = 0; i < size; ++i) |
| 164 | { |
| 165 | if (src_offsets[i] != src_prev_offset) |
| 166 | { |
| 167 | size_t size_to_write = src_offsets[i] - src_prev_offset; |
| 168 | size_t prev_res_data_size = res_data.size(); |
| 169 | res_data.resize(prev_res_data_size + size_to_write * n); |
| 170 | memcpy(&res_data[prev_res_data_size], &src_data_vec[src_prev_offset * n], size_to_write * n); |
| 171 | |
| 172 | if (nullable) |
| 173 | { |
| 174 | res_null_map->resize(res_prev_offset + size_to_write); |
| 175 | memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); |
| 176 | } |
| 177 | |
| 178 | res_prev_offset += size_to_write; |
| 179 | res_offsets[i] = res_prev_offset; |
| 180 | } |
| 181 | else |
| 182 | { |
| 183 | size_t prev_res_data_size = res_data.size(); |
| 184 | res_data.resize(prev_res_data_size + n); |
| 185 | memset(&res_data[prev_res_data_size], 0, n); |
| 186 | ++res_prev_offset; |
| 187 | res_offsets[i] = res_prev_offset; |
| 188 | |
| 189 | if (nullable) |
| 190 | res_null_map->push_back(1); |
| 191 | } |
| 192 | |
| 193 | src_prev_offset = src_offsets[i]; |
| 194 | } |
| 195 | |
| 196 | return true; |
| 197 | } |
| 198 | else |
| 199 | return false; |
| 200 | } |
| 201 | |
| 202 | |
| 203 | template <bool nullable> |
| 204 | bool executeString( |
| 205 | const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, |
| 206 | IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets, |
| 207 | const NullMap * src_null_map, |
| 208 | NullMap * res_null_map) |
| 209 | { |
| 210 | if (const ColumnString * src_data_concrete = checkAndGetColumn<ColumnString>(&src_data)) |
| 211 | { |
| 212 | const ColumnString::Offsets & src_string_offsets = src_data_concrete->getOffsets(); |
| 213 | |
| 214 | auto concrete_res_string_offsets = typeid_cast<ColumnString *>(&res_data_col); |
| 215 | if (!concrete_res_string_offsets) |
| 216 | throw Exception{"Internal error" , ErrorCodes::LOGICAL_ERROR}; |
| 217 | ColumnString::Offsets & res_string_offsets = concrete_res_string_offsets->getOffsets(); |
| 218 | |
| 219 | const ColumnString::Chars & src_data_vec = src_data_concrete->getChars(); |
| 220 | |
| 221 | auto concrete_res_data = typeid_cast<ColumnString *>(&res_data_col); |
| 222 | if (!concrete_res_data) |
| 223 | throw Exception{"Internal error" , ErrorCodes::LOGICAL_ERROR}; |
| 224 | ColumnString::Chars & res_data = concrete_res_data->getChars(); |
| 225 | |
| 226 | size_t size = src_array_offsets.size(); |
| 227 | res_array_offsets.resize(size); |
| 228 | res_string_offsets.reserve(src_string_offsets.size()); |
| 229 | res_data.reserve(src_data_vec.size()); |
| 230 | |
| 231 | if (nullable) |
| 232 | res_null_map->reserve(src_null_map->size()); |
| 233 | |
| 234 | ColumnArray::Offset src_array_prev_offset = 0; |
| 235 | ColumnArray::Offset res_array_prev_offset = 0; |
| 236 | |
| 237 | ColumnString::Offset src_string_prev_offset = 0; |
| 238 | ColumnString::Offset res_string_prev_offset = 0; |
| 239 | |
| 240 | for (size_t i = 0; i < size; ++i) |
| 241 | { |
| 242 | if (src_array_offsets[i] != src_array_prev_offset) |
| 243 | { |
| 244 | size_t array_size = src_array_offsets[i] - src_array_prev_offset; |
| 245 | |
| 246 | size_t bytes_to_copy = 0; |
| 247 | size_t from_string_prev_offset_local = src_string_prev_offset; |
| 248 | for (size_t j = 0; j < array_size; ++j) |
| 249 | { |
| 250 | size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local; |
| 251 | |
| 252 | res_string_prev_offset += string_size; |
| 253 | res_string_offsets.push_back(res_string_prev_offset); |
| 254 | |
| 255 | from_string_prev_offset_local += string_size; |
| 256 | bytes_to_copy += string_size; |
| 257 | } |
| 258 | |
| 259 | size_t res_data_old_size = res_data.size(); |
| 260 | res_data.resize(res_data_old_size + bytes_to_copy); |
| 261 | memcpy(&res_data[res_data_old_size], &src_data_vec[src_string_prev_offset], bytes_to_copy); |
| 262 | |
| 263 | if (nullable) |
| 264 | { |
| 265 | res_null_map->resize(res_array_prev_offset + array_size); |
| 266 | memcpy(&(*res_null_map)[res_array_prev_offset], &(*src_null_map)[src_array_prev_offset], array_size); |
| 267 | } |
| 268 | |
| 269 | res_array_prev_offset += array_size; |
| 270 | res_array_offsets[i] = res_array_prev_offset; |
| 271 | } |
| 272 | else |
| 273 | { |
| 274 | res_data.push_back(0); /// An empty string, including zero at the end. |
| 275 | |
| 276 | if (nullable) |
| 277 | res_null_map->push_back(1); |
| 278 | |
| 279 | ++res_string_prev_offset; |
| 280 | res_string_offsets.push_back(res_string_prev_offset); |
| 281 | |
| 282 | ++res_array_prev_offset; |
| 283 | res_array_offsets[i] = res_array_prev_offset; |
| 284 | } |
| 285 | |
| 286 | src_array_prev_offset = src_array_offsets[i]; |
| 287 | |
| 288 | if (src_array_prev_offset) |
| 289 | src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1]; |
| 290 | } |
| 291 | |
| 292 | return true; |
| 293 | } |
| 294 | else |
| 295 | return false; |
| 296 | } |
| 297 | |
| 298 | |
| 299 | template <bool nullable> |
| 300 | void executeGeneric( |
| 301 | const IColumn & src_data, const ColumnArray::Offsets & src_offsets, |
| 302 | IColumn & res_data, ColumnArray::Offsets & res_offsets, |
| 303 | const NullMap * src_null_map, |
| 304 | NullMap * res_null_map) |
| 305 | { |
| 306 | size_t size = src_offsets.size(); |
| 307 | res_offsets.resize(size); |
| 308 | res_data.reserve(src_data.size()); |
| 309 | |
| 310 | if (nullable) |
| 311 | res_null_map->reserve(src_null_map->size()); |
| 312 | |
| 313 | ColumnArray::Offset src_prev_offset = 0; |
| 314 | ColumnArray::Offset res_prev_offset = 0; |
| 315 | |
| 316 | for (size_t i = 0; i < size; ++i) |
| 317 | { |
| 318 | if (src_offsets[i] != src_prev_offset) |
| 319 | { |
| 320 | size_t size_to_write = src_offsets[i] - src_prev_offset; |
| 321 | res_data.insertRangeFrom(src_data, src_prev_offset, size_to_write); |
| 322 | |
| 323 | if (nullable) |
| 324 | { |
| 325 | res_null_map->resize(res_prev_offset + size_to_write); |
| 326 | memcpy(&(*res_null_map)[res_prev_offset], &(*src_null_map)[src_prev_offset], size_to_write); |
| 327 | } |
| 328 | |
| 329 | res_prev_offset += size_to_write; |
| 330 | res_offsets[i] = res_prev_offset; |
| 331 | } |
| 332 | else |
| 333 | { |
| 334 | res_data.insertDefault(); |
| 335 | ++res_prev_offset; |
| 336 | res_offsets[i] = res_prev_offset; |
| 337 | |
| 338 | if (nullable) |
| 339 | res_null_map->push_back(1); |
| 340 | } |
| 341 | |
| 342 | src_prev_offset = src_offsets[i]; |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | |
| 347 | template <bool nullable> |
| 348 | void executeDispatch( |
| 349 | const IColumn & src_data, const ColumnArray::Offsets & src_array_offsets, |
| 350 | IColumn & res_data_col, ColumnArray::Offsets & res_array_offsets, |
| 351 | const NullMap * src_null_map, |
| 352 | NullMap * res_null_map) |
| 353 | { |
| 354 | if (!(executeNumber<UInt8, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 355 | || executeNumber<UInt16, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 356 | || executeNumber<UInt32, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 357 | || executeNumber<UInt64, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 358 | || executeNumber<Int8, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 359 | || executeNumber<Int16, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 360 | || executeNumber<Int32, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 361 | || executeNumber<Int64, nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 362 | || executeNumber<Float32, nullable>(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 363 | || executeNumber<Float64, nullable>(src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 364 | || executeString<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map) |
| 365 | || executeFixedString<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map))) |
| 366 | executeGeneric<nullable> (src_data, src_array_offsets, res_data_col, res_array_offsets, src_null_map, res_null_map); |
| 367 | } |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | |
| 372 | void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) |
| 373 | { |
| 374 | if (FunctionEmptyArrayToSingleImpl::executeConst(block, arguments, result, input_rows_count)) |
| 375 | return; |
| 376 | |
| 377 | const ColumnArray * array = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get()); |
| 378 | if (!array) |
| 379 | throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), |
| 380 | ErrorCodes::ILLEGAL_COLUMN); |
| 381 | |
| 382 | MutableColumnPtr res_ptr = array->cloneEmpty(); |
| 383 | ColumnArray & res = assert_cast<ColumnArray &>(*res_ptr); |
| 384 | |
| 385 | const IColumn & src_data = array->getData(); |
| 386 | const ColumnArray::Offsets & src_offsets = array->getOffsets(); |
| 387 | IColumn & res_data = res.getData(); |
| 388 | ColumnArray::Offsets & res_offsets = res.getOffsets(); |
| 389 | |
| 390 | const NullMap * src_null_map = nullptr; |
| 391 | NullMap * res_null_map = nullptr; |
| 392 | |
| 393 | const IColumn * inner_col; |
| 394 | IColumn * inner_res_col; |
| 395 | |
| 396 | auto nullable_col = checkAndGetColumn<ColumnNullable>(src_data); |
| 397 | if (nullable_col) |
| 398 | { |
| 399 | inner_col = &nullable_col->getNestedColumn(); |
| 400 | src_null_map = &nullable_col->getNullMapData(); |
| 401 | |
| 402 | auto & nullable_res_col = assert_cast<ColumnNullable &>(res_data); |
| 403 | inner_res_col = &nullable_res_col.getNestedColumn(); |
| 404 | res_null_map = &nullable_res_col.getNullMapData(); |
| 405 | } |
| 406 | else |
| 407 | { |
| 408 | inner_col = &src_data; |
| 409 | inner_res_col = &res_data; |
| 410 | } |
| 411 | |
| 412 | if (nullable_col) |
| 413 | FunctionEmptyArrayToSingleImpl::executeDispatch<true>(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map); |
| 414 | else |
| 415 | FunctionEmptyArrayToSingleImpl::executeDispatch<false>(*inner_col, src_offsets, *inner_res_col, res_offsets, src_null_map, res_null_map); |
| 416 | |
| 417 | block.getByPosition(result).column = std::move(res_ptr); |
| 418 | } |
| 419 | |
| 420 | |
| 421 | void registerFunctionEmptyArrayToSingle(FunctionFactory & factory) |
| 422 | { |
| 423 | factory.registerFunction<FunctionEmptyArrayToSingle>(); |
| 424 | } |
| 425 | |
| 426 | } |
| 427 | |