| 1 | #include <Common/Arena.h> |
| 2 | #include <Common/SipHash.h> |
| 3 | #include <Common/NaNUtils.h> |
| 4 | #include <Common/typeid_cast.h> |
| 5 | #include <Common/assert_cast.h> |
| 6 | #include <Columns/ColumnNullable.h> |
| 7 | #include <Columns/ColumnConst.h> |
| 8 | #include <DataStreams/ColumnGathererStream.h> |
| 9 | |
| 10 | |
| 11 | namespace DB |
| 12 | { |
| 13 | |
| 14 | namespace ErrorCodes |
| 15 | { |
| 16 | extern const int LOGICAL_ERROR; |
| 17 | extern const int ILLEGAL_COLUMN; |
| 18 | extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; |
| 19 | } |
| 20 | |
| 21 | |
| 22 | ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_) |
| 23 | : nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) |
| 24 | { |
| 25 | /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. |
| 26 | nested_column = getNestedColumn().convertToFullColumnIfConst(); |
| 27 | |
| 28 | if (!getNestedColumn().canBeInsideNullable()) |
| 29 | throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column" , ErrorCodes::ILLEGAL_COLUMN}; |
| 30 | |
| 31 | if (isColumnConst(*null_map)) |
| 32 | throw Exception{"ColumnNullable cannot have constant null map" , ErrorCodes::ILLEGAL_COLUMN}; |
| 33 | } |
| 34 | |
| 35 | |
| 36 | void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const |
| 37 | { |
| 38 | const auto & arr = getNullMapData(); |
| 39 | hash.update(arr[n]); |
| 40 | if (arr[n] == 0) |
| 41 | getNestedColumn().updateHashWithValue(n, hash); |
| 42 | } |
| 43 | |
| 44 | |
| 45 | MutableColumnPtr ColumnNullable::cloneResized(size_t new_size) const |
| 46 | { |
| 47 | MutableColumnPtr new_nested_col = getNestedColumn().cloneResized(new_size); |
| 48 | auto new_null_map = ColumnUInt8::create(); |
| 49 | |
| 50 | if (new_size > 0) |
| 51 | { |
| 52 | new_null_map->getData().resize(new_size); |
| 53 | |
| 54 | size_t count = std::min(size(), new_size); |
| 55 | memcpy(new_null_map->getData().data(), getNullMapData().data(), count * sizeof(getNullMapData()[0])); |
| 56 | |
| 57 | /// If resizing to bigger one, set all new values to NULLs. |
| 58 | if (new_size > count) |
| 59 | memset(&new_null_map->getData()[count], 1, new_size - count); |
| 60 | } |
| 61 | |
| 62 | return ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map)); |
| 63 | } |
| 64 | |
| 65 | |
| 66 | Field ColumnNullable::operator[](size_t n) const |
| 67 | { |
| 68 | return isNullAt(n) ? Null() : getNestedColumn()[n]; |
| 69 | } |
| 70 | |
| 71 | |
| 72 | void ColumnNullable::get(size_t n, Field & res) const |
| 73 | { |
| 74 | if (isNullAt(n)) |
| 75 | res = Null(); |
| 76 | else |
| 77 | getNestedColumn().get(n, res); |
| 78 | } |
| 79 | |
| 80 | StringRef ColumnNullable::getDataAt(size_t /*n*/) const |
| 81 | { |
| 82 | throw Exception{"Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED}; |
| 83 | } |
| 84 | |
| 85 | void ColumnNullable::insertData(const char * pos, size_t length) |
| 86 | { |
| 87 | if (pos == nullptr) |
| 88 | { |
| 89 | getNestedColumn().insertDefault(); |
| 90 | getNullMapData().push_back(1); |
| 91 | } |
| 92 | else |
| 93 | { |
| 94 | getNestedColumn().insertData(pos, length); |
| 95 | getNullMapData().push_back(0); |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const |
| 100 | { |
| 101 | const auto & arr = getNullMapData(); |
| 102 | static constexpr auto s = sizeof(arr[0]); |
| 103 | |
| 104 | auto pos = arena.allocContinue(s, begin); |
| 105 | memcpy(pos, &arr[n], s); |
| 106 | |
| 107 | if (arr[n]) |
| 108 | return StringRef(pos, s); |
| 109 | |
| 110 | auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin); |
| 111 | |
| 112 | /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back. |
| 113 | return StringRef(nested_ref.data - s, nested_ref.size + s); |
| 114 | } |
| 115 | |
| 116 | const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos) |
| 117 | { |
| 118 | UInt8 val = *reinterpret_cast<const UInt8 *>(pos); |
| 119 | pos += sizeof(val); |
| 120 | |
| 121 | getNullMapData().push_back(val); |
| 122 | |
| 123 | if (val == 0) |
| 124 | pos = getNestedColumn().deserializeAndInsertFromArena(pos); |
| 125 | else |
| 126 | getNestedColumn().insertDefault(); |
| 127 | |
| 128 | return pos; |
| 129 | } |
| 130 | |
| 131 | void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) |
| 132 | { |
| 133 | const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(src); |
| 134 | getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); |
| 135 | getNestedColumn().insertRangeFrom(*nullable_col.nested_column, start, length); |
| 136 | } |
| 137 | |
| 138 | void ColumnNullable::insert(const Field & x) |
| 139 | { |
| 140 | if (x.isNull()) |
| 141 | { |
| 142 | getNestedColumn().insertDefault(); |
| 143 | getNullMapData().push_back(1); |
| 144 | } |
| 145 | else |
| 146 | { |
| 147 | getNestedColumn().insert(x); |
| 148 | getNullMapData().push_back(0); |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | void ColumnNullable::insertFrom(const IColumn & src, size_t n) |
| 153 | { |
| 154 | const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src); |
| 155 | getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); |
| 156 | getNullMapData().push_back(src_concrete.getNullMapData()[n]); |
| 157 | } |
| 158 | |
| 159 | void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n) |
| 160 | { |
| 161 | getNestedColumn().insertFrom(src, n); |
| 162 | getNullMapData().push_back(0); |
| 163 | } |
| 164 | |
| 165 | void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length) |
| 166 | { |
| 167 | getNestedColumn().insertRangeFrom(src, start, length); |
| 168 | getNullMapData().resize_fill(getNullMapData().size() + length, 0); |
| 169 | } |
| 170 | |
| 171 | void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length) |
| 172 | { |
| 173 | for (size_t i = 0; i < length; ++i) |
| 174 | insertFromNotNullable(src, position); |
| 175 | } |
| 176 | |
| 177 | void ColumnNullable::popBack(size_t n) |
| 178 | { |
| 179 | getNestedColumn().popBack(n); |
| 180 | getNullMapColumn().popBack(n); |
| 181 | } |
| 182 | |
| 183 | ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint) const |
| 184 | { |
| 185 | ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint); |
| 186 | ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint); |
| 187 | return ColumnNullable::create(filtered_data, filtered_null_map); |
| 188 | } |
| 189 | |
| 190 | ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const |
| 191 | { |
| 192 | ColumnPtr permuted_data = getNestedColumn().permute(perm, limit); |
| 193 | ColumnPtr permuted_null_map = getNullMapColumn().permute(perm, limit); |
| 194 | return ColumnNullable::create(permuted_data, permuted_null_map); |
| 195 | } |
| 196 | |
| 197 | ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const |
| 198 | { |
| 199 | ColumnPtr indexed_data = getNestedColumn().index(indexes, limit); |
| 200 | ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit); |
| 201 | return ColumnNullable::create(indexed_data, indexed_null_map); |
| 202 | } |
| 203 | |
| 204 | int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const |
| 205 | { |
| 206 | /// NULL values share the properties of NaN values. |
| 207 | /// Here the last parameter of compareAt is called null_direction_hint |
| 208 | /// instead of the usual nan_direction_hint and is used to implement |
| 209 | /// the ordering specified by either NULLS FIRST or NULLS LAST in the |
| 210 | /// ORDER BY construction. |
| 211 | |
| 212 | const ColumnNullable & nullable_rhs = assert_cast<const ColumnNullable &>(rhs_); |
| 213 | |
| 214 | bool lval_is_null = isNullAt(n); |
| 215 | bool rval_is_null = nullable_rhs.isNullAt(m); |
| 216 | |
| 217 | if (unlikely(lval_is_null || rval_is_null)) |
| 218 | { |
| 219 | if (lval_is_null && rval_is_null) |
| 220 | return 0; |
| 221 | else |
| 222 | return lval_is_null ? null_direction_hint : -null_direction_hint; |
| 223 | } |
| 224 | |
| 225 | const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); |
| 226 | return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); |
| 227 | } |
| 228 | |
| 229 | void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const |
| 230 | { |
| 231 | /// Cannot pass limit because of unknown amount of NULLs. |
| 232 | getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); |
| 233 | |
| 234 | if ((null_direction_hint > 0) != reverse) |
| 235 | { |
| 236 | /// Shift all NULL values to the end. |
| 237 | |
| 238 | size_t read_idx = 0; |
| 239 | size_t write_idx = 0; |
| 240 | size_t end_idx = res.size(); |
| 241 | |
| 242 | if (!limit) |
| 243 | limit = end_idx; |
| 244 | else |
| 245 | limit = std::min(end_idx, limit); |
| 246 | |
| 247 | while (read_idx < limit && !isNullAt(res[read_idx])) |
| 248 | { |
| 249 | ++read_idx; |
| 250 | ++write_idx; |
| 251 | } |
| 252 | |
| 253 | ++read_idx; |
| 254 | |
| 255 | /// Invariants: |
| 256 | /// write_idx < read_idx |
| 257 | /// write_idx points to NULL |
| 258 | /// read_idx will be incremented to position of next not-NULL |
| 259 | /// there are range of NULLs between write_idx and read_idx - 1, |
| 260 | /// We are moving elements from end to begin of this range, |
| 261 | /// so range will "bubble" towards the end. |
| 262 | /// Relative order of NULL elements could be changed, |
| 263 | /// but relative order of non-NULLs is preserved. |
| 264 | |
| 265 | while (read_idx < end_idx && write_idx < limit) |
| 266 | { |
| 267 | if (!isNullAt(res[read_idx])) |
| 268 | { |
| 269 | std::swap(res[read_idx], res[write_idx]); |
| 270 | ++write_idx; |
| 271 | } |
| 272 | ++read_idx; |
| 273 | } |
| 274 | } |
| 275 | else |
| 276 | { |
| 277 | /// Shift all NULL values to the beginning. |
| 278 | |
| 279 | ssize_t read_idx = res.size() - 1; |
| 280 | ssize_t write_idx = res.size() - 1; |
| 281 | |
| 282 | while (read_idx >= 0 && !isNullAt(res[read_idx])) |
| 283 | { |
| 284 | --read_idx; |
| 285 | --write_idx; |
| 286 | } |
| 287 | |
| 288 | --read_idx; |
| 289 | |
| 290 | while (read_idx >= 0 && write_idx >= 0) |
| 291 | { |
| 292 | if (!isNullAt(res[read_idx])) |
| 293 | { |
| 294 | std::swap(res[read_idx], res[write_idx]); |
| 295 | --write_idx; |
| 296 | } |
| 297 | --read_idx; |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | void ColumnNullable::gather(ColumnGathererStream & gatherer) |
| 303 | { |
| 304 | gatherer.gather(*this); |
| 305 | } |
| 306 | |
| 307 | void ColumnNullable::reserve(size_t n) |
| 308 | { |
| 309 | getNestedColumn().reserve(n); |
| 310 | getNullMapData().reserve(n); |
| 311 | } |
| 312 | |
| 313 | size_t ColumnNullable::byteSize() const |
| 314 | { |
| 315 | return getNestedColumn().byteSize() + getNullMapColumn().byteSize(); |
| 316 | } |
| 317 | |
| 318 | size_t ColumnNullable::allocatedBytes() const |
| 319 | { |
| 320 | return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes(); |
| 321 | } |
| 322 | |
| 323 | void ColumnNullable::protect() |
| 324 | { |
| 325 | getNestedColumn().protect(); |
| 326 | getNullMapColumn().protect(); |
| 327 | } |
| 328 | |
| 329 | |
| 330 | namespace |
| 331 | { |
| 332 | |
| 333 | /// The following function implements a slightly more general version |
| 334 | /// of getExtremes() than the implementation from ColumnVector. |
| 335 | /// It takes into account the possible presence of nullable values. |
| 336 | template <typename T> |
| 337 | void getExtremesFromNullableContent(const ColumnVector<T> & col, const NullMap & null_map, Field & min, Field & max) |
| 338 | { |
| 339 | const auto & data = col.getData(); |
| 340 | size_t size = data.size(); |
| 341 | |
| 342 | if (size == 0) |
| 343 | { |
| 344 | min = Null(); |
| 345 | max = Null(); |
| 346 | return; |
| 347 | } |
| 348 | |
| 349 | bool has_not_null = false; |
| 350 | bool has_not_nan = false; |
| 351 | |
| 352 | T cur_min = 0; |
| 353 | T cur_max = 0; |
| 354 | |
| 355 | for (size_t i = 0; i < size; ++i) |
| 356 | { |
| 357 | const T x = data[i]; |
| 358 | |
| 359 | if (null_map[i]) |
| 360 | continue; |
| 361 | |
| 362 | if (!has_not_null) |
| 363 | { |
| 364 | cur_min = x; |
| 365 | cur_max = x; |
| 366 | has_not_null = true; |
| 367 | has_not_nan = !isNaN(x); |
| 368 | continue; |
| 369 | } |
| 370 | |
| 371 | if (isNaN(x)) |
| 372 | continue; |
| 373 | |
| 374 | if (!has_not_nan) |
| 375 | { |
| 376 | cur_min = x; |
| 377 | cur_max = x; |
| 378 | has_not_nan = true; |
| 379 | continue; |
| 380 | } |
| 381 | |
| 382 | if (x < cur_min) |
| 383 | cur_min = x; |
| 384 | else if (x > cur_max) |
| 385 | cur_max = x; |
| 386 | } |
| 387 | |
| 388 | if (has_not_null) |
| 389 | { |
| 390 | min = cur_min; |
| 391 | max = cur_max; |
| 392 | } |
| 393 | } |
| 394 | |
| 395 | } |
| 396 | |
| 397 | |
| 398 | void ColumnNullable::getExtremes(Field & min, Field & max) const |
| 399 | { |
| 400 | min = Null(); |
| 401 | max = Null(); |
| 402 | |
| 403 | const auto & null_map_data = getNullMapData(); |
| 404 | |
| 405 | if (const auto col_i8 = typeid_cast<const ColumnInt8 *>(nested_column.get())) |
| 406 | getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max); |
| 407 | else if (const auto col_i16 = typeid_cast<const ColumnInt16 *>(nested_column.get())) |
| 408 | getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max); |
| 409 | else if (const auto col_i32 = typeid_cast<const ColumnInt32 *>(nested_column.get())) |
| 410 | getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max); |
| 411 | else if (const auto col_i64 = typeid_cast<const ColumnInt64 *>(nested_column.get())) |
| 412 | getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max); |
| 413 | else if (const auto col_u8 = typeid_cast<const ColumnUInt8 *>(nested_column.get())) |
| 414 | getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max); |
| 415 | else if (const auto col_u16 = typeid_cast<const ColumnUInt16 *>(nested_column.get())) |
| 416 | getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max); |
| 417 | else if (const auto col_u32 = typeid_cast<const ColumnUInt32 *>(nested_column.get())) |
| 418 | getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max); |
| 419 | else if (const auto col_u64 = typeid_cast<const ColumnUInt64 *>(nested_column.get())) |
| 420 | getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max); |
| 421 | else if (const auto col_f32 = typeid_cast<const ColumnFloat32 *>(nested_column.get())) |
| 422 | getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max); |
| 423 | else if (const auto col_f64 = typeid_cast<const ColumnFloat64 *>(nested_column.get())) |
| 424 | getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max); |
| 425 | } |
| 426 | |
| 427 | |
| 428 | ColumnPtr ColumnNullable::replicate(const Offsets & offsets) const |
| 429 | { |
| 430 | ColumnPtr replicated_data = getNestedColumn().replicate(offsets); |
| 431 | ColumnPtr replicated_null_map = getNullMapColumn().replicate(offsets); |
| 432 | return ColumnNullable::create(replicated_data, replicated_null_map); |
| 433 | } |
| 434 | |
| 435 | |
| 436 | template <bool negative> |
| 437 | void ColumnNullable::applyNullMapImpl(const ColumnUInt8 & map) |
| 438 | { |
| 439 | NullMap & arr1 = getNullMapData(); |
| 440 | const NullMap & arr2 = map.getData(); |
| 441 | |
| 442 | if (arr1.size() != arr2.size()) |
| 443 | throw Exception{"Inconsistent sizes of ColumnNullable objects" , ErrorCodes::LOGICAL_ERROR}; |
| 444 | |
| 445 | for (size_t i = 0, size = arr1.size(); i < size; ++i) |
| 446 | arr1[i] |= negative ^ arr2[i]; |
| 447 | } |
| 448 | |
| 449 | |
| 450 | void ColumnNullable::applyNullMap(const ColumnUInt8 & map) |
| 451 | { |
| 452 | applyNullMapImpl<false>(map); |
| 453 | } |
| 454 | |
| 455 | void ColumnNullable::applyNegatedNullMap(const ColumnUInt8 & map) |
| 456 | { |
| 457 | applyNullMapImpl<true>(map); |
| 458 | } |
| 459 | |
| 460 | |
| 461 | void ColumnNullable::applyNullMap(const ColumnNullable & other) |
| 462 | { |
| 463 | applyNullMap(other.getNullMapColumn()); |
| 464 | } |
| 465 | |
| 466 | |
| 467 | void ColumnNullable::checkConsistency() const |
| 468 | { |
| 469 | if (null_map->size() != getNestedColumn().size()) |
| 470 | throw Exception("Logical error: Sizes of nested column and null map of Nullable column are not equal" , |
| 471 | ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT); |
| 472 | } |
| 473 | |
| 474 | ColumnPtr makeNullable(const ColumnPtr & column) |
| 475 | { |
| 476 | if (isColumnNullable(*column)) |
| 477 | return column; |
| 478 | |
| 479 | if (isColumnConst(*column)) |
| 480 | return ColumnConst::create(makeNullable(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size()); |
| 481 | |
| 482 | return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0)); |
| 483 | } |
| 484 | |
| 485 | } |
| 486 | |