1 | #include <Common/Arena.h> |
2 | #include <Common/SipHash.h> |
3 | #include <Common/NaNUtils.h> |
4 | #include <Common/typeid_cast.h> |
5 | #include <Common/assert_cast.h> |
6 | #include <Columns/ColumnNullable.h> |
7 | #include <Columns/ColumnConst.h> |
8 | #include <DataStreams/ColumnGathererStream.h> |
9 | |
10 | |
11 | namespace DB |
12 | { |
13 | |
14 | namespace ErrorCodes |
15 | { |
16 | extern const int LOGICAL_ERROR; |
17 | extern const int ILLEGAL_COLUMN; |
18 | extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; |
19 | } |
20 | |
21 | |
22 | ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_) |
23 | : nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) |
24 | { |
25 | /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. |
26 | nested_column = getNestedColumn().convertToFullColumnIfConst(); |
27 | |
28 | if (!getNestedColumn().canBeInsideNullable()) |
29 | throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column" , ErrorCodes::ILLEGAL_COLUMN}; |
30 | |
31 | if (isColumnConst(*null_map)) |
32 | throw Exception{"ColumnNullable cannot have constant null map" , ErrorCodes::ILLEGAL_COLUMN}; |
33 | } |
34 | |
35 | |
36 | void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const |
37 | { |
38 | const auto & arr = getNullMapData(); |
39 | hash.update(arr[n]); |
40 | if (arr[n] == 0) |
41 | getNestedColumn().updateHashWithValue(n, hash); |
42 | } |
43 | |
44 | |
45 | MutableColumnPtr ColumnNullable::cloneResized(size_t new_size) const |
46 | { |
47 | MutableColumnPtr new_nested_col = getNestedColumn().cloneResized(new_size); |
48 | auto new_null_map = ColumnUInt8::create(); |
49 | |
50 | if (new_size > 0) |
51 | { |
52 | new_null_map->getData().resize(new_size); |
53 | |
54 | size_t count = std::min(size(), new_size); |
55 | memcpy(new_null_map->getData().data(), getNullMapData().data(), count * sizeof(getNullMapData()[0])); |
56 | |
57 | /// If resizing to bigger one, set all new values to NULLs. |
58 | if (new_size > count) |
59 | memset(&new_null_map->getData()[count], 1, new_size - count); |
60 | } |
61 | |
62 | return ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map)); |
63 | } |
64 | |
65 | |
66 | Field ColumnNullable::operator[](size_t n) const |
67 | { |
68 | return isNullAt(n) ? Null() : getNestedColumn()[n]; |
69 | } |
70 | |
71 | |
72 | void ColumnNullable::get(size_t n, Field & res) const |
73 | { |
74 | if (isNullAt(n)) |
75 | res = Null(); |
76 | else |
77 | getNestedColumn().get(n, res); |
78 | } |
79 | |
80 | StringRef ColumnNullable::getDataAt(size_t /*n*/) const |
81 | { |
82 | throw Exception{"Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED}; |
83 | } |
84 | |
85 | void ColumnNullable::insertData(const char * pos, size_t length) |
86 | { |
87 | if (pos == nullptr) |
88 | { |
89 | getNestedColumn().insertDefault(); |
90 | getNullMapData().push_back(1); |
91 | } |
92 | else |
93 | { |
94 | getNestedColumn().insertData(pos, length); |
95 | getNullMapData().push_back(0); |
96 | } |
97 | } |
98 | |
99 | StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const |
100 | { |
101 | const auto & arr = getNullMapData(); |
102 | static constexpr auto s = sizeof(arr[0]); |
103 | |
104 | auto pos = arena.allocContinue(s, begin); |
105 | memcpy(pos, &arr[n], s); |
106 | |
107 | if (arr[n]) |
108 | return StringRef(pos, s); |
109 | |
110 | auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin); |
111 | |
112 | /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back. |
113 | return StringRef(nested_ref.data - s, nested_ref.size + s); |
114 | } |
115 | |
116 | const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos) |
117 | { |
118 | UInt8 val = *reinterpret_cast<const UInt8 *>(pos); |
119 | pos += sizeof(val); |
120 | |
121 | getNullMapData().push_back(val); |
122 | |
123 | if (val == 0) |
124 | pos = getNestedColumn().deserializeAndInsertFromArena(pos); |
125 | else |
126 | getNestedColumn().insertDefault(); |
127 | |
128 | return pos; |
129 | } |
130 | |
131 | void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) |
132 | { |
133 | const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(src); |
134 | getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); |
135 | getNestedColumn().insertRangeFrom(*nullable_col.nested_column, start, length); |
136 | } |
137 | |
138 | void ColumnNullable::insert(const Field & x) |
139 | { |
140 | if (x.isNull()) |
141 | { |
142 | getNestedColumn().insertDefault(); |
143 | getNullMapData().push_back(1); |
144 | } |
145 | else |
146 | { |
147 | getNestedColumn().insert(x); |
148 | getNullMapData().push_back(0); |
149 | } |
150 | } |
151 | |
152 | void ColumnNullable::insertFrom(const IColumn & src, size_t n) |
153 | { |
154 | const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src); |
155 | getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); |
156 | getNullMapData().push_back(src_concrete.getNullMapData()[n]); |
157 | } |
158 | |
159 | void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n) |
160 | { |
161 | getNestedColumn().insertFrom(src, n); |
162 | getNullMapData().push_back(0); |
163 | } |
164 | |
165 | void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length) |
166 | { |
167 | getNestedColumn().insertRangeFrom(src, start, length); |
168 | getNullMapData().resize_fill(getNullMapData().size() + length, 0); |
169 | } |
170 | |
171 | void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length) |
172 | { |
173 | for (size_t i = 0; i < length; ++i) |
174 | insertFromNotNullable(src, position); |
175 | } |
176 | |
177 | void ColumnNullable::popBack(size_t n) |
178 | { |
179 | getNestedColumn().popBack(n); |
180 | getNullMapColumn().popBack(n); |
181 | } |
182 | |
183 | ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint) const |
184 | { |
185 | ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint); |
186 | ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint); |
187 | return ColumnNullable::create(filtered_data, filtered_null_map); |
188 | } |
189 | |
190 | ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const |
191 | { |
192 | ColumnPtr permuted_data = getNestedColumn().permute(perm, limit); |
193 | ColumnPtr permuted_null_map = getNullMapColumn().permute(perm, limit); |
194 | return ColumnNullable::create(permuted_data, permuted_null_map); |
195 | } |
196 | |
197 | ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const |
198 | { |
199 | ColumnPtr indexed_data = getNestedColumn().index(indexes, limit); |
200 | ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit); |
201 | return ColumnNullable::create(indexed_data, indexed_null_map); |
202 | } |
203 | |
204 | int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const |
205 | { |
206 | /// NULL values share the properties of NaN values. |
207 | /// Here the last parameter of compareAt is called null_direction_hint |
208 | /// instead of the usual nan_direction_hint and is used to implement |
209 | /// the ordering specified by either NULLS FIRST or NULLS LAST in the |
210 | /// ORDER BY construction. |
211 | |
212 | const ColumnNullable & nullable_rhs = assert_cast<const ColumnNullable &>(rhs_); |
213 | |
214 | bool lval_is_null = isNullAt(n); |
215 | bool rval_is_null = nullable_rhs.isNullAt(m); |
216 | |
217 | if (unlikely(lval_is_null || rval_is_null)) |
218 | { |
219 | if (lval_is_null && rval_is_null) |
220 | return 0; |
221 | else |
222 | return lval_is_null ? null_direction_hint : -null_direction_hint; |
223 | } |
224 | |
225 | const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); |
226 | return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); |
227 | } |
228 | |
229 | void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const |
230 | { |
231 | /// Cannot pass limit because of unknown amount of NULLs. |
232 | getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); |
233 | |
234 | if ((null_direction_hint > 0) != reverse) |
235 | { |
236 | /// Shift all NULL values to the end. |
237 | |
238 | size_t read_idx = 0; |
239 | size_t write_idx = 0; |
240 | size_t end_idx = res.size(); |
241 | |
242 | if (!limit) |
243 | limit = end_idx; |
244 | else |
245 | limit = std::min(end_idx, limit); |
246 | |
247 | while (read_idx < limit && !isNullAt(res[read_idx])) |
248 | { |
249 | ++read_idx; |
250 | ++write_idx; |
251 | } |
252 | |
253 | ++read_idx; |
254 | |
255 | /// Invariants: |
256 | /// write_idx < read_idx |
257 | /// write_idx points to NULL |
258 | /// read_idx will be incremented to position of next not-NULL |
259 | /// there are range of NULLs between write_idx and read_idx - 1, |
260 | /// We are moving elements from end to begin of this range, |
261 | /// so range will "bubble" towards the end. |
262 | /// Relative order of NULL elements could be changed, |
263 | /// but relative order of non-NULLs is preserved. |
264 | |
265 | while (read_idx < end_idx && write_idx < limit) |
266 | { |
267 | if (!isNullAt(res[read_idx])) |
268 | { |
269 | std::swap(res[read_idx], res[write_idx]); |
270 | ++write_idx; |
271 | } |
272 | ++read_idx; |
273 | } |
274 | } |
275 | else |
276 | { |
277 | /// Shift all NULL values to the beginning. |
278 | |
279 | ssize_t read_idx = res.size() - 1; |
280 | ssize_t write_idx = res.size() - 1; |
281 | |
282 | while (read_idx >= 0 && !isNullAt(res[read_idx])) |
283 | { |
284 | --read_idx; |
285 | --write_idx; |
286 | } |
287 | |
288 | --read_idx; |
289 | |
290 | while (read_idx >= 0 && write_idx >= 0) |
291 | { |
292 | if (!isNullAt(res[read_idx])) |
293 | { |
294 | std::swap(res[read_idx], res[write_idx]); |
295 | --write_idx; |
296 | } |
297 | --read_idx; |
298 | } |
299 | } |
300 | } |
301 | |
302 | void ColumnNullable::gather(ColumnGathererStream & gatherer) |
303 | { |
304 | gatherer.gather(*this); |
305 | } |
306 | |
307 | void ColumnNullable::reserve(size_t n) |
308 | { |
309 | getNestedColumn().reserve(n); |
310 | getNullMapData().reserve(n); |
311 | } |
312 | |
313 | size_t ColumnNullable::byteSize() const |
314 | { |
315 | return getNestedColumn().byteSize() + getNullMapColumn().byteSize(); |
316 | } |
317 | |
318 | size_t ColumnNullable::allocatedBytes() const |
319 | { |
320 | return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes(); |
321 | } |
322 | |
323 | void ColumnNullable::protect() |
324 | { |
325 | getNestedColumn().protect(); |
326 | getNullMapColumn().protect(); |
327 | } |
328 | |
329 | |
330 | namespace |
331 | { |
332 | |
333 | /// The following function implements a slightly more general version |
334 | /// of getExtremes() than the implementation from ColumnVector. |
335 | /// It takes into account the possible presence of nullable values. |
336 | template <typename T> |
337 | void getExtremesFromNullableContent(const ColumnVector<T> & col, const NullMap & null_map, Field & min, Field & max) |
338 | { |
339 | const auto & data = col.getData(); |
340 | size_t size = data.size(); |
341 | |
342 | if (size == 0) |
343 | { |
344 | min = Null(); |
345 | max = Null(); |
346 | return; |
347 | } |
348 | |
349 | bool has_not_null = false; |
350 | bool has_not_nan = false; |
351 | |
352 | T cur_min = 0; |
353 | T cur_max = 0; |
354 | |
355 | for (size_t i = 0; i < size; ++i) |
356 | { |
357 | const T x = data[i]; |
358 | |
359 | if (null_map[i]) |
360 | continue; |
361 | |
362 | if (!has_not_null) |
363 | { |
364 | cur_min = x; |
365 | cur_max = x; |
366 | has_not_null = true; |
367 | has_not_nan = !isNaN(x); |
368 | continue; |
369 | } |
370 | |
371 | if (isNaN(x)) |
372 | continue; |
373 | |
374 | if (!has_not_nan) |
375 | { |
376 | cur_min = x; |
377 | cur_max = x; |
378 | has_not_nan = true; |
379 | continue; |
380 | } |
381 | |
382 | if (x < cur_min) |
383 | cur_min = x; |
384 | else if (x > cur_max) |
385 | cur_max = x; |
386 | } |
387 | |
388 | if (has_not_null) |
389 | { |
390 | min = cur_min; |
391 | max = cur_max; |
392 | } |
393 | } |
394 | |
395 | } |
396 | |
397 | |
398 | void ColumnNullable::getExtremes(Field & min, Field & max) const |
399 | { |
400 | min = Null(); |
401 | max = Null(); |
402 | |
403 | const auto & null_map_data = getNullMapData(); |
404 | |
405 | if (const auto col_i8 = typeid_cast<const ColumnInt8 *>(nested_column.get())) |
406 | getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max); |
407 | else if (const auto col_i16 = typeid_cast<const ColumnInt16 *>(nested_column.get())) |
408 | getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max); |
409 | else if (const auto col_i32 = typeid_cast<const ColumnInt32 *>(nested_column.get())) |
410 | getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max); |
411 | else if (const auto col_i64 = typeid_cast<const ColumnInt64 *>(nested_column.get())) |
412 | getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max); |
413 | else if (const auto col_u8 = typeid_cast<const ColumnUInt8 *>(nested_column.get())) |
414 | getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max); |
415 | else if (const auto col_u16 = typeid_cast<const ColumnUInt16 *>(nested_column.get())) |
416 | getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max); |
417 | else if (const auto col_u32 = typeid_cast<const ColumnUInt32 *>(nested_column.get())) |
418 | getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max); |
419 | else if (const auto col_u64 = typeid_cast<const ColumnUInt64 *>(nested_column.get())) |
420 | getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max); |
421 | else if (const auto col_f32 = typeid_cast<const ColumnFloat32 *>(nested_column.get())) |
422 | getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max); |
423 | else if (const auto col_f64 = typeid_cast<const ColumnFloat64 *>(nested_column.get())) |
424 | getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max); |
425 | } |
426 | |
427 | |
428 | ColumnPtr ColumnNullable::replicate(const Offsets & offsets) const |
429 | { |
430 | ColumnPtr replicated_data = getNestedColumn().replicate(offsets); |
431 | ColumnPtr replicated_null_map = getNullMapColumn().replicate(offsets); |
432 | return ColumnNullable::create(replicated_data, replicated_null_map); |
433 | } |
434 | |
435 | |
436 | template <bool negative> |
437 | void ColumnNullable::applyNullMapImpl(const ColumnUInt8 & map) |
438 | { |
439 | NullMap & arr1 = getNullMapData(); |
440 | const NullMap & arr2 = map.getData(); |
441 | |
442 | if (arr1.size() != arr2.size()) |
443 | throw Exception{"Inconsistent sizes of ColumnNullable objects" , ErrorCodes::LOGICAL_ERROR}; |
444 | |
445 | for (size_t i = 0, size = arr1.size(); i < size; ++i) |
446 | arr1[i] |= negative ^ arr2[i]; |
447 | } |
448 | |
449 | |
450 | void ColumnNullable::applyNullMap(const ColumnUInt8 & map) |
451 | { |
452 | applyNullMapImpl<false>(map); |
453 | } |
454 | |
455 | void ColumnNullable::applyNegatedNullMap(const ColumnUInt8 & map) |
456 | { |
457 | applyNullMapImpl<true>(map); |
458 | } |
459 | |
460 | |
461 | void ColumnNullable::applyNullMap(const ColumnNullable & other) |
462 | { |
463 | applyNullMap(other.getNullMapColumn()); |
464 | } |
465 | |
466 | |
467 | void ColumnNullable::checkConsistency() const |
468 | { |
469 | if (null_map->size() != getNestedColumn().size()) |
470 | throw Exception("Logical error: Sizes of nested column and null map of Nullable column are not equal" , |
471 | ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT); |
472 | } |
473 | |
474 | ColumnPtr makeNullable(const ColumnPtr & column) |
475 | { |
476 | if (isColumnNullable(*column)) |
477 | return column; |
478 | |
479 | if (isColumnConst(*column)) |
480 | return ColumnConst::create(makeNullable(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size()); |
481 | |
482 | return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0)); |
483 | } |
484 | |
485 | } |
486 | |