1#include <Common/Arena.h>
2#include <Common/SipHash.h>
3#include <Common/NaNUtils.h>
4#include <Common/typeid_cast.h>
5#include <Common/assert_cast.h>
6#include <Columns/ColumnNullable.h>
7#include <Columns/ColumnConst.h>
8#include <DataStreams/ColumnGathererStream.h>
9
10
11namespace DB
12{
13
14namespace ErrorCodes
15{
16 extern const int LOGICAL_ERROR;
17 extern const int ILLEGAL_COLUMN;
18 extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
19}
20
21
22ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_)
23 : nested_column(std::move(nested_column_)), null_map(std::move(null_map_))
24{
25 /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
26 nested_column = getNestedColumn().convertToFullColumnIfConst();
27
28 if (!getNestedColumn().canBeInsideNullable())
29 throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN};
30
31 if (isColumnConst(*null_map))
32 throw Exception{"ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN};
33}
34
35
36void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
37{
38 const auto & arr = getNullMapData();
39 hash.update(arr[n]);
40 if (arr[n] == 0)
41 getNestedColumn().updateHashWithValue(n, hash);
42}
43
44
45MutableColumnPtr ColumnNullable::cloneResized(size_t new_size) const
46{
47 MutableColumnPtr new_nested_col = getNestedColumn().cloneResized(new_size);
48 auto new_null_map = ColumnUInt8::create();
49
50 if (new_size > 0)
51 {
52 new_null_map->getData().resize(new_size);
53
54 size_t count = std::min(size(), new_size);
55 memcpy(new_null_map->getData().data(), getNullMapData().data(), count * sizeof(getNullMapData()[0]));
56
57 /// If resizing to bigger one, set all new values to NULLs.
58 if (new_size > count)
59 memset(&new_null_map->getData()[count], 1, new_size - count);
60 }
61
62 return ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map));
63}
64
65
66Field ColumnNullable::operator[](size_t n) const
67{
68 return isNullAt(n) ? Null() : getNestedColumn()[n];
69}
70
71
72void ColumnNullable::get(size_t n, Field & res) const
73{
74 if (isNullAt(n))
75 res = Null();
76 else
77 getNestedColumn().get(n, res);
78}
79
80StringRef ColumnNullable::getDataAt(size_t /*n*/) const
81{
82 throw Exception{"Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED};
83}
84
85void ColumnNullable::insertData(const char * pos, size_t length)
86{
87 if (pos == nullptr)
88 {
89 getNestedColumn().insertDefault();
90 getNullMapData().push_back(1);
91 }
92 else
93 {
94 getNestedColumn().insertData(pos, length);
95 getNullMapData().push_back(0);
96 }
97}
98
99StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
100{
101 const auto & arr = getNullMapData();
102 static constexpr auto s = sizeof(arr[0]);
103
104 auto pos = arena.allocContinue(s, begin);
105 memcpy(pos, &arr[n], s);
106
107 if (arr[n])
108 return StringRef(pos, s);
109
110 auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
111
112 /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
113 return StringRef(nested_ref.data - s, nested_ref.size + s);
114}
115
116const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
117{
118 UInt8 val = *reinterpret_cast<const UInt8 *>(pos);
119 pos += sizeof(val);
120
121 getNullMapData().push_back(val);
122
123 if (val == 0)
124 pos = getNestedColumn().deserializeAndInsertFromArena(pos);
125 else
126 getNestedColumn().insertDefault();
127
128 return pos;
129}
130
131void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length)
132{
133 const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(src);
134 getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length);
135 getNestedColumn().insertRangeFrom(*nullable_col.nested_column, start, length);
136}
137
138void ColumnNullable::insert(const Field & x)
139{
140 if (x.isNull())
141 {
142 getNestedColumn().insertDefault();
143 getNullMapData().push_back(1);
144 }
145 else
146 {
147 getNestedColumn().insert(x);
148 getNullMapData().push_back(0);
149 }
150}
151
152void ColumnNullable::insertFrom(const IColumn & src, size_t n)
153{
154 const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
155 getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n);
156 getNullMapData().push_back(src_concrete.getNullMapData()[n]);
157}
158
159void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
160{
161 getNestedColumn().insertFrom(src, n);
162 getNullMapData().push_back(0);
163}
164
165void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length)
166{
167 getNestedColumn().insertRangeFrom(src, start, length);
168 getNullMapData().resize_fill(getNullMapData().size() + length, 0);
169}
170
171void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length)
172{
173 for (size_t i = 0; i < length; ++i)
174 insertFromNotNullable(src, position);
175}
176
177void ColumnNullable::popBack(size_t n)
178{
179 getNestedColumn().popBack(n);
180 getNullMapColumn().popBack(n);
181}
182
183ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint) const
184{
185 ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint);
186 ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint);
187 return ColumnNullable::create(filtered_data, filtered_null_map);
188}
189
190ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const
191{
192 ColumnPtr permuted_data = getNestedColumn().permute(perm, limit);
193 ColumnPtr permuted_null_map = getNullMapColumn().permute(perm, limit);
194 return ColumnNullable::create(permuted_data, permuted_null_map);
195}
196
197ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
198{
199 ColumnPtr indexed_data = getNestedColumn().index(indexes, limit);
200 ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit);
201 return ColumnNullable::create(indexed_data, indexed_null_map);
202}
203
204int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
205{
206 /// NULL values share the properties of NaN values.
207 /// Here the last parameter of compareAt is called null_direction_hint
208 /// instead of the usual nan_direction_hint and is used to implement
209 /// the ordering specified by either NULLS FIRST or NULLS LAST in the
210 /// ORDER BY construction.
211
212 const ColumnNullable & nullable_rhs = assert_cast<const ColumnNullable &>(rhs_);
213
214 bool lval_is_null = isNullAt(n);
215 bool rval_is_null = nullable_rhs.isNullAt(m);
216
217 if (unlikely(lval_is_null || rval_is_null))
218 {
219 if (lval_is_null && rval_is_null)
220 return 0;
221 else
222 return lval_is_null ? null_direction_hint : -null_direction_hint;
223 }
224
225 const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
226 return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
227}
228
229void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
230{
231 /// Cannot pass limit because of unknown amount of NULLs.
232 getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
233
234 if ((null_direction_hint > 0) != reverse)
235 {
236 /// Shift all NULL values to the end.
237
238 size_t read_idx = 0;
239 size_t write_idx = 0;
240 size_t end_idx = res.size();
241
242 if (!limit)
243 limit = end_idx;
244 else
245 limit = std::min(end_idx, limit);
246
247 while (read_idx < limit && !isNullAt(res[read_idx]))
248 {
249 ++read_idx;
250 ++write_idx;
251 }
252
253 ++read_idx;
254
255 /// Invariants:
256 /// write_idx < read_idx
257 /// write_idx points to NULL
258 /// read_idx will be incremented to position of next not-NULL
259 /// there are range of NULLs between write_idx and read_idx - 1,
260 /// We are moving elements from end to begin of this range,
261 /// so range will "bubble" towards the end.
262 /// Relative order of NULL elements could be changed,
263 /// but relative order of non-NULLs is preserved.
264
265 while (read_idx < end_idx && write_idx < limit)
266 {
267 if (!isNullAt(res[read_idx]))
268 {
269 std::swap(res[read_idx], res[write_idx]);
270 ++write_idx;
271 }
272 ++read_idx;
273 }
274 }
275 else
276 {
277 /// Shift all NULL values to the beginning.
278
279 ssize_t read_idx = res.size() - 1;
280 ssize_t write_idx = res.size() - 1;
281
282 while (read_idx >= 0 && !isNullAt(res[read_idx]))
283 {
284 --read_idx;
285 --write_idx;
286 }
287
288 --read_idx;
289
290 while (read_idx >= 0 && write_idx >= 0)
291 {
292 if (!isNullAt(res[read_idx]))
293 {
294 std::swap(res[read_idx], res[write_idx]);
295 --write_idx;
296 }
297 --read_idx;
298 }
299 }
300}
301
302void ColumnNullable::gather(ColumnGathererStream & gatherer)
303{
304 gatherer.gather(*this);
305}
306
307void ColumnNullable::reserve(size_t n)
308{
309 getNestedColumn().reserve(n);
310 getNullMapData().reserve(n);
311}
312
313size_t ColumnNullable::byteSize() const
314{
315 return getNestedColumn().byteSize() + getNullMapColumn().byteSize();
316}
317
318size_t ColumnNullable::allocatedBytes() const
319{
320 return getNestedColumn().allocatedBytes() + getNullMapColumn().allocatedBytes();
321}
322
323void ColumnNullable::protect()
324{
325 getNestedColumn().protect();
326 getNullMapColumn().protect();
327}
328
329
330namespace
331{
332
333/// The following function implements a slightly more general version
334/// of getExtremes() than the implementation from ColumnVector.
335/// It takes into account the possible presence of nullable values.
336template <typename T>
337void getExtremesFromNullableContent(const ColumnVector<T> & col, const NullMap & null_map, Field & min, Field & max)
338{
339 const auto & data = col.getData();
340 size_t size = data.size();
341
342 if (size == 0)
343 {
344 min = Null();
345 max = Null();
346 return;
347 }
348
349 bool has_not_null = false;
350 bool has_not_nan = false;
351
352 T cur_min = 0;
353 T cur_max = 0;
354
355 for (size_t i = 0; i < size; ++i)
356 {
357 const T x = data[i];
358
359 if (null_map[i])
360 continue;
361
362 if (!has_not_null)
363 {
364 cur_min = x;
365 cur_max = x;
366 has_not_null = true;
367 has_not_nan = !isNaN(x);
368 continue;
369 }
370
371 if (isNaN(x))
372 continue;
373
374 if (!has_not_nan)
375 {
376 cur_min = x;
377 cur_max = x;
378 has_not_nan = true;
379 continue;
380 }
381
382 if (x < cur_min)
383 cur_min = x;
384 else if (x > cur_max)
385 cur_max = x;
386 }
387
388 if (has_not_null)
389 {
390 min = cur_min;
391 max = cur_max;
392 }
393}
394
395}
396
397
398void ColumnNullable::getExtremes(Field & min, Field & max) const
399{
400 min = Null();
401 max = Null();
402
403 const auto & null_map_data = getNullMapData();
404
405 if (const auto col_i8 = typeid_cast<const ColumnInt8 *>(nested_column.get()))
406 getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max);
407 else if (const auto col_i16 = typeid_cast<const ColumnInt16 *>(nested_column.get()))
408 getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max);
409 else if (const auto col_i32 = typeid_cast<const ColumnInt32 *>(nested_column.get()))
410 getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max);
411 else if (const auto col_i64 = typeid_cast<const ColumnInt64 *>(nested_column.get()))
412 getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max);
413 else if (const auto col_u8 = typeid_cast<const ColumnUInt8 *>(nested_column.get()))
414 getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max);
415 else if (const auto col_u16 = typeid_cast<const ColumnUInt16 *>(nested_column.get()))
416 getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max);
417 else if (const auto col_u32 = typeid_cast<const ColumnUInt32 *>(nested_column.get()))
418 getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max);
419 else if (const auto col_u64 = typeid_cast<const ColumnUInt64 *>(nested_column.get()))
420 getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max);
421 else if (const auto col_f32 = typeid_cast<const ColumnFloat32 *>(nested_column.get()))
422 getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max);
423 else if (const auto col_f64 = typeid_cast<const ColumnFloat64 *>(nested_column.get()))
424 getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max);
425}
426
427
428ColumnPtr ColumnNullable::replicate(const Offsets & offsets) const
429{
430 ColumnPtr replicated_data = getNestedColumn().replicate(offsets);
431 ColumnPtr replicated_null_map = getNullMapColumn().replicate(offsets);
432 return ColumnNullable::create(replicated_data, replicated_null_map);
433}
434
435
436template <bool negative>
437void ColumnNullable::applyNullMapImpl(const ColumnUInt8 & map)
438{
439 NullMap & arr1 = getNullMapData();
440 const NullMap & arr2 = map.getData();
441
442 if (arr1.size() != arr2.size())
443 throw Exception{"Inconsistent sizes of ColumnNullable objects", ErrorCodes::LOGICAL_ERROR};
444
445 for (size_t i = 0, size = arr1.size(); i < size; ++i)
446 arr1[i] |= negative ^ arr2[i];
447}
448
449
450void ColumnNullable::applyNullMap(const ColumnUInt8 & map)
451{
452 applyNullMapImpl<false>(map);
453}
454
455void ColumnNullable::applyNegatedNullMap(const ColumnUInt8 & map)
456{
457 applyNullMapImpl<true>(map);
458}
459
460
461void ColumnNullable::applyNullMap(const ColumnNullable & other)
462{
463 applyNullMap(other.getNullMapColumn());
464}
465
466
467void ColumnNullable::checkConsistency() const
468{
469 if (null_map->size() != getNestedColumn().size())
470 throw Exception("Logical error: Sizes of nested column and null map of Nullable column are not equal",
471 ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT);
472}
473
474ColumnPtr makeNullable(const ColumnPtr & column)
475{
476 if (isColumnNullable(*column))
477 return column;
478
479 if (isColumnConst(*column))
480 return ColumnConst::create(makeNullable(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
481
482 return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0));
483}
484
485}
486