1 | #include <Columns/ColumnLowCardinality.h> |
2 | #include <Columns/ColumnsNumber.h> |
3 | #include <DataStreams/ColumnGathererStream.h> |
4 | #include <DataTypes/NumberTraits.h> |
5 | #include <Common/HashTable/HashMap.h> |
6 | #include <Common/assert_cast.h> |
7 | |
8 | |
9 | namespace DB |
10 | { |
11 | |
12 | namespace |
13 | { |
14 | template <typename T> |
15 | PaddedPODArray<T> * getIndexesData(IColumn & indexes) |
16 | { |
17 | auto * column = typeid_cast<ColumnVector<T> *>(&indexes); |
18 | if (column) |
19 | return &column->getData(); |
20 | |
21 | return nullptr; |
22 | } |
23 | |
24 | template <typename T> |
25 | MutableColumnPtr mapUniqueIndexImplRef(PaddedPODArray<T> & index) |
26 | { |
27 | PaddedPODArray<T> copy(index.cbegin(), index.cend()); |
28 | |
29 | HashMap<T, T> hash_map; |
30 | for (auto val : index) |
31 | hash_map.insert({val, hash_map.size()}); |
32 | |
33 | auto res_col = ColumnVector<T>::create(); |
34 | auto & data = res_col->getData(); |
35 | |
36 | data.resize(hash_map.size()); |
37 | for (const auto & val : hash_map) |
38 | data[val.getMapped()] = val.getKey(); |
39 | |
40 | for (auto & ind : index) |
41 | ind = hash_map[ind]; |
42 | |
43 | for (size_t i = 0; i < index.size(); ++i) |
44 | if (data[index[i]] != copy[i]) |
45 | throw Exception("Expected " + toString(data[index[i]]) + ", but got " + toString(copy[i]), ErrorCodes::LOGICAL_ERROR); |
46 | |
47 | return res_col; |
48 | } |
49 | |
50 | template <typename T> |
51 | MutableColumnPtr mapUniqueIndexImpl(PaddedPODArray<T> & index) |
52 | { |
53 | if (index.empty()) |
54 | return ColumnVector<T>::create(); |
55 | |
56 | auto size = index.size(); |
57 | |
58 | T max_val = index[0]; |
59 | for (size_t i = 1; i < size; ++i) |
60 | max_val = std::max(max_val, index[i]); |
61 | |
62 | /// May happen when dictionary is shared. |
63 | if (max_val > size) |
64 | return mapUniqueIndexImplRef(index); |
65 | |
66 | auto map_size = UInt64(max_val) + 1; |
67 | PaddedPODArray<T> map(map_size, 0); |
68 | T zero_pos_value = index[0]; |
69 | index[0] = 0; |
70 | T cur_pos = 0; |
71 | for (size_t i = 1; i < size; ++i) |
72 | { |
73 | T val = index[i]; |
74 | if (val != zero_pos_value && map[val] == 0) |
75 | { |
76 | ++cur_pos; |
77 | map[val] = cur_pos; |
78 | } |
79 | |
80 | index[i] = map[val]; |
81 | } |
82 | |
83 | auto res_col = ColumnVector<T>::create(UInt64(cur_pos) + 1); |
84 | auto & data = res_col->getData(); |
85 | data[0] = zero_pos_value; |
86 | for (size_t i = 0; i < map_size; ++i) |
87 | { |
88 | auto val = map[i]; |
89 | if (val) |
90 | data[val] = static_cast<T>(i); |
91 | } |
92 | |
93 | return res_col; |
94 | } |
95 | |
96 | /// Returns unique values of column. Write new index to column. |
97 | MutableColumnPtr mapUniqueIndex(IColumn & column) |
98 | { |
99 | if (auto * data_uint8 = getIndexesData<UInt8>(column)) |
100 | return mapUniqueIndexImpl(*data_uint8); |
101 | else if (auto * data_uint16 = getIndexesData<UInt16>(column)) |
102 | return mapUniqueIndexImpl(*data_uint16); |
103 | else if (auto * data_uint32 = getIndexesData<UInt32>(column)) |
104 | return mapUniqueIndexImpl(*data_uint32); |
105 | else if (auto * data_uint64 = getIndexesData<UInt64>(column)) |
106 | return mapUniqueIndexImpl(*data_uint64); |
107 | else |
108 | throw Exception("Indexes column for getUniqueIndex must be ColumnUInt, got" + column.getName(), |
109 | ErrorCodes::LOGICAL_ERROR); |
110 | } |
111 | } |
112 | |
113 | |
114 | ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared) |
115 | : dictionary(std::move(column_unique_), is_shared), idx(std::move(indexes_)) |
116 | { |
117 | idx.check(getDictionary().size()); |
118 | } |
119 | |
120 | void ColumnLowCardinality::insert(const Field & x) |
121 | { |
122 | compactIfSharedDictionary(); |
123 | idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x)); |
124 | idx.check(getDictionary().size()); |
125 | } |
126 | |
127 | void ColumnLowCardinality::insertDefault() |
128 | { |
129 | idx.insertPosition(getDictionary().getDefaultValueIndex()); |
130 | } |
131 | |
132 | void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) |
133 | { |
134 | auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src); |
135 | |
136 | if (!low_cardinality_src) |
137 | throw Exception("Expected ColumnLowCardinality, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN); |
138 | |
139 | size_t position = low_cardinality_src->getIndexes().getUInt(n); |
140 | |
141 | if (&low_cardinality_src->getDictionary() == &getDictionary()) |
142 | { |
143 | /// Dictionary is shared with src column. Insert only index. |
144 | idx.insertPosition(position); |
145 | } |
146 | else |
147 | { |
148 | compactIfSharedDictionary(); |
149 | const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn(); |
150 | idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position)); |
151 | } |
152 | |
153 | idx.check(getDictionary().size()); |
154 | } |
155 | |
156 | void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) |
157 | { |
158 | compactIfSharedDictionary(); |
159 | idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n)); |
160 | idx.check(getDictionary().size()); |
161 | } |
162 | |
163 | void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) |
164 | { |
165 | auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src); |
166 | |
167 | if (!low_cardinality_src) |
168 | throw Exception("Expected ColumnLowCardinality, got " + src.getName(), ErrorCodes::ILLEGAL_COLUMN); |
169 | |
170 | if (&low_cardinality_src->getDictionary() == &getDictionary()) |
171 | { |
172 | /// Dictionary is shared with src column. Insert only indexes. |
173 | idx.insertPositionsRange(low_cardinality_src->getIndexes(), start, length); |
174 | } |
175 | else |
176 | { |
177 | compactIfSharedDictionary(); |
178 | |
179 | /// TODO: Support native insertion from other unique column. It will help to avoid null map creation. |
180 | |
181 | auto sub_idx = (*low_cardinality_src->getIndexes().cut(start, length)).mutate(); |
182 | auto idx_map = mapUniqueIndex(*sub_idx); |
183 | |
184 | auto src_nested = low_cardinality_src->getDictionary().getNestedColumn(); |
185 | auto used_keys = src_nested->index(*idx_map, 0); |
186 | |
187 | auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size()); |
188 | idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length); |
189 | } |
190 | idx.check(getDictionary().size()); |
191 | } |
192 | |
193 | void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length) |
194 | { |
195 | compactIfSharedDictionary(); |
196 | auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length); |
197 | idx.insertPositionsRange(*inserted_indexes, 0, length); |
198 | idx.check(getDictionary().size()); |
199 | } |
200 | |
201 | void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions) |
202 | { |
203 | Index(positions.getPtr()).check(keys.size()); |
204 | compactIfSharedDictionary(); |
205 | auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size()); |
206 | idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size()); |
207 | idx.check(getDictionary().size()); |
208 | } |
209 | |
210 | void ColumnLowCardinality::insertData(const char * pos, size_t length) |
211 | { |
212 | compactIfSharedDictionary(); |
213 | idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length)); |
214 | idx.check(getDictionary().size()); |
215 | } |
216 | |
217 | StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const |
218 | { |
219 | return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin); |
220 | } |
221 | |
222 | const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * pos) |
223 | { |
224 | compactIfSharedDictionary(); |
225 | |
226 | const char * new_pos; |
227 | idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos)); |
228 | |
229 | idx.check(getDictionary().size()); |
230 | return new_pos; |
231 | } |
232 | |
233 | void ColumnLowCardinality::gather(ColumnGathererStream & gatherer) |
234 | { |
235 | gatherer.gather(*this); |
236 | } |
237 | |
238 | MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const |
239 | { |
240 | auto unique_ptr = dictionary.getColumnUniquePtr(); |
241 | if (size == 0) |
242 | unique_ptr = unique_ptr->cloneEmpty(); |
243 | |
244 | return ColumnLowCardinality::create((*std::move(unique_ptr)).mutate(), getIndexes().cloneResized(size)); |
245 | } |
246 | |
247 | int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const |
248 | { |
249 | const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs); |
250 | size_t n_index = getIndexes().getUInt(n); |
251 | size_t m_index = low_cardinality_column.getIndexes().getUInt(m); |
252 | return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); |
253 | } |
254 | |
255 | void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const |
256 | { |
257 | if (limit == 0) |
258 | limit = size(); |
259 | |
260 | size_t unique_limit = getDictionary().size(); |
261 | Permutation unique_perm; |
262 | getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); |
263 | |
264 | /// TODO: optimize with sse. |
265 | |
266 | /// Get indexes per row in column_unique. |
267 | std::vector<std::vector<size_t>> indexes_per_row(getDictionary().size()); |
268 | size_t indexes_size = getIndexes().size(); |
269 | for (size_t row = 0; row < indexes_size; ++row) |
270 | indexes_per_row[getIndexes().getUInt(row)].push_back(row); |
271 | |
272 | /// Replicate permutation. |
273 | size_t perm_size = std::min(indexes_size, limit); |
274 | res.resize(perm_size); |
275 | size_t perm_index = 0; |
276 | for (size_t row = 0; row < unique_perm.size() && perm_index < perm_size; ++row) |
277 | { |
278 | const auto & row_indexes = indexes_per_row[unique_perm[row]]; |
279 | for (auto row_index : row_indexes) |
280 | { |
281 | res[perm_index] = row_index; |
282 | ++perm_index; |
283 | |
284 | if (perm_index == perm_size) |
285 | break; |
286 | } |
287 | } |
288 | } |
289 | |
290 | std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const |
291 | { |
292 | auto columns = getIndexes().scatter(num_columns, selector); |
293 | for (auto & column : columns) |
294 | { |
295 | auto unique_ptr = dictionary.getColumnUniquePtr(); |
296 | column = ColumnLowCardinality::create((*std::move(unique_ptr)).mutate(), std::move(column)); |
297 | } |
298 | |
299 | return columns; |
300 | } |
301 | |
302 | void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique) |
303 | { |
304 | if (!empty()) |
305 | throw Exception("Can't set ColumnUnique for ColumnLowCardinality because is't not empty." , |
306 | ErrorCodes::LOGICAL_ERROR); |
307 | |
308 | dictionary.setShared(column_unique); |
309 | } |
310 | |
311 | ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const |
312 | { |
313 | auto sub_positions = (*idx.getPositions()->cut(start, length)).mutate(); |
314 | /// Create column with new indexes and old dictionary. |
315 | /// Dictionary is shared, but will be recreated after compactInplace call. |
316 | auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions)); |
317 | /// Will create new dictionary. |
318 | column->compactInplace(); |
319 | |
320 | return column; |
321 | } |
322 | |
323 | void ColumnLowCardinality::compactInplace() |
324 | { |
325 | auto positions = idx.detachPositions(); |
326 | dictionary.compact(positions); |
327 | idx.attachPositions(std::move(positions)); |
328 | } |
329 | |
330 | void ColumnLowCardinality::compactIfSharedDictionary() |
331 | { |
332 | if (dictionary.isShared()) |
333 | compactInplace(); |
334 | } |
335 | |
336 | |
337 | ColumnLowCardinality::DictionaryEncodedColumn |
338 | ColumnLowCardinality::getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const |
339 | { |
340 | MutableColumnPtr sub_indexes = (*std::move(idx.getPositions()->cut(offset, limit))).mutate(); |
341 | auto indexes_map = mapUniqueIndex(*sub_indexes); |
342 | auto sub_keys = getDictionary().getNestedColumn()->index(*indexes_map, 0); |
343 | |
344 | return {std::move(sub_keys), std::move(sub_indexes)}; |
345 | } |
346 | |
347 | ColumnPtr ColumnLowCardinality::countKeys() const |
348 | { |
349 | const auto & nested_column = getDictionary().getNestedColumn(); |
350 | size_t dict_size = nested_column->size(); |
351 | |
352 | auto counter = ColumnUInt64::create(dict_size, 0); |
353 | idx.countKeys(counter->getData()); |
354 | return counter; |
355 | } |
356 | |
357 | bool ColumnLowCardinality::containsNull() const |
358 | { |
359 | return getDictionary().nestedColumnIsNullable() && idx.containsDefault(); |
360 | } |
361 | |
362 | |
363 | ColumnLowCardinality::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {} |
364 | |
365 | ColumnLowCardinality::Index::Index(MutableColumnPtr && positions_) : positions(std::move(positions_)) |
366 | { |
367 | updateSizeOfType(); |
368 | } |
369 | |
370 | ColumnLowCardinality::Index::Index(ColumnPtr positions_) : positions(std::move(positions_)) |
371 | { |
372 | updateSizeOfType(); |
373 | } |
374 | |
375 | template <typename Callback> |
376 | void ColumnLowCardinality::Index::callForType(Callback && callback, size_t size_of_type) |
377 | { |
378 | switch (size_of_type) |
379 | { |
380 | case sizeof(UInt8): { callback(UInt8()); break; } |
381 | case sizeof(UInt16): { callback(UInt16()); break; } |
382 | case sizeof(UInt32): { callback(UInt32()); break; } |
383 | case sizeof(UInt64): { callback(UInt64()); break; } |
384 | default: { |
385 | throw Exception("Unexpected size of index type for ColumnLowCardinality: " + toString(size_of_type), |
386 | ErrorCodes::LOGICAL_ERROR); |
387 | } |
388 | } |
389 | } |
390 | |
391 | size_t ColumnLowCardinality::Index::getSizeOfIndexType(const IColumn & column, size_t hint) |
392 | { |
393 | auto checkFor = [&](auto type) { return typeid_cast<const ColumnVector<decltype(type)> *>(&column) != nullptr; }; |
394 | auto tryGetSizeFor = [&](auto type) -> size_t { return checkFor(type) ? sizeof(decltype(type)) : 0; }; |
395 | |
396 | if (hint) |
397 | { |
398 | size_t size = 0; |
399 | callForType([&](auto type) { size = tryGetSizeFor(type); }, hint); |
400 | |
401 | if (size) |
402 | return size; |
403 | } |
404 | |
405 | if (auto size = tryGetSizeFor(UInt8())) |
406 | return size; |
407 | if (auto size = tryGetSizeFor(UInt16())) |
408 | return size; |
409 | if (auto size = tryGetSizeFor(UInt32())) |
410 | return size; |
411 | if (auto size = tryGetSizeFor(UInt64())) |
412 | return size; |
413 | |
414 | throw Exception("Unexpected indexes type for ColumnLowCardinality. Expected UInt, got " + column.getName(), |
415 | ErrorCodes::ILLEGAL_COLUMN); |
416 | } |
417 | |
418 | void ColumnLowCardinality::Index::attachPositions(ColumnPtr positions_) |
419 | { |
420 | positions = std::move(positions_); |
421 | updateSizeOfType(); |
422 | } |
423 | |
424 | template <typename IndexType> |
425 | typename ColumnVector<IndexType>::Container & ColumnLowCardinality::Index::getPositionsData() |
426 | { |
427 | auto * positions_ptr = typeid_cast<ColumnVector<IndexType> *>(positions->assumeMutable().get()); |
428 | if (!positions_ptr) |
429 | throw Exception("Invalid indexes type for ColumnLowCardinality." |
430 | " Expected UInt" + toString(8 * sizeof(IndexType)) + ", got " + positions->getName(), |
431 | ErrorCodes::LOGICAL_ERROR); |
432 | |
433 | return positions_ptr->getData(); |
434 | } |
435 | |
436 | template <typename IndexType> |
437 | const typename ColumnVector<IndexType>::Container & ColumnLowCardinality::Index::getPositionsData() const |
438 | { |
439 | const auto * positions_ptr = typeid_cast<const ColumnVector<IndexType> *>(positions.get()); |
440 | if (!positions_ptr) |
441 | throw Exception("Invalid indexes type for ColumnLowCardinality." |
442 | " Expected UInt" + toString(8 * sizeof(IndexType)) + ", got " + positions->getName(), |
443 | ErrorCodes::LOGICAL_ERROR); |
444 | |
445 | return positions_ptr->getData(); |
446 | } |
447 | |
448 | template <typename IndexType> |
449 | void ColumnLowCardinality::Index::convertPositions() |
450 | { |
451 | auto convert = [&](auto x) |
452 | { |
453 | using CurIndexType = decltype(x); |
454 | auto & data = getPositionsData<CurIndexType>(); |
455 | |
456 | if (sizeof(CurIndexType) > sizeof(IndexType)) |
457 | throw Exception("Converting indexes to smaller type: from " + toString(sizeof(CurIndexType)) + |
458 | " to " + toString(sizeof(IndexType)), ErrorCodes::LOGICAL_ERROR); |
459 | |
460 | if (sizeof(CurIndexType) != sizeof(IndexType)) |
461 | { |
462 | size_t size = data.size(); |
463 | auto new_positions = ColumnVector<IndexType>::create(size); |
464 | auto & new_data = new_positions->getData(); |
465 | |
466 | /// TODO: Optimize with SSE? |
467 | for (size_t i = 0; i < size; ++i) |
468 | new_data[i] = data[i]; |
469 | |
470 | positions = std::move(new_positions); |
471 | size_of_type = sizeof(IndexType); |
472 | } |
473 | }; |
474 | |
475 | callForType(std::move(convert), size_of_type); |
476 | |
477 | checkSizeOfType(); |
478 | } |
479 | |
480 | void ColumnLowCardinality::Index::expandType() |
481 | { |
482 | auto expand = [&](auto type) |
483 | { |
484 | using CurIndexType = decltype(type); |
485 | constexpr auto next_size = NumberTraits::nextSize(sizeof(CurIndexType)); |
486 | if (next_size == sizeof(CurIndexType)) |
487 | throw Exception("Can't expand indexes type for ColumnLowCardinality from type: " |
488 | + demangle(typeid(CurIndexType).name()), ErrorCodes::LOGICAL_ERROR); |
489 | |
490 | using NewIndexType = typename NumberTraits::Construct<false, false, next_size>::Type; |
491 | convertPositions<NewIndexType>(); |
492 | }; |
493 | |
494 | callForType(std::move(expand), size_of_type); |
495 | } |
496 | |
497 | UInt64 ColumnLowCardinality::Index::getMaxPositionForCurrentType() const |
498 | { |
499 | UInt64 value = 0; |
500 | callForType([&](auto type) { value = std::numeric_limits<decltype(type)>::max(); }, size_of_type); |
501 | return value; |
502 | } |
503 | |
504 | size_t ColumnLowCardinality::Index::getPositionAt(size_t row) const |
505 | { |
506 | size_t pos; |
507 | auto getPosition = [&](auto type) |
508 | { |
509 | using CurIndexType = decltype(type); |
510 | pos = getPositionsData<CurIndexType>()[row]; |
511 | }; |
512 | |
513 | callForType(std::move(getPosition), size_of_type); |
514 | return pos; |
515 | } |
516 | |
517 | void ColumnLowCardinality::Index::insertPosition(UInt64 position) |
518 | { |
519 | while (position > getMaxPositionForCurrentType()) |
520 | expandType(); |
521 | |
522 | positions->insert(position); |
523 | checkSizeOfType(); |
524 | } |
525 | |
526 | void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit) |
527 | { |
528 | auto insertForType = [&](auto type) |
529 | { |
530 | using ColumnType = decltype(type); |
531 | const auto * column_ptr = typeid_cast<const ColumnVector<ColumnType> *>(&column); |
532 | |
533 | if (!column_ptr) |
534 | return false; |
535 | |
536 | if (size_of_type < sizeof(ColumnType)) |
537 | convertPositions<ColumnType>(); |
538 | |
539 | if (size_of_type == sizeof(ColumnType)) |
540 | positions->insertRangeFrom(column, offset, limit); |
541 | else |
542 | { |
543 | auto copy = [&](auto cur_type) |
544 | { |
545 | using CurIndexType = decltype(cur_type); |
546 | auto & positions_data = getPositionsData<CurIndexType>(); |
547 | const auto & column_data = column_ptr->getData(); |
548 | |
549 | UInt64 size = positions_data.size(); |
550 | positions_data.resize(size + limit); |
551 | |
552 | for (UInt64 i = 0; i < limit; ++i) |
553 | positions_data[size + i] = column_data[offset + i]; |
554 | }; |
555 | |
556 | callForType(std::move(copy), size_of_type); |
557 | } |
558 | |
559 | return true; |
560 | }; |
561 | |
562 | if (!insertForType(UInt8()) && |
563 | !insertForType(UInt16()) && |
564 | !insertForType(UInt32()) && |
565 | !insertForType(UInt64())) |
566 | throw Exception("Invalid column for ColumnLowCardinality index. Expected UInt, got " + column.getName(), |
567 | ErrorCodes::ILLEGAL_COLUMN); |
568 | |
569 | checkSizeOfType(); |
570 | } |
571 | |
572 | void ColumnLowCardinality::Index::check(size_t /*max_dictionary_size*/) |
573 | { |
574 | /// TODO: remove |
575 | /* |
576 | auto check = [&](auto cur_type) |
577 | { |
578 | using CurIndexType = decltype(cur_type); |
579 | auto & positions_data = getPositionsData<CurIndexType>(); |
580 | |
581 | for (size_t i = 0; i < positions_data.size(); ++i) |
582 | { |
583 | if (positions_data[i] >= max_dictionary_size) |
584 | { |
585 | throw Exception("Found index " + toString(positions_data[i]) + " at position " + toString(i) |
586 | + " which is grated or equal than dictionary size " + toString(max_dictionary_size), |
587 | ErrorCodes::LOGICAL_ERROR); |
588 | } |
589 | } |
590 | }; |
591 | |
592 | callForType(std::move(check), size_of_type); |
593 | */ |
594 | } |
595 | |
596 | void ColumnLowCardinality::Index::checkSizeOfType() |
597 | { |
598 | if (size_of_type != getSizeOfIndexType(*positions, size_of_type)) |
599 | throw Exception("Invalid size of type. Expected " + toString(8 * size_of_type) + |
600 | ", but positions are " + positions->getName(), ErrorCodes::LOGICAL_ERROR); |
601 | } |
602 | |
603 | void ColumnLowCardinality::Index::countKeys(ColumnUInt64::Container & counts) const |
604 | { |
605 | auto counter = [&](auto x) |
606 | { |
607 | using CurIndexType = decltype(x); |
608 | auto & data = getPositionsData<CurIndexType>(); |
609 | for (auto pos : data) |
610 | ++counts[pos]; |
611 | }; |
612 | callForType(std::move(counter), size_of_type); |
613 | } |
614 | |
615 | bool ColumnLowCardinality::Index::containsDefault() const |
616 | { |
617 | bool contains = false; |
618 | |
619 | auto check_contains_default = [&](auto x) |
620 | { |
621 | using CurIndexType = decltype(x); |
622 | auto & data = getPositionsData<CurIndexType>(); |
623 | for (auto pos : data) |
624 | { |
625 | if (pos == 0) |
626 | { |
627 | contains = true; |
628 | break; |
629 | } |
630 | } |
631 | }; |
632 | |
633 | callForType(std::move(check_contains_default), size_of_type); |
634 | return contains; |
635 | } |
636 | |
637 | |
638 | ColumnLowCardinality::Dictionary::Dictionary(MutableColumnPtr && column_unique_, bool is_shared) |
639 | : column_unique(std::move(column_unique_)), shared(is_shared) |
640 | { |
641 | checkColumn(*column_unique); |
642 | } |
643 | ColumnLowCardinality::Dictionary::Dictionary(ColumnPtr column_unique_, bool is_shared) |
644 | : column_unique(std::move(column_unique_)), shared(is_shared) |
645 | { |
646 | checkColumn(*column_unique); |
647 | } |
648 | |
649 | void ColumnLowCardinality::Dictionary::checkColumn(const IColumn & column) |
650 | { |
651 | |
652 | if (!dynamic_cast<const IColumnUnique *>(&column)) |
653 | throw Exception("ColumnUnique expected as an argument of ColumnLowCardinality." , ErrorCodes::ILLEGAL_COLUMN); |
654 | } |
655 | |
656 | void ColumnLowCardinality::Dictionary::setShared(const ColumnPtr & column_unique_) |
657 | { |
658 | checkColumn(*column_unique_); |
659 | |
660 | column_unique = column_unique_; |
661 | shared = true; |
662 | } |
663 | |
664 | void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions) |
665 | { |
666 | auto new_column_unique = column_unique->cloneEmpty(); |
667 | |
668 | auto & unique = getColumnUnique(); |
669 | auto & new_unique = static_cast<IColumnUnique &>(*new_column_unique); |
670 | |
671 | auto indexes = mapUniqueIndex(positions->assumeMutableRef()); |
672 | auto sub_keys = unique.getNestedColumn()->index(*indexes, 0); |
673 | auto new_indexes = new_unique.uniqueInsertRangeFrom(*sub_keys, 0, sub_keys->size()); |
674 | |
675 | positions = (*new_indexes->index(*positions, 0)).mutate(); |
676 | column_unique = std::move(new_column_unique); |
677 | |
678 | shared = false; |
679 | } |
680 | |
681 | } |
682 | |