1#pragma once
2#include <Columns/IColumn.h>
3#include <Columns/IColumnUnique.h>
4#include <Common/typeid_cast.h>
5#include <Common/assert_cast.h>
6#include <AggregateFunctions/AggregateFunctionCount.h>
7#include "ColumnsNumber.h"
8
9
10namespace DB
11{
12
13namespace ErrorCodes
14{
15 extern const int ILLEGAL_COLUMN;
16}
17
18class ColumnLowCardinality final : public COWHelper<IColumn, ColumnLowCardinality>
19{
20 friend class COWHelper<IColumn, ColumnLowCardinality>;
21
22 ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
23 ColumnLowCardinality(const ColumnLowCardinality & other) = default;
24
25public:
26 /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
27 * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
28 */
29 using Base = COWHelper<IColumn, ColumnLowCardinality>;
30 static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false)
31 {
32 return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
33 }
34
35 static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false)
36 {
37 return Base::create(std::move(column_unique), std::move(indexes), is_shared);
38 }
39
40 std::string getName() const override { return "ColumnLowCardinality"; }
41 const char * getFamilyName() const override { return "ColumnLowCardinality"; }
42
43 ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); }
44 ColumnPtr convertToFullColumnIfLowCardinality() const override { return convertToFullColumn(); }
45
46 MutableColumnPtr cloneResized(size_t size) const override;
47 size_t size() const override { return getIndexes().size(); }
48
49 Field operator[](size_t n) const override { return getDictionary()[getIndexes().getUInt(n)]; }
50 void get(size_t n, Field & res) const override { getDictionary().get(getIndexes().getUInt(n), res); }
51
52 StringRef getDataAt(size_t n) const override { return getDictionary().getDataAt(getIndexes().getUInt(n)); }
53 StringRef getDataAtWithTerminatingZero(size_t n) const override
54 {
55 return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n));
56 }
57
58 UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); }
59 UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
60 Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
61 Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); }
62 Float32 getFloat32(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat32(n)); }
63 bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); }
64 bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
65 ColumnPtr cut(size_t start, size_t length) const override
66 {
67 return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length));
68 }
69
70 void insert(const Field & x) override;
71 void insertDefault() override;
72
73 void insertFrom(const IColumn & src, size_t n) override;
74 void insertFromFullColumn(const IColumn & src, size_t n);
75
76 void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
77 void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length);
78 void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions);
79
80 void insertData(const char * pos, size_t length) override;
81
82 void popBack(size_t n) override { idx.popBack(n); }
83
84 StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
85
86 const char * deserializeAndInsertFromArena(const char * pos) override;
87
88 void updateHashWithValue(size_t n, SipHash & hash) const override
89 {
90 return getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
91 }
92
93 ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
94 {
95 return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
96 }
97
98 ColumnPtr permute(const Permutation & perm, size_t limit) const override
99 {
100 return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
101 }
102
103 ColumnPtr index(const IColumn & indexes_, size_t limit) const override
104 {
105 return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
106 }
107
108 int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
109
110 void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
111
112 ColumnPtr replicate(const Offsets & offsets) const override
113 {
114 return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
115 }
116
117 std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
118
119 void gather(ColumnGathererStream & gatherer_stream) override;
120
121 void getExtremes(Field & min, Field & max) const override
122 {
123 return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize
124 }
125
126 void reserve(size_t n) override { idx.reserve(n); }
127
128 size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
129 size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
130
131 void forEachSubcolumn(ColumnCallback callback) override
132 {
133 callback(idx.getPositionsPtr());
134
135 /// Column doesn't own dictionary if it's shared.
136 if (!dictionary.isShared())
137 callback(dictionary.getColumnUniquePtr());
138 }
139
140 bool structureEquals(const IColumn & rhs) const override
141 {
142 if (auto rhs_low_cardinality = typeid_cast<const ColumnLowCardinality *>(&rhs))
143 return idx.getPositions()->structureEquals(*rhs_low_cardinality->idx.getPositions())
144 && dictionary.getColumnUnique().structureEquals(rhs_low_cardinality->dictionary.getColumnUnique());
145 return false;
146 }
147
148 bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); }
149 bool isFixedAndContiguous() const override { return false; }
150 size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
151 bool isNumeric() const override { return getDictionary().isNumeric(); }
152 bool lowCardinality() const override { return true; }
153 bool isNullable() const override { return isColumnNullable(*dictionary.getColumnUniquePtr()); }
154
155 const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
156 const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); }
157 /// IColumnUnique & getUnique() { return static_cast<IColumnUnique &>(*column_unique); }
158 /// ColumnPtr getUniquePtr() const { return column_unique; }
159
160 /// IColumn & getIndexes() { return *idx.getPositions(); }
161 const IColumn & getIndexes() const { return *idx.getPositions(); }
162 const ColumnPtr & getIndexesPtr() const { return idx.getPositions(); }
163 size_t getSizeOfIndexType() const { return idx.getSizeOfIndexType(); }
164
165 ALWAYS_INLINE size_t getIndexAt(size_t row) const
166 {
167 const IColumn * indexes = &getIndexes();
168
169 switch (idx.getSizeOfIndexType())
170 {
171 case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(indexes)->getElement(row);
172 case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(indexes)->getElement(row);
173 case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(indexes)->getElement(row);
174 case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(indexes)->getElement(row);
175 default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
176 }
177 }
178
179 ///void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); }
180
181 /// Set shared ColumnUnique for empty low cardinality column.
182 void setSharedDictionary(const ColumnPtr & column_unique);
183 bool isSharedDictionary() const { return dictionary.isShared(); }
184
185 /// Create column with new dictionary from column part.
186 /// Dictionary will have only keys that are mentioned in index.
187 MutablePtr cutAndCompact(size_t start, size_t length) const;
188
189 struct DictionaryEncodedColumn
190 {
191 ColumnPtr dictionary;
192 ColumnPtr indexes;
193 };
194
195 DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const;
196
197 ColumnPtr countKeys() const;
198
199 bool containsNull() const;
200
201 class Index
202 {
203 public:
204 Index();
205 Index(const Index & other) = default;
206 explicit Index(MutableColumnPtr && positions_);
207 explicit Index(ColumnPtr positions_);
208
209 const ColumnPtr & getPositions() const { return positions; }
210 WrappedPtr & getPositionsPtr() { return positions; }
211 size_t getPositionAt(size_t row) const;
212 void insertPosition(UInt64 position);
213 void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit);
214
215 void popBack(size_t n) { positions->popBack(n); }
216 void reserve(size_t n) { positions->reserve(n); }
217
218 UInt64 getMaxPositionForCurrentType() const;
219
220 static size_t getSizeOfIndexType(const IColumn & column, size_t hint);
221 size_t getSizeOfIndexType() const { return size_of_type; }
222
223 void check(size_t max_dictionary_size);
224 void checkSizeOfType();
225
226 ColumnPtr detachPositions() { return std::move(positions); }
227 void attachPositions(ColumnPtr positions_);
228
229 void countKeys(ColumnUInt64::Container & counts) const;
230
231 bool containsDefault() const;
232
233 private:
234 WrappedPtr positions;
235 size_t size_of_type = 0;
236
237 void updateSizeOfType() { size_of_type = getSizeOfIndexType(*positions, size_of_type); }
238 void expandType();
239
240 template <typename IndexType>
241 typename ColumnVector<IndexType>::Container & getPositionsData();
242
243 template <typename IndexType>
244 const typename ColumnVector<IndexType>::Container & getPositionsData() const;
245
246 template <typename IndexType>
247 void convertPositions();
248
249 template <typename Callback>
250 static void callForType(Callback && callback, size_t size_of_type);
251 };
252
253private:
254 class Dictionary
255 {
256 public:
257 Dictionary(const Dictionary & other) = default;
258 explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared);
259 explicit Dictionary(ColumnPtr column_unique, bool is_shared);
260
261 const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
262 WrappedPtr & getColumnUniquePtr() { return column_unique; }
263
264 const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }
265 IColumnUnique & getColumnUnique() { return static_cast<IColumnUnique &>(*column_unique); }
266
267 /// Dictionary may be shared for several mutable columns.
268 /// Immutable columns may have the same column unique, which isn't necessarily shared dictionary.
269 void setShared(const ColumnPtr & dictionary);
270 bool isShared() const { return shared; }
271
272 /// Create new dictionary with only keys that are mentioned in positions.
273 void compact(ColumnPtr & positions);
274
275 private:
276 WrappedPtr column_unique;
277 bool shared = false;
278
279 void checkColumn(const IColumn & column);
280 };
281
282 Dictionary dictionary;
283 Index idx;
284
285 void compactInplace();
286 void compactIfSharedDictionary();
287};
288
289
290
291}
292