1 | #pragma once |
2 | #include <Columns/IColumn.h> |
3 | #include <Columns/IColumnUnique.h> |
4 | #include <Common/typeid_cast.h> |
5 | #include <Common/assert_cast.h> |
6 | #include <AggregateFunctions/AggregateFunctionCount.h> |
7 | #include "ColumnsNumber.h" |
8 | |
9 | |
10 | namespace DB |
11 | { |
12 | |
13 | namespace ErrorCodes |
14 | { |
15 | extern const int ILLEGAL_COLUMN; |
16 | } |
17 | |
18 | class ColumnLowCardinality final : public COWHelper<IColumn, ColumnLowCardinality> |
19 | { |
20 | friend class COWHelper<IColumn, ColumnLowCardinality>; |
21 | |
22 | ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false); |
23 | ColumnLowCardinality(const ColumnLowCardinality & other) = default; |
24 | |
25 | public: |
26 | /** Create immutable column using immutable arguments. This arguments may be shared with other columns. |
27 | * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. |
28 | */ |
29 | using Base = COWHelper<IColumn, ColumnLowCardinality>; |
30 | static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false) |
31 | { |
32 | return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared); |
33 | } |
34 | |
35 | static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false) |
36 | { |
37 | return Base::create(std::move(column_unique), std::move(indexes), is_shared); |
38 | } |
39 | |
40 | std::string getName() const override { return "ColumnLowCardinality" ; } |
41 | const char * getFamilyName() const override { return "ColumnLowCardinality" ; } |
42 | |
43 | ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); } |
44 | ColumnPtr convertToFullColumnIfLowCardinality() const override { return convertToFullColumn(); } |
45 | |
46 | MutableColumnPtr cloneResized(size_t size) const override; |
47 | size_t size() const override { return getIndexes().size(); } |
48 | |
49 | Field operator[](size_t n) const override { return getDictionary()[getIndexes().getUInt(n)]; } |
50 | void get(size_t n, Field & res) const override { getDictionary().get(getIndexes().getUInt(n), res); } |
51 | |
52 | StringRef getDataAt(size_t n) const override { return getDictionary().getDataAt(getIndexes().getUInt(n)); } |
53 | StringRef getDataAtWithTerminatingZero(size_t n) const override |
54 | { |
55 | return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n)); |
56 | } |
57 | |
58 | UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); } |
59 | UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); } |
60 | Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); } |
61 | Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); } |
62 | Float32 getFloat32(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat32(n)); } |
63 | bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); } |
64 | bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); } |
65 | ColumnPtr cut(size_t start, size_t length) const override |
66 | { |
67 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length)); |
68 | } |
69 | |
70 | void insert(const Field & x) override; |
71 | void insertDefault() override; |
72 | |
73 | void insertFrom(const IColumn & src, size_t n) override; |
74 | void insertFromFullColumn(const IColumn & src, size_t n); |
75 | |
76 | void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; |
77 | void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length); |
78 | void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions); |
79 | |
80 | void insertData(const char * pos, size_t length) override; |
81 | |
82 | void popBack(size_t n) override { idx.popBack(n); } |
83 | |
84 | StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; |
85 | |
86 | const char * deserializeAndInsertFromArena(const char * pos) override; |
87 | |
88 | void updateHashWithValue(size_t n, SipHash & hash) const override |
89 | { |
90 | return getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash); |
91 | } |
92 | |
93 | ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override |
94 | { |
95 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint)); |
96 | } |
97 | |
98 | ColumnPtr permute(const Permutation & perm, size_t limit) const override |
99 | { |
100 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit)); |
101 | } |
102 | |
103 | ColumnPtr index(const IColumn & indexes_, size_t limit) const override |
104 | { |
105 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); |
106 | } |
107 | |
108 | int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; |
109 | |
110 | void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; |
111 | |
112 | ColumnPtr replicate(const Offsets & offsets) const override |
113 | { |
114 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); |
115 | } |
116 | |
117 | std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override; |
118 | |
119 | void gather(ColumnGathererStream & gatherer_stream) override; |
120 | |
121 | void getExtremes(Field & min, Field & max) const override |
122 | { |
123 | return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize |
124 | } |
125 | |
126 | void reserve(size_t n) override { idx.reserve(n); } |
127 | |
128 | size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); } |
129 | size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); } |
130 | |
131 | void forEachSubcolumn(ColumnCallback callback) override |
132 | { |
133 | callback(idx.getPositionsPtr()); |
134 | |
135 | /// Column doesn't own dictionary if it's shared. |
136 | if (!dictionary.isShared()) |
137 | callback(dictionary.getColumnUniquePtr()); |
138 | } |
139 | |
140 | bool structureEquals(const IColumn & rhs) const override |
141 | { |
142 | if (auto rhs_low_cardinality = typeid_cast<const ColumnLowCardinality *>(&rhs)) |
143 | return idx.getPositions()->structureEquals(*rhs_low_cardinality->idx.getPositions()) |
144 | && dictionary.getColumnUnique().structureEquals(rhs_low_cardinality->dictionary.getColumnUnique()); |
145 | return false; |
146 | } |
147 | |
148 | bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); } |
149 | bool isFixedAndContiguous() const override { return false; } |
150 | size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } |
151 | bool isNumeric() const override { return getDictionary().isNumeric(); } |
152 | bool lowCardinality() const override { return true; } |
153 | bool isNullable() const override { return isColumnNullable(*dictionary.getColumnUniquePtr()); } |
154 | |
155 | const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); } |
156 | const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); } |
157 | /// IColumnUnique & getUnique() { return static_cast<IColumnUnique &>(*column_unique); } |
158 | /// ColumnPtr getUniquePtr() const { return column_unique; } |
159 | |
160 | /// IColumn & getIndexes() { return *idx.getPositions(); } |
161 | const IColumn & getIndexes() const { return *idx.getPositions(); } |
162 | const ColumnPtr & getIndexesPtr() const { return idx.getPositions(); } |
163 | size_t getSizeOfIndexType() const { return idx.getSizeOfIndexType(); } |
164 | |
165 | ALWAYS_INLINE size_t getIndexAt(size_t row) const |
166 | { |
167 | const IColumn * indexes = &getIndexes(); |
168 | |
169 | switch (idx.getSizeOfIndexType()) |
170 | { |
171 | case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(indexes)->getElement(row); |
172 | case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(indexes)->getElement(row); |
173 | case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(indexes)->getElement(row); |
174 | case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(indexes)->getElement(row); |
175 | default: throw Exception("Unexpected size of index type for low cardinality column." , ErrorCodes::LOGICAL_ERROR); |
176 | } |
177 | } |
178 | |
179 | ///void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); } |
180 | |
181 | /// Set shared ColumnUnique for empty low cardinality column. |
182 | void setSharedDictionary(const ColumnPtr & column_unique); |
183 | bool isSharedDictionary() const { return dictionary.isShared(); } |
184 | |
185 | /// Create column with new dictionary from column part. |
186 | /// Dictionary will have only keys that are mentioned in index. |
187 | MutablePtr cutAndCompact(size_t start, size_t length) const; |
188 | |
189 | struct DictionaryEncodedColumn |
190 | { |
191 | ColumnPtr dictionary; |
192 | ColumnPtr indexes; |
193 | }; |
194 | |
195 | DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const; |
196 | |
197 | ColumnPtr countKeys() const; |
198 | |
199 | bool containsNull() const; |
200 | |
201 | class Index |
202 | { |
203 | public: |
204 | Index(); |
205 | Index(const Index & other) = default; |
206 | explicit Index(MutableColumnPtr && positions_); |
207 | explicit Index(ColumnPtr positions_); |
208 | |
209 | const ColumnPtr & getPositions() const { return positions; } |
210 | WrappedPtr & getPositionsPtr() { return positions; } |
211 | size_t getPositionAt(size_t row) const; |
212 | void insertPosition(UInt64 position); |
213 | void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit); |
214 | |
215 | void popBack(size_t n) { positions->popBack(n); } |
216 | void reserve(size_t n) { positions->reserve(n); } |
217 | |
218 | UInt64 getMaxPositionForCurrentType() const; |
219 | |
220 | static size_t getSizeOfIndexType(const IColumn & column, size_t hint); |
221 | size_t getSizeOfIndexType() const { return size_of_type; } |
222 | |
223 | void check(size_t max_dictionary_size); |
224 | void checkSizeOfType(); |
225 | |
226 | ColumnPtr detachPositions() { return std::move(positions); } |
227 | void attachPositions(ColumnPtr positions_); |
228 | |
229 | void countKeys(ColumnUInt64::Container & counts) const; |
230 | |
231 | bool containsDefault() const; |
232 | |
233 | private: |
234 | WrappedPtr positions; |
235 | size_t size_of_type = 0; |
236 | |
237 | void updateSizeOfType() { size_of_type = getSizeOfIndexType(*positions, size_of_type); } |
238 | void expandType(); |
239 | |
240 | template <typename IndexType> |
241 | typename ColumnVector<IndexType>::Container & getPositionsData(); |
242 | |
243 | template <typename IndexType> |
244 | const typename ColumnVector<IndexType>::Container & getPositionsData() const; |
245 | |
246 | template <typename IndexType> |
247 | void convertPositions(); |
248 | |
249 | template <typename Callback> |
250 | static void callForType(Callback && callback, size_t size_of_type); |
251 | }; |
252 | |
253 | private: |
254 | class Dictionary |
255 | { |
256 | public: |
257 | Dictionary(const Dictionary & other) = default; |
258 | explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared); |
259 | explicit Dictionary(ColumnPtr column_unique, bool is_shared); |
260 | |
261 | const ColumnPtr & getColumnUniquePtr() const { return column_unique; } |
262 | WrappedPtr & getColumnUniquePtr() { return column_unique; } |
263 | |
264 | const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); } |
265 | IColumnUnique & getColumnUnique() { return static_cast<IColumnUnique &>(*column_unique); } |
266 | |
267 | /// Dictionary may be shared for several mutable columns. |
268 | /// Immutable columns may have the same column unique, which isn't necessarily shared dictionary. |
269 | void setShared(const ColumnPtr & dictionary); |
270 | bool isShared() const { return shared; } |
271 | |
272 | /// Create new dictionary with only keys that are mentioned in positions. |
273 | void compact(ColumnPtr & positions); |
274 | |
275 | private: |
276 | WrappedPtr column_unique; |
277 | bool shared = false; |
278 | |
279 | void checkColumn(const IColumn & column); |
280 | }; |
281 | |
282 | Dictionary dictionary; |
283 | Index idx; |
284 | |
285 | void compactInplace(); |
286 | void compactIfSharedDictionary(); |
287 | }; |
288 | |
289 | |
290 | |
291 | } |
292 | |