| 1 | #pragma once |
| 2 | #include <Columns/IColumn.h> |
| 3 | #include <Columns/IColumnUnique.h> |
| 4 | #include <Common/typeid_cast.h> |
| 5 | #include <Common/assert_cast.h> |
| 6 | #include <AggregateFunctions/AggregateFunctionCount.h> |
| 7 | #include "ColumnsNumber.h" |
| 8 | |
| 9 | |
| 10 | namespace DB |
| 11 | { |
| 12 | |
| 13 | namespace ErrorCodes |
| 14 | { |
| 15 | extern const int ILLEGAL_COLUMN; |
| 16 | } |
| 17 | |
| 18 | class ColumnLowCardinality final : public COWHelper<IColumn, ColumnLowCardinality> |
| 19 | { |
| 20 | friend class COWHelper<IColumn, ColumnLowCardinality>; |
| 21 | |
| 22 | ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false); |
| 23 | ColumnLowCardinality(const ColumnLowCardinality & other) = default; |
| 24 | |
| 25 | public: |
| 26 | /** Create immutable column using immutable arguments. This arguments may be shared with other columns. |
| 27 | * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. |
| 28 | */ |
| 29 | using Base = COWHelper<IColumn, ColumnLowCardinality>; |
| 30 | static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false) |
| 31 | { |
| 32 | return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared); |
| 33 | } |
| 34 | |
| 35 | static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false) |
| 36 | { |
| 37 | return Base::create(std::move(column_unique), std::move(indexes), is_shared); |
| 38 | } |
| 39 | |
| 40 | std::string getName() const override { return "ColumnLowCardinality" ; } |
| 41 | const char * getFamilyName() const override { return "ColumnLowCardinality" ; } |
| 42 | |
| 43 | ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); } |
| 44 | ColumnPtr convertToFullColumnIfLowCardinality() const override { return convertToFullColumn(); } |
| 45 | |
| 46 | MutableColumnPtr cloneResized(size_t size) const override; |
| 47 | size_t size() const override { return getIndexes().size(); } |
| 48 | |
| 49 | Field operator[](size_t n) const override { return getDictionary()[getIndexes().getUInt(n)]; } |
| 50 | void get(size_t n, Field & res) const override { getDictionary().get(getIndexes().getUInt(n), res); } |
| 51 | |
| 52 | StringRef getDataAt(size_t n) const override { return getDictionary().getDataAt(getIndexes().getUInt(n)); } |
| 53 | StringRef getDataAtWithTerminatingZero(size_t n) const override |
| 54 | { |
| 55 | return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n)); |
| 56 | } |
| 57 | |
| 58 | UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); } |
| 59 | UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); } |
| 60 | Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); } |
| 61 | Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); } |
| 62 | Float32 getFloat32(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat32(n)); } |
| 63 | bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); } |
| 64 | bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); } |
| 65 | ColumnPtr cut(size_t start, size_t length) const override |
| 66 | { |
| 67 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length)); |
| 68 | } |
| 69 | |
| 70 | void insert(const Field & x) override; |
| 71 | void insertDefault() override; |
| 72 | |
| 73 | void insertFrom(const IColumn & src, size_t n) override; |
| 74 | void insertFromFullColumn(const IColumn & src, size_t n); |
| 75 | |
| 76 | void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; |
| 77 | void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length); |
| 78 | void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions); |
| 79 | |
| 80 | void insertData(const char * pos, size_t length) override; |
| 81 | |
| 82 | void popBack(size_t n) override { idx.popBack(n); } |
| 83 | |
| 84 | StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; |
| 85 | |
| 86 | const char * deserializeAndInsertFromArena(const char * pos) override; |
| 87 | |
| 88 | void updateHashWithValue(size_t n, SipHash & hash) const override |
| 89 | { |
| 90 | return getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash); |
| 91 | } |
| 92 | |
| 93 | ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override |
| 94 | { |
| 95 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint)); |
| 96 | } |
| 97 | |
| 98 | ColumnPtr permute(const Permutation & perm, size_t limit) const override |
| 99 | { |
| 100 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit)); |
| 101 | } |
| 102 | |
| 103 | ColumnPtr index(const IColumn & indexes_, size_t limit) const override |
| 104 | { |
| 105 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); |
| 106 | } |
| 107 | |
| 108 | int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; |
| 109 | |
| 110 | void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; |
| 111 | |
| 112 | ColumnPtr replicate(const Offsets & offsets) const override |
| 113 | { |
| 114 | return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); |
| 115 | } |
| 116 | |
| 117 | std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override; |
| 118 | |
| 119 | void gather(ColumnGathererStream & gatherer_stream) override; |
| 120 | |
| 121 | void getExtremes(Field & min, Field & max) const override |
| 122 | { |
| 123 | return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize |
| 124 | } |
| 125 | |
| 126 | void reserve(size_t n) override { idx.reserve(n); } |
| 127 | |
| 128 | size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); } |
| 129 | size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); } |
| 130 | |
| 131 | void forEachSubcolumn(ColumnCallback callback) override |
| 132 | { |
| 133 | callback(idx.getPositionsPtr()); |
| 134 | |
| 135 | /// Column doesn't own dictionary if it's shared. |
| 136 | if (!dictionary.isShared()) |
| 137 | callback(dictionary.getColumnUniquePtr()); |
| 138 | } |
| 139 | |
| 140 | bool structureEquals(const IColumn & rhs) const override |
| 141 | { |
| 142 | if (auto rhs_low_cardinality = typeid_cast<const ColumnLowCardinality *>(&rhs)) |
| 143 | return idx.getPositions()->structureEquals(*rhs_low_cardinality->idx.getPositions()) |
| 144 | && dictionary.getColumnUnique().structureEquals(rhs_low_cardinality->dictionary.getColumnUnique()); |
| 145 | return false; |
| 146 | } |
| 147 | |
| 148 | bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); } |
| 149 | bool isFixedAndContiguous() const override { return false; } |
| 150 | size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } |
| 151 | bool isNumeric() const override { return getDictionary().isNumeric(); } |
| 152 | bool lowCardinality() const override { return true; } |
| 153 | bool isNullable() const override { return isColumnNullable(*dictionary.getColumnUniquePtr()); } |
| 154 | |
| 155 | const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); } |
| 156 | const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); } |
| 157 | /// IColumnUnique & getUnique() { return static_cast<IColumnUnique &>(*column_unique); } |
| 158 | /// ColumnPtr getUniquePtr() const { return column_unique; } |
| 159 | |
| 160 | /// IColumn & getIndexes() { return *idx.getPositions(); } |
| 161 | const IColumn & getIndexes() const { return *idx.getPositions(); } |
| 162 | const ColumnPtr & getIndexesPtr() const { return idx.getPositions(); } |
| 163 | size_t getSizeOfIndexType() const { return idx.getSizeOfIndexType(); } |
| 164 | |
| 165 | ALWAYS_INLINE size_t getIndexAt(size_t row) const |
| 166 | { |
| 167 | const IColumn * indexes = &getIndexes(); |
| 168 | |
| 169 | switch (idx.getSizeOfIndexType()) |
| 170 | { |
| 171 | case sizeof(UInt8): return assert_cast<const ColumnUInt8 *>(indexes)->getElement(row); |
| 172 | case sizeof(UInt16): return assert_cast<const ColumnUInt16 *>(indexes)->getElement(row); |
| 173 | case sizeof(UInt32): return assert_cast<const ColumnUInt32 *>(indexes)->getElement(row); |
| 174 | case sizeof(UInt64): return assert_cast<const ColumnUInt64 *>(indexes)->getElement(row); |
| 175 | default: throw Exception("Unexpected size of index type for low cardinality column." , ErrorCodes::LOGICAL_ERROR); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | ///void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); } |
| 180 | |
| 181 | /// Set shared ColumnUnique for empty low cardinality column. |
| 182 | void setSharedDictionary(const ColumnPtr & column_unique); |
| 183 | bool isSharedDictionary() const { return dictionary.isShared(); } |
| 184 | |
| 185 | /// Create column with new dictionary from column part. |
| 186 | /// Dictionary will have only keys that are mentioned in index. |
| 187 | MutablePtr cutAndCompact(size_t start, size_t length) const; |
| 188 | |
| 189 | struct DictionaryEncodedColumn |
| 190 | { |
| 191 | ColumnPtr dictionary; |
| 192 | ColumnPtr indexes; |
| 193 | }; |
| 194 | |
| 195 | DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const; |
| 196 | |
| 197 | ColumnPtr countKeys() const; |
| 198 | |
| 199 | bool containsNull() const; |
| 200 | |
| 201 | class Index |
| 202 | { |
| 203 | public: |
| 204 | Index(); |
| 205 | Index(const Index & other) = default; |
| 206 | explicit Index(MutableColumnPtr && positions_); |
| 207 | explicit Index(ColumnPtr positions_); |
| 208 | |
| 209 | const ColumnPtr & getPositions() const { return positions; } |
| 210 | WrappedPtr & getPositionsPtr() { return positions; } |
| 211 | size_t getPositionAt(size_t row) const; |
| 212 | void insertPosition(UInt64 position); |
| 213 | void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit); |
| 214 | |
| 215 | void popBack(size_t n) { positions->popBack(n); } |
| 216 | void reserve(size_t n) { positions->reserve(n); } |
| 217 | |
| 218 | UInt64 getMaxPositionForCurrentType() const; |
| 219 | |
| 220 | static size_t getSizeOfIndexType(const IColumn & column, size_t hint); |
| 221 | size_t getSizeOfIndexType() const { return size_of_type; } |
| 222 | |
| 223 | void check(size_t max_dictionary_size); |
| 224 | void checkSizeOfType(); |
| 225 | |
| 226 | ColumnPtr detachPositions() { return std::move(positions); } |
| 227 | void attachPositions(ColumnPtr positions_); |
| 228 | |
| 229 | void countKeys(ColumnUInt64::Container & counts) const; |
| 230 | |
| 231 | bool containsDefault() const; |
| 232 | |
| 233 | private: |
| 234 | WrappedPtr positions; |
| 235 | size_t size_of_type = 0; |
| 236 | |
| 237 | void updateSizeOfType() { size_of_type = getSizeOfIndexType(*positions, size_of_type); } |
| 238 | void expandType(); |
| 239 | |
| 240 | template <typename IndexType> |
| 241 | typename ColumnVector<IndexType>::Container & getPositionsData(); |
| 242 | |
| 243 | template <typename IndexType> |
| 244 | const typename ColumnVector<IndexType>::Container & getPositionsData() const; |
| 245 | |
| 246 | template <typename IndexType> |
| 247 | void convertPositions(); |
| 248 | |
| 249 | template <typename Callback> |
| 250 | static void callForType(Callback && callback, size_t size_of_type); |
| 251 | }; |
| 252 | |
| 253 | private: |
| 254 | class Dictionary |
| 255 | { |
| 256 | public: |
| 257 | Dictionary(const Dictionary & other) = default; |
| 258 | explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared); |
| 259 | explicit Dictionary(ColumnPtr column_unique, bool is_shared); |
| 260 | |
| 261 | const ColumnPtr & getColumnUniquePtr() const { return column_unique; } |
| 262 | WrappedPtr & getColumnUniquePtr() { return column_unique; } |
| 263 | |
| 264 | const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); } |
| 265 | IColumnUnique & getColumnUnique() { return static_cast<IColumnUnique &>(*column_unique); } |
| 266 | |
| 267 | /// Dictionary may be shared for several mutable columns. |
| 268 | /// Immutable columns may have the same column unique, which isn't necessarily shared dictionary. |
| 269 | void setShared(const ColumnPtr & dictionary); |
| 270 | bool isShared() const { return shared; } |
| 271 | |
| 272 | /// Create new dictionary with only keys that are mentioned in positions. |
| 273 | void compact(ColumnPtr & positions); |
| 274 | |
| 275 | private: |
| 276 | WrappedPtr column_unique; |
| 277 | bool shared = false; |
| 278 | |
| 279 | void checkColumn(const IColumn & column); |
| 280 | }; |
| 281 | |
| 282 | Dictionary dictionary; |
| 283 | Index idx; |
| 284 | |
| 285 | void compactInplace(); |
| 286 | void compactIfSharedDictionary(); |
| 287 | }; |
| 288 | |
| 289 | |
| 290 | |
| 291 | } |
| 292 | |