1 | #include <Columns/ColumnString.h> |
2 | #include <Columns/ColumnConst.h> |
3 | #include <Common/typeid_cast.h> |
4 | #include <Common/assert_cast.h> |
5 | #include <Interpreters/SetVariants.h> |
6 | |
7 | |
8 | namespace DB |
9 | { |
10 | |
11 | namespace ErrorCodes |
12 | { |
13 | extern const int LOGICAL_ERROR; |
14 | } |
15 | |
16 | template <typename Variant> |
17 | void SetVariantsTemplate<Variant>::init(Type type_) |
18 | { |
19 | type = type_; |
20 | |
21 | switch (type) |
22 | { |
23 | case Type::EMPTY: break; |
24 | |
25 | #define M(NAME) \ |
26 | case Type::NAME: NAME = std::make_unique<typename decltype(NAME)::element_type>(); break; |
27 | APPLY_FOR_SET_VARIANTS(M) |
28 | #undef M |
29 | } |
30 | } |
31 | |
32 | template <typename Variant> |
33 | size_t SetVariantsTemplate<Variant>::getTotalRowCount() const |
34 | { |
35 | switch (type) |
36 | { |
37 | case Type::EMPTY: return 0; |
38 | |
39 | #define M(NAME) \ |
40 | case Type::NAME: return NAME->data.size(); |
41 | APPLY_FOR_SET_VARIANTS(M) |
42 | #undef M |
43 | } |
44 | |
45 | __builtin_unreachable(); |
46 | } |
47 | |
48 | template <typename Variant> |
49 | size_t SetVariantsTemplate<Variant>::getTotalByteCount() const |
50 | { |
51 | switch (type) |
52 | { |
53 | case Type::EMPTY: return 0; |
54 | |
55 | #define M(NAME) \ |
56 | case Type::NAME: return NAME->data.getBufferSizeInBytes(); |
57 | APPLY_FOR_SET_VARIANTS(M) |
58 | #undef M |
59 | } |
60 | |
61 | __builtin_unreachable(); |
62 | } |
63 | |
64 | template <typename Variant> |
65 | typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes) |
66 | { |
67 | /// Check if at least one of the specified keys is nullable. |
68 | /// Create a set of nested key columns from the corresponding key columns. |
69 | /// Here "nested" means that, if a key column is nullable, we take its nested |
70 | /// column; otherwise we take the key column as is. |
71 | ColumnRawPtrs nested_key_columns; |
72 | nested_key_columns.reserve(key_columns.size()); |
73 | bool has_nullable_key = false; |
74 | |
75 | for (const auto & col : key_columns) |
76 | { |
77 | if (auto * nullable = checkAndGetColumn<ColumnNullable>(*col)) |
78 | { |
79 | nested_key_columns.push_back(&nullable->getNestedColumn()); |
80 | has_nullable_key = true; |
81 | } |
82 | else |
83 | nested_key_columns.push_back(col); |
84 | } |
85 | |
86 | size_t keys_size = nested_key_columns.size(); |
87 | |
88 | bool all_fixed = true; |
89 | size_t keys_bytes = 0; |
90 | key_sizes.resize(keys_size); |
91 | for (size_t j = 0; j < keys_size; ++j) |
92 | { |
93 | if (!nested_key_columns[j]->isFixedAndContiguous()) |
94 | { |
95 | all_fixed = false; |
96 | break; |
97 | } |
98 | key_sizes[j] = nested_key_columns[j]->sizeOfValueIfFixed(); |
99 | keys_bytes += key_sizes[j]; |
100 | } |
101 | |
102 | if (has_nullable_key) |
103 | { |
104 | /// At least one key is nullable. Therefore we choose a method |
105 | /// that takes into account this fact. |
106 | if ((keys_size == 1) && (nested_key_columns[0]->isNumeric())) |
107 | { |
108 | /// We have exactly one key and it is nullable. We shall add it a tag |
109 | /// which specifies whether its value is null or not. |
110 | size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed(); |
111 | if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8)) |
112 | return Type::nullable_keys128; |
113 | else |
114 | throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8." , |
115 | ErrorCodes::LOGICAL_ERROR}; |
116 | } |
117 | |
118 | if (all_fixed) |
119 | { |
120 | /// Pack if possible all the keys along with information about which key values are nulls |
121 | /// into a fixed 16- or 32-byte blob. |
122 | if (keys_bytes > (std::numeric_limits<size_t>::max() - std::tuple_size<KeysNullMap<UInt128>>::value)) |
123 | throw Exception{"Aggregator: keys sizes overflow" , ErrorCodes::LOGICAL_ERROR}; |
124 | if ((std::tuple_size<KeysNullMap<UInt128>>::value + keys_bytes) <= 16) |
125 | return Type::nullable_keys128; |
126 | if ((std::tuple_size<KeysNullMap<UInt256>>::value + keys_bytes) <= 32) |
127 | return Type::nullable_keys256; |
128 | } |
129 | |
130 | /// Fallback case. |
131 | return Type::hashed; |
132 | } |
133 | |
134 | /// If there is one numeric key that fits into 64 bits |
135 | if (keys_size == 1 && nested_key_columns[0]->isNumeric() && !nested_key_columns[0]->lowCardinality()) |
136 | { |
137 | size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed(); |
138 | if (size_of_field == 1) |
139 | return Type::key8; |
140 | if (size_of_field == 2) |
141 | return Type::key16; |
142 | if (size_of_field == 4) |
143 | return Type::key32; |
144 | if (size_of_field == 8) |
145 | return Type::key64; |
146 | if (size_of_field == 16) |
147 | return Type::keys128; |
148 | throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16." , ErrorCodes::LOGICAL_ERROR); |
149 | } |
150 | |
151 | /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys |
152 | if (all_fixed && keys_bytes <= 16) |
153 | return Type::keys128; |
154 | if (all_fixed && keys_bytes <= 32) |
155 | return Type::keys256; |
156 | |
157 | /// If there is single string key, use hash table of it's values. |
158 | if (keys_size == 1 |
159 | && (typeid_cast<const ColumnString *>(nested_key_columns[0]) |
160 | || (isColumnConst(*nested_key_columns[0]) && typeid_cast<const ColumnString *>(&assert_cast<const ColumnConst *>(nested_key_columns[0])->getDataColumn())))) |
161 | return Type::key_string; |
162 | |
163 | if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(nested_key_columns[0])) |
164 | return Type::key_fixed_string; |
165 | |
166 | /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values. |
167 | return Type::hashed; |
168 | } |
169 | |
170 | template struct SetVariantsTemplate<NonClearableSet>; |
171 | template struct SetVariantsTemplate<ClearableSet>; |
172 | |
173 | } |
174 | |