1#include <Columns/ColumnString.h>
2#include <Columns/ColumnConst.h>
3#include <Common/typeid_cast.h>
4#include <Common/assert_cast.h>
5#include <Interpreters/SetVariants.h>
6
7
8namespace DB
9{
10
11namespace ErrorCodes
12{
13 extern const int LOGICAL_ERROR;
14}
15
16template <typename Variant>
17void SetVariantsTemplate<Variant>::init(Type type_)
18{
19 type = type_;
20
21 switch (type)
22 {
23 case Type::EMPTY: break;
24
25 #define M(NAME) \
26 case Type::NAME: NAME = std::make_unique<typename decltype(NAME)::element_type>(); break;
27 APPLY_FOR_SET_VARIANTS(M)
28 #undef M
29 }
30}
31
32template <typename Variant>
33size_t SetVariantsTemplate<Variant>::getTotalRowCount() const
34{
35 switch (type)
36 {
37 case Type::EMPTY: return 0;
38
39 #define M(NAME) \
40 case Type::NAME: return NAME->data.size();
41 APPLY_FOR_SET_VARIANTS(M)
42 #undef M
43 }
44
45 __builtin_unreachable();
46}
47
48template <typename Variant>
49size_t SetVariantsTemplate<Variant>::getTotalByteCount() const
50{
51 switch (type)
52 {
53 case Type::EMPTY: return 0;
54
55 #define M(NAME) \
56 case Type::NAME: return NAME->data.getBufferSizeInBytes();
57 APPLY_FOR_SET_VARIANTS(M)
58 #undef M
59 }
60
61 __builtin_unreachable();
62}
63
64template <typename Variant>
65typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes)
66{
67 /// Check if at least one of the specified keys is nullable.
68 /// Create a set of nested key columns from the corresponding key columns.
69 /// Here "nested" means that, if a key column is nullable, we take its nested
70 /// column; otherwise we take the key column as is.
71 ColumnRawPtrs nested_key_columns;
72 nested_key_columns.reserve(key_columns.size());
73 bool has_nullable_key = false;
74
75 for (const auto & col : key_columns)
76 {
77 if (auto * nullable = checkAndGetColumn<ColumnNullable>(*col))
78 {
79 nested_key_columns.push_back(&nullable->getNestedColumn());
80 has_nullable_key = true;
81 }
82 else
83 nested_key_columns.push_back(col);
84 }
85
86 size_t keys_size = nested_key_columns.size();
87
88 bool all_fixed = true;
89 size_t keys_bytes = 0;
90 key_sizes.resize(keys_size);
91 for (size_t j = 0; j < keys_size; ++j)
92 {
93 if (!nested_key_columns[j]->isFixedAndContiguous())
94 {
95 all_fixed = false;
96 break;
97 }
98 key_sizes[j] = nested_key_columns[j]->sizeOfValueIfFixed();
99 keys_bytes += key_sizes[j];
100 }
101
102 if (has_nullable_key)
103 {
104 /// At least one key is nullable. Therefore we choose a method
105 /// that takes into account this fact.
106 if ((keys_size == 1) && (nested_key_columns[0]->isNumeric()))
107 {
108 /// We have exactly one key and it is nullable. We shall add it a tag
109 /// which specifies whether its value is null or not.
110 size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed();
111 if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
112 return Type::nullable_keys128;
113 else
114 throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
115 ErrorCodes::LOGICAL_ERROR};
116 }
117
118 if (all_fixed)
119 {
120 /// Pack if possible all the keys along with information about which key values are nulls
121 /// into a fixed 16- or 32-byte blob.
122 if (keys_bytes > (std::numeric_limits<size_t>::max() - std::tuple_size<KeysNullMap<UInt128>>::value))
123 throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR};
124 if ((std::tuple_size<KeysNullMap<UInt128>>::value + keys_bytes) <= 16)
125 return Type::nullable_keys128;
126 if ((std::tuple_size<KeysNullMap<UInt256>>::value + keys_bytes) <= 32)
127 return Type::nullable_keys256;
128 }
129
130 /// Fallback case.
131 return Type::hashed;
132 }
133
134 /// If there is one numeric key that fits into 64 bits
135 if (keys_size == 1 && nested_key_columns[0]->isNumeric() && !nested_key_columns[0]->lowCardinality())
136 {
137 size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed();
138 if (size_of_field == 1)
139 return Type::key8;
140 if (size_of_field == 2)
141 return Type::key16;
142 if (size_of_field == 4)
143 return Type::key32;
144 if (size_of_field == 8)
145 return Type::key64;
146 if (size_of_field == 16)
147 return Type::keys128;
148 throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
149 }
150
151 /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
152 if (all_fixed && keys_bytes <= 16)
153 return Type::keys128;
154 if (all_fixed && keys_bytes <= 32)
155 return Type::keys256;
156
157 /// If there is single string key, use hash table of it's values.
158 if (keys_size == 1
159 && (typeid_cast<const ColumnString *>(nested_key_columns[0])
160 || (isColumnConst(*nested_key_columns[0]) && typeid_cast<const ColumnString *>(&assert_cast<const ColumnConst *>(nested_key_columns[0])->getDataColumn()))))
161 return Type::key_string;
162
163 if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(nested_key_columns[0]))
164 return Type::key_fixed_string;
165
166 /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values.
167 return Type::hashed;
168}
169
170template struct SetVariantsTemplate<NonClearableSet>;
171template struct SetVariantsTemplate<ClearableSet>;
172
173}
174