1#include <Functions/IFunctionImpl.h>
2#include <Functions/FunctionHelpers.h>
3#include <DataTypes/DataTypeArray.h>
4#include <DataTypes/DataTypesNumber.h>
5#include <Columns/ColumnArray.h>
6#include <Columns/ColumnNullable.h>
7#include <Columns/ColumnsNumber.h>
8#include <Columns/ColumnString.h>
9#include <Interpreters/AggregationCommon.h>
10#include <Common/HashTable/ClearableHashMap.h>
11#include <Common/ColumnsHashing.h>
12
13
14namespace DB
15{
16
17namespace ErrorCodes
18{
19 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
20 extern const int ILLEGAL_COLUMN;
21 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
22 extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
23}
24
25class FunctionArrayEnumerateUniq;
26class FunctionArrayEnumerateDense;
27
28template <typename Derived>
29class FunctionArrayEnumerateExtended : public IFunction
30{
31public:
32 static FunctionPtr create(const Context &) { return std::make_shared<Derived>(); }
33
34 String getName() const override { return Derived::name; }
35
36 bool isVariadic() const override { return true; }
37 size_t getNumberOfArguments() const override { return 0; }
38 bool useDefaultImplementationForConstants() const override { return true; }
39
40 DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
41 {
42 if (arguments.size() == 0)
43 throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
44 + toString(arguments.size()) + ", should be at least 1.",
45 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
46
47 for (size_t i = 0; i < arguments.size(); ++i)
48 {
49 const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[i].get());
50 if (!array_type)
51 throw Exception("All arguments for function " + getName() + " must be arrays but argument " +
52 toString(i + 1) + " has type " + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
53 }
54
55 return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt32>());
56 }
57
58 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
59
60private:
61 /// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
62 static constexpr size_t INITIAL_SIZE_DEGREE = 9;
63
64 template <typename T>
65 struct MethodOneNumber
66 {
67 using Set = ClearableHashMap<T, UInt32, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
68 HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
69 using Method = ColumnsHashing::HashMethodOneNumber<typename Set::value_type, UInt32, T, false>;
70 };
71
72 struct MethodString
73 {
74 using Set = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
75 HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
76 using Method = ColumnsHashing::HashMethodString<typename Set::value_type, UInt32, false, false>;
77 };
78
79 struct MethodFixedString
80 {
81 using Set = ClearableHashMap<StringRef, UInt32, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
82 HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
83 using Method = ColumnsHashing::HashMethodFixedString<typename Set::value_type, UInt32, false, false>;
84 };
85
86 struct MethodFixed
87 {
88 using Set = ClearableHashMap<UInt128, UInt32, UInt128HashCRC32, HashTableGrower<INITIAL_SIZE_DEGREE>,
89 HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
90 using Method = ColumnsHashing::HashMethodKeysFixed<typename Set::value_type, UInt128, UInt32, false, false, false>;
91 };
92
93 struct MethodHashed
94 {
95 using Set = ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
96 HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>;
97 using Method = ColumnsHashing::HashMethodHashed<typename Set::value_type, UInt32, false>;
98 };
99
100 template <typename Method>
101 void executeMethod(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes,
102 const NullMap * null_map, ColumnUInt32::Container & res_values);
103
104 template <typename Method, bool has_null_map>
105 void executeMethodImpl(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const Sizes & key_sizes,
106 const NullMap * null_map, ColumnUInt32::Container & res_values);
107
108 template <typename T>
109 bool executeNumber(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
110 bool executeString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
111 bool executeFixedString(const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values);
112 bool execute128bit(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
113 void executeHashed(const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values);
114};
115
116
117template <typename Derived>
118void FunctionArrayEnumerateExtended<Derived>::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
119{
120 const ColumnArray::Offsets * offsets = nullptr;
121 size_t num_arguments = arguments.size();
122 ColumnRawPtrs data_columns(num_arguments);
123
124 Columns array_holders;
125 ColumnPtr offsets_column;
126 for (size_t i = 0; i < num_arguments; ++i)
127 {
128 const ColumnPtr & array_ptr = block.getByPosition(arguments[i]).column;
129 const ColumnArray * array = checkAndGetColumn<ColumnArray>(array_ptr.get());
130 if (!array)
131 {
132 const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(
133 block.getByPosition(arguments[i]).column.get());
134 if (!const_array)
135 throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName()
136 + " of " + toString(i + 1) + "-th argument of function " + getName(),
137 ErrorCodes::ILLEGAL_COLUMN);
138 array_holders.emplace_back(const_array->convertToFullColumn());
139 array = checkAndGetColumn<ColumnArray>(array_holders.back().get());
140 }
141
142 const ColumnArray::Offsets & offsets_i = array->getOffsets();
143 if (i == 0)
144 {
145 offsets = &offsets_i;
146 offsets_column = array->getOffsetsPtr();
147 }
148 else if (offsets_i != *offsets)
149 throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.",
150 ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
151
152 auto * array_data = &array->getData();
153 data_columns[i] = array_data;
154 }
155
156 const NullMap * null_map = nullptr;
157
158 for (size_t i = 0; i < num_arguments; ++i)
159 {
160 if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(*data_columns[i]))
161 {
162 if (num_arguments == 1)
163 data_columns[i] = &nullable_col->getNestedColumn();
164
165 null_map = &nullable_col->getNullMapData();
166 break;
167 }
168 }
169
170 auto res_nested = ColumnUInt32::create();
171
172 ColumnUInt32::Container & res_values = res_nested->getData();
173 if (!offsets->empty())
174 res_values.resize(offsets->back());
175
176 if (num_arguments == 1)
177 {
178 if (!(executeNumber<UInt8>(*offsets, *data_columns[0], null_map, res_values)
179 || executeNumber<UInt16>(*offsets, *data_columns[0], null_map, res_values)
180 || executeNumber<UInt32>(*offsets, *data_columns[0], null_map, res_values)
181 || executeNumber<UInt64>(*offsets, *data_columns[0], null_map, res_values)
182 || executeNumber<Int8>(*offsets, *data_columns[0], null_map, res_values)
183 || executeNumber<Int16>(*offsets, *data_columns[0], null_map, res_values)
184 || executeNumber<Int32>(*offsets, *data_columns[0], null_map, res_values)
185 || executeNumber<Int64>(*offsets, *data_columns[0], null_map, res_values)
186 || executeNumber<Float32>(*offsets, *data_columns[0], null_map, res_values)
187 || executeNumber<Float64>(*offsets, *data_columns[0], null_map, res_values)
188 || executeString(*offsets, *data_columns[0], null_map, res_values)
189 || executeFixedString(*offsets, *data_columns[0], null_map, res_values)))
190 executeHashed(*offsets, data_columns, res_values);
191 }
192 else
193 {
194 if (!execute128bit(*offsets, data_columns, res_values))
195 executeHashed(*offsets, data_columns, res_values);
196 }
197
198 block.getByPosition(result).column = ColumnArray::create(std::move(res_nested), offsets_column);
199}
200
201template <typename Derived>
202template <typename Method, bool has_null_map>
203void FunctionArrayEnumerateExtended<Derived>::executeMethodImpl(
204 const ColumnArray::Offsets & offsets,
205 const ColumnRawPtrs & columns,
206 const Sizes & key_sizes,
207 [[maybe_unused]] const NullMap * null_map,
208 ColumnUInt32::Container & res_values)
209{
210 typename Method::Set indices;
211 typename Method::Method method(columns, key_sizes, nullptr);
212 Arena pool; /// Won't use it;
213
214 ColumnArray::Offset prev_off = 0;
215
216 if constexpr (std::is_same_v<Derived, FunctionArrayEnumerateUniq>)
217 {
218 // Unique
219 for (size_t off : offsets)
220 {
221 indices.clear();
222 UInt32 null_count = 0;
223 for (size_t j = prev_off; j < off; ++j)
224 {
225 if constexpr (has_null_map)
226 {
227 if ((*null_map)[j])
228 {
229 res_values[j] = ++null_count;
230 continue;
231 }
232 }
233
234 auto emplace_result = method.emplaceKey(indices, j, pool);
235 auto idx = emplace_result.getMapped() + 1;
236 emplace_result.setMapped(idx);
237
238 res_values[j] = idx;
239 }
240 prev_off = off;
241 }
242 }
243 else
244 {
245 // Dense
246 for (size_t off : offsets)
247 {
248 indices.clear();
249 UInt32 rank = 0;
250 [[maybe_unused]] UInt32 null_index = 0;
251 for (size_t j = prev_off; j < off; ++j)
252 {
253 if constexpr (has_null_map)
254 {
255 if ((*null_map)[j])
256 {
257 if (!null_index)
258 null_index = ++rank;
259
260 res_values[j] = null_index;
261 continue;
262 }
263 }
264
265 auto emplace_result = method.emplaceKey(indices, j, pool);
266 auto idx = emplace_result.getMapped();
267
268 if (!idx)
269 {
270 idx = ++rank;
271 emplace_result.setMapped(idx);
272 }
273
274 res_values[j] = idx;
275 }
276 prev_off = off;
277 }
278 }
279}
280
281template <typename Derived>
282template <typename Method>
283void FunctionArrayEnumerateExtended<Derived>::executeMethod(
284 const ColumnArray::Offsets & offsets,
285 const ColumnRawPtrs & columns,
286 const Sizes & key_sizes,
287 const NullMap * null_map,
288 ColumnUInt32::Container & res_values)
289{
290 if (null_map)
291 executeMethodImpl<Method, true>(offsets, columns, key_sizes, null_map, res_values);
292 else
293 executeMethodImpl<Method, false>(offsets, columns, key_sizes, null_map, res_values);
294
295}
296
297template <typename Derived>
298template <typename T>
299bool FunctionArrayEnumerateExtended<Derived>::executeNumber(
300 const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
301{
302 const auto * nested = checkAndGetColumn<ColumnVector<T>>(&data);
303 if (!nested)
304 return false;
305
306 executeMethod<MethodOneNumber<T>>(offsets, {nested}, {}, null_map, res_values);
307 return true;
308}
309
310template <typename Derived>
311bool FunctionArrayEnumerateExtended<Derived>::executeString(
312 const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
313{
314 const auto * nested = checkAndGetColumn<ColumnString>(&data);
315 if (nested)
316 executeMethod<MethodString>(offsets, {nested}, {}, null_map, res_values);
317
318 return nested;
319}
320
321template <typename Derived>
322bool FunctionArrayEnumerateExtended<Derived>::executeFixedString(
323 const ColumnArray::Offsets & offsets, const IColumn & data, const NullMap * null_map, ColumnUInt32::Container & res_values)
324{
325 const auto * nested = checkAndGetColumn<ColumnString>(&data);
326 if (nested)
327 executeMethod<MethodFixedString>(offsets, {nested}, {}, null_map, res_values);
328
329 return nested;
330}
331
332template <typename Derived>
333bool FunctionArrayEnumerateExtended<Derived>::execute128bit(
334 const ColumnArray::Offsets & offsets,
335 const ColumnRawPtrs & columns,
336 ColumnUInt32::Container & res_values)
337{
338 size_t count = columns.size();
339 size_t keys_bytes = 0;
340 Sizes key_sizes(count);
341
342 for (size_t j = 0; j < count; ++j)
343 {
344 if (!columns[j]->isFixedAndContiguous())
345 return false;
346 key_sizes[j] = columns[j]->sizeOfValueIfFixed();
347 keys_bytes += key_sizes[j];
348 }
349
350 executeMethod<MethodFixed>(offsets, columns, key_sizes, nullptr, res_values);
351 return true;
352}
353
354template <typename Derived>
355void FunctionArrayEnumerateExtended<Derived>::executeHashed(
356 const ColumnArray::Offsets & offsets,
357 const ColumnRawPtrs & columns,
358 ColumnUInt32::Container & res_values)
359{
360 executeMethod<MethodHashed>(offsets, columns, {}, nullptr, res_values);
361}
362
363}
364