1 | #include <Functions/IFunctionImpl.h> |
2 | #include <Functions/FunctionFactory.h> |
3 | #include <Functions/FunctionHelpers.h> |
4 | #include <DataTypes/DataTypesNumber.h> |
5 | #include <DataTypes/DataTypeArray.h> |
6 | #include <DataTypes/getLeastSupertype.h> |
7 | #include <Columns/ColumnArray.h> |
8 | #include <Columns/ColumnVector.h> |
9 | #include <Interpreters/castColumn.h> |
10 | #include <numeric> |
11 | |
12 | |
13 | namespace DB |
14 | { |
15 | |
16 | namespace ErrorCodes |
17 | { |
18 | extern const int ARGUMENT_OUT_OF_BOUND; |
19 | extern const int ILLEGAL_COLUMN; |
20 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
21 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
22 | } |
23 | |
24 | |
25 | class FunctionRange : public IFunction |
26 | { |
27 | public: |
28 | static constexpr auto name = "range" ; |
29 | static constexpr size_t max_elements = 100'000'000; |
30 | static FunctionPtr create(const Context & context_) { return std::make_shared<FunctionRange>(context_); } |
31 | FunctionRange(const Context & context_) : context(context_) {} |
32 | |
33 | private: |
34 | const Context & context; |
35 | String getName() const override { return name; } |
36 | |
37 | size_t getNumberOfArguments() const override { return 0; } |
38 | bool isVariadic() const override { return true; } |
39 | bool useDefaultImplementationForConstants() const override { return true; } |
40 | |
41 | DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override |
42 | { |
43 | if (arguments.size() > 3 || arguments.empty()) |
44 | { |
45 | throw Exception{"Function " + getName() + " needs 1..3 arguments; passed " |
46 | + std::to_string(arguments.size()) + "." , |
47 | ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; |
48 | } |
49 | |
50 | for (const auto & arg : arguments) |
51 | { |
52 | if (!isUnsignedInteger(arg)) |
53 | throw Exception{"Illegal type " + arg->getName() + " of argument of function " + getName(), |
54 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; |
55 | } |
56 | |
57 | DataTypePtr common_type = getLeastSupertype(arguments); |
58 | return std::make_shared<DataTypeArray>(common_type); |
59 | } |
60 | |
61 | template <typename T> |
62 | bool executeInternal(Block & block, const IColumn * arg, const size_t result) |
63 | { |
64 | if (const auto in = checkAndGetColumn<ColumnVector<T>>(arg)) |
65 | { |
66 | const auto & in_data = in->getData(); |
67 | const auto total_values = std::accumulate(std::begin(in_data), std::end(in_data), size_t{}, |
68 | [this] (const size_t lhs, const size_t rhs) |
69 | { |
70 | const auto sum = lhs + rhs; |
71 | if (sum < lhs) |
72 | throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing" , |
73 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
74 | |
75 | return sum; |
76 | }); |
77 | |
78 | if (total_values > max_elements) |
79 | throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + |
80 | " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), |
81 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
82 | |
83 | auto data_col = ColumnVector<T>::create(total_values); |
84 | auto offsets_col = ColumnArray::ColumnOffsets::create(in->size()); |
85 | |
86 | auto & out_data = data_col->getData(); |
87 | auto & out_offsets = offsets_col->getData(); |
88 | |
89 | IColumn::Offset offset{}; |
90 | for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx) |
91 | { |
92 | for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx) |
93 | out_data[offset + elem_idx] = elem_idx; |
94 | |
95 | offset += in_data[row_idx]; |
96 | out_offsets[row_idx] = offset; |
97 | } |
98 | |
99 | block.getByPosition(result).column = ColumnArray::create(std::move(data_col), std::move(offsets_col)); |
100 | return true; |
101 | } |
102 | else |
103 | return false; |
104 | } |
105 | |
106 | template <typename T> |
107 | bool executeConstStartStep(Block & block, const IColumn * end_arg, const T start, const T step, const size_t input_rows_count, const size_t result) |
108 | { |
109 | auto end_column = checkAndGetColumn<ColumnVector<T>>(end_arg); |
110 | if (!end_column) |
111 | { |
112 | return false; |
113 | } |
114 | |
115 | const auto & end_data = end_column->getData(); |
116 | |
117 | size_t total_values = 0; |
118 | size_t pre_values = 0; |
119 | |
120 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
121 | { |
122 | if (start < end_data[row_idx] && step == 0) |
123 | throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero" , |
124 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
125 | |
126 | pre_values += start >= end_data[row_idx] ? 0 |
127 | : (end_data[row_idx] - start - 1) / step + 1; |
128 | |
129 | if (pre_values < total_values) |
130 | throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing" , |
131 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
132 | |
133 | total_values = pre_values; |
134 | if (total_values > max_elements) |
135 | throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + |
136 | " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), |
137 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
138 | } |
139 | |
140 | auto data_col = ColumnVector<T>::create(total_values); |
141 | auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); |
142 | |
143 | auto & out_data = data_col->getData(); |
144 | auto & out_offsets = offsets_col->getData(); |
145 | |
146 | IColumn::Offset offset{}; |
147 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
148 | { |
149 | for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step) |
150 | out_data[offset++] = st; |
151 | |
152 | out_offsets[row_idx] = offset; |
153 | } |
154 | |
155 | block.getByPosition(result).column = ColumnArray::create(std::move(data_col), std::move(offsets_col)); |
156 | return true; |
157 | } |
158 | |
159 | template <typename T> |
160 | bool executeConstStep(Block & block, const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count, const size_t result) |
161 | { |
162 | auto start_column = checkAndGetColumn<ColumnVector<T>>(start_arg); |
163 | auto end_column = checkAndGetColumn<ColumnVector<T>>(end_arg); |
164 | if (!end_column || !start_column) |
165 | { |
166 | return false; |
167 | } |
168 | |
169 | const auto & start_data = start_column->getData(); |
170 | const auto & end_data = end_column->getData(); |
171 | |
172 | size_t total_values = 0; |
173 | size_t pre_values = 0; |
174 | |
175 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
176 | { |
177 | if (start_data[row_idx] < end_data[row_idx] && step == 0) |
178 | throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero" , |
179 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
180 | |
181 | pre_values += start_data[row_idx] >= end_data[row_idx] ? 0 |
182 | : (end_data[row_idx] - start_data[row_idx] - 1) / step + 1; |
183 | |
184 | if (pre_values < total_values) |
185 | throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing" , |
186 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
187 | |
188 | total_values = pre_values; |
189 | if (total_values > max_elements) |
190 | throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + |
191 | " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), |
192 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
193 | } |
194 | |
195 | auto data_col = ColumnVector<T>::create(total_values); |
196 | auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); |
197 | |
198 | auto & out_data = data_col->getData(); |
199 | auto & out_offsets = offsets_col->getData(); |
200 | |
201 | IColumn::Offset offset{}; |
202 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
203 | { |
204 | for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st < ed; st += step) |
205 | out_data[offset++] = st; |
206 | |
207 | out_offsets[row_idx] = offset; |
208 | } |
209 | |
210 | block.getByPosition(result).column = ColumnArray::create(std::move(data_col), std::move(offsets_col)); |
211 | return true; |
212 | } |
213 | |
214 | template <typename T> |
215 | bool executeConstStart(Block & block, const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count, const size_t result) |
216 | { |
217 | auto end_column = checkAndGetColumn<ColumnVector<T>>(end_arg); |
218 | auto step_column = checkAndGetColumn<ColumnVector<T>>(step_arg); |
219 | if (!end_column || !step_column) |
220 | { |
221 | return false; |
222 | } |
223 | |
224 | const auto & end_data = end_column->getData(); |
225 | const auto & step_data = step_column->getData(); |
226 | |
227 | size_t total_values = 0; |
228 | size_t pre_values = 0; |
229 | |
230 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
231 | { |
232 | if (start < end_data[row_idx] && step_data[row_idx] == 0) |
233 | throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero" , |
234 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
235 | |
236 | pre_values += start >= end_data[row_idx] ? 0 |
237 | : (end_data[row_idx] - start - 1) / step_data[row_idx] + 1; |
238 | |
239 | if (pre_values < total_values) |
240 | throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing" , |
241 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
242 | |
243 | total_values = pre_values; |
244 | if (total_values > max_elements) |
245 | throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + |
246 | " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), |
247 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
248 | } |
249 | |
250 | auto data_col = ColumnVector<T>::create(total_values); |
251 | auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); |
252 | |
253 | auto & out_data = data_col->getData(); |
254 | auto & out_offsets = offsets_col->getData(); |
255 | |
256 | IColumn::Offset offset{}; |
257 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
258 | { |
259 | for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step_data[row_idx]) |
260 | out_data[offset++] = st; |
261 | |
262 | out_offsets[row_idx] = offset; |
263 | } |
264 | |
265 | block.getByPosition(result).column = ColumnArray::create(std::move(data_col), std::move(offsets_col)); |
266 | return true; |
267 | } |
268 | |
269 | template <typename T> |
270 | bool executeGeneric(Block & block, const IColumn * start_col, const IColumn * end_col, const IColumn * step_col, const size_t input_rows_count, const size_t result) |
271 | { |
272 | auto start_column = checkAndGetColumn<ColumnVector<T>>(start_col); |
273 | auto end_column = checkAndGetColumn<ColumnVector<T>>(end_col); |
274 | auto step_column = checkAndGetColumn<ColumnVector<T>>(step_col); |
275 | |
276 | if (!start_column || !end_column || !step_column) |
277 | { |
278 | return false; |
279 | } |
280 | |
281 | const auto & start_data = start_column->getData(); |
282 | const auto & end_start = end_column->getData(); |
283 | const auto & step_data = step_column->getData(); |
284 | |
285 | size_t total_values = 0; |
286 | size_t pre_values = 0; |
287 | |
288 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
289 | { |
290 | if (start_data[row_idx] < end_start[row_idx] && step_data[row_idx] == 0) |
291 | throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero" , |
292 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
293 | |
294 | pre_values += start_data[row_idx] >= end_start[row_idx] ? 0 |
295 | : (end_start[row_idx] -start_data[row_idx] - 1) / (step_data[row_idx]) + 1; |
296 | |
297 | if (pre_values < total_values) |
298 | throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing" , |
299 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
300 | |
301 | total_values = pre_values; |
302 | if (total_values > max_elements) |
303 | throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + |
304 | " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), |
305 | ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
306 | } |
307 | |
308 | auto data_col = ColumnVector<T>::create(total_values); |
309 | auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); |
310 | |
311 | auto & out_data = data_col->getData(); |
312 | auto & out_offsets = offsets_col->getData(); |
313 | |
314 | IColumn::Offset offset{}; |
315 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) |
316 | { |
317 | for (size_t st = start_data[row_idx], ed = end_start[row_idx]; st < ed; st += step_data[row_idx]) |
318 | out_data[offset++] = st; |
319 | |
320 | out_offsets[row_idx] = offset; |
321 | } |
322 | |
323 | block.getByPosition(result).column = ColumnArray::create(std::move(data_col), std::move(offsets_col)); |
324 | return true; |
325 | } |
326 | |
327 | void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override |
328 | { |
329 | if (arguments.size() == 1) |
330 | { |
331 | const auto col = block.getByPosition(arguments[0]).column.get(); |
332 | if (!executeInternal<UInt8>(block, col, result) && |
333 | !executeInternal<UInt16>(block, col, result) && |
334 | !executeInternal<UInt32>(block, col, result) && |
335 | !executeInternal<UInt64>(block, col, result)) |
336 | { |
337 | throw Exception{"Illegal column " + col->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; |
338 | } |
339 | return; |
340 | } |
341 | |
342 | Columns columns_holder(3); |
343 | ColumnRawPtrs columns(3); |
344 | |
345 | const auto return_type = checkAndGetDataType<DataTypeArray>(block.getByPosition(result).type.get())->getNestedType(); |
346 | |
347 | for (size_t i = 0; i < arguments.size(); ++i) |
348 | { |
349 | if (i == 1) |
350 | columns_holder[i] = castColumn(block.getByPosition(arguments[i]), return_type, context)->convertToFullColumnIfConst(); |
351 | else |
352 | columns_holder[i] = castColumn(block.getByPosition(arguments[i]), return_type, context); |
353 | |
354 | columns[i] = columns_holder[i].get(); |
355 | } |
356 | |
357 | // for step column, defaults to 1 |
358 | if (arguments.size() == 2) |
359 | { |
360 | columns_holder[2] = return_type->createColumnConst(input_rows_count, 1); |
361 | columns[2] = columns_holder[2].get(); |
362 | } |
363 | |
364 | bool is_start_const = isColumnConst(*columns[0]); |
365 | bool is_step_const = isColumnConst(*columns[2]); |
366 | bool ok; |
367 | if (is_start_const && is_step_const) |
368 | { |
369 | UInt64 start = assert_cast<const ColumnConst &>(*columns[0]).getUInt(0); |
370 | UInt64 step = assert_cast<const ColumnConst &>(*columns[2]).getUInt(0); |
371 | |
372 | ok = executeConstStartStep<UInt8>(block, columns[1], start, step, input_rows_count, result) || |
373 | executeConstStartStep<UInt16>(block, columns[1], start, step, input_rows_count, result) || |
374 | executeConstStartStep<UInt32>(block, columns[1], start, step, input_rows_count, result) || |
375 | executeConstStartStep<UInt64>(block, columns[1], start, step, input_rows_count, result); |
376 | } |
377 | else if (is_start_const && !is_step_const) |
378 | { |
379 | UInt64 start = assert_cast<const ColumnConst &>(*columns[0]).getUInt(0); |
380 | |
381 | ok = executeConstStart<UInt8>(block, columns[1], columns[2], start, input_rows_count, result) || |
382 | executeConstStart<UInt16>(block, columns[1], columns[2], start, input_rows_count, result) || |
383 | executeConstStart<UInt32>(block, columns[1], columns[2], start, input_rows_count, result) || |
384 | executeConstStart<UInt64>(block, columns[1], columns[2], start, input_rows_count, result); |
385 | } |
386 | else if (!is_start_const && is_step_const) |
387 | { |
388 | UInt64 step = assert_cast<const ColumnConst &>(*columns[2]).getUInt(0); |
389 | |
390 | ok = executeConstStep<UInt8>(block, columns[0], columns[1], step, input_rows_count, result) || |
391 | executeConstStep<UInt16>(block, columns[0], columns[1], step, input_rows_count, result) || |
392 | executeConstStep<UInt32>(block, columns[0], columns[1], step, input_rows_count, result) || |
393 | executeConstStep<UInt64>(block, columns[0], columns[1], step, input_rows_count, result); |
394 | } |
395 | else |
396 | { |
397 | ok = executeGeneric<UInt8>(block, columns[0], columns[1], columns[2], input_rows_count, result) || |
398 | executeGeneric<UInt16>(block, columns[0], columns[1], columns[2], input_rows_count, result) || |
399 | executeGeneric<UInt32>(block, columns[0], columns[1], columns[2], input_rows_count, result) || |
400 | executeGeneric<UInt64>(block, columns[0], columns[1], columns[2], input_rows_count, result); |
401 | } |
402 | |
403 | if (!ok) |
404 | { |
405 | throw Exception{"Illegal columns " + columns[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; |
406 | } |
407 | } |
408 | |
409 | }; |
410 | |
411 | |
412 | void registerFunctionRange(FunctionFactory & factory) |
413 | { |
414 | factory.registerFunction<FunctionRange>(); |
415 | } |
416 | |
417 | } |
418 | |