1#pragma once
2
3#include <AggregateFunctions/FactoryHelpers.h>
4
5/// These must be exposed in header for the purpose of dynamic compilation.
6#include <AggregateFunctions/QuantileReservoirSampler.h>
7#include <AggregateFunctions/QuantileReservoirSamplerDeterministic.h>
8#include <AggregateFunctions/QuantileExact.h>
9#include <AggregateFunctions/QuantileExactWeighted.h>
10#include <AggregateFunctions/QuantileTiming.h>
11#include <AggregateFunctions/QuantileTDigest.h>
12
13#include <AggregateFunctions/IAggregateFunction.h>
14#include <AggregateFunctions/QuantilesCommon.h>
15#include <Columns/ColumnArray.h>
16#include <Columns/ColumnDecimal.h>
17#include <Columns/ColumnsNumber.h>
18#include <DataTypes/DataTypeArray.h>
19#include <DataTypes/DataTypeDate.h>
20#include <DataTypes/DataTypeDateTime.h>
21#include <DataTypes/DataTypesNumber.h>
22#include <IO/ReadHelpers.h>
23#include <IO/WriteHelpers.h>
24#include <Common/assert_cast.h>
25
26#include <type_traits>
27
28
29namespace DB
30{
31
32namespace ErrorCodes
33{
34 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
35}
36
37template <typename> class QuantileTiming;
38
39
40/** Generic aggregate function for calculation of quantiles.
41 * It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
42 */
43
44template <
45 /// Type of first argument.
46 typename Value,
47 /// Data structure and implementation of calculation. Look at QuantileExact.h for example.
48 typename Data,
49 /// Structure with static member "name", containing the name of the aggregate function.
50 typename Name,
51 /// If true, the function accepts the second argument
52 /// (in can be "weight" to calculate quantiles or "determinator" that is used instead of PRNG).
53 /// Second argument is always obtained through 'getUInt' method.
54 bool has_second_arg,
55 /// If non-void, the function will return float of specified type with possibly interpolated results and NaN if there was no values.
56 /// Otherwise it will return Value type and default value if there was no values.
57 /// As an example, the function cannot return floats, if the SQL type of argument is Date or DateTime.
58 typename FloatReturnType,
59 /// If true, the function will accept multiple parameters with quantile levels
60 /// and return an Array filled with many values of that quantiles.
61 bool returns_many
62>
63class AggregateFunctionQuantile final : public IAggregateFunctionDataHelper<Data,
64 AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>
65{
66private:
67 using ColVecType = std::conditional_t<IsDecimalNumber<Value>, ColumnDecimal<Value>, ColumnVector<Value>>;
68
69 static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
70 static_assert(!IsDecimalNumber<Value> || !returns_float);
71
72 QuantileLevels<Float64> levels;
73
74 /// Used when there are single level to get.
75 Float64 level = 0.5;
76
77 DataTypePtr & argument_type;
78
79public:
80 AggregateFunctionQuantile(const DataTypePtr & argument_type_, const Array & params)
81 : IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>({argument_type_}, params)
82 , levels(params, returns_many), level(levels.levels[0]), argument_type(this->argument_types[0])
83 {
84 if (!returns_many && levels.size() > 1)
85 throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
86 }
87
88 String getName() const override { return Name::name; }
89
90 DataTypePtr getReturnType() const override
91 {
92 DataTypePtr res;
93
94 if constexpr (returns_float)
95 res = std::make_shared<DataTypeNumber<FloatReturnType>>();
96 else
97 res = argument_type;
98
99 if constexpr (returns_many)
100 return std::make_shared<DataTypeArray>(res);
101 else
102 return res;
103 }
104
105 void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
106 {
107 auto value = static_cast<const ColVecType &>(*columns[0]).getData()[row_num];
108
109 if constexpr (std::is_same_v<Data, QuantileTiming<Value>>)
110 {
111 /// QuantileTiming only supports integers.
112 if (isNaN(value) || value > std::numeric_limits<Value>::max() || value < std::numeric_limits<Value>::min())
113 return;
114 }
115
116 if constexpr (has_second_arg)
117 this->data(place).add(
118 value,
119 columns[1]->getUInt(row_num));
120 else
121 this->data(place).add(value);
122 }
123
124 void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
125 {
126 this->data(place).merge(this->data(rhs));
127 }
128
129 void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
130 {
131 /// const_cast is required because some data structures apply finalizaton (like compactization) before serializing.
132 this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
133 }
134
135 void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
136 {
137 this->data(place).deserialize(buf);
138 }
139
140 void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
141 {
142 /// const_cast is required because some data structures apply finalizaton (like sorting) for obtain a result.
143 auto & data = this->data(const_cast<AggregateDataPtr>(place));
144
145 if constexpr (returns_many)
146 {
147 ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
148 ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
149
150 size_t size = levels.size();
151 offsets_to.push_back(offsets_to.back() + size);
152
153 if (!size)
154 return;
155
156 if constexpr (returns_float)
157 {
158 auto & data_to = assert_cast<ColumnVector<FloatReturnType> &>(arr_to.getData()).getData();
159 size_t old_size = data_to.size();
160 data_to.resize(data_to.size() + size);
161
162 data.getManyFloat(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
163 }
164 else
165 {
166 auto & data_to = static_cast<ColVecType &>(arr_to.getData()).getData();
167 size_t old_size = data_to.size();
168 data_to.resize(data_to.size() + size);
169
170 data.getMany(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
171 }
172 }
173 else
174 {
175 if constexpr (returns_float)
176 assert_cast<ColumnVector<FloatReturnType> &>(to).getData().push_back(data.getFloat(level));
177 else
178 static_cast<ColVecType &>(to).getData().push_back(data.get(level));
179 }
180 }
181
182 static void assertSecondArg(const DataTypes & types)
183 {
184 if constexpr (has_second_arg)
185 {
186 assertBinary(Name::name, types);
187 if (!isUnsignedInteger(types[1]))
188 throw Exception("Second argument (weight) for function " + std::string(Name::name) + " must be unsigned integer, but it has type " + types[1]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
189 }
190 else
191 assertUnary(Name::name, types);
192 }
193};
194
195struct NameQuantile { static constexpr auto name = "quantile"; };
196struct NameQuantiles { static constexpr auto name = "quantiles"; };
197struct NameQuantileDeterministic { static constexpr auto name = "quantileDeterministic"; };
198struct NameQuantilesDeterministic { static constexpr auto name = "quantilesDeterministic"; };
199
200struct NameQuantileExact { static constexpr auto name = "quantileExact"; };
201struct NameQuantilesExact { static constexpr auto name = "quantilesExact"; };
202
203struct NameQuantileExactExclusive { static constexpr auto name = "quantileExactExclusive"; };
204struct NameQuantilesExactExclusive { static constexpr auto name = "quantilesExactExclusive"; };
205
206struct NameQuantileExactInclusive { static constexpr auto name = "quantileExactInclusive"; };
207struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExactInclusive"; };
208
209struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; };
210struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; };
211
212struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; };
213struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; };
214struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; };
215struct NameQuantilesTimingWeighted { static constexpr auto name = "quantilesTimingWeighted"; };
216
217struct NameQuantileTDigest { static constexpr auto name = "quantileTDigest"; };
218struct NameQuantileTDigestWeighted { static constexpr auto name = "quantileTDigestWeighted"; };
219struct NameQuantilesTDigest { static constexpr auto name = "quantilesTDigest"; };
220struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDigestWeighted"; };
221
222}
223