AggregateFunctionQuantile.h source code [ClickHouse/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h]

1	#pragma once
2
3	#include <AggregateFunctions/FactoryHelpers.h>
4
5	/// These must be exposed in header for the purpose of dynamic compilation.
6	#include <AggregateFunctions/QuantileReservoirSampler.h>
7	#include <AggregateFunctions/QuantileReservoirSamplerDeterministic.h>
8	#include <AggregateFunctions/QuantileExact.h>
9	#include <AggregateFunctions/QuantileExactWeighted.h>
10	#include <AggregateFunctions/QuantileTiming.h>
11	#include <AggregateFunctions/QuantileTDigest.h>
12
13	#include <AggregateFunctions/IAggregateFunction.h>
14	#include <AggregateFunctions/QuantilesCommon.h>
15	#include <Columns/ColumnArray.h>
16	#include <Columns/ColumnDecimal.h>
17	#include <Columns/ColumnsNumber.h>
18	#include <DataTypes/DataTypeArray.h>
19	#include <DataTypes/DataTypeDate.h>
20	#include <DataTypes/DataTypeDateTime.h>
21	#include <DataTypes/DataTypesNumber.h>
22	#include <IO/ReadHelpers.h>
23	#include <IO/WriteHelpers.h>
24	#include <Common/assert_cast.h>
25
26	#include <type_traits>
27
28
29	namespace DB
30	{
31
32	namespace ErrorCodes
33	{
34	extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
35	}
36
37	template <typename> class QuantileTiming;
38
39
40	/* Generic aggregate function for calculation of quantiles.*
41	* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
42	*/
43
44	template <
45	/// Type of first argument.
46	typename Value,
47	/// Data structure and implementation of calculation. Look at QuantileExact.h for example.
48	typename Data,
49	/// Structure with static member "name", containing the name of the aggregate function.
50	typename Name,
51	/// If true, the function accepts the second argument
52	/// (in can be "weight" to calculate quantiles or "determinator" that is used instead of PRNG).
53	/// Second argument is always obtained through 'getUInt' method.
54	bool has_second_arg,
55	/// If non-void, the function will return float of specified type with possibly interpolated results and NaN if there was no values.
56	/// Otherwise it will return Value type and default value if there was no values.
57	/// As an example, the function cannot return floats, if the SQL type of argument is Date or DateTime.
58	typename FloatReturnType,
59	/// If true, the function will accept multiple parameters with quantile levels
60	/// and return an Array filled with many values of that quantiles.
61	bool returns_many
62	>
63	class AggregateFunctionQuantile final : public IAggregateFunctionDataHelper<Data,
64	AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>
65	{
66	private:
67	using ColVecType = std::conditional_t<IsDecimalNumber<Value>, ColumnDecimal<Value>, ColumnVector<Value>>;
68
69	static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
70	static_assert(!IsDecimalNumber<Value> \|\| !returns_float);
71
72	QuantileLevels<Float64> levels;
73
74	/// Used when there are single level to get.
75	Float64 level = `0.5`;
76
77	DataTypePtr & argument_type;
78
79	public:
80	AggregateFunctionQuantile(const DataTypePtr & argument_type_, const Array & params)
81	: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>({argument_type_}, params)
82	, levels (params, returns_many), level(levels.levels[`0`]), argument_type(this->argument_types[`0`])
83	{
84	if (!returns_many && levels.size() > `1`)
85	throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
86	}
87
88	String getName() const override { return Name::name; }
89
90	DataTypePtr getReturnType() const override
91	{
92	DataTypePtr res;
93
94	if constexpr (returns_float)
95	res = std::make_shared<DataTypeNumber<FloatReturnType>>();
96	else
97	res = argument_type;
98
99	if constexpr (returns_many)
100	return std::make_shared<DataTypeArray>(res);
101	else
102	return res;
103	}
104
105	void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena ) const* override
106	{
107	auto value = static_cast<const ColVecType &>(*columns[`0`]).getData()[row_num];
108
109	if constexpr (std::is_same_v<Data, QuantileTiming<Value>>)
110	{
111	/// QuantileTiming only supports integers.
112	if (isNaN(value) \|\| value > std::numeric_limits<Value>::max() \|\| value < std::numeric_limits<Value>::min())
113	return;
114	}
115
116	if constexpr (has_second_arg)
117	this->data(place).add(
118	value,
119	columns[`1`]->getUInt(row_num));
120	else
121	this->data(place).add(value);
122	}
123
124	void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena ) const* override
125	{
126	this->data(place).merge(this->data(rhs));
127	}
128
129	void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
130	{
131	/// const_cast is required because some data structures apply finalizaton (like compactization) before serializing.
132	this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
133	}
134
135	void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena ) const* override
136	{
137	this->data(place).deserialize(buf);
138	}
139
140	void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
141	{
142	/// const_cast is required because some data structures apply finalizaton (like sorting) for obtain a result.
143	auto & data = this->data(const_cast<AggregateDataPtr>(place));
144
145	if constexpr (returns_many)
146	{
147	ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
148	ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
149
150	size_t size = levels.size();
151	offsets_to.push_back(offsets_to.back() + size);
152
153	if (!size)
154	return;
155
156	if constexpr (returns_float)
157	{
158	auto & data_to = assert_cast<ColumnVector<FloatReturnType> &>(arr_to.getData()).getData();
159	size_t old_size = data_to.size();
160	data_to.resize(data_to.size() + size);
161
162	data.getManyFloat(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
163	}
164	else
165	{
166	auto & data_to = static_cast<ColVecType &>(arr_to.getData()).getData();
167	size_t old_size = data_to.size();
168	data_to.resize(data_to.size() + size);
169
170	data.getMany(levels.levels.data(), levels.permutation.data(), size, data_to.data() + old_size);
171	}
172	}
173	else
174	{
175	if constexpr (returns_float)
176	assert_cast<ColumnVector<FloatReturnType> &>(to).getData().push_back(data.getFloat(level));
177	else
178	static_cast<ColVecType &>(to).getData().push_back(data.get(level));
179	}
180	}
181
182	static void assertSecondArg(const DataTypes & types)
183	{
184	if constexpr (has_second_arg)
185	{
186	assertBinary(Name::name, types);
187	if (!isUnsignedInteger(types [`1`]))
188	throw Exception ("Second argument (weight) for function " + std::string(Name::name) + " must be unsigned integer, but it has type " + types [`1`]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
189	}
190	else
191	assertUnary(Name::name, types);
192	}
193	};
194
195	struct NameQuantile { static constexpr auto name = "quantile"; };
196	struct NameQuantiles { static constexpr auto name = "quantiles"; };
197	struct NameQuantileDeterministic { static constexpr auto name = "quantileDeterministic"; };
198	struct NameQuantilesDeterministic { static constexpr auto name = "quantilesDeterministic"; };
199
200	struct NameQuantileExact { static constexpr auto name = "quantileExact"; };
201	struct NameQuantilesExact { static constexpr auto name = "quantilesExact"; };
202
203	struct NameQuantileExactExclusive { static constexpr auto name = "quantileExactExclusive"; };
204	struct NameQuantilesExactExclusive { static constexpr auto name = "quantilesExactExclusive"; };
205
206	struct NameQuantileExactInclusive { static constexpr auto name = "quantileExactInclusive"; };
207	struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExactInclusive"; };
208
209	struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; };
210	struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; };
211
212	struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; };
213	struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; };
214	struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; };
215	struct NameQuantilesTimingWeighted { static constexpr auto name = "quantilesTimingWeighted"; };
216
217	struct NameQuantileTDigest { static constexpr auto name = "quantileTDigest"; };
218	struct NameQuantileTDigestWeighted { static constexpr auto name = "quantileTDigestWeighted"; };
219	struct NameQuantilesTDigest { static constexpr auto name = "quantilesTDigest"; };
220	struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDigestWeighted"; };
221
222	}
223

Browse the source code of ClickHouse/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h