1#pragma once
2
3#include <city.h>
4#include <Core/Defines.h>
5#include <Common/SipHash.h>
6#include <Common/UInt128.h>
7#include <Common/assert_cast.h>
8#include <Columns/ColumnTuple.h>
9
10
11namespace DB
12{
13
14/** Hashes a set of arguments to the aggregate function
15 * to calculate the number of unique values
16 * and adds them to the set.
17 *
18 * Four options (2 x 2)
19 *
20 * - for approximate calculation, uses a non-cryptographic 64-bit hash function;
21 * - for an accurate calculation, uses a cryptographic 128-bit hash function;
22 *
23 * - for several arguments passed in the usual way;
24 * - for one argument-tuple.
25 */
26
27template <bool exact, bool for_tuple>
28struct UniqVariadicHash;
29
30
31/// If some arguments are not contiguous, we cannot use simple hash function,
32/// because it requires method IColumn::getDataAt to work.
33/// Note that we treat single tuple argument in the same way as multiple arguments.
34bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types);
35
36
37template <>
38struct UniqVariadicHash<false, false>
39{
40 static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
41 {
42 UInt64 hash;
43
44 const IColumn ** column = columns;
45 const IColumn ** columns_end = column + num_args;
46
47 {
48 StringRef value = (*column)->getDataAt(row_num);
49 hash = CityHash_v1_0_2::CityHash64(value.data, value.size);
50 ++column;
51 }
52
53 while (column < columns_end)
54 {
55 StringRef value = (*column)->getDataAt(row_num);
56 hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash));
57 ++column;
58 }
59
60 return hash;
61 }
62};
63
64template <>
65struct UniqVariadicHash<false, true>
66{
67 static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
68 {
69 UInt64 hash;
70
71 const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();
72
73 const auto * column = tuple_columns.data();
74 const auto * columns_end = column + num_args;
75
76 {
77 StringRef value = column->get()->getDataAt(row_num);
78 hash = CityHash_v1_0_2::CityHash64(value.data, value.size);
79 ++column;
80 }
81
82 while (column < columns_end)
83 {
84 StringRef value = column->get()->getDataAt(row_num);
85 hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash));
86 ++column;
87 }
88
89 return hash;
90 }
91};
92
93template <>
94struct UniqVariadicHash<true, false>
95{
96 static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
97 {
98 const IColumn ** column = columns;
99 const IColumn ** columns_end = column + num_args;
100
101 SipHash hash;
102
103 while (column < columns_end)
104 {
105 (*column)->updateHashWithValue(row_num, hash);
106 ++column;
107 }
108
109 UInt128 key;
110 hash.get128(key.low, key.high);
111 return key;
112 }
113};
114
115template <>
116struct UniqVariadicHash<true, true>
117{
118 static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
119 {
120 const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();
121
122 const auto * column = tuple_columns.data();
123 const auto * columns_end = column + num_args;
124
125 SipHash hash;
126
127 while (column < columns_end)
128 {
129 (*column)->updateHashWithValue(row_num, hash);
130 ++column;
131 }
132
133 UInt128 key;
134 hash.get128(key.low, key.high);
135 return key;
136 }
137};
138
139}
140