1 | #pragma once |
2 | |
3 | #include <city.h> |
4 | #include <Core/Defines.h> |
5 | #include <Common/SipHash.h> |
6 | #include <Common/UInt128.h> |
7 | #include <Common/assert_cast.h> |
8 | #include <Columns/ColumnTuple.h> |
9 | |
10 | |
11 | namespace DB |
12 | { |
13 | |
14 | /** Hashes a set of arguments to the aggregate function |
15 | * to calculate the number of unique values |
16 | * and adds them to the set. |
17 | * |
18 | * Four options (2 x 2) |
19 | * |
20 | * - for approximate calculation, uses a non-cryptographic 64-bit hash function; |
21 | * - for an accurate calculation, uses a cryptographic 128-bit hash function; |
22 | * |
23 | * - for several arguments passed in the usual way; |
24 | * - for one argument-tuple. |
25 | */ |
26 | |
27 | template <bool exact, bool for_tuple> |
28 | struct UniqVariadicHash; |
29 | |
30 | |
31 | /// If some arguments are not contiguous, we cannot use simple hash function, |
32 | /// because it requires method IColumn::getDataAt to work. |
33 | /// Note that we treat single tuple argument in the same way as multiple arguments. |
34 | bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types); |
35 | |
36 | |
37 | template <> |
38 | struct UniqVariadicHash<false, false> |
39 | { |
40 | static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) |
41 | { |
42 | UInt64 hash; |
43 | |
44 | const IColumn ** column = columns; |
45 | const IColumn ** columns_end = column + num_args; |
46 | |
47 | { |
48 | StringRef value = (*column)->getDataAt(row_num); |
49 | hash = CityHash_v1_0_2::CityHash64(value.data, value.size); |
50 | ++column; |
51 | } |
52 | |
53 | while (column < columns_end) |
54 | { |
55 | StringRef value = (*column)->getDataAt(row_num); |
56 | hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash)); |
57 | ++column; |
58 | } |
59 | |
60 | return hash; |
61 | } |
62 | }; |
63 | |
64 | template <> |
65 | struct UniqVariadicHash<false, true> |
66 | { |
67 | static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) |
68 | { |
69 | UInt64 hash; |
70 | |
71 | const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns(); |
72 | |
73 | const auto * column = tuple_columns.data(); |
74 | const auto * columns_end = column + num_args; |
75 | |
76 | { |
77 | StringRef value = column->get()->getDataAt(row_num); |
78 | hash = CityHash_v1_0_2::CityHash64(value.data, value.size); |
79 | ++column; |
80 | } |
81 | |
82 | while (column < columns_end) |
83 | { |
84 | StringRef value = column->get()->getDataAt(row_num); |
85 | hash = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(CityHash_v1_0_2::CityHash64(value.data, value.size), hash)); |
86 | ++column; |
87 | } |
88 | |
89 | return hash; |
90 | } |
91 | }; |
92 | |
93 | template <> |
94 | struct UniqVariadicHash<true, false> |
95 | { |
96 | static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) |
97 | { |
98 | const IColumn ** column = columns; |
99 | const IColumn ** columns_end = column + num_args; |
100 | |
101 | SipHash hash; |
102 | |
103 | while (column < columns_end) |
104 | { |
105 | (*column)->updateHashWithValue(row_num, hash); |
106 | ++column; |
107 | } |
108 | |
109 | UInt128 key; |
110 | hash.get128(key.low, key.high); |
111 | return key; |
112 | } |
113 | }; |
114 | |
115 | template <> |
116 | struct UniqVariadicHash<true, true> |
117 | { |
118 | static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) |
119 | { |
120 | const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns(); |
121 | |
122 | const auto * column = tuple_columns.data(); |
123 | const auto * columns_end = column + num_args; |
124 | |
125 | SipHash hash; |
126 | |
127 | while (column < columns_end) |
128 | { |
129 | (*column)->updateHashWithValue(row_num, hash); |
130 | ++column; |
131 | } |
132 | |
133 | UInt128 key; |
134 | hash.get128(key.low, key.high); |
135 | return key; |
136 | } |
137 | }; |
138 | |
139 | } |
140 | |