1 | //===--------------------------------------------------------------------===// |
2 | // hash.cpp |
3 | // Description: This file contains the vectorized hash implementations |
4 | //===--------------------------------------------------------------------===// |
5 | |
6 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
7 | #include "duckdb/common/types/hash.hpp" |
8 | #include "duckdb/common/types/null_value.hpp" |
9 | |
10 | using namespace duckdb; |
11 | using namespace std; |
12 | |
13 | struct HashOp { |
14 | template <class T> static inline hash_t Operation(T input, bool is_null) { |
15 | return duckdb::Hash<T>(is_null ? duckdb::NullValue<T>() : input); |
16 | } |
17 | }; |
18 | |
19 | template <bool HAS_RSEL, class T> |
20 | static inline void tight_loop_hash(T *__restrict ldata, hash_t *__restrict result_data, const SelectionVector *rsel, |
21 | idx_t count, const SelectionVector *__restrict sel_vector, nullmask_t &nullmask) { |
22 | if (nullmask.any()) { |
23 | for (idx_t i = 0; i < count; i++) { |
24 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; |
25 | auto idx = sel_vector->get_index(ridx); |
26 | result_data[ridx] = HashOp::Operation(ldata[idx], nullmask[idx]); |
27 | } |
28 | } else { |
29 | for (idx_t i = 0; i < count; i++) { |
30 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; |
31 | auto idx = sel_vector->get_index(ridx); |
32 | result_data[ridx] = duckdb::Hash<T>(ldata[idx]); |
33 | } |
34 | } |
35 | } |
36 | |
37 | template <bool HAS_RSEL, class T> |
38 | static inline void templated_loop_hash(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { |
39 | if (input.vector_type == VectorType::CONSTANT_VECTOR) { |
40 | result.vector_type = VectorType::CONSTANT_VECTOR; |
41 | |
42 | auto ldata = ConstantVector::GetData<T>(input); |
43 | auto result_data = ConstantVector::GetData<hash_t>(result); |
44 | *result_data = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); |
45 | } else { |
46 | result.vector_type = VectorType::FLAT_VECTOR; |
47 | |
48 | VectorData idata; |
49 | input.Orrify(count, idata); |
50 | |
51 | tight_loop_hash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(result), rsel, count, idata.sel, |
52 | *idata.nullmask); |
53 | } |
54 | } |
55 | |
56 | template <bool HAS_RSEL> |
57 | static inline void hash_type_switch(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { |
58 | assert(result.type == TypeId::HASH); |
59 | switch (input.type) { |
60 | case TypeId::BOOL: |
61 | case TypeId::INT8: |
62 | templated_loop_hash<HAS_RSEL, int8_t>(input, result, rsel, count); |
63 | break; |
64 | case TypeId::INT16: |
65 | templated_loop_hash<HAS_RSEL, int16_t>(input, result, rsel, count); |
66 | break; |
67 | case TypeId::INT32: |
68 | templated_loop_hash<HAS_RSEL, int32_t>(input, result, rsel, count); |
69 | break; |
70 | case TypeId::INT64: |
71 | templated_loop_hash<HAS_RSEL, int64_t>(input, result, rsel, count); |
72 | break; |
73 | case TypeId::FLOAT: |
74 | templated_loop_hash<HAS_RSEL, float>(input, result, rsel, count); |
75 | break; |
76 | case TypeId::DOUBLE: |
77 | templated_loop_hash<HAS_RSEL, double>(input, result, rsel, count); |
78 | break; |
79 | case TypeId::VARCHAR: |
80 | templated_loop_hash<HAS_RSEL, string_t>(input, result, rsel, count); |
81 | break; |
82 | default: |
83 | throw InvalidTypeException(input.type, "Invalid type for hash" ); |
84 | } |
85 | } |
86 | |
87 | void VectorOperations::Hash(Vector &input, Vector &result, idx_t count) { |
88 | hash_type_switch<false>(input, result, nullptr, count); |
89 | } |
90 | |
91 | void VectorOperations::Hash(Vector &input, Vector &result, const SelectionVector &sel, idx_t count) { |
92 | hash_type_switch<true>(input, result, &sel, count); |
93 | } |
94 | |
95 | static inline hash_t combine_hash(hash_t a, hash_t b) { |
96 | return (a * UINT64_C(0xbf58476d1ce4e5b9)) ^ b; |
97 | } |
98 | |
99 | template <bool HAS_RSEL, class T> |
100 | static inline void tight_loop_combine_hash_constant(T *__restrict ldata, hash_t constant_hash, |
101 | hash_t *__restrict hash_data, const SelectionVector *rsel, |
102 | idx_t count, const SelectionVector *__restrict sel_vector, |
103 | nullmask_t &nullmask) { |
104 | if (nullmask.any()) { |
105 | for (idx_t i = 0; i < count; i++) { |
106 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; |
107 | auto idx = sel_vector->get_index(ridx); |
108 | auto other_hash = HashOp::Operation(ldata[idx], nullmask[idx]); |
109 | hash_data[ridx] = combine_hash(constant_hash, other_hash); |
110 | } |
111 | } else { |
112 | for (idx_t i = 0; i < count; i++) { |
113 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; |
114 | auto idx = sel_vector->get_index(ridx); |
115 | auto other_hash = duckdb::Hash<T>(ldata[idx]); |
116 | hash_data[ridx] = combine_hash(constant_hash, other_hash); |
117 | } |
118 | } |
119 | } |
120 | |
121 | template <bool HAS_RSEL, class T> |
122 | static inline void tight_loop_combine_hash(T *__restrict ldata, hash_t *__restrict hash_data, |
123 | const SelectionVector *rsel, idx_t count, |
124 | const SelectionVector *__restrict sel_vector, nullmask_t &nullmask) { |
125 | if (nullmask.any()) { |
126 | for (idx_t i = 0; i < count; i++) { |
127 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; |
128 | auto idx = sel_vector->get_index(ridx); |
129 | auto other_hash = HashOp::Operation(ldata[idx], nullmask[idx]); |
130 | hash_data[ridx] = combine_hash(hash_data[ridx], other_hash); |
131 | } |
132 | } else { |
133 | for (idx_t i = 0; i < count; i++) { |
134 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; |
135 | auto idx = sel_vector->get_index(ridx); |
136 | auto other_hash = duckdb::Hash<T>(ldata[idx]); |
137 | hash_data[ridx] = combine_hash(hash_data[ridx], other_hash); |
138 | } |
139 | } |
140 | } |
141 | |
142 | template <bool HAS_RSEL, class T> |
143 | void templated_loop_combine_hash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) { |
144 | if (input.vector_type == VectorType::CONSTANT_VECTOR && hashes.vector_type == VectorType::CONSTANT_VECTOR) { |
145 | auto ldata = ConstantVector::GetData<T>(input); |
146 | auto hash_data = ConstantVector::GetData<hash_t>(hashes); |
147 | |
148 | auto other_hash = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); |
149 | *hash_data = combine_hash(*hash_data, other_hash); |
150 | } else { |
151 | VectorData idata; |
152 | input.Orrify(count, idata); |
153 | if (hashes.vector_type == VectorType::CONSTANT_VECTOR) { |
154 | // mix constant with non-constant, first get the constant value |
155 | auto constant_hash = *ConstantVector::GetData<hash_t>(hashes); |
156 | // now re-initialize the hashes vector to an empty flat vector |
157 | hashes.Initialize(hashes.type); |
158 | tight_loop_combine_hash_constant<HAS_RSEL, T>((T *)idata.data, constant_hash, |
159 | FlatVector::GetData<hash_t>(hashes), rsel, count, idata.sel, |
160 | *idata.nullmask); |
161 | } else { |
162 | assert(hashes.vector_type == VectorType::FLAT_VECTOR); |
163 | tight_loop_combine_hash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(hashes), rsel, count, |
164 | idata.sel, *idata.nullmask); |
165 | } |
166 | } |
167 | } |
168 | |
169 | template <bool HAS_RSEL> |
170 | static inline void combine_hash_type_switch(Vector &hashes, Vector &input, const SelectionVector *rsel, idx_t count) { |
171 | assert(hashes.type == TypeId::HASH); |
172 | switch (input.type) { |
173 | case TypeId::BOOL: |
174 | case TypeId::INT8: |
175 | templated_loop_combine_hash<HAS_RSEL, int8_t>(input, hashes, rsel, count); |
176 | break; |
177 | case TypeId::INT16: |
178 | templated_loop_combine_hash<HAS_RSEL, int16_t>(input, hashes, rsel, count); |
179 | break; |
180 | case TypeId::INT32: |
181 | templated_loop_combine_hash<HAS_RSEL, int32_t>(input, hashes, rsel, count); |
182 | break; |
183 | case TypeId::INT64: |
184 | templated_loop_combine_hash<HAS_RSEL, int64_t>(input, hashes, rsel, count); |
185 | break; |
186 | case TypeId::FLOAT: |
187 | templated_loop_combine_hash<HAS_RSEL, float>(input, hashes, rsel, count); |
188 | break; |
189 | case TypeId::DOUBLE: |
190 | templated_loop_combine_hash<HAS_RSEL, double>(input, hashes, rsel, count); |
191 | break; |
192 | case TypeId::VARCHAR: |
193 | templated_loop_combine_hash<HAS_RSEL, string_t>(input, hashes, rsel, count); |
194 | break; |
195 | default: |
196 | throw InvalidTypeException(input.type, "Invalid type for hash" ); |
197 | } |
198 | } |
199 | |
200 | void VectorOperations::CombineHash(Vector &hashes, Vector &input, idx_t count) { |
201 | combine_hash_type_switch<false>(hashes, input, nullptr, count); |
202 | } |
203 | |
204 | void VectorOperations::CombineHash(Vector &hashes, Vector &input, const SelectionVector &rsel, idx_t count) { |
205 | combine_hash_type_switch<true>(hashes, input, &rsel, count); |
206 | } |
207 | |