1//===--------------------------------------------------------------------===//
2// hash.cpp
3// Description: This file contains the vectorized hash implementations
4//===--------------------------------------------------------------------===//
5
6#include "duckdb/common/vector_operations/vector_operations.hpp"
7#include "duckdb/common/types/hash.hpp"
8#include "duckdb/common/types/null_value.hpp"
9
10using namespace duckdb;
11using namespace std;
12
13struct HashOp {
14 template <class T> static inline hash_t Operation(T input, bool is_null) {
15 return duckdb::Hash<T>(is_null ? duckdb::NullValue<T>() : input);
16 }
17};
18
19template <bool HAS_RSEL, class T>
20static inline void tight_loop_hash(T *__restrict ldata, hash_t *__restrict result_data, const SelectionVector *rsel,
21 idx_t count, const SelectionVector *__restrict sel_vector, nullmask_t &nullmask) {
22 if (nullmask.any()) {
23 for (idx_t i = 0; i < count; i++) {
24 auto ridx = HAS_RSEL ? rsel->get_index(i) : i;
25 auto idx = sel_vector->get_index(ridx);
26 result_data[ridx] = HashOp::Operation(ldata[idx], nullmask[idx]);
27 }
28 } else {
29 for (idx_t i = 0; i < count; i++) {
30 auto ridx = HAS_RSEL ? rsel->get_index(i) : i;
31 auto idx = sel_vector->get_index(ridx);
32 result_data[ridx] = duckdb::Hash<T>(ldata[idx]);
33 }
34 }
35}
36
37template <bool HAS_RSEL, class T>
38static inline void templated_loop_hash(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) {
39 if (input.vector_type == VectorType::CONSTANT_VECTOR) {
40 result.vector_type = VectorType::CONSTANT_VECTOR;
41
42 auto ldata = ConstantVector::GetData<T>(input);
43 auto result_data = ConstantVector::GetData<hash_t>(result);
44 *result_data = HashOp::Operation(*ldata, ConstantVector::IsNull(input));
45 } else {
46 result.vector_type = VectorType::FLAT_VECTOR;
47
48 VectorData idata;
49 input.Orrify(count, idata);
50
51 tight_loop_hash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(result), rsel, count, idata.sel,
52 *idata.nullmask);
53 }
54}
55
56template <bool HAS_RSEL>
57static inline void hash_type_switch(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) {
58 assert(result.type == TypeId::HASH);
59 switch (input.type) {
60 case TypeId::BOOL:
61 case TypeId::INT8:
62 templated_loop_hash<HAS_RSEL, int8_t>(input, result, rsel, count);
63 break;
64 case TypeId::INT16:
65 templated_loop_hash<HAS_RSEL, int16_t>(input, result, rsel, count);
66 break;
67 case TypeId::INT32:
68 templated_loop_hash<HAS_RSEL, int32_t>(input, result, rsel, count);
69 break;
70 case TypeId::INT64:
71 templated_loop_hash<HAS_RSEL, int64_t>(input, result, rsel, count);
72 break;
73 case TypeId::FLOAT:
74 templated_loop_hash<HAS_RSEL, float>(input, result, rsel, count);
75 break;
76 case TypeId::DOUBLE:
77 templated_loop_hash<HAS_RSEL, double>(input, result, rsel, count);
78 break;
79 case TypeId::VARCHAR:
80 templated_loop_hash<HAS_RSEL, string_t>(input, result, rsel, count);
81 break;
82 default:
83 throw InvalidTypeException(input.type, "Invalid type for hash");
84 }
85}
86
87void VectorOperations::Hash(Vector &input, Vector &result, idx_t count) {
88 hash_type_switch<false>(input, result, nullptr, count);
89}
90
91void VectorOperations::Hash(Vector &input, Vector &result, const SelectionVector &sel, idx_t count) {
92 hash_type_switch<true>(input, result, &sel, count);
93}
94
95static inline hash_t combine_hash(hash_t a, hash_t b) {
96 return (a * UINT64_C(0xbf58476d1ce4e5b9)) ^ b;
97}
98
99template <bool HAS_RSEL, class T>
100static inline void tight_loop_combine_hash_constant(T *__restrict ldata, hash_t constant_hash,
101 hash_t *__restrict hash_data, const SelectionVector *rsel,
102 idx_t count, const SelectionVector *__restrict sel_vector,
103 nullmask_t &nullmask) {
104 if (nullmask.any()) {
105 for (idx_t i = 0; i < count; i++) {
106 auto ridx = HAS_RSEL ? rsel->get_index(i) : i;
107 auto idx = sel_vector->get_index(ridx);
108 auto other_hash = HashOp::Operation(ldata[idx], nullmask[idx]);
109 hash_data[ridx] = combine_hash(constant_hash, other_hash);
110 }
111 } else {
112 for (idx_t i = 0; i < count; i++) {
113 auto ridx = HAS_RSEL ? rsel->get_index(i) : i;
114 auto idx = sel_vector->get_index(ridx);
115 auto other_hash = duckdb::Hash<T>(ldata[idx]);
116 hash_data[ridx] = combine_hash(constant_hash, other_hash);
117 }
118 }
119}
120
121template <bool HAS_RSEL, class T>
122static inline void tight_loop_combine_hash(T *__restrict ldata, hash_t *__restrict hash_data,
123 const SelectionVector *rsel, idx_t count,
124 const SelectionVector *__restrict sel_vector, nullmask_t &nullmask) {
125 if (nullmask.any()) {
126 for (idx_t i = 0; i < count; i++) {
127 auto ridx = HAS_RSEL ? rsel->get_index(i) : i;
128 auto idx = sel_vector->get_index(ridx);
129 auto other_hash = HashOp::Operation(ldata[idx], nullmask[idx]);
130 hash_data[ridx] = combine_hash(hash_data[ridx], other_hash);
131 }
132 } else {
133 for (idx_t i = 0; i < count; i++) {
134 auto ridx = HAS_RSEL ? rsel->get_index(i) : i;
135 auto idx = sel_vector->get_index(ridx);
136 auto other_hash = duckdb::Hash<T>(ldata[idx]);
137 hash_data[ridx] = combine_hash(hash_data[ridx], other_hash);
138 }
139 }
140}
141
142template <bool HAS_RSEL, class T>
143void templated_loop_combine_hash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) {
144 if (input.vector_type == VectorType::CONSTANT_VECTOR && hashes.vector_type == VectorType::CONSTANT_VECTOR) {
145 auto ldata = ConstantVector::GetData<T>(input);
146 auto hash_data = ConstantVector::GetData<hash_t>(hashes);
147
148 auto other_hash = HashOp::Operation(*ldata, ConstantVector::IsNull(input));
149 *hash_data = combine_hash(*hash_data, other_hash);
150 } else {
151 VectorData idata;
152 input.Orrify(count, idata);
153 if (hashes.vector_type == VectorType::CONSTANT_VECTOR) {
154 // mix constant with non-constant, first get the constant value
155 auto constant_hash = *ConstantVector::GetData<hash_t>(hashes);
156 // now re-initialize the hashes vector to an empty flat vector
157 hashes.Initialize(hashes.type);
158 tight_loop_combine_hash_constant<HAS_RSEL, T>((T *)idata.data, constant_hash,
159 FlatVector::GetData<hash_t>(hashes), rsel, count, idata.sel,
160 *idata.nullmask);
161 } else {
162 assert(hashes.vector_type == VectorType::FLAT_VECTOR);
163 tight_loop_combine_hash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(hashes), rsel, count,
164 idata.sel, *idata.nullmask);
165 }
166 }
167}
168
169template <bool HAS_RSEL>
170static inline void combine_hash_type_switch(Vector &hashes, Vector &input, const SelectionVector *rsel, idx_t count) {
171 assert(hashes.type == TypeId::HASH);
172 switch (input.type) {
173 case TypeId::BOOL:
174 case TypeId::INT8:
175 templated_loop_combine_hash<HAS_RSEL, int8_t>(input, hashes, rsel, count);
176 break;
177 case TypeId::INT16:
178 templated_loop_combine_hash<HAS_RSEL, int16_t>(input, hashes, rsel, count);
179 break;
180 case TypeId::INT32:
181 templated_loop_combine_hash<HAS_RSEL, int32_t>(input, hashes, rsel, count);
182 break;
183 case TypeId::INT64:
184 templated_loop_combine_hash<HAS_RSEL, int64_t>(input, hashes, rsel, count);
185 break;
186 case TypeId::FLOAT:
187 templated_loop_combine_hash<HAS_RSEL, float>(input, hashes, rsel, count);
188 break;
189 case TypeId::DOUBLE:
190 templated_loop_combine_hash<HAS_RSEL, double>(input, hashes, rsel, count);
191 break;
192 case TypeId::VARCHAR:
193 templated_loop_combine_hash<HAS_RSEL, string_t>(input, hashes, rsel, count);
194 break;
195 default:
196 throw InvalidTypeException(input.type, "Invalid type for hash");
197 }
198}
199
200void VectorOperations::CombineHash(Vector &hashes, Vector &input, idx_t count) {
201 combine_hash_type_switch<false>(hashes, input, nullptr, count);
202}
203
204void VectorOperations::CombineHash(Vector &hashes, Vector &input, const SelectionVector &rsel, idx_t count) {
205 combine_hash_type_switch<true>(hashes, input, &rsel, count);
206}
207