| 1 | //===--------------------------------------------------------------------===// | 
|---|
| 2 | // hash.cpp | 
|---|
| 3 | // Description: This file contains the vectorized hash implementations | 
|---|
| 4 | //===--------------------------------------------------------------------===// | 
|---|
| 5 |  | 
|---|
| 6 | #include "duckdb/common/vector_operations/vector_operations.hpp" | 
|---|
| 7 | #include "duckdb/common/types/hash.hpp" | 
|---|
| 8 | #include "duckdb/common/types/null_value.hpp" | 
|---|
| 9 |  | 
|---|
| 10 | using namespace duckdb; | 
|---|
| 11 | using namespace std; | 
|---|
| 12 |  | 
|---|
| 13 | struct HashOp { | 
|---|
| 14 | template <class T> static inline hash_t Operation(T input, bool is_null) { | 
|---|
| 15 | return duckdb::Hash<T>(is_null ? duckdb::NullValue<T>() : input); | 
|---|
| 16 | } | 
|---|
| 17 | }; | 
|---|
| 18 |  | 
|---|
| 19 | template <bool HAS_RSEL, class T> | 
|---|
| 20 | static inline void tight_loop_hash(T *__restrict ldata, hash_t *__restrict result_data, const SelectionVector *rsel, | 
|---|
| 21 | idx_t count, const SelectionVector *__restrict sel_vector, nullmask_t &nullmask) { | 
|---|
| 22 | if (nullmask.any()) { | 
|---|
| 23 | for (idx_t i = 0; i < count; i++) { | 
|---|
| 24 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; | 
|---|
| 25 | auto idx = sel_vector->get_index(ridx); | 
|---|
| 26 | result_data[ridx] = HashOp::Operation(ldata[idx], nullmask[idx]); | 
|---|
| 27 | } | 
|---|
| 28 | } else { | 
|---|
| 29 | for (idx_t i = 0; i < count; i++) { | 
|---|
| 30 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; | 
|---|
| 31 | auto idx = sel_vector->get_index(ridx); | 
|---|
| 32 | result_data[ridx] = duckdb::Hash<T>(ldata[idx]); | 
|---|
| 33 | } | 
|---|
| 34 | } | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | template <bool HAS_RSEL, class T> | 
|---|
| 38 | static inline void templated_loop_hash(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { | 
|---|
| 39 | if (input.vector_type == VectorType::CONSTANT_VECTOR) { | 
|---|
| 40 | result.vector_type = VectorType::CONSTANT_VECTOR; | 
|---|
| 41 |  | 
|---|
| 42 | auto ldata = ConstantVector::GetData<T>(input); | 
|---|
| 43 | auto result_data = ConstantVector::GetData<hash_t>(result); | 
|---|
| 44 | *result_data = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); | 
|---|
| 45 | } else { | 
|---|
| 46 | result.vector_type = VectorType::FLAT_VECTOR; | 
|---|
| 47 |  | 
|---|
| 48 | VectorData idata; | 
|---|
| 49 | input.Orrify(count, idata); | 
|---|
| 50 |  | 
|---|
| 51 | tight_loop_hash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(result), rsel, count, idata.sel, | 
|---|
| 52 | *idata.nullmask); | 
|---|
| 53 | } | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | template <bool HAS_RSEL> | 
|---|
| 57 | static inline void hash_type_switch(Vector &input, Vector &result, const SelectionVector *rsel, idx_t count) { | 
|---|
| 58 | assert(result.type == TypeId::HASH); | 
|---|
| 59 | switch (input.type) { | 
|---|
| 60 | case TypeId::BOOL: | 
|---|
| 61 | case TypeId::INT8: | 
|---|
| 62 | templated_loop_hash<HAS_RSEL, int8_t>(input, result, rsel, count); | 
|---|
| 63 | break; | 
|---|
| 64 | case TypeId::INT16: | 
|---|
| 65 | templated_loop_hash<HAS_RSEL, int16_t>(input, result, rsel, count); | 
|---|
| 66 | break; | 
|---|
| 67 | case TypeId::INT32: | 
|---|
| 68 | templated_loop_hash<HAS_RSEL, int32_t>(input, result, rsel, count); | 
|---|
| 69 | break; | 
|---|
| 70 | case TypeId::INT64: | 
|---|
| 71 | templated_loop_hash<HAS_RSEL, int64_t>(input, result, rsel, count); | 
|---|
| 72 | break; | 
|---|
| 73 | case TypeId::FLOAT: | 
|---|
| 74 | templated_loop_hash<HAS_RSEL, float>(input, result, rsel, count); | 
|---|
| 75 | break; | 
|---|
| 76 | case TypeId::DOUBLE: | 
|---|
| 77 | templated_loop_hash<HAS_RSEL, double>(input, result, rsel, count); | 
|---|
| 78 | break; | 
|---|
| 79 | case TypeId::VARCHAR: | 
|---|
| 80 | templated_loop_hash<HAS_RSEL, string_t>(input, result, rsel, count); | 
|---|
| 81 | break; | 
|---|
| 82 | default: | 
|---|
| 83 | throw InvalidTypeException(input.type, "Invalid type for hash"); | 
|---|
| 84 | } | 
|---|
| 85 | } | 
|---|
| 86 |  | 
|---|
| 87 | void VectorOperations::Hash(Vector &input, Vector &result, idx_t count) { | 
|---|
| 88 | hash_type_switch<false>(input, result, nullptr, count); | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | void VectorOperations::Hash(Vector &input, Vector &result, const SelectionVector &sel, idx_t count) { | 
|---|
| 92 | hash_type_switch<true>(input, result, &sel, count); | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | static inline hash_t combine_hash(hash_t a, hash_t b) { | 
|---|
| 96 | return (a * UINT64_C(0xbf58476d1ce4e5b9)) ^ b; | 
|---|
| 97 | } | 
|---|
| 98 |  | 
|---|
| 99 | template <bool HAS_RSEL, class T> | 
|---|
| 100 | static inline void tight_loop_combine_hash_constant(T *__restrict ldata, hash_t constant_hash, | 
|---|
| 101 | hash_t *__restrict hash_data, const SelectionVector *rsel, | 
|---|
| 102 | idx_t count, const SelectionVector *__restrict sel_vector, | 
|---|
| 103 | nullmask_t &nullmask) { | 
|---|
| 104 | if (nullmask.any()) { | 
|---|
| 105 | for (idx_t i = 0; i < count; i++) { | 
|---|
| 106 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; | 
|---|
| 107 | auto idx = sel_vector->get_index(ridx); | 
|---|
| 108 | auto other_hash = HashOp::Operation(ldata[idx], nullmask[idx]); | 
|---|
| 109 | hash_data[ridx] = combine_hash(constant_hash, other_hash); | 
|---|
| 110 | } | 
|---|
| 111 | } else { | 
|---|
| 112 | for (idx_t i = 0; i < count; i++) { | 
|---|
| 113 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; | 
|---|
| 114 | auto idx = sel_vector->get_index(ridx); | 
|---|
| 115 | auto other_hash = duckdb::Hash<T>(ldata[idx]); | 
|---|
| 116 | hash_data[ridx] = combine_hash(constant_hash, other_hash); | 
|---|
| 117 | } | 
|---|
| 118 | } | 
|---|
| 119 | } | 
|---|
| 120 |  | 
|---|
| 121 | template <bool HAS_RSEL, class T> | 
|---|
| 122 | static inline void tight_loop_combine_hash(T *__restrict ldata, hash_t *__restrict hash_data, | 
|---|
| 123 | const SelectionVector *rsel, idx_t count, | 
|---|
| 124 | const SelectionVector *__restrict sel_vector, nullmask_t &nullmask) { | 
|---|
| 125 | if (nullmask.any()) { | 
|---|
| 126 | for (idx_t i = 0; i < count; i++) { | 
|---|
| 127 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; | 
|---|
| 128 | auto idx = sel_vector->get_index(ridx); | 
|---|
| 129 | auto other_hash = HashOp::Operation(ldata[idx], nullmask[idx]); | 
|---|
| 130 | hash_data[ridx] = combine_hash(hash_data[ridx], other_hash); | 
|---|
| 131 | } | 
|---|
| 132 | } else { | 
|---|
| 133 | for (idx_t i = 0; i < count; i++) { | 
|---|
| 134 | auto ridx = HAS_RSEL ? rsel->get_index(i) : i; | 
|---|
| 135 | auto idx = sel_vector->get_index(ridx); | 
|---|
| 136 | auto other_hash = duckdb::Hash<T>(ldata[idx]); | 
|---|
| 137 | hash_data[ridx] = combine_hash(hash_data[ridx], other_hash); | 
|---|
| 138 | } | 
|---|
| 139 | } | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | template <bool HAS_RSEL, class T> | 
|---|
| 143 | void templated_loop_combine_hash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) { | 
|---|
| 144 | if (input.vector_type == VectorType::CONSTANT_VECTOR && hashes.vector_type == VectorType::CONSTANT_VECTOR) { | 
|---|
| 145 | auto ldata = ConstantVector::GetData<T>(input); | 
|---|
| 146 | auto hash_data = ConstantVector::GetData<hash_t>(hashes); | 
|---|
| 147 |  | 
|---|
| 148 | auto other_hash = HashOp::Operation(*ldata, ConstantVector::IsNull(input)); | 
|---|
| 149 | *hash_data = combine_hash(*hash_data, other_hash); | 
|---|
| 150 | } else { | 
|---|
| 151 | VectorData idata; | 
|---|
| 152 | input.Orrify(count, idata); | 
|---|
| 153 | if (hashes.vector_type == VectorType::CONSTANT_VECTOR) { | 
|---|
| 154 | // mix constant with non-constant, first get the constant value | 
|---|
| 155 | auto constant_hash = *ConstantVector::GetData<hash_t>(hashes); | 
|---|
| 156 | // now re-initialize the hashes vector to an empty flat vector | 
|---|
| 157 | hashes.Initialize(hashes.type); | 
|---|
| 158 | tight_loop_combine_hash_constant<HAS_RSEL, T>((T *)idata.data, constant_hash, | 
|---|
| 159 | FlatVector::GetData<hash_t>(hashes), rsel, count, idata.sel, | 
|---|
| 160 | *idata.nullmask); | 
|---|
| 161 | } else { | 
|---|
| 162 | assert(hashes.vector_type == VectorType::FLAT_VECTOR); | 
|---|
| 163 | tight_loop_combine_hash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(hashes), rsel, count, | 
|---|
| 164 | idata.sel, *idata.nullmask); | 
|---|
| 165 | } | 
|---|
| 166 | } | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | template <bool HAS_RSEL> | 
|---|
| 170 | static inline void combine_hash_type_switch(Vector &hashes, Vector &input, const SelectionVector *rsel, idx_t count) { | 
|---|
| 171 | assert(hashes.type == TypeId::HASH); | 
|---|
| 172 | switch (input.type) { | 
|---|
| 173 | case TypeId::BOOL: | 
|---|
| 174 | case TypeId::INT8: | 
|---|
| 175 | templated_loop_combine_hash<HAS_RSEL, int8_t>(input, hashes, rsel, count); | 
|---|
| 176 | break; | 
|---|
| 177 | case TypeId::INT16: | 
|---|
| 178 | templated_loop_combine_hash<HAS_RSEL, int16_t>(input, hashes, rsel, count); | 
|---|
| 179 | break; | 
|---|
| 180 | case TypeId::INT32: | 
|---|
| 181 | templated_loop_combine_hash<HAS_RSEL, int32_t>(input, hashes, rsel, count); | 
|---|
| 182 | break; | 
|---|
| 183 | case TypeId::INT64: | 
|---|
| 184 | templated_loop_combine_hash<HAS_RSEL, int64_t>(input, hashes, rsel, count); | 
|---|
| 185 | break; | 
|---|
| 186 | case TypeId::FLOAT: | 
|---|
| 187 | templated_loop_combine_hash<HAS_RSEL, float>(input, hashes, rsel, count); | 
|---|
| 188 | break; | 
|---|
| 189 | case TypeId::DOUBLE: | 
|---|
| 190 | templated_loop_combine_hash<HAS_RSEL, double>(input, hashes, rsel, count); | 
|---|
| 191 | break; | 
|---|
| 192 | case TypeId::VARCHAR: | 
|---|
| 193 | templated_loop_combine_hash<HAS_RSEL, string_t>(input, hashes, rsel, count); | 
|---|
| 194 | break; | 
|---|
| 195 | default: | 
|---|
| 196 | throw InvalidTypeException(input.type, "Invalid type for hash"); | 
|---|
| 197 | } | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | void VectorOperations::CombineHash(Vector &hashes, Vector &input, idx_t count) { | 
|---|
| 201 | combine_hash_type_switch<false>(hashes, input, nullptr, count); | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|
| 204 | void VectorOperations::CombineHash(Vector &hashes, Vector &input, const SelectionVector &rsel, idx_t count) { | 
|---|
| 205 | combine_hash_type_switch<true>(hashes, input, &rsel, count); | 
|---|
| 206 | } | 
|---|
| 207 |  | 
|---|