| 1 | #include "duckdb/common/helper.hpp" |
| 2 | #include "duckdb/common/row_operations/row_operations.hpp" |
| 3 | #include "duckdb/common/types/vector.hpp" |
| 4 | |
| 5 | namespace duckdb { |
| 6 | |
| 7 | using ValidityBytes = TemplatedValidityMask<uint8_t>; |
| 8 | |
| 9 | template <class T> |
| 10 | static void TemplatedHeapGather(Vector &v, const idx_t count, const SelectionVector &sel, data_ptr_t *key_locations) { |
| 11 | auto target = FlatVector::GetData<T>(v); |
| 12 | |
| 13 | for (idx_t i = 0; i < count; ++i) { |
| 14 | const auto col_idx = sel.get_index(idx: i); |
| 15 | target[col_idx] = Load<T>(key_locations[i]); |
| 16 | key_locations[i] += sizeof(T); |
| 17 | } |
| 18 | } |
| 19 | |
| 20 | static void HeapGatherStringVector(Vector &v, const idx_t vcount, const SelectionVector &sel, |
| 21 | data_ptr_t *key_locations) { |
| 22 | const auto &validity = FlatVector::Validity(vector&: v); |
| 23 | auto target = FlatVector::GetData<string_t>(vector&: v); |
| 24 | |
| 25 | for (idx_t i = 0; i < vcount; i++) { |
| 26 | const auto col_idx = sel.get_index(idx: i); |
| 27 | if (!validity.RowIsValid(row_idx: col_idx)) { |
| 28 | continue; |
| 29 | } |
| 30 | auto len = Load<uint32_t>(ptr: key_locations[i]); |
| 31 | key_locations[i] += sizeof(uint32_t); |
| 32 | target[col_idx] = StringVector::AddStringOrBlob(vector&: v, data: string_t(const_char_ptr_cast(src: key_locations[i]), len)); |
| 33 | key_locations[i] += len; |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | static void HeapGatherStructVector(Vector &v, const idx_t vcount, const SelectionVector &sel, |
| 38 | data_ptr_t *key_locations) { |
| 39 | // struct must have a validitymask for its fields |
| 40 | auto &child_types = StructType::GetChildTypes(type: v.GetType()); |
| 41 | const idx_t struct_validitymask_size = (child_types.size() + 7) / 8; |
| 42 | data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE]; |
| 43 | for (idx_t i = 0; i < vcount; i++) { |
| 44 | // use key_locations as the validitymask, and create struct_key_locations |
| 45 | struct_validitymask_locations[i] = key_locations[i]; |
| 46 | key_locations[i] += struct_validitymask_size; |
| 47 | } |
| 48 | |
| 49 | // now deserialize into the struct vectors |
| 50 | auto &children = StructVector::GetEntries(vector&: v); |
| 51 | for (idx_t i = 0; i < child_types.size(); i++) { |
| 52 | RowOperations::HeapGather(v&: *children[i], vcount, sel, col_idx: i, key_locations, validitymask_locations: struct_validitymask_locations); |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | static void HeapGatherListVector(Vector &v, const idx_t vcount, const SelectionVector &sel, data_ptr_t *key_locations) { |
| 57 | const auto &validity = FlatVector::Validity(vector&: v); |
| 58 | |
| 59 | auto child_type = ListType::GetChildType(type: v.GetType()); |
| 60 | auto list_data = ListVector::GetData(v); |
| 61 | data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE]; |
| 62 | |
| 63 | uint64_t entry_offset = ListVector::GetListSize(vector: v); |
| 64 | for (idx_t i = 0; i < vcount; i++) { |
| 65 | const auto col_idx = sel.get_index(idx: i); |
| 66 | if (!validity.RowIsValid(row_idx: col_idx)) { |
| 67 | continue; |
| 68 | } |
| 69 | // read list length |
| 70 | auto entry_remaining = Load<uint64_t>(ptr: key_locations[i]); |
| 71 | key_locations[i] += sizeof(uint64_t); |
| 72 | // set list entry attributes |
| 73 | list_data[col_idx].length = entry_remaining; |
| 74 | list_data[col_idx].offset = entry_offset; |
| 75 | // skip over the validity mask |
| 76 | data_ptr_t validitymask_location = key_locations[i]; |
| 77 | idx_t offset_in_byte = 0; |
| 78 | key_locations[i] += (entry_remaining + 7) / 8; |
| 79 | // entry sizes |
| 80 | data_ptr_t var_entry_size_ptr = nullptr; |
| 81 | if (!TypeIsConstantSize(type: child_type.InternalType())) { |
| 82 | var_entry_size_ptr = key_locations[i]; |
| 83 | key_locations[i] += entry_remaining * sizeof(idx_t); |
| 84 | } |
| 85 | |
| 86 | // now read the list data |
| 87 | while (entry_remaining > 0) { |
| 88 | auto next = MinValue(a: entry_remaining, b: (idx_t)STANDARD_VECTOR_SIZE); |
| 89 | |
| 90 | // initialize a new vector to append |
| 91 | Vector append_vector(v.GetType()); |
| 92 | append_vector.SetVectorType(v.GetVectorType()); |
| 93 | |
| 94 | auto &list_vec_to_append = ListVector::GetEntry(vector&: append_vector); |
| 95 | |
| 96 | // set validity |
| 97 | //! Since we are constructing the vector, this will always be a flat vector. |
| 98 | auto &append_validity = FlatVector::Validity(vector&: list_vec_to_append); |
| 99 | for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { |
| 100 | append_validity.Set(row_idx: entry_idx, valid: *(validitymask_location) & (1 << offset_in_byte)); |
| 101 | if (++offset_in_byte == 8) { |
| 102 | validitymask_location++; |
| 103 | offset_in_byte = 0; |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | // compute entry sizes and set locations where the list entries are |
| 108 | if (TypeIsConstantSize(type: child_type.InternalType())) { |
| 109 | // constant size list entries |
| 110 | const idx_t type_size = GetTypeIdSize(type: child_type.InternalType()); |
| 111 | for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { |
| 112 | list_entry_locations[entry_idx] = key_locations[i]; |
| 113 | key_locations[i] += type_size; |
| 114 | } |
| 115 | } else { |
| 116 | // variable size list entries |
| 117 | for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) { |
| 118 | list_entry_locations[entry_idx] = key_locations[i]; |
| 119 | key_locations[i] += Load<idx_t>(ptr: var_entry_size_ptr); |
| 120 | var_entry_size_ptr += sizeof(idx_t); |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | // now deserialize and add to listvector |
| 125 | RowOperations::HeapGather(v&: list_vec_to_append, vcount: next, sel: *FlatVector::IncrementalSelectionVector(), col_idx: 0, |
| 126 | key_locations: list_entry_locations, validitymask_locations: nullptr); |
| 127 | ListVector::Append(target&: v, source: list_vec_to_append, source_size: next); |
| 128 | |
| 129 | // update for next iteration |
| 130 | entry_remaining -= next; |
| 131 | entry_offset += next; |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | void RowOperations::HeapGather(Vector &v, const idx_t &vcount, const SelectionVector &sel, const idx_t &col_no, |
| 137 | data_ptr_t *key_locations, data_ptr_t *validitymask_locations) { |
| 138 | v.SetVectorType(VectorType::FLAT_VECTOR); |
| 139 | |
| 140 | auto &validity = FlatVector::Validity(vector&: v); |
| 141 | if (validitymask_locations) { |
| 142 | // Precompute mask indexes |
| 143 | idx_t entry_idx; |
| 144 | idx_t idx_in_entry; |
| 145 | ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry); |
| 146 | |
| 147 | for (idx_t i = 0; i < vcount; i++) { |
| 148 | ValidityBytes row_mask(validitymask_locations[i]); |
| 149 | const auto valid = row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry); |
| 150 | const auto col_idx = sel.get_index(idx: i); |
| 151 | validity.Set(row_idx: col_idx, valid); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | auto type = v.GetType().InternalType(); |
| 156 | switch (type) { |
| 157 | case PhysicalType::BOOL: |
| 158 | case PhysicalType::INT8: |
| 159 | TemplatedHeapGather<int8_t>(v, count: vcount, sel, key_locations); |
| 160 | break; |
| 161 | case PhysicalType::INT16: |
| 162 | TemplatedHeapGather<int16_t>(v, count: vcount, sel, key_locations); |
| 163 | break; |
| 164 | case PhysicalType::INT32: |
| 165 | TemplatedHeapGather<int32_t>(v, count: vcount, sel, key_locations); |
| 166 | break; |
| 167 | case PhysicalType::INT64: |
| 168 | TemplatedHeapGather<int64_t>(v, count: vcount, sel, key_locations); |
| 169 | break; |
| 170 | case PhysicalType::UINT8: |
| 171 | TemplatedHeapGather<uint8_t>(v, count: vcount, sel, key_locations); |
| 172 | break; |
| 173 | case PhysicalType::UINT16: |
| 174 | TemplatedHeapGather<uint16_t>(v, count: vcount, sel, key_locations); |
| 175 | break; |
| 176 | case PhysicalType::UINT32: |
| 177 | TemplatedHeapGather<uint32_t>(v, count: vcount, sel, key_locations); |
| 178 | break; |
| 179 | case PhysicalType::UINT64: |
| 180 | TemplatedHeapGather<uint64_t>(v, count: vcount, sel, key_locations); |
| 181 | break; |
| 182 | case PhysicalType::INT128: |
| 183 | TemplatedHeapGather<hugeint_t>(v, count: vcount, sel, key_locations); |
| 184 | break; |
| 185 | case PhysicalType::FLOAT: |
| 186 | TemplatedHeapGather<float>(v, count: vcount, sel, key_locations); |
| 187 | break; |
| 188 | case PhysicalType::DOUBLE: |
| 189 | TemplatedHeapGather<double>(v, count: vcount, sel, key_locations); |
| 190 | break; |
| 191 | case PhysicalType::INTERVAL: |
| 192 | TemplatedHeapGather<interval_t>(v, count: vcount, sel, key_locations); |
| 193 | break; |
| 194 | case PhysicalType::VARCHAR: |
| 195 | HeapGatherStringVector(v, vcount, sel, key_locations); |
| 196 | break; |
| 197 | case PhysicalType::STRUCT: |
| 198 | HeapGatherStructVector(v, vcount, sel, key_locations); |
| 199 | break; |
| 200 | case PhysicalType::LIST: |
| 201 | HeapGatherListVector(v, vcount, sel, key_locations); |
| 202 | break; |
| 203 | default: |
| 204 | throw NotImplementedException("Unimplemented deserialize from row-format" ); |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | } // namespace duckdb |
| 209 | |