1#include "duckdb/common/helper.hpp"
2#include "duckdb/common/row_operations/row_operations.hpp"
3#include "duckdb/common/types/vector.hpp"
4
5namespace duckdb {
6
7using ValidityBytes = TemplatedValidityMask<uint8_t>;
8
9template <class T>
10static void TemplatedHeapGather(Vector &v, const idx_t count, const SelectionVector &sel, data_ptr_t *key_locations) {
11 auto target = FlatVector::GetData<T>(v);
12
13 for (idx_t i = 0; i < count; ++i) {
14 const auto col_idx = sel.get_index(idx: i);
15 target[col_idx] = Load<T>(key_locations[i]);
16 key_locations[i] += sizeof(T);
17 }
18}
19
20static void HeapGatherStringVector(Vector &v, const idx_t vcount, const SelectionVector &sel,
21 data_ptr_t *key_locations) {
22 const auto &validity = FlatVector::Validity(vector&: v);
23 auto target = FlatVector::GetData<string_t>(vector&: v);
24
25 for (idx_t i = 0; i < vcount; i++) {
26 const auto col_idx = sel.get_index(idx: i);
27 if (!validity.RowIsValid(row_idx: col_idx)) {
28 continue;
29 }
30 auto len = Load<uint32_t>(ptr: key_locations[i]);
31 key_locations[i] += sizeof(uint32_t);
32 target[col_idx] = StringVector::AddStringOrBlob(vector&: v, data: string_t(const_char_ptr_cast(src: key_locations[i]), len));
33 key_locations[i] += len;
34 }
35}
36
37static void HeapGatherStructVector(Vector &v, const idx_t vcount, const SelectionVector &sel,
38 data_ptr_t *key_locations) {
39 // struct must have a validitymask for its fields
40 auto &child_types = StructType::GetChildTypes(type: v.GetType());
41 const idx_t struct_validitymask_size = (child_types.size() + 7) / 8;
42 data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE];
43 for (idx_t i = 0; i < vcount; i++) {
44 // use key_locations as the validitymask, and create struct_key_locations
45 struct_validitymask_locations[i] = key_locations[i];
46 key_locations[i] += struct_validitymask_size;
47 }
48
49 // now deserialize into the struct vectors
50 auto &children = StructVector::GetEntries(vector&: v);
51 for (idx_t i = 0; i < child_types.size(); i++) {
52 RowOperations::HeapGather(v&: *children[i], vcount, sel, col_idx: i, key_locations, validitymask_locations: struct_validitymask_locations);
53 }
54}
55
56static void HeapGatherListVector(Vector &v, const idx_t vcount, const SelectionVector &sel, data_ptr_t *key_locations) {
57 const auto &validity = FlatVector::Validity(vector&: v);
58
59 auto child_type = ListType::GetChildType(type: v.GetType());
60 auto list_data = ListVector::GetData(v);
61 data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE];
62
63 uint64_t entry_offset = ListVector::GetListSize(vector: v);
64 for (idx_t i = 0; i < vcount; i++) {
65 const auto col_idx = sel.get_index(idx: i);
66 if (!validity.RowIsValid(row_idx: col_idx)) {
67 continue;
68 }
69 // read list length
70 auto entry_remaining = Load<uint64_t>(ptr: key_locations[i]);
71 key_locations[i] += sizeof(uint64_t);
72 // set list entry attributes
73 list_data[col_idx].length = entry_remaining;
74 list_data[col_idx].offset = entry_offset;
75 // skip over the validity mask
76 data_ptr_t validitymask_location = key_locations[i];
77 idx_t offset_in_byte = 0;
78 key_locations[i] += (entry_remaining + 7) / 8;
79 // entry sizes
80 data_ptr_t var_entry_size_ptr = nullptr;
81 if (!TypeIsConstantSize(type: child_type.InternalType())) {
82 var_entry_size_ptr = key_locations[i];
83 key_locations[i] += entry_remaining * sizeof(idx_t);
84 }
85
86 // now read the list data
87 while (entry_remaining > 0) {
88 auto next = MinValue(a: entry_remaining, b: (idx_t)STANDARD_VECTOR_SIZE);
89
90 // initialize a new vector to append
91 Vector append_vector(v.GetType());
92 append_vector.SetVectorType(v.GetVectorType());
93
94 auto &list_vec_to_append = ListVector::GetEntry(vector&: append_vector);
95
96 // set validity
97 //! Since we are constructing the vector, this will always be a flat vector.
98 auto &append_validity = FlatVector::Validity(vector&: list_vec_to_append);
99 for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) {
100 append_validity.Set(row_idx: entry_idx, valid: *(validitymask_location) & (1 << offset_in_byte));
101 if (++offset_in_byte == 8) {
102 validitymask_location++;
103 offset_in_byte = 0;
104 }
105 }
106
107 // compute entry sizes and set locations where the list entries are
108 if (TypeIsConstantSize(type: child_type.InternalType())) {
109 // constant size list entries
110 const idx_t type_size = GetTypeIdSize(type: child_type.InternalType());
111 for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) {
112 list_entry_locations[entry_idx] = key_locations[i];
113 key_locations[i] += type_size;
114 }
115 } else {
116 // variable size list entries
117 for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) {
118 list_entry_locations[entry_idx] = key_locations[i];
119 key_locations[i] += Load<idx_t>(ptr: var_entry_size_ptr);
120 var_entry_size_ptr += sizeof(idx_t);
121 }
122 }
123
124 // now deserialize and add to listvector
125 RowOperations::HeapGather(v&: list_vec_to_append, vcount: next, sel: *FlatVector::IncrementalSelectionVector(), col_idx: 0,
126 key_locations: list_entry_locations, validitymask_locations: nullptr);
127 ListVector::Append(target&: v, source: list_vec_to_append, source_size: next);
128
129 // update for next iteration
130 entry_remaining -= next;
131 entry_offset += next;
132 }
133 }
134}
135
136void RowOperations::HeapGather(Vector &v, const idx_t &vcount, const SelectionVector &sel, const idx_t &col_no,
137 data_ptr_t *key_locations, data_ptr_t *validitymask_locations) {
138 v.SetVectorType(VectorType::FLAT_VECTOR);
139
140 auto &validity = FlatVector::Validity(vector&: v);
141 if (validitymask_locations) {
142 // Precompute mask indexes
143 idx_t entry_idx;
144 idx_t idx_in_entry;
145 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
146
147 for (idx_t i = 0; i < vcount; i++) {
148 ValidityBytes row_mask(validitymask_locations[i]);
149 const auto valid = row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry);
150 const auto col_idx = sel.get_index(idx: i);
151 validity.Set(row_idx: col_idx, valid);
152 }
153 }
154
155 auto type = v.GetType().InternalType();
156 switch (type) {
157 case PhysicalType::BOOL:
158 case PhysicalType::INT8:
159 TemplatedHeapGather<int8_t>(v, count: vcount, sel, key_locations);
160 break;
161 case PhysicalType::INT16:
162 TemplatedHeapGather<int16_t>(v, count: vcount, sel, key_locations);
163 break;
164 case PhysicalType::INT32:
165 TemplatedHeapGather<int32_t>(v, count: vcount, sel, key_locations);
166 break;
167 case PhysicalType::INT64:
168 TemplatedHeapGather<int64_t>(v, count: vcount, sel, key_locations);
169 break;
170 case PhysicalType::UINT8:
171 TemplatedHeapGather<uint8_t>(v, count: vcount, sel, key_locations);
172 break;
173 case PhysicalType::UINT16:
174 TemplatedHeapGather<uint16_t>(v, count: vcount, sel, key_locations);
175 break;
176 case PhysicalType::UINT32:
177 TemplatedHeapGather<uint32_t>(v, count: vcount, sel, key_locations);
178 break;
179 case PhysicalType::UINT64:
180 TemplatedHeapGather<uint64_t>(v, count: vcount, sel, key_locations);
181 break;
182 case PhysicalType::INT128:
183 TemplatedHeapGather<hugeint_t>(v, count: vcount, sel, key_locations);
184 break;
185 case PhysicalType::FLOAT:
186 TemplatedHeapGather<float>(v, count: vcount, sel, key_locations);
187 break;
188 case PhysicalType::DOUBLE:
189 TemplatedHeapGather<double>(v, count: vcount, sel, key_locations);
190 break;
191 case PhysicalType::INTERVAL:
192 TemplatedHeapGather<interval_t>(v, count: vcount, sel, key_locations);
193 break;
194 case PhysicalType::VARCHAR:
195 HeapGatherStringVector(v, vcount, sel, key_locations);
196 break;
197 case PhysicalType::STRUCT:
198 HeapGatherStructVector(v, vcount, sel, key_locations);
199 break;
200 case PhysicalType::LIST:
201 HeapGatherListVector(v, vcount, sel, key_locations);
202 break;
203 default:
204 throw NotImplementedException("Unimplemented deserialize from row-format");
205 }
206}
207
208} // namespace duckdb
209