1#include "duckdb/common/helper.hpp"
2#include "duckdb/common/row_operations/row_operations.hpp"
3#include "duckdb/common/types/vector.hpp"
4
5namespace duckdb {
6
7using ValidityBytes = TemplatedValidityMask<uint8_t>;
8
9static void ComputeStringEntrySizes(UnifiedVectorFormat &vdata, idx_t entry_sizes[], const idx_t ser_count,
10 const SelectionVector &sel, const idx_t offset) {
11 auto strings = UnifiedVectorFormat::GetData<string_t>(format: vdata);
12 for (idx_t i = 0; i < ser_count; i++) {
13 auto idx = sel.get_index(idx: i);
14 auto str_idx = vdata.sel->get_index(idx: idx + offset);
15 if (vdata.validity.RowIsValid(row_idx: str_idx)) {
16 entry_sizes[i] += sizeof(uint32_t) + strings[str_idx].GetSize();
17 }
18 }
19}
20
21static void ComputeStructEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count,
22 const SelectionVector &sel, idx_t offset) {
23 // obtain child vectors
24 idx_t num_children;
25 auto &children = StructVector::GetEntries(vector&: v);
26 num_children = children.size();
27 // add struct validitymask size
28 const idx_t struct_validitymask_size = (num_children + 7) / 8;
29 for (idx_t i = 0; i < ser_count; i++) {
30 entry_sizes[i] += struct_validitymask_size;
31 }
32 // compute size of child vectors
33 for (auto &struct_vector : children) {
34 RowOperations::ComputeEntrySizes(v&: *struct_vector, entry_sizes, vcount, ser_count, sel, offset);
35 }
36}
37
38static void ComputeListEntrySizes(Vector &v, UnifiedVectorFormat &vdata, idx_t entry_sizes[], idx_t ser_count,
39 const SelectionVector &sel, idx_t offset) {
40 auto list_data = ListVector::GetData(v);
41 auto &child_vector = ListVector::GetEntry(vector&: v);
42 idx_t list_entry_sizes[STANDARD_VECTOR_SIZE];
43 for (idx_t i = 0; i < ser_count; i++) {
44 auto idx = sel.get_index(idx: i);
45 auto source_idx = vdata.sel->get_index(idx: idx + offset);
46 if (vdata.validity.RowIsValid(row_idx: source_idx)) {
47 auto list_entry = list_data[source_idx];
48
49 // make room for list length, list validitymask
50 entry_sizes[i] += sizeof(list_entry.length);
51 entry_sizes[i] += (list_entry.length + 7) / 8;
52
53 // serialize size of each entry (if non-constant size)
54 if (!TypeIsConstantSize(type: ListType::GetChildType(type: v.GetType()).InternalType())) {
55 entry_sizes[i] += list_entry.length * sizeof(list_entry.length);
56 }
57
58 // compute size of each the elements in list_entry and sum them
59 auto entry_remaining = list_entry.length;
60 auto entry_offset = list_entry.offset;
61 while (entry_remaining > 0) {
62 // the list entry can span multiple vectors
63 auto next = MinValue(a: (idx_t)STANDARD_VECTOR_SIZE, b: entry_remaining);
64
65 // compute and add to the total
66 std::fill_n(list_entry_sizes, next, 0);
67 RowOperations::ComputeEntrySizes(v&: child_vector, entry_sizes: list_entry_sizes, vcount: next, ser_count: next,
68 sel: *FlatVector::IncrementalSelectionVector(), offset: entry_offset);
69 for (idx_t list_idx = 0; list_idx < next; list_idx++) {
70 entry_sizes[i] += list_entry_sizes[list_idx];
71 }
72
73 // update for next iteration
74 entry_remaining -= next;
75 entry_offset += next;
76 }
77 }
78 }
79}
80
81void RowOperations::ComputeEntrySizes(Vector &v, UnifiedVectorFormat &vdata, idx_t entry_sizes[], idx_t vcount,
82 idx_t ser_count, const SelectionVector &sel, idx_t offset) {
83 const auto physical_type = v.GetType().InternalType();
84 if (TypeIsConstantSize(type: physical_type)) {
85 const auto type_size = GetTypeIdSize(type: physical_type);
86 for (idx_t i = 0; i < ser_count; i++) {
87 entry_sizes[i] += type_size;
88 }
89 } else {
90 switch (physical_type) {
91 case PhysicalType::VARCHAR:
92 ComputeStringEntrySizes(vdata, entry_sizes, ser_count, sel, offset);
93 break;
94 case PhysicalType::STRUCT:
95 ComputeStructEntrySizes(v, entry_sizes, vcount, ser_count, sel, offset);
96 break;
97 case PhysicalType::LIST:
98 ComputeListEntrySizes(v, vdata, entry_sizes, ser_count, sel, offset);
99 break;
100 default:
101 // LCOV_EXCL_START
102 throw NotImplementedException("Column with variable size type %s cannot be serialized to row-format",
103 v.GetType().ToString());
104 // LCOV_EXCL_STOP
105 }
106 }
107}
108
109void RowOperations::ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count,
110 const SelectionVector &sel, idx_t offset) {
111 UnifiedVectorFormat vdata;
112 v.ToUnifiedFormat(count: vcount, data&: vdata);
113 ComputeEntrySizes(v, vdata, entry_sizes, vcount, ser_count, sel, offset);
114}
115
116template <class T>
117static void TemplatedHeapScatter(UnifiedVectorFormat &vdata, const SelectionVector &sel, idx_t count, idx_t col_idx,
118 data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
119 auto source = UnifiedVectorFormat::GetData<T>(vdata);
120 if (!validitymask_locations) {
121 for (idx_t i = 0; i < count; i++) {
122 auto idx = sel.get_index(idx: i);
123 auto source_idx = vdata.sel->get_index(idx: idx + offset);
124
125 auto target = (T *)key_locations[i];
126 Store<T>(source[source_idx], data_ptr_cast(target));
127 key_locations[i] += sizeof(T);
128 }
129 } else {
130 idx_t entry_idx;
131 idx_t idx_in_entry;
132 ValidityBytes::GetEntryIndex(row_idx: col_idx, entry_idx, idx_in_entry);
133 const auto bit = ~(1UL << idx_in_entry);
134 for (idx_t i = 0; i < count; i++) {
135 auto idx = sel.get_index(idx: i);
136 auto source_idx = vdata.sel->get_index(idx: idx + offset);
137
138 auto target = (T *)key_locations[i];
139 Store<T>(source[source_idx], data_ptr_cast(target));
140 key_locations[i] += sizeof(T);
141
142 // set the validitymask
143 if (!vdata.validity.RowIsValid(row_idx: source_idx)) {
144 *(validitymask_locations[i] + entry_idx) &= bit;
145 }
146 }
147 }
148}
149
150static void HeapScatterStringVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
151 data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
152 UnifiedVectorFormat vdata;
153 v.ToUnifiedFormat(count: vcount, data&: vdata);
154
155 auto strings = UnifiedVectorFormat::GetData<string_t>(format: vdata);
156 if (!validitymask_locations) {
157 for (idx_t i = 0; i < ser_count; i++) {
158 auto idx = sel.get_index(idx: i);
159 auto source_idx = vdata.sel->get_index(idx: idx + offset);
160 if (vdata.validity.RowIsValid(row_idx: source_idx)) {
161 auto &string_entry = strings[source_idx];
162 // store string size
163 Store<uint32_t>(val: string_entry.GetSize(), ptr: key_locations[i]);
164 key_locations[i] += sizeof(uint32_t);
165 // store the string
166 memcpy(dest: key_locations[i], src: string_entry.GetData(), n: string_entry.GetSize());
167 key_locations[i] += string_entry.GetSize();
168 }
169 }
170 } else {
171 idx_t entry_idx;
172 idx_t idx_in_entry;
173 ValidityBytes::GetEntryIndex(row_idx: col_idx, entry_idx, idx_in_entry);
174 const auto bit = ~(1UL << idx_in_entry);
175 for (idx_t i = 0; i < ser_count; i++) {
176 auto idx = sel.get_index(idx: i);
177 auto source_idx = vdata.sel->get_index(idx: idx + offset);
178 if (vdata.validity.RowIsValid(row_idx: source_idx)) {
179 auto &string_entry = strings[source_idx];
180 // store string size
181 Store<uint32_t>(val: string_entry.GetSize(), ptr: key_locations[i]);
182 key_locations[i] += sizeof(uint32_t);
183 // store the string
184 memcpy(dest: key_locations[i], src: string_entry.GetData(), n: string_entry.GetSize());
185 key_locations[i] += string_entry.GetSize();
186 } else {
187 // set the validitymask
188 *(validitymask_locations[i] + entry_idx) &= bit;
189 }
190 }
191 }
192}
193
194static void HeapScatterStructVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
195 data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
196 UnifiedVectorFormat vdata;
197 v.ToUnifiedFormat(count: vcount, data&: vdata);
198
199 auto &children = StructVector::GetEntries(vector&: v);
200 idx_t num_children = children.size();
201
202 // the whole struct itself can be NULL
203 idx_t entry_idx;
204 idx_t idx_in_entry;
205 ValidityBytes::GetEntryIndex(row_idx: col_idx, entry_idx, idx_in_entry);
206 const auto bit = ~(1UL << idx_in_entry);
207
208 // struct must have a validitymask for its fields
209 const idx_t struct_validitymask_size = (num_children + 7) / 8;
210 data_ptr_t struct_validitymask_locations[STANDARD_VECTOR_SIZE];
211 for (idx_t i = 0; i < ser_count; i++) {
212 // initialize the struct validity mask
213 struct_validitymask_locations[i] = key_locations[i];
214 memset(s: struct_validitymask_locations[i], c: -1, n: struct_validitymask_size);
215 key_locations[i] += struct_validitymask_size;
216
217 // set whether the whole struct is null
218 auto idx = sel.get_index(idx: i);
219 auto source_idx = vdata.sel->get_index(idx) + offset;
220 if (validitymask_locations && !vdata.validity.RowIsValid(row_idx: source_idx)) {
221 *(validitymask_locations[i] + entry_idx) &= bit;
222 }
223 }
224
225 // now serialize the struct vectors
226 for (idx_t i = 0; i < children.size(); i++) {
227 auto &struct_vector = *children[i];
228 RowOperations::HeapScatter(v&: struct_vector, vcount, sel, ser_count, col_idx: i, key_locations,
229 validitymask_locations: struct_validitymask_locations, offset);
230 }
231}
232
233static void HeapScatterListVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_no,
234 data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
235 UnifiedVectorFormat vdata;
236 v.ToUnifiedFormat(count: vcount, data&: vdata);
237
238 idx_t entry_idx;
239 idx_t idx_in_entry;
240 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
241
242 auto list_data = ListVector::GetData(v);
243
244 auto &child_vector = ListVector::GetEntry(vector&: v);
245
246 UnifiedVectorFormat list_vdata;
247 child_vector.ToUnifiedFormat(count: ListVector::GetListSize(vector: v), data&: list_vdata);
248 auto child_type = ListType::GetChildType(type: v.GetType()).InternalType();
249
250 idx_t list_entry_sizes[STANDARD_VECTOR_SIZE];
251 data_ptr_t list_entry_locations[STANDARD_VECTOR_SIZE];
252
253 for (idx_t i = 0; i < ser_count; i++) {
254 auto idx = sel.get_index(idx: i);
255 auto source_idx = vdata.sel->get_index(idx: idx + offset);
256 if (!vdata.validity.RowIsValid(row_idx: source_idx)) {
257 if (validitymask_locations) {
258 // set the row validitymask for this column to invalid
259 ValidityBytes row_mask(validitymask_locations[i]);
260 row_mask.SetInvalidUnsafe(entry_idx, idx_in_entry);
261 }
262 continue;
263 }
264 auto list_entry = list_data[source_idx];
265
266 // store list length
267 Store<uint64_t>(val: list_entry.length, ptr: key_locations[i]);
268 key_locations[i] += sizeof(list_entry.length);
269
270 // make room for the validitymask
271 data_ptr_t list_validitymask_location = key_locations[i];
272 idx_t entry_offset_in_byte = 0;
273 idx_t validitymask_size = (list_entry.length + 7) / 8;
274 memset(s: list_validitymask_location, c: -1, n: validitymask_size);
275 key_locations[i] += validitymask_size;
276
277 // serialize size of each entry (if non-constant size)
278 data_ptr_t var_entry_size_ptr = nullptr;
279 if (!TypeIsConstantSize(type: child_type)) {
280 var_entry_size_ptr = key_locations[i];
281 key_locations[i] += list_entry.length * sizeof(idx_t);
282 }
283
284 auto entry_remaining = list_entry.length;
285 auto entry_offset = list_entry.offset;
286 while (entry_remaining > 0) {
287 // the list entry can span multiple vectors
288 auto next = MinValue(a: (idx_t)STANDARD_VECTOR_SIZE, b: entry_remaining);
289
290 // serialize list validity
291 for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) {
292 auto list_idx = list_vdata.sel->get_index(idx: entry_idx + entry_offset);
293 if (!list_vdata.validity.RowIsValid(row_idx: list_idx)) {
294 *(list_validitymask_location) &= ~(1UL << entry_offset_in_byte);
295 }
296 if (++entry_offset_in_byte == 8) {
297 list_validitymask_location++;
298 entry_offset_in_byte = 0;
299 }
300 }
301
302 if (TypeIsConstantSize(type: child_type)) {
303 // constant size list entries: set list entry locations
304 const idx_t type_size = GetTypeIdSize(type: child_type);
305 for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) {
306 list_entry_locations[entry_idx] = key_locations[i];
307 key_locations[i] += type_size;
308 }
309 } else {
310 // variable size list entries: compute entry sizes and set list entry locations
311 std::fill_n(list_entry_sizes, next, 0);
312 RowOperations::ComputeEntrySizes(v&: child_vector, entry_sizes: list_entry_sizes, vcount: next, ser_count: next,
313 sel: *FlatVector::IncrementalSelectionVector(), offset: entry_offset);
314 for (idx_t entry_idx = 0; entry_idx < next; entry_idx++) {
315 list_entry_locations[entry_idx] = key_locations[i];
316 key_locations[i] += list_entry_sizes[entry_idx];
317 Store<idx_t>(val: list_entry_sizes[entry_idx], ptr: var_entry_size_ptr);
318 var_entry_size_ptr += sizeof(idx_t);
319 }
320 }
321
322 // now serialize to the locations
323 RowOperations::HeapScatter(v&: child_vector, vcount: ListVector::GetListSize(vector: v),
324 sel: *FlatVector::IncrementalSelectionVector(), ser_count: next, col_idx: 0, key_locations: list_entry_locations,
325 validitymask_locations: nullptr, offset: entry_offset);
326
327 // update for next iteration
328 entry_remaining -= next;
329 entry_offset += next;
330 }
331 }
332}
333
334void RowOperations::HeapScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
335 data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
336 if (TypeIsConstantSize(type: v.GetType().InternalType())) {
337 UnifiedVectorFormat vdata;
338 v.ToUnifiedFormat(count: vcount, data&: vdata);
339 RowOperations::HeapScatterVData(vdata, type: v.GetType().InternalType(), sel, ser_count, col_idx, key_locations,
340 validitymask_locations, offset);
341 } else {
342 switch (v.GetType().InternalType()) {
343 case PhysicalType::VARCHAR:
344 HeapScatterStringVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset);
345 break;
346 case PhysicalType::STRUCT:
347 HeapScatterStructVector(v, vcount, sel, ser_count, col_idx, key_locations, validitymask_locations, offset);
348 break;
349 case PhysicalType::LIST:
350 HeapScatterListVector(v, vcount, sel, ser_count, col_no: col_idx, key_locations, validitymask_locations, offset);
351 break;
352 default:
353 // LCOV_EXCL_START
354 throw NotImplementedException("Serialization of variable length vector with type %s",
355 v.GetType().ToString());
356 // LCOV_EXCL_STOP
357 }
358 }
359}
360
361void RowOperations::HeapScatterVData(UnifiedVectorFormat &vdata, PhysicalType type, const SelectionVector &sel,
362 idx_t ser_count, idx_t col_idx, data_ptr_t *key_locations,
363 data_ptr_t *validitymask_locations, idx_t offset) {
364 switch (type) {
365 case PhysicalType::BOOL:
366 case PhysicalType::INT8:
367 TemplatedHeapScatter<int8_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
368 break;
369 case PhysicalType::INT16:
370 TemplatedHeapScatter<int16_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
371 break;
372 case PhysicalType::INT32:
373 TemplatedHeapScatter<int32_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
374 break;
375 case PhysicalType::INT64:
376 TemplatedHeapScatter<int64_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
377 break;
378 case PhysicalType::UINT8:
379 TemplatedHeapScatter<uint8_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
380 break;
381 case PhysicalType::UINT16:
382 TemplatedHeapScatter<uint16_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
383 break;
384 case PhysicalType::UINT32:
385 TemplatedHeapScatter<uint32_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
386 break;
387 case PhysicalType::UINT64:
388 TemplatedHeapScatter<uint64_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
389 break;
390 case PhysicalType::INT128:
391 TemplatedHeapScatter<hugeint_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
392 break;
393 case PhysicalType::FLOAT:
394 TemplatedHeapScatter<float>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
395 break;
396 case PhysicalType::DOUBLE:
397 TemplatedHeapScatter<double>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
398 break;
399 case PhysicalType::INTERVAL:
400 TemplatedHeapScatter<interval_t>(vdata, sel, count: ser_count, col_idx, key_locations, validitymask_locations, offset);
401 break;
402 default:
403 throw NotImplementedException("FIXME: Serialize to of constant type column to row-format");
404 }
405}
406
407} // namespace duckdb
408