1//===--------------------------------------------------------------------===//
2// row_gather.cpp
3// Description: This file contains the implementation of the gather operators
4//===--------------------------------------------------------------------===//
5
6#include "duckdb/common/exception.hpp"
7#include "duckdb/common/operator/constant_operators.hpp"
8#include "duckdb/common/row_operations/row_operations.hpp"
9#include "duckdb/common/types/row/row_data_collection.hpp"
10#include "duckdb/common/types/row/row_layout.hpp"
11#include "duckdb/common/types/row/tuple_data_layout.hpp"
12
13namespace duckdb {
14
15using ValidityBytes = RowLayout::ValidityBytes;
16
17template <class T>
18static void TemplatedGatherLoop(Vector &rows, const SelectionVector &row_sel, Vector &col,
19 const SelectionVector &col_sel, idx_t count, const RowLayout &layout, idx_t col_no,
20 idx_t build_size) {
21 // Precompute mask indexes
22 const auto &offsets = layout.GetOffsets();
23 const auto col_offset = offsets[col_no];
24 idx_t entry_idx;
25 idx_t idx_in_entry;
26 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
27
28 auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
29 auto data = FlatVector::GetData<T>(col);
30 auto &col_mask = FlatVector::Validity(vector&: col);
31
32 for (idx_t i = 0; i < count; i++) {
33 auto row_idx = row_sel.get_index(idx: i);
34 auto row = ptrs[row_idx];
35 auto col_idx = col_sel.get_index(idx: i);
36 data[col_idx] = Load<T>(row + col_offset);
37 ValidityBytes row_mask(row);
38 if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
39 if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) {
40 //! We need to initialize the mask with the vector size.
41 col_mask.Initialize(count: build_size);
42 }
43 col_mask.SetInvalid(col_idx);
44 }
45 }
46}
47
48static void GatherVarchar(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
49 idx_t count, const RowLayout &layout, idx_t col_no, idx_t build_size,
50 data_ptr_t base_heap_ptr) {
51 // Precompute mask indexes
52 const auto &offsets = layout.GetOffsets();
53 const auto col_offset = offsets[col_no];
54 const auto heap_offset = layout.GetHeapOffset();
55 idx_t entry_idx;
56 idx_t idx_in_entry;
57 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
58
59 auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
60 auto data = FlatVector::GetData<string_t>(vector&: col);
61 auto &col_mask = FlatVector::Validity(vector&: col);
62
63 for (idx_t i = 0; i < count; i++) {
64 auto row_idx = row_sel.get_index(idx: i);
65 auto row = ptrs[row_idx];
66 auto col_idx = col_sel.get_index(idx: i);
67 auto col_ptr = row + col_offset;
68 data[col_idx] = Load<string_t>(ptr: col_ptr);
69 ValidityBytes row_mask(row);
70 if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
71 if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) {
72 //! We need to initialize the mask with the vector size.
73 col_mask.Initialize(count: build_size);
74 }
75 col_mask.SetInvalid(col_idx);
76 } else if (base_heap_ptr && Load<uint32_t>(ptr: col_ptr) > string_t::INLINE_LENGTH) {
77 // Not inline, so unswizzle the copied pointer the pointer
78 auto heap_ptr_ptr = row + heap_offset;
79 auto heap_row_ptr = base_heap_ptr + Load<idx_t>(ptr: heap_ptr_ptr);
80 auto string_ptr = data_ptr_t(data + col_idx) + string_t::HEADER_SIZE;
81 Store<data_ptr_t>(val: heap_row_ptr + Load<idx_t>(ptr: string_ptr), ptr: string_ptr);
82#ifdef DEBUG
83 data[col_idx].Verify();
84#endif
85 }
86 }
87}
88
89static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vector &col,
90 const SelectionVector &col_sel, idx_t count, const RowLayout &layout, idx_t col_no,
91 data_ptr_t base_heap_ptr) {
92 const auto &offsets = layout.GetOffsets();
93 const auto col_offset = offsets[col_no];
94 const auto heap_offset = layout.GetHeapOffset();
95 auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
96
97 // Build the gather locations
98 auto data_locations = make_unsafe_uniq_array<data_ptr_t>(n: count);
99 auto mask_locations = make_unsafe_uniq_array<data_ptr_t>(n: count);
100 for (idx_t i = 0; i < count; i++) {
101 auto row_idx = row_sel.get_index(idx: i);
102 auto row = ptrs[row_idx];
103 mask_locations[i] = row;
104 auto col_ptr = ptrs[row_idx] + col_offset;
105 if (base_heap_ptr) {
106 auto heap_ptr_ptr = row + heap_offset;
107 auto heap_row_ptr = base_heap_ptr + Load<idx_t>(ptr: heap_ptr_ptr);
108 data_locations[i] = heap_row_ptr + Load<idx_t>(ptr: col_ptr);
109 } else {
110 data_locations[i] = Load<data_ptr_t>(ptr: col_ptr);
111 }
112 }
113
114 // Deserialise into the selected locations
115 RowOperations::HeapGather(v&: col, vcount: count, sel: col_sel, col_idx: col_no, key_locations: data_locations.get(), validitymask_locations: mask_locations.get());
116}
117
118void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
119 const idx_t count, const RowLayout &layout, const idx_t col_no, const idx_t build_size,
120 data_ptr_t heap_ptr) {
121 D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR);
122 D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!"
123
124 col.SetVectorType(VectorType::FLAT_VECTOR);
125 switch (col.GetType().InternalType()) {
126 case PhysicalType::UINT8:
127 TemplatedGatherLoop<uint8_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
128 break;
129 case PhysicalType::UINT16:
130 TemplatedGatherLoop<uint16_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
131 break;
132 case PhysicalType::UINT32:
133 TemplatedGatherLoop<uint32_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
134 break;
135 case PhysicalType::UINT64:
136 TemplatedGatherLoop<uint64_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
137 break;
138 case PhysicalType::BOOL:
139 case PhysicalType::INT8:
140 TemplatedGatherLoop<int8_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
141 break;
142 case PhysicalType::INT16:
143 TemplatedGatherLoop<int16_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
144 break;
145 case PhysicalType::INT32:
146 TemplatedGatherLoop<int32_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
147 break;
148 case PhysicalType::INT64:
149 TemplatedGatherLoop<int64_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
150 break;
151 case PhysicalType::INT128:
152 TemplatedGatherLoop<hugeint_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
153 break;
154 case PhysicalType::FLOAT:
155 TemplatedGatherLoop<float>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
156 break;
157 case PhysicalType::DOUBLE:
158 TemplatedGatherLoop<double>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
159 break;
160 case PhysicalType::INTERVAL:
161 TemplatedGatherLoop<interval_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
162 break;
163 case PhysicalType::VARCHAR:
164 GatherVarchar(rows, row_sel, col, col_sel, count, layout, col_no, build_size, base_heap_ptr: heap_ptr);
165 break;
166 case PhysicalType::LIST:
167 case PhysicalType::STRUCT:
168 GatherNestedVector(rows, row_sel, col, col_sel, count, layout, col_no, base_heap_ptr: heap_ptr);
169 break;
170 default:
171 throw InternalException("Unimplemented type for RowOperations::Gather");
172 }
173}
174
175template <class T>
176static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t col_offset, idx_t col_no) {
177 // Precompute mask indexes
178 idx_t entry_idx;
179 idx_t idx_in_entry;
180 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
181
182 auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
183 auto data = FlatVector::GetData<T>(col);
184 // auto &col_mask = FlatVector::Validity(col);
185
186 for (idx_t i = 0; i < count; i++) {
187 auto row = ptrs[i];
188 data[i] = Load<T>(row + col_offset);
189 ValidityBytes row_mask(row);
190 if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
191 throw InternalException("Null value comparisons not implemented for perfect hash table yet");
192 // col_mask.SetInvalid(i);
193 }
194 }
195}
196
197void RowOperations::FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count,
198 idx_t col_no) {
199 const auto col_offset = layout.GetOffsets()[col_no];
200 col.SetVectorType(VectorType::FLAT_VECTOR);
201 switch (col.GetType().InternalType()) {
202 case PhysicalType::UINT8:
203 TemplatedFullScanLoop<uint8_t>(rows, col, count, col_offset, col_no);
204 break;
205 case PhysicalType::UINT16:
206 TemplatedFullScanLoop<uint16_t>(rows, col, count, col_offset, col_no);
207 break;
208 case PhysicalType::UINT32:
209 TemplatedFullScanLoop<uint32_t>(rows, col, count, col_offset, col_no);
210 break;
211 case PhysicalType::UINT64:
212 TemplatedFullScanLoop<uint64_t>(rows, col, count, col_offset, col_no);
213 break;
214 case PhysicalType::INT8:
215 TemplatedFullScanLoop<int8_t>(rows, col, count, col_offset, col_no);
216 break;
217 case PhysicalType::INT16:
218 TemplatedFullScanLoop<int16_t>(rows, col, count, col_offset, col_no);
219 break;
220 case PhysicalType::INT32:
221 TemplatedFullScanLoop<int32_t>(rows, col, count, col_offset, col_no);
222 break;
223 case PhysicalType::INT64:
224 TemplatedFullScanLoop<int64_t>(rows, col, count, col_offset, col_no);
225 break;
226 default:
227 throw NotImplementedException("Unimplemented type for RowOperations::FullScanColumn");
228 }
229}
230
231} // namespace duckdb
232