1 | //===--------------------------------------------------------------------===// |
2 | // row_gather.cpp |
3 | // Description: This file contains the implementation of the gather operators |
4 | //===--------------------------------------------------------------------===// |
5 | |
6 | #include "duckdb/common/exception.hpp" |
7 | #include "duckdb/common/operator/constant_operators.hpp" |
8 | #include "duckdb/common/row_operations/row_operations.hpp" |
9 | #include "duckdb/common/types/row/row_data_collection.hpp" |
10 | #include "duckdb/common/types/row/row_layout.hpp" |
11 | #include "duckdb/common/types/row/tuple_data_layout.hpp" |
12 | |
13 | namespace duckdb { |
14 | |
15 | using ValidityBytes = RowLayout::ValidityBytes; |
16 | |
17 | template <class T> |
18 | static void TemplatedGatherLoop(Vector &rows, const SelectionVector &row_sel, Vector &col, |
19 | const SelectionVector &col_sel, idx_t count, const RowLayout &layout, idx_t col_no, |
20 | idx_t build_size) { |
21 | // Precompute mask indexes |
22 | const auto &offsets = layout.GetOffsets(); |
23 | const auto col_offset = offsets[col_no]; |
24 | idx_t entry_idx; |
25 | idx_t idx_in_entry; |
26 | ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry); |
27 | |
28 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
29 | auto data = FlatVector::GetData<T>(col); |
30 | auto &col_mask = FlatVector::Validity(vector&: col); |
31 | |
32 | for (idx_t i = 0; i < count; i++) { |
33 | auto row_idx = row_sel.get_index(idx: i); |
34 | auto row = ptrs[row_idx]; |
35 | auto col_idx = col_sel.get_index(idx: i); |
36 | data[col_idx] = Load<T>(row + col_offset); |
37 | ValidityBytes row_mask(row); |
38 | if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { |
39 | if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) { |
40 | //! We need to initialize the mask with the vector size. |
41 | col_mask.Initialize(count: build_size); |
42 | } |
43 | col_mask.SetInvalid(col_idx); |
44 | } |
45 | } |
46 | } |
47 | |
48 | static void GatherVarchar(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel, |
49 | idx_t count, const RowLayout &layout, idx_t col_no, idx_t build_size, |
50 | data_ptr_t base_heap_ptr) { |
51 | // Precompute mask indexes |
52 | const auto &offsets = layout.GetOffsets(); |
53 | const auto col_offset = offsets[col_no]; |
54 | const auto heap_offset = layout.GetHeapOffset(); |
55 | idx_t entry_idx; |
56 | idx_t idx_in_entry; |
57 | ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry); |
58 | |
59 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
60 | auto data = FlatVector::GetData<string_t>(vector&: col); |
61 | auto &col_mask = FlatVector::Validity(vector&: col); |
62 | |
63 | for (idx_t i = 0; i < count; i++) { |
64 | auto row_idx = row_sel.get_index(idx: i); |
65 | auto row = ptrs[row_idx]; |
66 | auto col_idx = col_sel.get_index(idx: i); |
67 | auto col_ptr = row + col_offset; |
68 | data[col_idx] = Load<string_t>(ptr: col_ptr); |
69 | ValidityBytes row_mask(row); |
70 | if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { |
71 | if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) { |
72 | //! We need to initialize the mask with the vector size. |
73 | col_mask.Initialize(count: build_size); |
74 | } |
75 | col_mask.SetInvalid(col_idx); |
76 | } else if (base_heap_ptr && Load<uint32_t>(ptr: col_ptr) > string_t::INLINE_LENGTH) { |
77 | // Not inline, so unswizzle the copied pointer the pointer |
78 | auto heap_ptr_ptr = row + heap_offset; |
79 | auto heap_row_ptr = base_heap_ptr + Load<idx_t>(ptr: heap_ptr_ptr); |
80 | auto string_ptr = data_ptr_t(data + col_idx) + string_t::HEADER_SIZE; |
81 | Store<data_ptr_t>(val: heap_row_ptr + Load<idx_t>(ptr: string_ptr), ptr: string_ptr); |
82 | #ifdef DEBUG |
83 | data[col_idx].Verify(); |
84 | #endif |
85 | } |
86 | } |
87 | } |
88 | |
89 | static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vector &col, |
90 | const SelectionVector &col_sel, idx_t count, const RowLayout &layout, idx_t col_no, |
91 | data_ptr_t base_heap_ptr) { |
92 | const auto &offsets = layout.GetOffsets(); |
93 | const auto col_offset = offsets[col_no]; |
94 | const auto heap_offset = layout.GetHeapOffset(); |
95 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
96 | |
97 | // Build the gather locations |
98 | auto data_locations = make_unsafe_uniq_array<data_ptr_t>(n: count); |
99 | auto mask_locations = make_unsafe_uniq_array<data_ptr_t>(n: count); |
100 | for (idx_t i = 0; i < count; i++) { |
101 | auto row_idx = row_sel.get_index(idx: i); |
102 | auto row = ptrs[row_idx]; |
103 | mask_locations[i] = row; |
104 | auto col_ptr = ptrs[row_idx] + col_offset; |
105 | if (base_heap_ptr) { |
106 | auto heap_ptr_ptr = row + heap_offset; |
107 | auto heap_row_ptr = base_heap_ptr + Load<idx_t>(ptr: heap_ptr_ptr); |
108 | data_locations[i] = heap_row_ptr + Load<idx_t>(ptr: col_ptr); |
109 | } else { |
110 | data_locations[i] = Load<data_ptr_t>(ptr: col_ptr); |
111 | } |
112 | } |
113 | |
114 | // Deserialise into the selected locations |
115 | RowOperations::HeapGather(v&: col, vcount: count, sel: col_sel, col_idx: col_no, key_locations: data_locations.get(), validitymask_locations: mask_locations.get()); |
116 | } |
117 | |
118 | void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel, |
119 | const idx_t count, const RowLayout &layout, const idx_t col_no, const idx_t build_size, |
120 | data_ptr_t heap_ptr) { |
121 | D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR); |
122 | D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!" |
123 | |
124 | col.SetVectorType(VectorType::FLAT_VECTOR); |
125 | switch (col.GetType().InternalType()) { |
126 | case PhysicalType::UINT8: |
127 | TemplatedGatherLoop<uint8_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
128 | break; |
129 | case PhysicalType::UINT16: |
130 | TemplatedGatherLoop<uint16_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
131 | break; |
132 | case PhysicalType::UINT32: |
133 | TemplatedGatherLoop<uint32_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
134 | break; |
135 | case PhysicalType::UINT64: |
136 | TemplatedGatherLoop<uint64_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
137 | break; |
138 | case PhysicalType::BOOL: |
139 | case PhysicalType::INT8: |
140 | TemplatedGatherLoop<int8_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
141 | break; |
142 | case PhysicalType::INT16: |
143 | TemplatedGatherLoop<int16_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
144 | break; |
145 | case PhysicalType::INT32: |
146 | TemplatedGatherLoop<int32_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
147 | break; |
148 | case PhysicalType::INT64: |
149 | TemplatedGatherLoop<int64_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
150 | break; |
151 | case PhysicalType::INT128: |
152 | TemplatedGatherLoop<hugeint_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
153 | break; |
154 | case PhysicalType::FLOAT: |
155 | TemplatedGatherLoop<float>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
156 | break; |
157 | case PhysicalType::DOUBLE: |
158 | TemplatedGatherLoop<double>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
159 | break; |
160 | case PhysicalType::INTERVAL: |
161 | TemplatedGatherLoop<interval_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size); |
162 | break; |
163 | case PhysicalType::VARCHAR: |
164 | GatherVarchar(rows, row_sel, col, col_sel, count, layout, col_no, build_size, base_heap_ptr: heap_ptr); |
165 | break; |
166 | case PhysicalType::LIST: |
167 | case PhysicalType::STRUCT: |
168 | GatherNestedVector(rows, row_sel, col, col_sel, count, layout, col_no, base_heap_ptr: heap_ptr); |
169 | break; |
170 | default: |
171 | throw InternalException("Unimplemented type for RowOperations::Gather" ); |
172 | } |
173 | } |
174 | |
175 | template <class T> |
176 | static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t col_offset, idx_t col_no) { |
177 | // Precompute mask indexes |
178 | idx_t entry_idx; |
179 | idx_t idx_in_entry; |
180 | ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry); |
181 | |
182 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
183 | auto data = FlatVector::GetData<T>(col); |
184 | // auto &col_mask = FlatVector::Validity(col); |
185 | |
186 | for (idx_t i = 0; i < count; i++) { |
187 | auto row = ptrs[i]; |
188 | data[i] = Load<T>(row + col_offset); |
189 | ValidityBytes row_mask(row); |
190 | if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { |
191 | throw InternalException("Null value comparisons not implemented for perfect hash table yet" ); |
192 | // col_mask.SetInvalid(i); |
193 | } |
194 | } |
195 | } |
196 | |
197 | void RowOperations::FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count, |
198 | idx_t col_no) { |
199 | const auto col_offset = layout.GetOffsets()[col_no]; |
200 | col.SetVectorType(VectorType::FLAT_VECTOR); |
201 | switch (col.GetType().InternalType()) { |
202 | case PhysicalType::UINT8: |
203 | TemplatedFullScanLoop<uint8_t>(rows, col, count, col_offset, col_no); |
204 | break; |
205 | case PhysicalType::UINT16: |
206 | TemplatedFullScanLoop<uint16_t>(rows, col, count, col_offset, col_no); |
207 | break; |
208 | case PhysicalType::UINT32: |
209 | TemplatedFullScanLoop<uint32_t>(rows, col, count, col_offset, col_no); |
210 | break; |
211 | case PhysicalType::UINT64: |
212 | TemplatedFullScanLoop<uint64_t>(rows, col, count, col_offset, col_no); |
213 | break; |
214 | case PhysicalType::INT8: |
215 | TemplatedFullScanLoop<int8_t>(rows, col, count, col_offset, col_no); |
216 | break; |
217 | case PhysicalType::INT16: |
218 | TemplatedFullScanLoop<int16_t>(rows, col, count, col_offset, col_no); |
219 | break; |
220 | case PhysicalType::INT32: |
221 | TemplatedFullScanLoop<int32_t>(rows, col, count, col_offset, col_no); |
222 | break; |
223 | case PhysicalType::INT64: |
224 | TemplatedFullScanLoop<int64_t>(rows, col, count, col_offset, col_no); |
225 | break; |
226 | default: |
227 | throw NotImplementedException("Unimplemented type for RowOperations::FullScanColumn" ); |
228 | } |
229 | } |
230 | |
231 | } // namespace duckdb |
232 | |