row_gather.cpp source code [Velox/build/_deps/duckdb-src/src/common/row_operations/row_gather.cpp]

1	//===--------------------------------------------------------------------===//
2	// row_gather.cpp
3	// Description: This file contains the implementation of the gather operators
4	//===--------------------------------------------------------------------===//
5
6	#include "duckdb/common/exception.hpp"
7	#include "duckdb/common/operator/constant_operators.hpp"
8	#include "duckdb/common/row_operations/row_operations.hpp"
9	#include "duckdb/common/types/row/row_data_collection.hpp"
10	#include "duckdb/common/types/row/row_layout.hpp"
11	#include "duckdb/common/types/row/tuple_data_layout.hpp"
12
13	namespace duckdb {
14
15	using ValidityBytes = RowLayout::ValidityBytes;
16
17	template <class T>
18	static void TemplatedGatherLoop(Vector &rows, const SelectionVector &row_sel, Vector &col,
19	const SelectionVector &col_sel, idx_t count, const RowLayout &layout, idx_t col_no,
20	idx_t build_size) {
21	// Precompute mask indexes
22	const auto &offsets = layout.GetOffsets();
23	const auto col_offset = offsets [col_no];
24	idx_t entry_idx;
25	idx_t idx_in_entry;
26	ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
27
28	auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
29	auto data = FlatVector::GetData<T>(col);
30	auto &col_mask = FlatVector::Validity(vector&: col);
31
32	for (idx_t i = `0`; i < count; i++) {
33	auto row_idx = row_sel.get_index(idx: i);
34	auto row = ptrs[row_idx];
35	auto col_idx = col_sel.get_index(idx: i);
36	data[col_idx] = Load<T>(row + col_offset);
37	ValidityBytes row_mask(row);
38	if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
39	if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) {
40	//! We need to initialize the mask with the vector size.
41	col_mask.Initialize(count: build_size);
42	}
43	col_mask.SetInvalid(col_idx);
44	}
45	}
46	}
47
48	static void GatherVarchar(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
49	idx_t count, const RowLayout &layout, idx_t col_no, idx_t build_size,
50	data_ptr_t base_heap_ptr) {
51	// Precompute mask indexes
52	const auto &offsets = layout.GetOffsets();
53	const auto col_offset = offsets [col_no];
54	const auto heap_offset = layout.GetHeapOffset();
55	idx_t entry_idx;
56	idx_t idx_in_entry;
57	ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
58
59	auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
60	auto data = FlatVector::GetData<string_t>(vector&: col);
61	auto &col_mask = FlatVector::Validity(vector&: col);
62
63	for (idx_t i = `0`; i < count; i++) {
64	auto row_idx = row_sel.get_index(idx: i);
65	auto row = ptrs[row_idx];
66	auto col_idx = col_sel.get_index(idx: i);
67	auto col_ptr = row + col_offset;
68	data[col_idx] = Load<string_t>(ptr: col_ptr);
69	ValidityBytes row_mask(row);
70	if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
71	if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) {
72	//! We need to initialize the mask with the vector size.
73	col_mask.Initialize(count: build_size);
74	}
75	col_mask.SetInvalid(col_idx);
76	} else if (base_heap_ptr && Load<uint32_t>(ptr: col_ptr) > string_t::INLINE_LENGTH) {
77	// Not inline, so unswizzle the copied pointer the pointer
78	auto heap_ptr_ptr = row + heap_offset;
79	auto heap_row_ptr = base_heap_ptr + Load<idx_t>(ptr: heap_ptr_ptr);
80	auto string_ptr = data_ptr_t(data + col_idx) + string_t::HEADER_SIZE;
81	Store<data_ptr_t>(val: heap_row_ptr + Load<idx_t>(ptr: string_ptr), ptr: string_ptr);
82	#ifdef DEBUG
83	data[col_idx].Verify();
84	#endif
85	}
86	}
87	}
88
89	static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vector &col,
90	const SelectionVector &col_sel, idx_t count, const RowLayout &layout, idx_t col_no,
91	data_ptr_t base_heap_ptr) {
92	const auto &offsets = layout.GetOffsets();
93	const auto col_offset = offsets [col_no];
94	const auto heap_offset = layout.GetHeapOffset();
95	auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
96
97	// Build the gather locations
98	auto data_locations = make_unsafe_uniq_array<data_ptr_t>(n: count);
99	auto mask_locations = make_unsafe_uniq_array<data_ptr_t>(n: count);
100	for (idx_t i = `0`; i < count; i++) {
101	auto row_idx = row_sel.get_index(idx: i);
102	auto row = ptrs[row_idx];
103	mask_locations [i] = row;
104	auto col_ptr = ptrs[row_idx] + col_offset;
105	if (base_heap_ptr) {
106	auto heap_ptr_ptr = row + heap_offset;
107	auto heap_row_ptr = base_heap_ptr + Load<idx_t>(ptr: heap_ptr_ptr);
108	data_locations [i] = heap_row_ptr + Load<idx_t>(ptr: col_ptr);
109	} else {
110	data_locations [i] = Load<data_ptr_t>(ptr: col_ptr);
111	}
112	}
113
114	// Deserialise into the selected locations
115	RowOperations::HeapGather(v&: col, vcount: count, sel: col_sel, col_idx: col_no, key_locations: data_locations.get(), validitymask_locations: mask_locations.get());
116	}
117
118	void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
119	const idx_t count, const RowLayout &layout, const idx_t col_no, const idx_t build_size,
120	data_ptr_t heap_ptr) {
121	D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR);
122	D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!"
123
124	col.SetVectorType(VectorType::FLAT_VECTOR);
125	switch (col.GetType().InternalType()) {
126	case PhysicalType::UINT8:
127	TemplatedGatherLoop<uint8_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
128	break;
129	case PhysicalType::UINT16:
130	TemplatedGatherLoop<uint16_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
131	break;
132	case PhysicalType::UINT32:
133	TemplatedGatherLoop<uint32_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
134	break;
135	case PhysicalType::UINT64:
136	TemplatedGatherLoop<uint64_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
137	break;
138	case PhysicalType::BOOL:
139	case PhysicalType::INT8:
140	TemplatedGatherLoop<int8_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
141	break;
142	case PhysicalType::INT16:
143	TemplatedGatherLoop<int16_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
144	break;
145	case PhysicalType::INT32:
146	TemplatedGatherLoop<int32_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
147	break;
148	case PhysicalType::INT64:
149	TemplatedGatherLoop<int64_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
150	break;
151	case PhysicalType::INT128:
152	TemplatedGatherLoop<hugeint_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
153	break;
154	case PhysicalType::FLOAT:
155	TemplatedGatherLoop<float>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
156	break;
157	case PhysicalType::DOUBLE:
158	TemplatedGatherLoop<double>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
159	break;
160	case PhysicalType::INTERVAL:
161	TemplatedGatherLoop<interval_t>(rows, row_sel, col, col_sel, count, layout, col_no, build_size);
162	break;
163	case PhysicalType::VARCHAR:
164	GatherVarchar(rows, row_sel, col, col_sel, count, layout, col_no, build_size, base_heap_ptr: heap_ptr);
165	break;
166	case PhysicalType::LIST:
167	case PhysicalType::STRUCT:
168	GatherNestedVector(rows, row_sel, col, col_sel, count, layout, col_no, base_heap_ptr: heap_ptr);
169	break;
170	default:
171	throw InternalException ("Unimplemented type for RowOperations::Gather");
172	}
173	}
174
175	template <class T>
176	static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t col_offset, idx_t col_no) {
177	// Precompute mask indexes
178	idx_t entry_idx;
179	idx_t idx_in_entry;
180	ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
181
182	auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
183	auto data = FlatVector::GetData<T>(col);
184	// auto &col_mask = FlatVector::Validity(col);
185
186	for (idx_t i = `0`; i < count; i++) {
187	auto row = ptrs[i];
188	data[i] = Load<T>(row + col_offset);
189	ValidityBytes row_mask(row);
190	if (!row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
191	throw InternalException ("Null value comparisons not implemented for perfect hash table yet");
192	// col_mask.SetInvalid(i);
193	}
194	}
195	}
196
197	void RowOperations::FullScanColumn(const TupleDataLayout &layout, Vector &rows, Vector &col, idx_t count,
198	idx_t col_no) {
199	const auto col_offset = layout.GetOffsets()[col_no];
200	col.SetVectorType(VectorType::FLAT_VECTOR);
201	switch (col.GetType().InternalType()) {
202	case PhysicalType::UINT8:
203	TemplatedFullScanLoop<uint8_t>(rows, col, count, col_offset, col_no);
204	break;
205	case PhysicalType::UINT16:
206	TemplatedFullScanLoop<uint16_t>(rows, col, count, col_offset, col_no);
207	break;
208	case PhysicalType::UINT32:
209	TemplatedFullScanLoop<uint32_t>(rows, col, count, col_offset, col_no);
210	break;
211	case PhysicalType::UINT64:
212	TemplatedFullScanLoop<uint64_t>(rows, col, count, col_offset, col_no);
213	break;
214	case PhysicalType::INT8:
215	TemplatedFullScanLoop<int8_t>(rows, col, count, col_offset, col_no);
216	break;
217	case PhysicalType::INT16:
218	TemplatedFullScanLoop<int16_t>(rows, col, count, col_offset, col_no);
219	break;
220	case PhysicalType::INT32:
221	TemplatedFullScanLoop<int32_t>(rows, col, count, col_offset, col_no);
222	break;
223	case PhysicalType::INT64:
224	TemplatedFullScanLoop<int64_t>(rows, col, count, col_offset, col_no);
225	break;
226	default:
227	throw NotImplementedException ("Unimplemented type for RowOperations::FullScanColumn");
228	}
229	}
230
231	} // namespace duckdb
232

Browse the source code of Velox/build/_deps/duckdb-src/src/common/row_operations/row_gather.cpp