1 | //===--------------------------------------------------------------------===// |
2 | // copy.cpp |
3 | // Description: This file contains the implementation of the different copy |
4 | // functions |
5 | //===--------------------------------------------------------------------===// |
6 | |
7 | #include "duckdb/common/exception.hpp" |
8 | #include "duckdb/common/types/null_value.hpp" |
9 | #include "duckdb/common/types/chunk_collection.hpp" |
10 | #include "duckdb/storage/segment/uncompressed.hpp" |
11 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
12 | |
13 | namespace duckdb { |
14 | |
15 | template <class T> |
16 | static void TemplatedCopy(const Vector &source, const SelectionVector &sel, Vector &target, idx_t source_offset, |
17 | idx_t target_offset, idx_t copy_count) { |
18 | auto ldata = FlatVector::GetData<T>(source); |
19 | auto tdata = FlatVector::GetData<T>(target); |
20 | for (idx_t i = 0; i < copy_count; i++) { |
21 | auto source_idx = sel.get_index(idx: source_offset + i); |
22 | tdata[target_offset + i] = ldata[source_idx]; |
23 | } |
24 | } |
25 | |
26 | static const ValidityMask &CopyValidityMask(const Vector &v) { |
27 | switch (v.GetVectorType()) { |
28 | case VectorType::FLAT_VECTOR: |
29 | return FlatVector::Validity(vector: v); |
30 | case VectorType::FSST_VECTOR: |
31 | return FSSTVector::Validity(vector: v); |
32 | default: |
33 | throw InternalException("Unsupported vector type in vector copy" ); |
34 | } |
35 | } |
36 | |
37 | void VectorOperations::Copy(const Vector &source_p, Vector &target, const SelectionVector &sel_p, idx_t source_count, |
38 | idx_t source_offset, idx_t target_offset) { |
39 | D_ASSERT(source_offset <= source_count); |
40 | D_ASSERT(source_p.GetType() == target.GetType()); |
41 | idx_t copy_count = source_count - source_offset; |
42 | |
43 | SelectionVector owned_sel; |
44 | const SelectionVector *sel = &sel_p; |
45 | |
46 | const Vector *source = &source_p; |
47 | bool finished = false; |
48 | while (!finished) { |
49 | switch (source->GetVectorType()) { |
50 | case VectorType::DICTIONARY_VECTOR: { |
51 | // dictionary vector: merge selection vectors |
52 | auto &child = DictionaryVector::Child(vector: *source); |
53 | auto &dict_sel = DictionaryVector::SelVector(vector: *source); |
54 | // merge the selection vectors and verify the child |
55 | auto new_buffer = dict_sel.Slice(sel: *sel, count: source_count); |
56 | owned_sel.Initialize(data: new_buffer); |
57 | sel = &owned_sel; |
58 | source = &child; |
59 | break; |
60 | } |
61 | case VectorType::SEQUENCE_VECTOR: { |
62 | int64_t start, increment; |
63 | Vector seq(source->GetType()); |
64 | SequenceVector::GetSequence(vector: *source, start, increment); |
65 | VectorOperations::GenerateSequence(result&: seq, count: source_count, sel: *sel, start, increment); |
66 | VectorOperations::Copy(source_p: seq, target, sel_p: *sel, source_count, source_offset, target_offset); |
67 | return; |
68 | } |
69 | case VectorType::CONSTANT_VECTOR: |
70 | sel = ConstantVector::ZeroSelectionVector(count: copy_count, owned_sel); |
71 | finished = true; |
72 | break; |
73 | case VectorType::FSST_VECTOR: |
74 | finished = true; |
75 | break; |
76 | case VectorType::FLAT_VECTOR: |
77 | finished = true; |
78 | break; |
79 | default: |
80 | throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy" ); |
81 | } |
82 | } |
83 | |
84 | if (copy_count == 0) { |
85 | return; |
86 | } |
87 | |
88 | // Allow copying of a single value to constant vectors |
89 | const auto target_vector_type = target.GetVectorType(); |
90 | if (copy_count == 1 && target_vector_type == VectorType::CONSTANT_VECTOR) { |
91 | target_offset = 0; |
92 | target.SetVectorType(VectorType::FLAT_VECTOR); |
93 | } |
94 | D_ASSERT(target.GetVectorType() == VectorType::FLAT_VECTOR); |
95 | |
96 | // first copy the nullmask |
97 | auto &tmask = FlatVector::Validity(vector&: target); |
98 | if (source->GetVectorType() == VectorType::CONSTANT_VECTOR) { |
99 | const bool valid = !ConstantVector::IsNull(vector: *source); |
100 | for (idx_t i = 0; i < copy_count; i++) { |
101 | tmask.Set(row_idx: target_offset + i, valid); |
102 | } |
103 | } else { |
104 | auto &smask = CopyValidityMask(v: *source); |
105 | if (smask.IsMaskSet()) { |
106 | for (idx_t i = 0; i < copy_count; i++) { |
107 | auto idx = sel->get_index(idx: source_offset + i); |
108 | |
109 | if (smask.RowIsValid(row_idx: idx)) { |
110 | // set valid |
111 | if (!tmask.AllValid()) { |
112 | tmask.SetValidUnsafe(target_offset + i); |
113 | } |
114 | } else { |
115 | // set invalid |
116 | if (tmask.AllValid()) { |
117 | auto init_size = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, b: target_offset + copy_count); |
118 | tmask.Initialize(count: init_size); |
119 | } |
120 | tmask.SetInvalidUnsafe(target_offset + i); |
121 | } |
122 | } |
123 | } |
124 | } |
125 | |
126 | D_ASSERT(sel); |
127 | |
128 | // For FSST Vectors we decompress instead of copying. |
129 | if (source->GetVectorType() == VectorType::FSST_VECTOR) { |
130 | FSSTVector::DecompressVector(src: *source, dst&: target, src_offset: source_offset, dst_offset: target_offset, copy_count, sel); |
131 | return; |
132 | } |
133 | |
134 | // now copy over the data |
135 | switch (source->GetType().InternalType()) { |
136 | case PhysicalType::BOOL: |
137 | case PhysicalType::INT8: |
138 | TemplatedCopy<int8_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
139 | break; |
140 | case PhysicalType::INT16: |
141 | TemplatedCopy<int16_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
142 | break; |
143 | case PhysicalType::INT32: |
144 | TemplatedCopy<int32_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
145 | break; |
146 | case PhysicalType::INT64: |
147 | TemplatedCopy<int64_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
148 | break; |
149 | case PhysicalType::UINT8: |
150 | TemplatedCopy<uint8_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
151 | break; |
152 | case PhysicalType::UINT16: |
153 | TemplatedCopy<uint16_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
154 | break; |
155 | case PhysicalType::UINT32: |
156 | TemplatedCopy<uint32_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
157 | break; |
158 | case PhysicalType::UINT64: |
159 | TemplatedCopy<uint64_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
160 | break; |
161 | case PhysicalType::INT128: |
162 | TemplatedCopy<hugeint_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
163 | break; |
164 | case PhysicalType::FLOAT: |
165 | TemplatedCopy<float>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
166 | break; |
167 | case PhysicalType::DOUBLE: |
168 | TemplatedCopy<double>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
169 | break; |
170 | case PhysicalType::INTERVAL: |
171 | TemplatedCopy<interval_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
172 | break; |
173 | case PhysicalType::VARCHAR: { |
174 | auto ldata = FlatVector::GetData<string_t>(vector: *source); |
175 | auto tdata = FlatVector::GetData<string_t>(vector&: target); |
176 | for (idx_t i = 0; i < copy_count; i++) { |
177 | auto source_idx = sel->get_index(idx: source_offset + i); |
178 | auto target_idx = target_offset + i; |
179 | if (tmask.RowIsValid(row_idx: target_idx)) { |
180 | tdata[target_idx] = StringVector::AddStringOrBlob(vector&: target, data: ldata[source_idx]); |
181 | } |
182 | } |
183 | break; |
184 | } |
185 | case PhysicalType::STRUCT: { |
186 | auto &source_children = StructVector::GetEntries(vector: *source); |
187 | auto &target_children = StructVector::GetEntries(vector&: target); |
188 | D_ASSERT(source_children.size() == target_children.size()); |
189 | for (idx_t i = 0; i < source_children.size(); i++) { |
190 | VectorOperations::Copy(source_p: *source_children[i], target&: *target_children[i], sel_p, source_count, source_offset, |
191 | target_offset); |
192 | } |
193 | break; |
194 | } |
195 | case PhysicalType::LIST: { |
196 | D_ASSERT(target.GetType().InternalType() == PhysicalType::LIST); |
197 | |
198 | auto &source_child = ListVector::GetEntry(vector: *source); |
199 | auto sdata = FlatVector::GetData<list_entry_t>(vector: *source); |
200 | auto tdata = FlatVector::GetData<list_entry_t>(vector&: target); |
201 | |
202 | if (target_vector_type == VectorType::CONSTANT_VECTOR) { |
203 | // If we are only writing one value, then the copied values (if any) are contiguous |
204 | // and we can just Append from the offset position |
205 | if (!tmask.RowIsValid(row_idx: target_offset)) { |
206 | break; |
207 | } |
208 | auto source_idx = sel->get_index(idx: source_offset); |
209 | auto &source_entry = sdata[source_idx]; |
210 | const idx_t source_child_size = source_entry.length + source_entry.offset; |
211 | |
212 | //! overwrite constant target vectors. |
213 | ListVector::SetListSize(vec&: target, size: 0); |
214 | ListVector::Append(target, source: source_child, source_size: source_child_size, source_offset: source_entry.offset); |
215 | |
216 | auto &target_entry = tdata[target_offset]; |
217 | target_entry.length = source_entry.length; |
218 | target_entry.offset = 0; |
219 | } else { |
220 | //! if the source has list offsets, we need to append them to the target |
221 | //! build a selection vector for the copied child elements |
222 | vector<sel_t> child_rows; |
223 | for (idx_t i = 0; i < copy_count; ++i) { |
224 | if (tmask.RowIsValid(row_idx: target_offset + i)) { |
225 | auto source_idx = sel->get_index(idx: source_offset + i); |
226 | auto &source_entry = sdata[source_idx]; |
227 | for (idx_t j = 0; j < source_entry.length; ++j) { |
228 | child_rows.emplace_back(args: source_entry.offset + j); |
229 | } |
230 | } |
231 | } |
232 | idx_t source_child_size = child_rows.size(); |
233 | SelectionVector child_sel(child_rows.data()); |
234 | |
235 | idx_t old_target_child_len = ListVector::GetListSize(vector: target); |
236 | |
237 | //! append to list itself |
238 | ListVector::Append(target, source: source_child, sel: child_sel, source_size: source_child_size); |
239 | |
240 | //! now write the list offsets |
241 | for (idx_t i = 0; i < copy_count; i++) { |
242 | auto source_idx = sel->get_index(idx: source_offset + i); |
243 | auto &source_entry = sdata[source_idx]; |
244 | auto &target_entry = tdata[target_offset + i]; |
245 | |
246 | target_entry.length = source_entry.length; |
247 | target_entry.offset = old_target_child_len; |
248 | if (tmask.RowIsValid(row_idx: target_offset + i)) { |
249 | old_target_child_len += target_entry.length; |
250 | } |
251 | } |
252 | } |
253 | break; |
254 | } |
255 | default: |
256 | throw NotImplementedException("Unimplemented type '%s' for copy!" , |
257 | TypeIdToString(type: source->GetType().InternalType())); |
258 | } |
259 | |
260 | if (target_vector_type != VectorType::FLAT_VECTOR) { |
261 | target.SetVectorType(target_vector_type); |
262 | } |
263 | } |
264 | |
265 | void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset, |
266 | idx_t target_offset) { |
267 | VectorOperations::Copy(source_p: source, target, sel_p: *FlatVector::IncrementalSelectionVector(), source_count, source_offset, |
268 | target_offset); |
269 | } |
270 | |
271 | } // namespace duckdb |
272 | |