1//===--------------------------------------------------------------------===//
2// copy.cpp
3// Description: This file contains the implementation of the different copy
4// functions
5//===--------------------------------------------------------------------===//
6
7#include "duckdb/common/exception.hpp"
8#include "duckdb/common/types/null_value.hpp"
9#include "duckdb/common/types/chunk_collection.hpp"
10#include "duckdb/storage/segment/uncompressed.hpp"
11#include "duckdb/common/vector_operations/vector_operations.hpp"
12
13namespace duckdb {
14
15template <class T>
16static void TemplatedCopy(const Vector &source, const SelectionVector &sel, Vector &target, idx_t source_offset,
17 idx_t target_offset, idx_t copy_count) {
18 auto ldata = FlatVector::GetData<T>(source);
19 auto tdata = FlatVector::GetData<T>(target);
20 for (idx_t i = 0; i < copy_count; i++) {
21 auto source_idx = sel.get_index(idx: source_offset + i);
22 tdata[target_offset + i] = ldata[source_idx];
23 }
24}
25
26static const ValidityMask &CopyValidityMask(const Vector &v) {
27 switch (v.GetVectorType()) {
28 case VectorType::FLAT_VECTOR:
29 return FlatVector::Validity(vector: v);
30 case VectorType::FSST_VECTOR:
31 return FSSTVector::Validity(vector: v);
32 default:
33 throw InternalException("Unsupported vector type in vector copy");
34 }
35}
36
37void VectorOperations::Copy(const Vector &source_p, Vector &target, const SelectionVector &sel_p, idx_t source_count,
38 idx_t source_offset, idx_t target_offset) {
39 D_ASSERT(source_offset <= source_count);
40 D_ASSERT(source_p.GetType() == target.GetType());
41 idx_t copy_count = source_count - source_offset;
42
43 SelectionVector owned_sel;
44 const SelectionVector *sel = &sel_p;
45
46 const Vector *source = &source_p;
47 bool finished = false;
48 while (!finished) {
49 switch (source->GetVectorType()) {
50 case VectorType::DICTIONARY_VECTOR: {
51 // dictionary vector: merge selection vectors
52 auto &child = DictionaryVector::Child(vector: *source);
53 auto &dict_sel = DictionaryVector::SelVector(vector: *source);
54 // merge the selection vectors and verify the child
55 auto new_buffer = dict_sel.Slice(sel: *sel, count: source_count);
56 owned_sel.Initialize(data: new_buffer);
57 sel = &owned_sel;
58 source = &child;
59 break;
60 }
61 case VectorType::SEQUENCE_VECTOR: {
62 int64_t start, increment;
63 Vector seq(source->GetType());
64 SequenceVector::GetSequence(vector: *source, start, increment);
65 VectorOperations::GenerateSequence(result&: seq, count: source_count, sel: *sel, start, increment);
66 VectorOperations::Copy(source_p: seq, target, sel_p: *sel, source_count, source_offset, target_offset);
67 return;
68 }
69 case VectorType::CONSTANT_VECTOR:
70 sel = ConstantVector::ZeroSelectionVector(count: copy_count, owned_sel);
71 finished = true;
72 break;
73 case VectorType::FSST_VECTOR:
74 finished = true;
75 break;
76 case VectorType::FLAT_VECTOR:
77 finished = true;
78 break;
79 default:
80 throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy");
81 }
82 }
83
84 if (copy_count == 0) {
85 return;
86 }
87
88 // Allow copying of a single value to constant vectors
89 const auto target_vector_type = target.GetVectorType();
90 if (copy_count == 1 && target_vector_type == VectorType::CONSTANT_VECTOR) {
91 target_offset = 0;
92 target.SetVectorType(VectorType::FLAT_VECTOR);
93 }
94 D_ASSERT(target.GetVectorType() == VectorType::FLAT_VECTOR);
95
96 // first copy the nullmask
97 auto &tmask = FlatVector::Validity(vector&: target);
98 if (source->GetVectorType() == VectorType::CONSTANT_VECTOR) {
99 const bool valid = !ConstantVector::IsNull(vector: *source);
100 for (idx_t i = 0; i < copy_count; i++) {
101 tmask.Set(row_idx: target_offset + i, valid);
102 }
103 } else {
104 auto &smask = CopyValidityMask(v: *source);
105 if (smask.IsMaskSet()) {
106 for (idx_t i = 0; i < copy_count; i++) {
107 auto idx = sel->get_index(idx: source_offset + i);
108
109 if (smask.RowIsValid(row_idx: idx)) {
110 // set valid
111 if (!tmask.AllValid()) {
112 tmask.SetValidUnsafe(target_offset + i);
113 }
114 } else {
115 // set invalid
116 if (tmask.AllValid()) {
117 auto init_size = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, b: target_offset + copy_count);
118 tmask.Initialize(count: init_size);
119 }
120 tmask.SetInvalidUnsafe(target_offset + i);
121 }
122 }
123 }
124 }
125
126 D_ASSERT(sel);
127
128 // For FSST Vectors we decompress instead of copying.
129 if (source->GetVectorType() == VectorType::FSST_VECTOR) {
130 FSSTVector::DecompressVector(src: *source, dst&: target, src_offset: source_offset, dst_offset: target_offset, copy_count, sel);
131 return;
132 }
133
134 // now copy over the data
135 switch (source->GetType().InternalType()) {
136 case PhysicalType::BOOL:
137 case PhysicalType::INT8:
138 TemplatedCopy<int8_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
139 break;
140 case PhysicalType::INT16:
141 TemplatedCopy<int16_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
142 break;
143 case PhysicalType::INT32:
144 TemplatedCopy<int32_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
145 break;
146 case PhysicalType::INT64:
147 TemplatedCopy<int64_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
148 break;
149 case PhysicalType::UINT8:
150 TemplatedCopy<uint8_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
151 break;
152 case PhysicalType::UINT16:
153 TemplatedCopy<uint16_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
154 break;
155 case PhysicalType::UINT32:
156 TemplatedCopy<uint32_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
157 break;
158 case PhysicalType::UINT64:
159 TemplatedCopy<uint64_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
160 break;
161 case PhysicalType::INT128:
162 TemplatedCopy<hugeint_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
163 break;
164 case PhysicalType::FLOAT:
165 TemplatedCopy<float>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
166 break;
167 case PhysicalType::DOUBLE:
168 TemplatedCopy<double>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
169 break;
170 case PhysicalType::INTERVAL:
171 TemplatedCopy<interval_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count);
172 break;
173 case PhysicalType::VARCHAR: {
174 auto ldata = FlatVector::GetData<string_t>(vector: *source);
175 auto tdata = FlatVector::GetData<string_t>(vector&: target);
176 for (idx_t i = 0; i < copy_count; i++) {
177 auto source_idx = sel->get_index(idx: source_offset + i);
178 auto target_idx = target_offset + i;
179 if (tmask.RowIsValid(row_idx: target_idx)) {
180 tdata[target_idx] = StringVector::AddStringOrBlob(vector&: target, data: ldata[source_idx]);
181 }
182 }
183 break;
184 }
185 case PhysicalType::STRUCT: {
186 auto &source_children = StructVector::GetEntries(vector: *source);
187 auto &target_children = StructVector::GetEntries(vector&: target);
188 D_ASSERT(source_children.size() == target_children.size());
189 for (idx_t i = 0; i < source_children.size(); i++) {
190 VectorOperations::Copy(source_p: *source_children[i], target&: *target_children[i], sel_p, source_count, source_offset,
191 target_offset);
192 }
193 break;
194 }
195 case PhysicalType::LIST: {
196 D_ASSERT(target.GetType().InternalType() == PhysicalType::LIST);
197
198 auto &source_child = ListVector::GetEntry(vector: *source);
199 auto sdata = FlatVector::GetData<list_entry_t>(vector: *source);
200 auto tdata = FlatVector::GetData<list_entry_t>(vector&: target);
201
202 if (target_vector_type == VectorType::CONSTANT_VECTOR) {
203 // If we are only writing one value, then the copied values (if any) are contiguous
204 // and we can just Append from the offset position
205 if (!tmask.RowIsValid(row_idx: target_offset)) {
206 break;
207 }
208 auto source_idx = sel->get_index(idx: source_offset);
209 auto &source_entry = sdata[source_idx];
210 const idx_t source_child_size = source_entry.length + source_entry.offset;
211
212 //! overwrite constant target vectors.
213 ListVector::SetListSize(vec&: target, size: 0);
214 ListVector::Append(target, source: source_child, source_size: source_child_size, source_offset: source_entry.offset);
215
216 auto &target_entry = tdata[target_offset];
217 target_entry.length = source_entry.length;
218 target_entry.offset = 0;
219 } else {
220 //! if the source has list offsets, we need to append them to the target
221 //! build a selection vector for the copied child elements
222 vector<sel_t> child_rows;
223 for (idx_t i = 0; i < copy_count; ++i) {
224 if (tmask.RowIsValid(row_idx: target_offset + i)) {
225 auto source_idx = sel->get_index(idx: source_offset + i);
226 auto &source_entry = sdata[source_idx];
227 for (idx_t j = 0; j < source_entry.length; ++j) {
228 child_rows.emplace_back(args: source_entry.offset + j);
229 }
230 }
231 }
232 idx_t source_child_size = child_rows.size();
233 SelectionVector child_sel(child_rows.data());
234
235 idx_t old_target_child_len = ListVector::GetListSize(vector: target);
236
237 //! append to list itself
238 ListVector::Append(target, source: source_child, sel: child_sel, source_size: source_child_size);
239
240 //! now write the list offsets
241 for (idx_t i = 0; i < copy_count; i++) {
242 auto source_idx = sel->get_index(idx: source_offset + i);
243 auto &source_entry = sdata[source_idx];
244 auto &target_entry = tdata[target_offset + i];
245
246 target_entry.length = source_entry.length;
247 target_entry.offset = old_target_child_len;
248 if (tmask.RowIsValid(row_idx: target_offset + i)) {
249 old_target_child_len += target_entry.length;
250 }
251 }
252 }
253 break;
254 }
255 default:
256 throw NotImplementedException("Unimplemented type '%s' for copy!",
257 TypeIdToString(type: source->GetType().InternalType()));
258 }
259
260 if (target_vector_type != VectorType::FLAT_VECTOR) {
261 target.SetVectorType(target_vector_type);
262 }
263}
264
265void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset,
266 idx_t target_offset) {
267 VectorOperations::Copy(source_p: source, target, sel_p: *FlatVector::IncrementalSelectionVector(), source_count, source_offset,
268 target_offset);
269}
270
271} // namespace duckdb
272