| 1 | //===--------------------------------------------------------------------===// |
| 2 | // copy.cpp |
| 3 | // Description: This file contains the implementation of the different copy |
| 4 | // functions |
| 5 | //===--------------------------------------------------------------------===// |
| 6 | |
| 7 | #include "duckdb/common/exception.hpp" |
| 8 | #include "duckdb/common/types/null_value.hpp" |
| 9 | #include "duckdb/common/types/chunk_collection.hpp" |
| 10 | #include "duckdb/storage/segment/uncompressed.hpp" |
| 11 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
| 12 | |
| 13 | namespace duckdb { |
| 14 | |
| 15 | template <class T> |
| 16 | static void TemplatedCopy(const Vector &source, const SelectionVector &sel, Vector &target, idx_t source_offset, |
| 17 | idx_t target_offset, idx_t copy_count) { |
| 18 | auto ldata = FlatVector::GetData<T>(source); |
| 19 | auto tdata = FlatVector::GetData<T>(target); |
| 20 | for (idx_t i = 0; i < copy_count; i++) { |
| 21 | auto source_idx = sel.get_index(idx: source_offset + i); |
| 22 | tdata[target_offset + i] = ldata[source_idx]; |
| 23 | } |
| 24 | } |
| 25 | |
| 26 | static const ValidityMask &CopyValidityMask(const Vector &v) { |
| 27 | switch (v.GetVectorType()) { |
| 28 | case VectorType::FLAT_VECTOR: |
| 29 | return FlatVector::Validity(vector: v); |
| 30 | case VectorType::FSST_VECTOR: |
| 31 | return FSSTVector::Validity(vector: v); |
| 32 | default: |
| 33 | throw InternalException("Unsupported vector type in vector copy" ); |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | void VectorOperations::Copy(const Vector &source_p, Vector &target, const SelectionVector &sel_p, idx_t source_count, |
| 38 | idx_t source_offset, idx_t target_offset) { |
| 39 | D_ASSERT(source_offset <= source_count); |
| 40 | D_ASSERT(source_p.GetType() == target.GetType()); |
| 41 | idx_t copy_count = source_count - source_offset; |
| 42 | |
| 43 | SelectionVector owned_sel; |
| 44 | const SelectionVector *sel = &sel_p; |
| 45 | |
| 46 | const Vector *source = &source_p; |
| 47 | bool finished = false; |
| 48 | while (!finished) { |
| 49 | switch (source->GetVectorType()) { |
| 50 | case VectorType::DICTIONARY_VECTOR: { |
| 51 | // dictionary vector: merge selection vectors |
| 52 | auto &child = DictionaryVector::Child(vector: *source); |
| 53 | auto &dict_sel = DictionaryVector::SelVector(vector: *source); |
| 54 | // merge the selection vectors and verify the child |
| 55 | auto new_buffer = dict_sel.Slice(sel: *sel, count: source_count); |
| 56 | owned_sel.Initialize(data: new_buffer); |
| 57 | sel = &owned_sel; |
| 58 | source = &child; |
| 59 | break; |
| 60 | } |
| 61 | case VectorType::SEQUENCE_VECTOR: { |
| 62 | int64_t start, increment; |
| 63 | Vector seq(source->GetType()); |
| 64 | SequenceVector::GetSequence(vector: *source, start, increment); |
| 65 | VectorOperations::GenerateSequence(result&: seq, count: source_count, sel: *sel, start, increment); |
| 66 | VectorOperations::Copy(source_p: seq, target, sel_p: *sel, source_count, source_offset, target_offset); |
| 67 | return; |
| 68 | } |
| 69 | case VectorType::CONSTANT_VECTOR: |
| 70 | sel = ConstantVector::ZeroSelectionVector(count: copy_count, owned_sel); |
| 71 | finished = true; |
| 72 | break; |
| 73 | case VectorType::FSST_VECTOR: |
| 74 | finished = true; |
| 75 | break; |
| 76 | case VectorType::FLAT_VECTOR: |
| 77 | finished = true; |
| 78 | break; |
| 79 | default: |
| 80 | throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy" ); |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | if (copy_count == 0) { |
| 85 | return; |
| 86 | } |
| 87 | |
| 88 | // Allow copying of a single value to constant vectors |
| 89 | const auto target_vector_type = target.GetVectorType(); |
| 90 | if (copy_count == 1 && target_vector_type == VectorType::CONSTANT_VECTOR) { |
| 91 | target_offset = 0; |
| 92 | target.SetVectorType(VectorType::FLAT_VECTOR); |
| 93 | } |
| 94 | D_ASSERT(target.GetVectorType() == VectorType::FLAT_VECTOR); |
| 95 | |
| 96 | // first copy the nullmask |
| 97 | auto &tmask = FlatVector::Validity(vector&: target); |
| 98 | if (source->GetVectorType() == VectorType::CONSTANT_VECTOR) { |
| 99 | const bool valid = !ConstantVector::IsNull(vector: *source); |
| 100 | for (idx_t i = 0; i < copy_count; i++) { |
| 101 | tmask.Set(row_idx: target_offset + i, valid); |
| 102 | } |
| 103 | } else { |
| 104 | auto &smask = CopyValidityMask(v: *source); |
| 105 | if (smask.IsMaskSet()) { |
| 106 | for (idx_t i = 0; i < copy_count; i++) { |
| 107 | auto idx = sel->get_index(idx: source_offset + i); |
| 108 | |
| 109 | if (smask.RowIsValid(row_idx: idx)) { |
| 110 | // set valid |
| 111 | if (!tmask.AllValid()) { |
| 112 | tmask.SetValidUnsafe(target_offset + i); |
| 113 | } |
| 114 | } else { |
| 115 | // set invalid |
| 116 | if (tmask.AllValid()) { |
| 117 | auto init_size = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, b: target_offset + copy_count); |
| 118 | tmask.Initialize(count: init_size); |
| 119 | } |
| 120 | tmask.SetInvalidUnsafe(target_offset + i); |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | D_ASSERT(sel); |
| 127 | |
| 128 | // For FSST Vectors we decompress instead of copying. |
| 129 | if (source->GetVectorType() == VectorType::FSST_VECTOR) { |
| 130 | FSSTVector::DecompressVector(src: *source, dst&: target, src_offset: source_offset, dst_offset: target_offset, copy_count, sel); |
| 131 | return; |
| 132 | } |
| 133 | |
| 134 | // now copy over the data |
| 135 | switch (source->GetType().InternalType()) { |
| 136 | case PhysicalType::BOOL: |
| 137 | case PhysicalType::INT8: |
| 138 | TemplatedCopy<int8_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 139 | break; |
| 140 | case PhysicalType::INT16: |
| 141 | TemplatedCopy<int16_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 142 | break; |
| 143 | case PhysicalType::INT32: |
| 144 | TemplatedCopy<int32_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 145 | break; |
| 146 | case PhysicalType::INT64: |
| 147 | TemplatedCopy<int64_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 148 | break; |
| 149 | case PhysicalType::UINT8: |
| 150 | TemplatedCopy<uint8_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 151 | break; |
| 152 | case PhysicalType::UINT16: |
| 153 | TemplatedCopy<uint16_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 154 | break; |
| 155 | case PhysicalType::UINT32: |
| 156 | TemplatedCopy<uint32_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 157 | break; |
| 158 | case PhysicalType::UINT64: |
| 159 | TemplatedCopy<uint64_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 160 | break; |
| 161 | case PhysicalType::INT128: |
| 162 | TemplatedCopy<hugeint_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 163 | break; |
| 164 | case PhysicalType::FLOAT: |
| 165 | TemplatedCopy<float>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 166 | break; |
| 167 | case PhysicalType::DOUBLE: |
| 168 | TemplatedCopy<double>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 169 | break; |
| 170 | case PhysicalType::INTERVAL: |
| 171 | TemplatedCopy<interval_t>(source: *source, sel: *sel, target, source_offset, target_offset, copy_count); |
| 172 | break; |
| 173 | case PhysicalType::VARCHAR: { |
| 174 | auto ldata = FlatVector::GetData<string_t>(vector: *source); |
| 175 | auto tdata = FlatVector::GetData<string_t>(vector&: target); |
| 176 | for (idx_t i = 0; i < copy_count; i++) { |
| 177 | auto source_idx = sel->get_index(idx: source_offset + i); |
| 178 | auto target_idx = target_offset + i; |
| 179 | if (tmask.RowIsValid(row_idx: target_idx)) { |
| 180 | tdata[target_idx] = StringVector::AddStringOrBlob(vector&: target, data: ldata[source_idx]); |
| 181 | } |
| 182 | } |
| 183 | break; |
| 184 | } |
| 185 | case PhysicalType::STRUCT: { |
| 186 | auto &source_children = StructVector::GetEntries(vector: *source); |
| 187 | auto &target_children = StructVector::GetEntries(vector&: target); |
| 188 | D_ASSERT(source_children.size() == target_children.size()); |
| 189 | for (idx_t i = 0; i < source_children.size(); i++) { |
| 190 | VectorOperations::Copy(source_p: *source_children[i], target&: *target_children[i], sel_p, source_count, source_offset, |
| 191 | target_offset); |
| 192 | } |
| 193 | break; |
| 194 | } |
| 195 | case PhysicalType::LIST: { |
| 196 | D_ASSERT(target.GetType().InternalType() == PhysicalType::LIST); |
| 197 | |
| 198 | auto &source_child = ListVector::GetEntry(vector: *source); |
| 199 | auto sdata = FlatVector::GetData<list_entry_t>(vector: *source); |
| 200 | auto tdata = FlatVector::GetData<list_entry_t>(vector&: target); |
| 201 | |
| 202 | if (target_vector_type == VectorType::CONSTANT_VECTOR) { |
| 203 | // If we are only writing one value, then the copied values (if any) are contiguous |
| 204 | // and we can just Append from the offset position |
| 205 | if (!tmask.RowIsValid(row_idx: target_offset)) { |
| 206 | break; |
| 207 | } |
| 208 | auto source_idx = sel->get_index(idx: source_offset); |
| 209 | auto &source_entry = sdata[source_idx]; |
| 210 | const idx_t source_child_size = source_entry.length + source_entry.offset; |
| 211 | |
| 212 | //! overwrite constant target vectors. |
| 213 | ListVector::SetListSize(vec&: target, size: 0); |
| 214 | ListVector::Append(target, source: source_child, source_size: source_child_size, source_offset: source_entry.offset); |
| 215 | |
| 216 | auto &target_entry = tdata[target_offset]; |
| 217 | target_entry.length = source_entry.length; |
| 218 | target_entry.offset = 0; |
| 219 | } else { |
| 220 | //! if the source has list offsets, we need to append them to the target |
| 221 | //! build a selection vector for the copied child elements |
| 222 | vector<sel_t> child_rows; |
| 223 | for (idx_t i = 0; i < copy_count; ++i) { |
| 224 | if (tmask.RowIsValid(row_idx: target_offset + i)) { |
| 225 | auto source_idx = sel->get_index(idx: source_offset + i); |
| 226 | auto &source_entry = sdata[source_idx]; |
| 227 | for (idx_t j = 0; j < source_entry.length; ++j) { |
| 228 | child_rows.emplace_back(args: source_entry.offset + j); |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | idx_t source_child_size = child_rows.size(); |
| 233 | SelectionVector child_sel(child_rows.data()); |
| 234 | |
| 235 | idx_t old_target_child_len = ListVector::GetListSize(vector: target); |
| 236 | |
| 237 | //! append to list itself |
| 238 | ListVector::Append(target, source: source_child, sel: child_sel, source_size: source_child_size); |
| 239 | |
| 240 | //! now write the list offsets |
| 241 | for (idx_t i = 0; i < copy_count; i++) { |
| 242 | auto source_idx = sel->get_index(idx: source_offset + i); |
| 243 | auto &source_entry = sdata[source_idx]; |
| 244 | auto &target_entry = tdata[target_offset + i]; |
| 245 | |
| 246 | target_entry.length = source_entry.length; |
| 247 | target_entry.offset = old_target_child_len; |
| 248 | if (tmask.RowIsValid(row_idx: target_offset + i)) { |
| 249 | old_target_child_len += target_entry.length; |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | break; |
| 254 | } |
| 255 | default: |
| 256 | throw NotImplementedException("Unimplemented type '%s' for copy!" , |
| 257 | TypeIdToString(type: source->GetType().InternalType())); |
| 258 | } |
| 259 | |
| 260 | if (target_vector_type != VectorType::FLAT_VECTOR) { |
| 261 | target.SetVectorType(target_vector_type); |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset, |
| 266 | idx_t target_offset) { |
| 267 | VectorOperations::Copy(source_p: source, target, sel_p: *FlatVector::IncrementalSelectionVector(), source_count, source_offset, |
| 268 | target_offset); |
| 269 | } |
| 270 | |
| 271 | } // namespace duckdb |
| 272 | |