| 1 | //===--------------------------------------------------------------------===// |
| 2 | // row_scatter.cpp |
| 3 | // Description: This file contains the implementation of the row scattering |
| 4 | // operators |
| 5 | //===--------------------------------------------------------------------===// |
| 6 | |
| 7 | #include "duckdb/common/exception.hpp" |
| 8 | #include "duckdb/common/helper.hpp" |
| 9 | #include "duckdb/common/row_operations/row_operations.hpp" |
| 10 | #include "duckdb/common/types/null_value.hpp" |
| 11 | #include "duckdb/common/types/row/row_data_collection.hpp" |
| 12 | #include "duckdb/common/types/row/row_layout.hpp" |
| 13 | #include "duckdb/common/types/selection_vector.hpp" |
| 14 | #include "duckdb/common/types/vector.hpp" |
| 15 | |
| 16 | namespace duckdb { |
| 17 | |
| 18 | using ValidityBytes = RowLayout::ValidityBytes; |
| 19 | |
| 20 | template <class T> |
| 21 | static void TemplatedScatter(UnifiedVectorFormat &col, Vector &rows, const SelectionVector &sel, const idx_t count, |
| 22 | const idx_t col_offset, const idx_t col_no) { |
| 23 | auto data = UnifiedVectorFormat::GetData<T>(col); |
| 24 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
| 25 | |
| 26 | if (!col.validity.AllValid()) { |
| 27 | for (idx_t i = 0; i < count; i++) { |
| 28 | auto idx = sel.get_index(idx: i); |
| 29 | auto col_idx = col.sel->get_index(idx); |
| 30 | auto row = ptrs[idx]; |
| 31 | |
| 32 | auto isnull = !col.validity.RowIsValid(row_idx: col_idx); |
| 33 | T store_value = isnull ? NullValue<T>() : data[col_idx]; |
| 34 | Store<T>(store_value, row + col_offset); |
| 35 | if (isnull) { |
| 36 | ValidityBytes col_mask(ptrs[idx]); |
| 37 | col_mask.SetInvalidUnsafe(col_no); |
| 38 | } |
| 39 | } |
| 40 | } else { |
| 41 | for (idx_t i = 0; i < count; i++) { |
| 42 | auto idx = sel.get_index(idx: i); |
| 43 | auto col_idx = col.sel->get_index(idx); |
| 44 | auto row = ptrs[idx]; |
| 45 | |
| 46 | Store<T>(data[col_idx], row + col_offset); |
| 47 | } |
| 48 | } |
| 49 | } |
| 50 | |
| 51 | static void ComputeStringEntrySizes(const UnifiedVectorFormat &col, idx_t entry_sizes[], const SelectionVector &sel, |
| 52 | const idx_t count, const idx_t offset = 0) { |
| 53 | auto data = UnifiedVectorFormat::GetData<string_t>(format: col); |
| 54 | for (idx_t i = 0; i < count; i++) { |
| 55 | auto idx = sel.get_index(idx: i); |
| 56 | auto col_idx = col.sel->get_index(idx) + offset; |
| 57 | const auto &str = data[col_idx]; |
| 58 | if (col.validity.RowIsValid(row_idx: col_idx) && !str.IsInlined()) { |
| 59 | entry_sizes[i] += str.GetSize(); |
| 60 | } |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | static void ScatterStringVector(UnifiedVectorFormat &col, Vector &rows, data_ptr_t str_locations[], |
| 65 | const SelectionVector &sel, const idx_t count, const idx_t col_offset, |
| 66 | const idx_t col_no) { |
| 67 | auto string_data = UnifiedVectorFormat::GetData<string_t>(format: col); |
| 68 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
| 69 | |
| 70 | // Write out zero length to avoid swizzling problems. |
| 71 | const string_t null(nullptr, 0); |
| 72 | for (idx_t i = 0; i < count; i++) { |
| 73 | auto idx = sel.get_index(idx: i); |
| 74 | auto col_idx = col.sel->get_index(idx); |
| 75 | auto row = ptrs[idx]; |
| 76 | if (!col.validity.RowIsValid(row_idx: col_idx)) { |
| 77 | ValidityBytes col_mask(row); |
| 78 | col_mask.SetInvalidUnsafe(col_no); |
| 79 | Store<string_t>(val: null, ptr: row + col_offset); |
| 80 | } else if (string_data[col_idx].IsInlined()) { |
| 81 | Store<string_t>(val: string_data[col_idx], ptr: row + col_offset); |
| 82 | } else { |
| 83 | const auto &str = string_data[col_idx]; |
| 84 | string_t inserted(const_char_ptr_cast(src: str_locations[i]), str.GetSize()); |
| 85 | memcpy(dest: inserted.GetDataWriteable(), src: str.GetData(), n: str.GetSize()); |
| 86 | str_locations[i] += str.GetSize(); |
| 87 | inserted.Finalize(); |
| 88 | Store<string_t>(val: inserted, ptr: row + col_offset); |
| 89 | } |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | static void ScatterNestedVector(Vector &vec, UnifiedVectorFormat &col, Vector &rows, data_ptr_t data_locations[], |
| 94 | const SelectionVector &sel, const idx_t count, const idx_t col_offset, |
| 95 | const idx_t col_no, const idx_t vcount) { |
| 96 | // Store pointers to the data in the row |
| 97 | // Do this first because SerializeVector destroys the locations |
| 98 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
| 99 | data_ptr_t validitymask_locations[STANDARD_VECTOR_SIZE]; |
| 100 | for (idx_t i = 0; i < count; i++) { |
| 101 | auto idx = sel.get_index(idx: i); |
| 102 | auto row = ptrs[idx]; |
| 103 | validitymask_locations[i] = row; |
| 104 | |
| 105 | Store<data_ptr_t>(val: data_locations[i], ptr: row + col_offset); |
| 106 | } |
| 107 | |
| 108 | // Serialise the data |
| 109 | RowOperations::HeapScatter(v&: vec, vcount, sel, ser_count: count, col_idx: col_no, key_locations: data_locations, validitymask_locations); |
| 110 | } |
| 111 | |
| 112 | void RowOperations::Scatter(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows, |
| 113 | RowDataCollection &string_heap, const SelectionVector &sel, idx_t count) { |
| 114 | if (count == 0) { |
| 115 | return; |
| 116 | } |
| 117 | |
| 118 | // Set the validity mask for each row before inserting data |
| 119 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
| 120 | for (idx_t i = 0; i < count; ++i) { |
| 121 | auto row_idx = sel.get_index(idx: i); |
| 122 | auto row = ptrs[row_idx]; |
| 123 | ValidityBytes(row).SetAllValid(layout.ColumnCount()); |
| 124 | } |
| 125 | |
| 126 | const auto vcount = columns.size(); |
| 127 | auto &offsets = layout.GetOffsets(); |
| 128 | auto &types = layout.GetTypes(); |
| 129 | |
| 130 | // Compute the entry size of the variable size columns |
| 131 | vector<BufferHandle> handles; |
| 132 | data_ptr_t data_locations[STANDARD_VECTOR_SIZE]; |
| 133 | if (!layout.AllConstant()) { |
| 134 | idx_t entry_sizes[STANDARD_VECTOR_SIZE]; |
| 135 | std::fill_n(entry_sizes, count, sizeof(uint32_t)); |
| 136 | for (idx_t col_no = 0; col_no < types.size(); col_no++) { |
| 137 | if (TypeIsConstantSize(type: types[col_no].InternalType())) { |
| 138 | continue; |
| 139 | } |
| 140 | |
| 141 | auto &vec = columns.data[col_no]; |
| 142 | auto &col = col_data[col_no]; |
| 143 | switch (types[col_no].InternalType()) { |
| 144 | case PhysicalType::VARCHAR: |
| 145 | ComputeStringEntrySizes(col, entry_sizes, sel, count); |
| 146 | break; |
| 147 | case PhysicalType::LIST: |
| 148 | case PhysicalType::STRUCT: |
| 149 | RowOperations::ComputeEntrySizes(v&: vec, vdata&: col, entry_sizes, vcount, ser_count: count, sel); |
| 150 | break; |
| 151 | default: |
| 152 | throw InternalException("Unsupported type for RowOperations::Scatter" ); |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | // Build out the buffer space |
| 157 | handles = string_heap.Build(added_count: count, key_locations: data_locations, entry_sizes); |
| 158 | |
| 159 | // Serialize information that is needed for swizzling if the computation goes out-of-core |
| 160 | const idx_t heap_pointer_offset = layout.GetHeapOffset(); |
| 161 | for (idx_t i = 0; i < count; i++) { |
| 162 | auto row_idx = sel.get_index(idx: i); |
| 163 | auto row = ptrs[row_idx]; |
| 164 | // Pointer to this row in the heap block |
| 165 | Store<data_ptr_t>(val: data_locations[i], ptr: row + heap_pointer_offset); |
| 166 | // Row size is stored in the heap in front of each row |
| 167 | Store<uint32_t>(val: entry_sizes[i], ptr: data_locations[i]); |
| 168 | data_locations[i] += sizeof(uint32_t); |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | for (idx_t col_no = 0; col_no < types.size(); col_no++) { |
| 173 | auto &vec = columns.data[col_no]; |
| 174 | auto &col = col_data[col_no]; |
| 175 | auto col_offset = offsets[col_no]; |
| 176 | |
| 177 | switch (types[col_no].InternalType()) { |
| 178 | case PhysicalType::BOOL: |
| 179 | case PhysicalType::INT8: |
| 180 | TemplatedScatter<int8_t>(col, rows, sel, count, col_offset, col_no); |
| 181 | break; |
| 182 | case PhysicalType::INT16: |
| 183 | TemplatedScatter<int16_t>(col, rows, sel, count, col_offset, col_no); |
| 184 | break; |
| 185 | case PhysicalType::INT32: |
| 186 | TemplatedScatter<int32_t>(col, rows, sel, count, col_offset, col_no); |
| 187 | break; |
| 188 | case PhysicalType::INT64: |
| 189 | TemplatedScatter<int64_t>(col, rows, sel, count, col_offset, col_no); |
| 190 | break; |
| 191 | case PhysicalType::UINT8: |
| 192 | TemplatedScatter<uint8_t>(col, rows, sel, count, col_offset, col_no); |
| 193 | break; |
| 194 | case PhysicalType::UINT16: |
| 195 | TemplatedScatter<uint16_t>(col, rows, sel, count, col_offset, col_no); |
| 196 | break; |
| 197 | case PhysicalType::UINT32: |
| 198 | TemplatedScatter<uint32_t>(col, rows, sel, count, col_offset, col_no); |
| 199 | break; |
| 200 | case PhysicalType::UINT64: |
| 201 | TemplatedScatter<uint64_t>(col, rows, sel, count, col_offset, col_no); |
| 202 | break; |
| 203 | case PhysicalType::INT128: |
| 204 | TemplatedScatter<hugeint_t>(col, rows, sel, count, col_offset, col_no); |
| 205 | break; |
| 206 | case PhysicalType::FLOAT: |
| 207 | TemplatedScatter<float>(col, rows, sel, count, col_offset, col_no); |
| 208 | break; |
| 209 | case PhysicalType::DOUBLE: |
| 210 | TemplatedScatter<double>(col, rows, sel, count, col_offset, col_no); |
| 211 | break; |
| 212 | case PhysicalType::INTERVAL: |
| 213 | TemplatedScatter<interval_t>(col, rows, sel, count, col_offset, col_no); |
| 214 | break; |
| 215 | case PhysicalType::VARCHAR: |
| 216 | ScatterStringVector(col, rows, str_locations: data_locations, sel, count, col_offset, col_no); |
| 217 | break; |
| 218 | case PhysicalType::LIST: |
| 219 | case PhysicalType::STRUCT: |
| 220 | ScatterNestedVector(vec, col, rows, data_locations, sel, count, col_offset, col_no, vcount); |
| 221 | break; |
| 222 | default: |
| 223 | throw InternalException("Unsupported type for RowOperations::Scatter" ); |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | } // namespace duckdb |
| 229 | |