| 1 | #include "duckdb/common/types/data_chunk.hpp" |
| 2 | |
| 3 | #include "duckdb/common/exception.hpp" |
| 4 | #include "duckdb/common/helper.hpp" |
| 5 | #include "duckdb/common/printer.hpp" |
| 6 | #include "duckdb/common/serializer.hpp" |
| 7 | #include "duckdb/common/types/null_value.hpp" |
| 8 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
| 9 | #include "duckdb/common/unordered_map.hpp" |
| 10 | |
| 11 | using namespace duckdb; |
| 12 | using namespace std; |
| 13 | |
| 14 | DataChunk::DataChunk() : count(0) { |
| 15 | } |
| 16 | |
| 17 | void DataChunk::InitializeEmpty(vector<TypeId> &types) { |
| 18 | assert(types.size() > 0); |
| 19 | for (idx_t i = 0; i < types.size(); i++) { |
| 20 | data.emplace_back(Vector(types[i], nullptr)); |
| 21 | } |
| 22 | } |
| 23 | |
| 24 | void DataChunk::Initialize(vector<TypeId> &types) { |
| 25 | assert(types.size() > 0); |
| 26 | InitializeEmpty(types); |
| 27 | for (idx_t i = 0; i < types.size(); i++) { |
| 28 | data[i].Initialize(); |
| 29 | } |
| 30 | } |
| 31 | |
| 32 | void DataChunk::Reset() { |
| 33 | for (idx_t i = 0; i < column_count(); i++) { |
| 34 | data[i].Initialize(); |
| 35 | } |
| 36 | SetCardinality(0); |
| 37 | } |
| 38 | |
| 39 | void DataChunk::Destroy() { |
| 40 | data.clear(); |
| 41 | SetCardinality(0); |
| 42 | } |
| 43 | |
| 44 | Value DataChunk::GetValue(idx_t col_idx, idx_t index) const { |
| 45 | assert(index < size()); |
| 46 | return data[col_idx].GetValue(index); |
| 47 | } |
| 48 | |
| 49 | void DataChunk::SetValue(idx_t col_idx, idx_t index, Value val) { |
| 50 | data[col_idx].SetValue(index, move(val)); |
| 51 | } |
| 52 | |
| 53 | void DataChunk::Reference(DataChunk &chunk) { |
| 54 | assert(chunk.column_count() <= column_count()); |
| 55 | SetCardinality(chunk); |
| 56 | for (idx_t i = 0; i < chunk.column_count(); i++) { |
| 57 | data[i].Reference(chunk.data[i]); |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | void DataChunk::Copy(DataChunk &other, idx_t offset) { |
| 62 | assert(column_count() == other.column_count()); |
| 63 | assert(other.size() == 0); |
| 64 | |
| 65 | for (idx_t i = 0; i < column_count(); i++) { |
| 66 | assert(other.data[i].vector_type == VectorType::FLAT_VECTOR); |
| 67 | VectorOperations::Copy(data[i], other.data[i], size(), offset, 0); |
| 68 | } |
| 69 | other.SetCardinality(size() - offset); |
| 70 | } |
| 71 | |
| 72 | void DataChunk::Append(DataChunk &other) { |
| 73 | if (other.size() == 0) { |
| 74 | return; |
| 75 | } |
| 76 | if (column_count() != other.column_count()) { |
| 77 | throw OutOfRangeException("Column counts of appending chunk doesn't match!" ); |
| 78 | } |
| 79 | for (idx_t i = 0; i < column_count(); i++) { |
| 80 | assert(data[i].vector_type == VectorType::FLAT_VECTOR); |
| 81 | VectorOperations::Copy(other.data[i], data[i], other.size(), 0, size()); |
| 82 | } |
| 83 | SetCardinality(size() + other.size()); |
| 84 | } |
| 85 | |
| 86 | void DataChunk::Normalify() { |
| 87 | for (idx_t i = 0; i < column_count(); i++) { |
| 88 | data[i].Normalify(size()); |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | vector<TypeId> DataChunk::GetTypes() { |
| 93 | vector<TypeId> types; |
| 94 | for (idx_t i = 0; i < column_count(); i++) { |
| 95 | types.push_back(data[i].type); |
| 96 | } |
| 97 | return types; |
| 98 | } |
| 99 | |
| 100 | string DataChunk::ToString() const { |
| 101 | string retval = "Chunk - [" + to_string(column_count()) + " Columns]\n" ; |
| 102 | for (idx_t i = 0; i < column_count(); i++) { |
| 103 | retval += "- " + data[i].ToString(size()) + "\n" ; |
| 104 | } |
| 105 | return retval; |
| 106 | } |
| 107 | |
| 108 | void DataChunk::Serialize(Serializer &serializer) { |
| 109 | // write the count |
| 110 | serializer.Write<sel_t>(size()); |
| 111 | serializer.Write<idx_t>(column_count()); |
| 112 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { |
| 113 | // write the types |
| 114 | serializer.Write<int>((int)data[col_idx].type); |
| 115 | } |
| 116 | // write the data |
| 117 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { |
| 118 | data[col_idx].Serialize(size(), serializer); |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | void DataChunk::Deserialize(Deserializer &source) { |
| 123 | auto rows = source.Read<sel_t>(); |
| 124 | idx_t column_count = source.Read<idx_t>(); |
| 125 | |
| 126 | vector<TypeId> types; |
| 127 | for (idx_t i = 0; i < column_count; i++) { |
| 128 | types.push_back((TypeId)source.Read<int>()); |
| 129 | } |
| 130 | Initialize(types); |
| 131 | // now load the column data |
| 132 | SetCardinality(rows); |
| 133 | for (idx_t i = 0; i < column_count; i++) { |
| 134 | data[i].Deserialize(rows, source); |
| 135 | } |
| 136 | Verify(); |
| 137 | } |
| 138 | |
| 139 | void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count) { |
| 140 | this->count = count; |
| 141 | sel_cache_t merge_cache; |
| 142 | for (idx_t c = 0; c < column_count(); c++) { |
| 143 | data[c].Slice(sel_vector, count, merge_cache); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset) { |
| 148 | assert(other.column_count() <= col_offset + column_count()); |
| 149 | this->count = count; |
| 150 | sel_cache_t merge_cache; |
| 151 | for (idx_t c = 0; c < other.column_count(); c++) { |
| 152 | if (other.data[c].vector_type == VectorType::DICTIONARY_VECTOR) { |
| 153 | // already a dictionary! merge the dictionaries |
| 154 | data[col_offset + c].Reference(other.data[c]); |
| 155 | data[col_offset + c].Slice(sel, count, merge_cache); |
| 156 | } else { |
| 157 | data[col_offset + c].Slice(other.data[c], sel, count); |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | unique_ptr<VectorData[]> DataChunk::Orrify() { |
| 163 | auto orrified_data = unique_ptr<VectorData[]>(new VectorData[column_count()]); |
| 164 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { |
| 165 | data[col_idx].Orrify(size(), orrified_data[col_idx]); |
| 166 | } |
| 167 | return orrified_data; |
| 168 | } |
| 169 | |
| 170 | void DataChunk::Hash(Vector &result) { |
| 171 | assert(result.type == TypeId::HASH); |
| 172 | VectorOperations::Hash(data[0], result, size()); |
| 173 | for (idx_t i = 1; i < column_count(); i++) { |
| 174 | VectorOperations::CombineHash(result, data[i], size()); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | void DataChunk::Verify() { |
| 179 | #ifdef DEBUG |
| 180 | assert(size() <= STANDARD_VECTOR_SIZE); |
| 181 | // verify that all vectors in this chunk have the chunk selection vector |
| 182 | for (idx_t i = 0; i < column_count(); i++) { |
| 183 | data[i].Verify(size()); |
| 184 | } |
| 185 | #endif |
| 186 | } |
| 187 | |
| 188 | void DataChunk::Print() { |
| 189 | Printer::Print(ToString()); |
| 190 | } |
| 191 | |