| 1 | #include "duckdb/common/types/data_chunk.hpp" | 
|---|
| 2 |  | 
|---|
| 3 | #include "duckdb/common/exception.hpp" | 
|---|
| 4 | #include "duckdb/common/helper.hpp" | 
|---|
| 5 | #include "duckdb/common/printer.hpp" | 
|---|
| 6 | #include "duckdb/common/serializer.hpp" | 
|---|
| 7 | #include "duckdb/common/types/null_value.hpp" | 
|---|
| 8 | #include "duckdb/common/vector_operations/vector_operations.hpp" | 
|---|
| 9 | #include "duckdb/common/unordered_map.hpp" | 
|---|
| 10 |  | 
|---|
| 11 | using namespace duckdb; | 
|---|
| 12 | using namespace std; | 
|---|
| 13 |  | 
|---|
| 14 | DataChunk::DataChunk() : count(0) { | 
|---|
| 15 | } | 
|---|
| 16 |  | 
|---|
| 17 | void DataChunk::InitializeEmpty(vector<TypeId> &types) { | 
|---|
| 18 | assert(types.size() > 0); | 
|---|
| 19 | for (idx_t i = 0; i < types.size(); i++) { | 
|---|
| 20 | data.emplace_back(Vector(types[i], nullptr)); | 
|---|
| 21 | } | 
|---|
| 22 | } | 
|---|
| 23 |  | 
|---|
| 24 | void DataChunk::Initialize(vector<TypeId> &types) { | 
|---|
| 25 | assert(types.size() > 0); | 
|---|
| 26 | InitializeEmpty(types); | 
|---|
| 27 | for (idx_t i = 0; i < types.size(); i++) { | 
|---|
| 28 | data[i].Initialize(); | 
|---|
| 29 | } | 
|---|
| 30 | } | 
|---|
| 31 |  | 
|---|
| 32 | void DataChunk::Reset() { | 
|---|
| 33 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 34 | data[i].Initialize(); | 
|---|
| 35 | } | 
|---|
| 36 | SetCardinality(0); | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | void DataChunk::Destroy() { | 
|---|
| 40 | data.clear(); | 
|---|
| 41 | SetCardinality(0); | 
|---|
| 42 | } | 
|---|
| 43 |  | 
|---|
| 44 | Value DataChunk::GetValue(idx_t col_idx, idx_t index) const { | 
|---|
| 45 | assert(index < size()); | 
|---|
| 46 | return data[col_idx].GetValue(index); | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | void DataChunk::SetValue(idx_t col_idx, idx_t index, Value val) { | 
|---|
| 50 | data[col_idx].SetValue(index, move(val)); | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | void DataChunk::Reference(DataChunk &chunk) { | 
|---|
| 54 | assert(chunk.column_count() <= column_count()); | 
|---|
| 55 | SetCardinality(chunk); | 
|---|
| 56 | for (idx_t i = 0; i < chunk.column_count(); i++) { | 
|---|
| 57 | data[i].Reference(chunk.data[i]); | 
|---|
| 58 | } | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | void DataChunk::Copy(DataChunk &other, idx_t offset) { | 
|---|
| 62 | assert(column_count() == other.column_count()); | 
|---|
| 63 | assert(other.size() == 0); | 
|---|
| 64 |  | 
|---|
| 65 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 66 | assert(other.data[i].vector_type == VectorType::FLAT_VECTOR); | 
|---|
| 67 | VectorOperations::Copy(data[i], other.data[i], size(), offset, 0); | 
|---|
| 68 | } | 
|---|
| 69 | other.SetCardinality(size() - offset); | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | void DataChunk::Append(DataChunk &other) { | 
|---|
| 73 | if (other.size() == 0) { | 
|---|
| 74 | return; | 
|---|
| 75 | } | 
|---|
| 76 | if (column_count() != other.column_count()) { | 
|---|
| 77 | throw OutOfRangeException( "Column counts of appending chunk doesn't match!"); | 
|---|
| 78 | } | 
|---|
| 79 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 80 | assert(data[i].vector_type == VectorType::FLAT_VECTOR); | 
|---|
| 81 | VectorOperations::Copy(other.data[i], data[i], other.size(), 0, size()); | 
|---|
| 82 | } | 
|---|
| 83 | SetCardinality(size() + other.size()); | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | void DataChunk::Normalify() { | 
|---|
| 87 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 88 | data[i].Normalify(size()); | 
|---|
| 89 | } | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | vector<TypeId> DataChunk::GetTypes() { | 
|---|
| 93 | vector<TypeId> types; | 
|---|
| 94 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 95 | types.push_back(data[i].type); | 
|---|
| 96 | } | 
|---|
| 97 | return types; | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | string DataChunk::ToString() const { | 
|---|
| 101 | string retval = "Chunk - ["+ to_string(column_count()) + " Columns]\n"; | 
|---|
| 102 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 103 | retval += "- "+ data[i].ToString(size()) + "\n"; | 
|---|
| 104 | } | 
|---|
| 105 | return retval; | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | void DataChunk::Serialize(Serializer &serializer) { | 
|---|
| 109 | // write the count | 
|---|
| 110 | serializer.Write<sel_t>(size()); | 
|---|
| 111 | serializer.Write<idx_t>(column_count()); | 
|---|
| 112 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { | 
|---|
| 113 | // write the types | 
|---|
| 114 | serializer.Write<int>((int)data[col_idx].type); | 
|---|
| 115 | } | 
|---|
| 116 | // write the data | 
|---|
| 117 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { | 
|---|
| 118 | data[col_idx].Serialize(size(), serializer); | 
|---|
| 119 | } | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | void DataChunk::Deserialize(Deserializer &source) { | 
|---|
| 123 | auto rows = source.Read<sel_t>(); | 
|---|
| 124 | idx_t column_count = source.Read<idx_t>(); | 
|---|
| 125 |  | 
|---|
| 126 | vector<TypeId> types; | 
|---|
| 127 | for (idx_t i = 0; i < column_count; i++) { | 
|---|
| 128 | types.push_back((TypeId)source.Read<int>()); | 
|---|
| 129 | } | 
|---|
| 130 | Initialize(types); | 
|---|
| 131 | // now load the column data | 
|---|
| 132 | SetCardinality(rows); | 
|---|
| 133 | for (idx_t i = 0; i < column_count; i++) { | 
|---|
| 134 | data[i].Deserialize(rows, source); | 
|---|
| 135 | } | 
|---|
| 136 | Verify(); | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count) { | 
|---|
| 140 | this->count = count; | 
|---|
| 141 | sel_cache_t merge_cache; | 
|---|
| 142 | for (idx_t c = 0; c < column_count(); c++) { | 
|---|
| 143 | data[c].Slice(sel_vector, count, merge_cache); | 
|---|
| 144 | } | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset) { | 
|---|
| 148 | assert(other.column_count() <= col_offset + column_count()); | 
|---|
| 149 | this->count = count; | 
|---|
| 150 | sel_cache_t merge_cache; | 
|---|
| 151 | for (idx_t c = 0; c < other.column_count(); c++) { | 
|---|
| 152 | if (other.data[c].vector_type == VectorType::DICTIONARY_VECTOR) { | 
|---|
| 153 | // already a dictionary! merge the dictionaries | 
|---|
| 154 | data[col_offset + c].Reference(other.data[c]); | 
|---|
| 155 | data[col_offset + c].Slice(sel, count, merge_cache); | 
|---|
| 156 | } else { | 
|---|
| 157 | data[col_offset + c].Slice(other.data[c], sel, count); | 
|---|
| 158 | } | 
|---|
| 159 | } | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | unique_ptr<VectorData[]> DataChunk::Orrify() { | 
|---|
| 163 | auto orrified_data = unique_ptr<VectorData[]>(new VectorData[column_count()]); | 
|---|
| 164 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { | 
|---|
| 165 | data[col_idx].Orrify(size(), orrified_data[col_idx]); | 
|---|
| 166 | } | 
|---|
| 167 | return orrified_data; | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | void DataChunk::Hash(Vector &result) { | 
|---|
| 171 | assert(result.type == TypeId::HASH); | 
|---|
| 172 | VectorOperations::Hash(data[0], result, size()); | 
|---|
| 173 | for (idx_t i = 1; i < column_count(); i++) { | 
|---|
| 174 | VectorOperations::CombineHash(result, data[i], size()); | 
|---|
| 175 | } | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | void DataChunk::Verify() { | 
|---|
| 179 | #ifdef DEBUG | 
|---|
| 180 | assert(size() <= STANDARD_VECTOR_SIZE); | 
|---|
| 181 | // verify that all vectors in this chunk have the chunk selection vector | 
|---|
| 182 | for (idx_t i = 0; i < column_count(); i++) { | 
|---|
| 183 | data[i].Verify(size()); | 
|---|
| 184 | } | 
|---|
| 185 | #endif | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | void DataChunk::Print() { | 
|---|
| 189 | Printer::Print(ToString()); | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|