1 | #include "duckdb/common/types/data_chunk.hpp" |
2 | |
3 | #include "duckdb/common/exception.hpp" |
4 | #include "duckdb/common/helper.hpp" |
5 | #include "duckdb/common/printer.hpp" |
6 | #include "duckdb/common/serializer.hpp" |
7 | #include "duckdb/common/types/null_value.hpp" |
8 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
9 | #include "duckdb/common/unordered_map.hpp" |
10 | |
11 | using namespace duckdb; |
12 | using namespace std; |
13 | |
14 | DataChunk::DataChunk() : count(0) { |
15 | } |
16 | |
17 | void DataChunk::InitializeEmpty(vector<TypeId> &types) { |
18 | assert(types.size() > 0); |
19 | for (idx_t i = 0; i < types.size(); i++) { |
20 | data.emplace_back(Vector(types[i], nullptr)); |
21 | } |
22 | } |
23 | |
24 | void DataChunk::Initialize(vector<TypeId> &types) { |
25 | assert(types.size() > 0); |
26 | InitializeEmpty(types); |
27 | for (idx_t i = 0; i < types.size(); i++) { |
28 | data[i].Initialize(); |
29 | } |
30 | } |
31 | |
32 | void DataChunk::Reset() { |
33 | for (idx_t i = 0; i < column_count(); i++) { |
34 | data[i].Initialize(); |
35 | } |
36 | SetCardinality(0); |
37 | } |
38 | |
39 | void DataChunk::Destroy() { |
40 | data.clear(); |
41 | SetCardinality(0); |
42 | } |
43 | |
44 | Value DataChunk::GetValue(idx_t col_idx, idx_t index) const { |
45 | assert(index < size()); |
46 | return data[col_idx].GetValue(index); |
47 | } |
48 | |
49 | void DataChunk::SetValue(idx_t col_idx, idx_t index, Value val) { |
50 | data[col_idx].SetValue(index, move(val)); |
51 | } |
52 | |
53 | void DataChunk::Reference(DataChunk &chunk) { |
54 | assert(chunk.column_count() <= column_count()); |
55 | SetCardinality(chunk); |
56 | for (idx_t i = 0; i < chunk.column_count(); i++) { |
57 | data[i].Reference(chunk.data[i]); |
58 | } |
59 | } |
60 | |
61 | void DataChunk::Copy(DataChunk &other, idx_t offset) { |
62 | assert(column_count() == other.column_count()); |
63 | assert(other.size() == 0); |
64 | |
65 | for (idx_t i = 0; i < column_count(); i++) { |
66 | assert(other.data[i].vector_type == VectorType::FLAT_VECTOR); |
67 | VectorOperations::Copy(data[i], other.data[i], size(), offset, 0); |
68 | } |
69 | other.SetCardinality(size() - offset); |
70 | } |
71 | |
72 | void DataChunk::Append(DataChunk &other) { |
73 | if (other.size() == 0) { |
74 | return; |
75 | } |
76 | if (column_count() != other.column_count()) { |
77 | throw OutOfRangeException("Column counts of appending chunk doesn't match!" ); |
78 | } |
79 | for (idx_t i = 0; i < column_count(); i++) { |
80 | assert(data[i].vector_type == VectorType::FLAT_VECTOR); |
81 | VectorOperations::Copy(other.data[i], data[i], other.size(), 0, size()); |
82 | } |
83 | SetCardinality(size() + other.size()); |
84 | } |
85 | |
86 | void DataChunk::Normalify() { |
87 | for (idx_t i = 0; i < column_count(); i++) { |
88 | data[i].Normalify(size()); |
89 | } |
90 | } |
91 | |
92 | vector<TypeId> DataChunk::GetTypes() { |
93 | vector<TypeId> types; |
94 | for (idx_t i = 0; i < column_count(); i++) { |
95 | types.push_back(data[i].type); |
96 | } |
97 | return types; |
98 | } |
99 | |
100 | string DataChunk::ToString() const { |
101 | string retval = "Chunk - [" + to_string(column_count()) + " Columns]\n" ; |
102 | for (idx_t i = 0; i < column_count(); i++) { |
103 | retval += "- " + data[i].ToString(size()) + "\n" ; |
104 | } |
105 | return retval; |
106 | } |
107 | |
108 | void DataChunk::Serialize(Serializer &serializer) { |
109 | // write the count |
110 | serializer.Write<sel_t>(size()); |
111 | serializer.Write<idx_t>(column_count()); |
112 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { |
113 | // write the types |
114 | serializer.Write<int>((int)data[col_idx].type); |
115 | } |
116 | // write the data |
117 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { |
118 | data[col_idx].Serialize(size(), serializer); |
119 | } |
120 | } |
121 | |
122 | void DataChunk::Deserialize(Deserializer &source) { |
123 | auto rows = source.Read<sel_t>(); |
124 | idx_t column_count = source.Read<idx_t>(); |
125 | |
126 | vector<TypeId> types; |
127 | for (idx_t i = 0; i < column_count; i++) { |
128 | types.push_back((TypeId)source.Read<int>()); |
129 | } |
130 | Initialize(types); |
131 | // now load the column data |
132 | SetCardinality(rows); |
133 | for (idx_t i = 0; i < column_count; i++) { |
134 | data[i].Deserialize(rows, source); |
135 | } |
136 | Verify(); |
137 | } |
138 | |
139 | void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count) { |
140 | this->count = count; |
141 | sel_cache_t merge_cache; |
142 | for (idx_t c = 0; c < column_count(); c++) { |
143 | data[c].Slice(sel_vector, count, merge_cache); |
144 | } |
145 | } |
146 | |
147 | void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset) { |
148 | assert(other.column_count() <= col_offset + column_count()); |
149 | this->count = count; |
150 | sel_cache_t merge_cache; |
151 | for (idx_t c = 0; c < other.column_count(); c++) { |
152 | if (other.data[c].vector_type == VectorType::DICTIONARY_VECTOR) { |
153 | // already a dictionary! merge the dictionaries |
154 | data[col_offset + c].Reference(other.data[c]); |
155 | data[col_offset + c].Slice(sel, count, merge_cache); |
156 | } else { |
157 | data[col_offset + c].Slice(other.data[c], sel, count); |
158 | } |
159 | } |
160 | } |
161 | |
162 | unique_ptr<VectorData[]> DataChunk::Orrify() { |
163 | auto orrified_data = unique_ptr<VectorData[]>(new VectorData[column_count()]); |
164 | for (idx_t col_idx = 0; col_idx < column_count(); col_idx++) { |
165 | data[col_idx].Orrify(size(), orrified_data[col_idx]); |
166 | } |
167 | return orrified_data; |
168 | } |
169 | |
170 | void DataChunk::Hash(Vector &result) { |
171 | assert(result.type == TypeId::HASH); |
172 | VectorOperations::Hash(data[0], result, size()); |
173 | for (idx_t i = 1; i < column_count(); i++) { |
174 | VectorOperations::CombineHash(result, data[i], size()); |
175 | } |
176 | } |
177 | |
178 | void DataChunk::Verify() { |
179 | #ifdef DEBUG |
180 | assert(size() <= STANDARD_VECTOR_SIZE); |
181 | // verify that all vectors in this chunk have the chunk selection vector |
182 | for (idx_t i = 0; i < column_count(); i++) { |
183 | data[i].Verify(size()); |
184 | } |
185 | #endif |
186 | } |
187 | |
188 | void DataChunk::Print() { |
189 | Printer::Print(ToString()); |
190 | } |
191 | |