data_chunk.cpp source code [DuckDB/src/common/types/data_chunk.cpp]

1	#include "duckdb/common/types/data_chunk.hpp"
2
3	#include "duckdb/common/exception.hpp"
4	#include "duckdb/common/helper.hpp"
5	#include "duckdb/common/printer.hpp"
6	#include "duckdb/common/serializer.hpp"
7	#include "duckdb/common/types/null_value.hpp"
8	#include "duckdb/common/vector_operations/vector_operations.hpp"
9	#include "duckdb/common/unordered_map.hpp"
10
11	using namespace duckdb;
12	using namespace std;
13
14	DataChunk::DataChunk() : count(`0`) {
15	}
16
17	void DataChunk::InitializeEmpty(vector<TypeId> &types) {
18	assert(types.size() > `0`);
19	for (idx_t i = `0`; i < types.size(); i++) {
20	data.emplace_back(Vector (types [i], nullptr));
21	}
22	}
23
24	void DataChunk::Initialize(vector<TypeId> &types) {
25	assert(types.size() > `0`);
26	InitializeEmpty(types);
27	for (idx_t i = `0`; i < types.size(); i++) {
28	data [i].Initialize();
29	}
30	}
31
32	void DataChunk::Reset() {
33	for (idx_t i = `0`; i < column_count(); i++) {
34	data [i].Initialize();
35	}
36	SetCardinality(`0`);
37	}
38
39	void DataChunk::Destroy() {
40	data.clear();
41	SetCardinality(`0`);
42	}
43
44	Value DataChunk::GetValue(idx_t col_idx, idx_t index) const {
45	assert(index < size());
46	return data [col_idx].GetValue(index);
47	}
48
49	void DataChunk::SetValue(idx_t col_idx, idx_t index, Value val) {
50	data [col_idx].SetValue(index, move(val));
51	}
52
53	void DataChunk::Reference(DataChunk &chunk) {
54	assert(chunk.column_count() <= column_count());
55	SetCardinality(chunk);
56	for (idx_t i = `0`; i < chunk.column_count(); i++) {
57	data [i].Reference(chunk.data [i]);
58	}
59	}
60
61	void DataChunk::Copy(DataChunk &other, idx_t offset) {
62	assert(column_count() == other.column_count());
63	assert(other.size() == `0`);
64
65	for (idx_t i = `0`; i < column_count(); i++) {
66	assert(other.data[i].vector_type == VectorType::FLAT_VECTOR);
67	VectorOperations::Copy(data [i], other.data [i], size(), offset, `0`);
68	}
69	other.SetCardinality(size() - offset);
70	}
71
72	void DataChunk::Append(DataChunk &other) {
73	if (other.size() == `0`) {
74	return;
75	}
76	if (column_count() != other.column_count()) {
77	throw OutOfRangeException ("Column counts of appending chunk doesn't match!");
78	}
79	for (idx_t i = `0`; i < column_count(); i++) {
80	assert(data[i].vector_type == VectorType::FLAT_VECTOR);
81	VectorOperations::Copy(other.data [i], data [i], other.size(), `0`, size());
82	}
83	SetCardinality(size() + other.size());
84	}
85
86	void DataChunk::Normalify() {
87	for (idx_t i = `0`; i < column_count(); i++) {
88	data [i].Normalify(size());
89	}
90	}
91
92	vector<TypeId> DataChunk::GetTypes() {
93	vector<TypeId> types;
94	for (idx_t i = `0`; i < column_count(); i++) {
95	types.push_back(data [i].type);
96	}
97	return types;
98	}
99
100	string DataChunk::ToString() const {
101	string retval = "Chunk - [" + to_string(column_count()) + " Columns]\n";
102	for (idx_t i = `0`; i < column_count(); i++) {
103	retval += "- " + data [i].ToString(size()) + "\n";
104	}
105	return retval;
106	}
107
108	void DataChunk::Serialize(Serializer &serializer) {
109	// write the count
110	serializer.Write<sel_t>(size());
111	serializer.Write<idx_t>(column_count());
112	for (idx_t col_idx = `0`; col_idx < column_count(); col_idx++) {
113	// write the types
114	serializer.Write<int>((int)data [col_idx].type);
115	}
116	// write the data
117	for (idx_t col_idx = `0`; col_idx < column_count(); col_idx++) {
118	data [col_idx].Serialize(size(), serializer);
119	}
120	}
121
122	void DataChunk::Deserialize(Deserializer &source) {
123	auto rows = source.Read<sel_t>();
124	idx_t column_count = source.Read<idx_t>();
125
126	vector<TypeId> types;
127	for (idx_t i = `0`; i < column_count; i++) {
128	types.push_back((TypeId)source.Read<int>());
129	}
130	Initialize(types);
131	// now load the column data
132	SetCardinality(rows);
133	for (idx_t i = `0`; i < column_count; i++) {
134	data [i].Deserialize(rows, source);
135	}
136	Verify();
137	}
138
139	void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count) {
140	this->count = count;
141	sel_cache_t merge_cache;
142	for (idx_t c = `0`; c < column_count(); c++) {
143	data [c].Slice(sel_vector, count, merge_cache);
144	}
145	}
146
147	void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset) {
148	assert(other.column_count() <= col_offset + column_count());
149	this->count = count;
150	sel_cache_t merge_cache;
151	for (idx_t c = `0`; c < other.column_count(); c++) {
152	if (other.data [c].vector_type == VectorType::DICTIONARY_VECTOR) {
153	// already a dictionary! merge the dictionaries
154	data [col_offset + c].Reference(other.data [c]);
155	data [col_offset + c].Slice(sel, count, merge_cache);
156	} else {
157	data [col_offset + c].Slice(other.data [c], sel, count);
158	}
159	}
160	}
161
162	unique_ptr<VectorData[]> DataChunk::Orrify() {
163	auto orrified_data = unique_ptr<VectorData[]>(new VectorData[column_count()]);
164	for (idx_t col_idx = `0`; col_idx < column_count(); col_idx++) {
165	data [col_idx].Orrify(size(), orrified_data [col_idx]);
166	}
167	return orrified_data;
168	}
169
170	void DataChunk::Hash(Vector &result) {
171	assert(result.type == TypeId::HASH);
172	VectorOperations::Hash(data [`0`], result, size());
173	for (idx_t i = `1`; i < column_count(); i++) {
174	VectorOperations::CombineHash(result, data [i], size());
175	}
176	}
177
178	void DataChunk::Verify() {
179	#ifdef DEBUG
180	assert(size() <= STANDARD_VECTOR_SIZE);
181	// verify that all vectors in this chunk have the chunk selection vector
182	for (idx_t i = `0`; i < column_count(); i++) {
183	data[i].Verify(size());
184	}
185	#endif
186	}
187
188	void DataChunk::Print() {
189	Printer::Print(ToString());
190	}
191

Browse the source code of DuckDB/src/common/types/data_chunk.cpp