1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/common/types/data_chunk.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/common.hpp" |
12 | #include "duckdb/common/types/vector.hpp" |
13 | |
14 | #include <vector> |
15 | |
16 | namespace duckdb { |
17 | |
18 | //! A Data Chunk represents a set of vectors. |
19 | /*! |
20 | The data chunk class is the intermediate representation used by the |
21 | execution engine of DuckDB. It effectively represents a subset of a relation. |
22 | It holds a set of vectors that all have the same length. |
23 | |
24 | DataChunk is initialized using the DataChunk::Initialize function by |
25 | providing it with a vector of TypeIds for the Vector members. By default, |
26 | this function will also allocate a chunk of memory in the DataChunk for the |
27 | vectors and all the vectors will be referencing vectors to the data owned by |
28 | the chunk. The reason for this behavior is that the underlying vectors can |
29 | become referencing vectors to other chunks as well (i.e. in the case an |
30 | operator does not alter the data, such as a Filter operator which only adds a |
31 | selection vector). |
32 | |
33 | In addition to holding the data of the vectors, the DataChunk also owns the |
34 | selection vector that underlying vectors can point to. |
35 | */ |
36 | class DataChunk { |
37 | public: |
38 | //! Creates an empty DataChunk |
39 | DataChunk(); |
40 | |
41 | //! The vectors owned by the DataChunk. |
42 | vector<Vector> data; |
43 | |
44 | public: |
45 | idx_t size() const { |
46 | return count; |
47 | } |
48 | idx_t column_count() const { |
49 | return data.size(); |
50 | } |
51 | void SetCardinality(idx_t count) { |
52 | assert(count <= STANDARD_VECTOR_SIZE); |
53 | this->count = count; |
54 | } |
55 | void SetCardinality(const DataChunk &other) { |
56 | this->count = other.size(); |
57 | } |
58 | |
59 | Value GetValue(idx_t col_idx, idx_t index) const; |
60 | void SetValue(idx_t col_idx, idx_t index, Value val); |
61 | |
62 | //! Set the DataChunk to reference another data chunk |
63 | void Reference(DataChunk &chunk); |
64 | |
65 | //! Initializes the DataChunk with the specified types to an empty DataChunk |
66 | //! This will create one vector of the specified type for each TypeId in the |
67 | //! types list. The vector will be referencing vector to the data owned by |
68 | //! the DataChunk. |
69 | void Initialize(vector<TypeId> &types); |
70 | //! Initializes an empty DataChunk with the given types. The vectors will *not* have any data allocated for them. |
71 | void InitializeEmpty(vector<TypeId> &types); |
72 | //! Append the other DataChunk to this one. The column count and types of |
73 | //! the two DataChunks have to match exactly. Throws an exception if there |
74 | //! is not enough space in the chunk. |
75 | void Append(DataChunk &other); |
76 | //! Destroy all data and columns owned by this DataChunk |
77 | void Destroy(); |
78 | |
79 | //! Copies the data from this vector to another vector. |
80 | void Copy(DataChunk &other, idx_t offset = 0); |
81 | |
82 | //! Turn all the vectors from the chunk into flat vectors |
83 | void Normalify(); |
84 | |
85 | unique_ptr<VectorData[]> Orrify(); |
86 | |
87 | void Slice(const SelectionVector &sel_vector, idx_t count); |
88 | void Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset = 0); |
89 | |
90 | //! Resets the DataChunk to its state right after the DataChunk::Initialize |
91 | //! function was called. This sets the count to 0, and resets each member |
92 | //! Vector to point back to the data owned by this DataChunk. |
93 | void Reset(); |
94 | |
95 | //! Serializes a DataChunk to a stand-alone binary blob |
96 | void Serialize(Serializer &serializer); |
97 | //! Deserializes a blob back into a DataChunk |
98 | void Deserialize(Deserializer &source); |
99 | |
100 | //! Hashes the DataChunk to the target vector |
101 | void Hash(Vector &result); |
102 | |
103 | //! Returns a list of types of the vectors of this data chunk |
104 | vector<TypeId> GetTypes(); |
105 | |
106 | //! Converts this DataChunk to a printable string representation |
107 | string ToString() const; |
108 | void Print(); |
109 | |
110 | DataChunk(const DataChunk &) = delete; |
111 | |
112 | //! Verify that the DataChunk is in a consistent, not corrupt state. DEBUG |
113 | //! FUNCTION ONLY! |
114 | void Verify(); |
115 | |
116 | private: |
117 | idx_t count; |
118 | }; |
119 | } // namespace duckdb |
120 | |