1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/common/types/vector.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/bitset.hpp" |
12 | #include "duckdb/common/common.hpp" |
13 | #include "duckdb/common/types/selection_vector.hpp" |
14 | #include "duckdb/common/types/value.hpp" |
15 | #include "duckdb/common/enums/vector_type.hpp" |
16 | #include "duckdb/common/types/vector_buffer.hpp" |
17 | |
18 | namespace duckdb { |
19 | //! Type used for nullmasks |
20 | typedef bitset<STANDARD_VECTOR_SIZE> nullmask_t; |
21 | |
22 | //! Zero NULL mask: filled with the value 0 [READ ONLY] |
23 | extern nullmask_t ZERO_MASK; |
24 | |
25 | struct VectorData { |
26 | const SelectionVector *sel; |
27 | data_ptr_t data; |
28 | nullmask_t *nullmask; |
29 | }; |
30 | |
31 | class VectorStructBuffer; |
32 | class VectorListBuffer; |
33 | class ChunkCollection; |
34 | |
35 | //! Vector of values of a specified TypeId. |
36 | class Vector { |
37 | friend struct ConstantVector; |
38 | friend struct DictionaryVector; |
39 | friend struct FlatVector; |
40 | friend struct ListVector; |
41 | friend struct StringVector; |
42 | friend struct StructVector; |
43 | friend struct SequenceVector; |
44 | |
45 | friend class DataChunk; |
46 | |
47 | public: |
48 | Vector(); |
49 | //! Create a vector of size one holding the passed on value |
50 | Vector(Value value); |
51 | //! Create an empty standard vector with a type, equivalent to calling Vector(type, true, false) |
52 | Vector(TypeId type); |
53 | //! Create a non-owning vector that references the specified data |
54 | Vector(TypeId type, data_ptr_t dataptr); |
55 | //! Create an owning vector that holds at most STANDARD_VECTOR_SIZE entries. |
56 | /*! |
57 | Create a new vector |
58 | If create_data is true, the vector will be an owning empty vector. |
59 | If zero_data is true, the allocated data will be zero-initialized. |
60 | */ |
61 | Vector(TypeId type, bool create_data, bool zero_data); |
62 | // implicit copying of Vectors is not allowed |
63 | Vector(const Vector &) = delete; |
64 | // but moving of vectors is allowed |
65 | Vector(Vector &&other) noexcept; |
66 | |
67 | //! The vector type specifies how the data of the vector is physically stored (i.e. if it is a single repeated |
68 | //! constant, if it is compressed) |
69 | VectorType vector_type; |
70 | //! The type of the elements stored in the vector (e.g. integer, float) |
71 | TypeId type; |
72 | |
73 | public: |
74 | //! Create a vector that references the specified value. |
75 | void Reference(Value &value); |
76 | //! Causes this vector to reference the data held by the other vector. |
77 | void Reference(Vector &other); |
78 | |
79 | //! Creates a reference to a slice of the other vector |
80 | void Slice(Vector &other, idx_t offset); |
81 | //! Creates a reference to a slice of the other vector |
82 | void Slice(Vector &other, const SelectionVector &sel, idx_t count); |
83 | //! Turns the vector into a dictionary vector with the specified dictionary |
84 | void Slice(const SelectionVector &sel, idx_t count); |
85 | //! Slice the vector, keeping the result around in a cache or potentially using the cache instead of slicing |
86 | void Slice(const SelectionVector &sel, idx_t count, sel_cache_t &cache); |
87 | |
88 | //! Creates the data of this vector with the specified type. Any data that |
89 | //! is currently in the vector is destroyed. |
90 | void Initialize(TypeId new_type = TypeId::INVALID, bool zero_data = false); |
91 | |
92 | //! Converts this Vector to a printable string representation |
93 | string ToString(idx_t count) const; |
94 | void Print(idx_t count); |
95 | |
96 | string ToString() const; |
97 | void Print(); |
98 | |
99 | //! Flatten the vector, removing any compression and turning it into a FLAT_VECTOR |
100 | void Normalify(idx_t count); |
101 | void Normalify(const SelectionVector &sel, idx_t count); |
102 | //! Obtains a selection vector and data pointer through which the data of this vector can be accessed |
103 | void Orrify(idx_t count, VectorData &data); |
104 | |
105 | //! Turn the vector into a sequence vector |
106 | void Sequence(int64_t start, int64_t increment); |
107 | |
108 | //! Verify that the Vector is in a consistent, not corrupt state. DEBUG |
109 | //! FUNCTION ONLY! |
110 | void Verify(idx_t count); |
111 | void Verify(const SelectionVector &sel, idx_t count); |
112 | void UTFVerify(idx_t count); |
113 | void UTFVerify(const SelectionVector &sel, idx_t count); |
114 | |
115 | //! Returns the [index] element of the Vector as a Value. |
116 | Value GetValue(idx_t index) const; |
117 | //! Sets the [index] element of the Vector to the specified Value. |
118 | void SetValue(idx_t index, Value val); |
119 | |
120 | //! Serializes a Vector to a stand-alone binary blob |
121 | void Serialize(idx_t count, Serializer &serializer); |
122 | //! Deserializes a blob back into a Vector |
123 | void Deserialize(idx_t count, Deserializer &source); |
124 | |
125 | protected: |
126 | //! A pointer to the data. |
127 | data_ptr_t data; |
128 | //! The nullmask of the vector |
129 | nullmask_t nullmask; |
130 | //! The main buffer holding the data of the vector |
131 | buffer_ptr<VectorBuffer> buffer; |
132 | //! The buffer holding auxiliary data of the vector |
133 | //! e.g. a string vector uses this to store strings |
134 | buffer_ptr<VectorBuffer> auxiliary; |
135 | }; |
136 | |
137 | //! The DictionaryBuffer holds a selection vector |
138 | class VectorChildBuffer : public VectorBuffer { |
139 | public: |
140 | VectorChildBuffer() : VectorBuffer(VectorBufferType::VECTOR_CHILD_BUFFER), data() { |
141 | } |
142 | |
143 | public: |
144 | Vector data; |
145 | }; |
146 | |
147 | struct ConstantVector { |
148 | static inline data_ptr_t GetData(Vector &vector) { |
149 | assert(vector.vector_type == VectorType::CONSTANT_VECTOR || vector.vector_type == VectorType::FLAT_VECTOR); |
150 | return vector.data; |
151 | } |
152 | template <class T> static inline T *GetData(Vector &vector) { |
153 | return (T *)ConstantVector::GetData(vector); |
154 | } |
155 | static inline bool IsNull(const Vector &vector) { |
156 | assert(vector.vector_type == VectorType::CONSTANT_VECTOR); |
157 | return vector.nullmask[0]; |
158 | } |
159 | static inline void SetNull(Vector &vector, bool is_null) { |
160 | assert(vector.vector_type == VectorType::CONSTANT_VECTOR); |
161 | vector.nullmask[0] = is_null; |
162 | } |
163 | static inline nullmask_t &Nullmask(Vector &vector) { |
164 | assert(vector.vector_type == VectorType::CONSTANT_VECTOR); |
165 | return vector.nullmask; |
166 | } |
167 | |
168 | static const sel_t zero_vector[STANDARD_VECTOR_SIZE]; |
169 | static const SelectionVector ZeroSelectionVector; |
170 | }; |
171 | |
172 | struct DictionaryVector { |
173 | static inline SelectionVector &SelVector(const Vector &vector) { |
174 | assert(vector.vector_type == VectorType::DICTIONARY_VECTOR); |
175 | return ((DictionaryBuffer &)*vector.buffer).GetSelVector(); |
176 | } |
177 | static inline Vector &Child(const Vector &vector) { |
178 | assert(vector.vector_type == VectorType::DICTIONARY_VECTOR); |
179 | return ((VectorChildBuffer &)*vector.auxiliary).data; |
180 | } |
181 | }; |
182 | |
183 | struct FlatVector { |
184 | static inline data_ptr_t GetData(Vector &vector) { |
185 | return ConstantVector::GetData(vector); |
186 | } |
187 | template <class T> static inline T *GetData(Vector &vector) { |
188 | return ConstantVector::GetData<T>(vector); |
189 | } |
190 | static inline void SetData(Vector &vector, data_ptr_t data) { |
191 | assert(vector.vector_type == VectorType::FLAT_VECTOR); |
192 | vector.data = data; |
193 | } |
194 | template <class T> static inline T GetValue(Vector &vector, idx_t idx) { |
195 | assert(vector.vector_type == VectorType::FLAT_VECTOR); |
196 | return FlatVector::GetData<T>(vector)[idx]; |
197 | } |
198 | static inline nullmask_t &Nullmask(Vector &vector) { |
199 | assert(vector.vector_type == VectorType::FLAT_VECTOR); |
200 | return vector.nullmask; |
201 | } |
202 | static inline void SetNullmask(Vector &vector, nullmask_t new_mask) { |
203 | assert(vector.vector_type == VectorType::FLAT_VECTOR); |
204 | vector.nullmask = move(new_mask); |
205 | } |
206 | static inline void SetNull(Vector &vector, idx_t idx, bool value) { |
207 | assert(vector.vector_type == VectorType::FLAT_VECTOR); |
208 | vector.nullmask[idx] = value; |
209 | } |
210 | static inline bool IsNull(const Vector &vector, idx_t idx) { |
211 | assert(vector.vector_type == VectorType::FLAT_VECTOR); |
212 | return vector.nullmask[idx]; |
213 | } |
214 | |
215 | static const sel_t incremental_vector[STANDARD_VECTOR_SIZE]; |
216 | static const SelectionVector IncrementalSelectionVector; |
217 | }; |
218 | |
219 | struct ListVector { |
220 | static ChunkCollection &GetEntry(const Vector &vector); |
221 | static bool HasEntry(const Vector &vector); |
222 | static void SetEntry(Vector &vector, unique_ptr<ChunkCollection> entry); |
223 | }; |
224 | |
225 | struct StringVector { |
226 | //! Add a string to the string heap of the vector (auxiliary data) |
227 | static string_t AddString(Vector &vector, const char *data, idx_t len); |
228 | //! Add a string to the string heap of the vector (auxiliary data) |
229 | static string_t AddString(Vector &vector, const char *data); |
230 | //! Add a string to the string heap of the vector (auxiliary data) |
231 | static string_t AddString(Vector &vector, string_t data); |
232 | //! Add a string to the string heap of the vector (auxiliary data) |
233 | static string_t AddString(Vector &vector, const string &data); |
234 | //! Add a blob to the string heap of the vector (auxiliary data) |
235 | static string_t AddBlob(Vector &vector, string_t data); |
236 | //! Allocates an empty string of the specified size, and returns a writable pointer that can be used to store the |
237 | //! result of an operation |
238 | static string_t EmptyString(Vector &vector, idx_t len); |
239 | |
240 | //! Add a reference from this vector to the string heap of the provided vector |
241 | static void AddHeapReference(Vector &vector, Vector &other); |
242 | }; |
243 | |
244 | struct StructVector { |
245 | static bool HasEntries(const Vector &vector); |
246 | static child_list_t<unique_ptr<Vector>> &GetEntries(const Vector &vector); |
247 | static void AddEntry(Vector &vector, string name, unique_ptr<Vector> entry); |
248 | }; |
249 | |
250 | struct SequenceVector { |
251 | static void GetSequence(const Vector &vector, int64_t &start, int64_t &increment) { |
252 | assert(vector.vector_type == VectorType::SEQUENCE_VECTOR); |
253 | auto data = (int64_t *)vector.buffer->GetData(); |
254 | start = data[0]; |
255 | increment = data[1]; |
256 | } |
257 | }; |
258 | |
259 | class StandaloneVector : public Vector { |
260 | public: |
261 | StandaloneVector() : Vector() { |
262 | } |
263 | StandaloneVector(TypeId type) : Vector(type) { |
264 | } |
265 | StandaloneVector(TypeId type, data_ptr_t dataptr) : Vector(type, dataptr) { |
266 | } |
267 | |
268 | public: |
269 | idx_t size() { |
270 | return count; |
271 | } |
272 | void SetCount(idx_t count) { |
273 | assert(count <= STANDARD_VECTOR_SIZE); |
274 | this->count = count; |
275 | } |
276 | |
277 | protected: |
278 | idx_t count; |
279 | }; |
280 | |
281 | } // namespace duckdb |
282 | |