1 | #include <cstring> // strlen() on Solaris |
2 | |
3 | #include "duckdb/common/types/vector.hpp" |
4 | |
5 | #include "duckdb/common/assert.hpp" |
6 | #include "duckdb/common/exception.hpp" |
7 | #include "duckdb/common/printer.hpp" |
8 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
9 | #include "duckdb/common/types/chunk_collection.hpp" |
10 | #include "duckdb/common/serializer.hpp" |
11 | #include "duckdb/common/types/null_value.hpp" |
12 | |
13 | using namespace std; |
14 | |
15 | namespace duckdb { |
16 | |
17 | Vector::Vector(TypeId type, bool create_data, bool zero_data) |
18 | : vector_type(VectorType::FLAT_VECTOR), type(type), data(nullptr) { |
19 | if (create_data) { |
20 | Initialize(type, zero_data); |
21 | } |
22 | } |
23 | |
24 | Vector::Vector(TypeId type) : Vector(type, true, false) { |
25 | } |
26 | |
27 | Vector::Vector(TypeId type, data_ptr_t dataptr) : vector_type(VectorType::FLAT_VECTOR), type(type), data(dataptr) { |
28 | if (dataptr && type == TypeId::INVALID) { |
29 | throw InvalidTypeException(type, "Cannot create a vector of type INVALID!" ); |
30 | } |
31 | } |
32 | |
33 | Vector::Vector(Value value) : vector_type(VectorType::CONSTANT_VECTOR) { |
34 | Reference(value); |
35 | } |
36 | |
37 | Vector::Vector() : vector_type(VectorType::FLAT_VECTOR), type(TypeId::INVALID), data(nullptr) { |
38 | } |
39 | |
40 | Vector::Vector(Vector &&other) noexcept |
41 | : vector_type(other.vector_type), type(other.type), data(other.data), nullmask(other.nullmask), |
42 | buffer(move(other.buffer)), auxiliary(move(other.auxiliary)) { |
43 | } |
44 | |
45 | void Vector::Reference(Value &value) { |
46 | vector_type = VectorType::CONSTANT_VECTOR; |
47 | type = value.type; |
48 | buffer = VectorBuffer::CreateConstantVector(type); |
49 | auxiliary.reset(); |
50 | data = buffer->GetData(); |
51 | SetValue(0, value); |
52 | } |
53 | |
54 | void Vector::Reference(Vector &other) { |
55 | vector_type = other.vector_type; |
56 | buffer = other.buffer; |
57 | auxiliary = other.auxiliary; |
58 | data = other.data; |
59 | type = other.type; |
60 | nullmask = other.nullmask; |
61 | } |
62 | |
63 | void Vector::Slice(Vector &other, idx_t offset) { |
64 | if (other.vector_type == VectorType::CONSTANT_VECTOR) { |
65 | Reference(other); |
66 | return; |
67 | } |
68 | assert(other.vector_type == VectorType::FLAT_VECTOR); |
69 | |
70 | // create a reference to the other vector |
71 | Reference(other); |
72 | if (offset > 0) { |
73 | data = data + GetTypeIdSize(type) * offset; |
74 | nullmask <<= offset; |
75 | } |
76 | } |
77 | |
78 | void Vector::Slice(Vector &other, const SelectionVector &sel, idx_t count) { |
79 | Reference(other); |
80 | Slice(sel, count); |
81 | } |
82 | |
83 | void Vector::Slice(const SelectionVector &sel, idx_t count) { |
84 | if (vector_type == VectorType::CONSTANT_VECTOR) { |
85 | // dictionary on a constant is just a constant |
86 | return; |
87 | } |
88 | if (vector_type == VectorType::DICTIONARY_VECTOR) { |
89 | // already a dictionary, slice the current dictionary |
90 | auto ¤t_sel = DictionaryVector::SelVector(*this); |
91 | auto sliced_dictionary = current_sel.Slice(sel, count); |
92 | buffer = make_unique<DictionaryBuffer>(move(sliced_dictionary)); |
93 | return; |
94 | } |
95 | auto child_ref = make_buffer<VectorChildBuffer>(); |
96 | child_ref->data.Reference(*this); |
97 | |
98 | auto dict_buffer = make_unique<DictionaryBuffer>(sel); |
99 | buffer = move(dict_buffer); |
100 | auxiliary = move(child_ref); |
101 | vector_type = VectorType::DICTIONARY_VECTOR; |
102 | } |
103 | |
104 | void Vector::Slice(const SelectionVector &sel, idx_t count, sel_cache_t &cache) { |
105 | if (vector_type == VectorType::DICTIONARY_VECTOR) { |
106 | // dictionary vector: need to merge dictionaries |
107 | // check if we have a cached entry |
108 | auto ¤t_sel = DictionaryVector::SelVector(*this); |
109 | auto target_data = current_sel.data(); |
110 | auto entry = cache.find(target_data); |
111 | if (entry != cache.end()) { |
112 | // cached entry exists: use that |
113 | this->buffer = entry->second; |
114 | } else { |
115 | Slice(sel, count); |
116 | cache[target_data] = this->buffer; |
117 | } |
118 | } else { |
119 | Slice(sel, count); |
120 | } |
121 | } |
122 | |
123 | void Vector::Initialize(TypeId new_type, bool zero_data) { |
124 | if (new_type != TypeId::INVALID) { |
125 | type = new_type; |
126 | } |
127 | vector_type = VectorType::FLAT_VECTOR; |
128 | buffer.reset(); |
129 | auxiliary.reset(); |
130 | nullmask.reset(); |
131 | if (GetTypeIdSize(type) > 0) { |
132 | buffer = VectorBuffer::CreateStandardVector(type); |
133 | data = buffer->GetData(); |
134 | if (zero_data) { |
135 | memset(data, 0, STANDARD_VECTOR_SIZE * GetTypeIdSize(type)); |
136 | } |
137 | } |
138 | } |
139 | |
140 | void Vector::SetValue(idx_t index, Value val) { |
141 | if (vector_type == VectorType::DICTIONARY_VECTOR) { |
142 | // dictionary: apply dictionary and forward to child |
143 | auto &sel_vector = DictionaryVector::SelVector(*this); |
144 | auto &child = DictionaryVector::Child(*this); |
145 | return child.SetValue(sel_vector.get_index(index), move(val)); |
146 | } |
147 | Value newVal = val.CastAs(type); |
148 | |
149 | nullmask[index] = newVal.is_null; |
150 | if (newVal.is_null) { |
151 | return; |
152 | } |
153 | switch (type) { |
154 | case TypeId::BOOL: |
155 | ((bool *)data)[index] = newVal.value_.boolean; |
156 | break; |
157 | case TypeId::INT8: |
158 | ((int8_t *)data)[index] = newVal.value_.tinyint; |
159 | break; |
160 | case TypeId::INT16: |
161 | ((int16_t *)data)[index] = newVal.value_.smallint; |
162 | break; |
163 | case TypeId::INT32: |
164 | ((int32_t *)data)[index] = newVal.value_.integer; |
165 | break; |
166 | case TypeId::INT64: |
167 | ((int64_t *)data)[index] = newVal.value_.bigint; |
168 | break; |
169 | case TypeId::FLOAT: |
170 | ((float *)data)[index] = newVal.value_.float_; |
171 | break; |
172 | case TypeId::DOUBLE: |
173 | ((double *)data)[index] = newVal.value_.double_; |
174 | break; |
175 | case TypeId::POINTER: |
176 | ((uintptr_t *)data)[index] = newVal.value_.pointer; |
177 | break; |
178 | case TypeId::VARCHAR: { |
179 | ((string_t *)data)[index] = StringVector::AddBlob(*this, newVal.str_value); |
180 | break; |
181 | } |
182 | case TypeId::STRUCT: { |
183 | if (!auxiliary || StructVector::GetEntries(*this).size() == 0) { |
184 | for (size_t i = 0; i < val.struct_value.size(); i++) { |
185 | auto &struct_child = val.struct_value[i]; |
186 | auto cv = make_unique<Vector>(struct_child.second.type); |
187 | cv->vector_type = vector_type; |
188 | StructVector::AddEntry(*this, struct_child.first, move(cv)); |
189 | } |
190 | } |
191 | |
192 | auto &children = StructVector::GetEntries(*this); |
193 | assert(children.size() == val.struct_value.size()); |
194 | |
195 | for (size_t i = 0; i < val.struct_value.size(); i++) { |
196 | auto &struct_child = val.struct_value[i]; |
197 | assert(vector_type == VectorType::CONSTANT_VECTOR || vector_type == VectorType::FLAT_VECTOR); |
198 | auto &vec_child = children[i]; |
199 | assert(vec_child.first == struct_child.first); |
200 | vec_child.second->SetValue(index, struct_child.second); |
201 | } |
202 | } break; |
203 | |
204 | case TypeId::LIST: { |
205 | if (!auxiliary) { |
206 | auto cc = make_unique<ChunkCollection>(); |
207 | ListVector::SetEntry(*this, move(cc)); |
208 | } |
209 | auto &child_cc = ListVector::GetEntry(*this); |
210 | // TODO optimization: in-place update if fits |
211 | auto offset = child_cc.count; |
212 | if (val.list_value.size() > 0) { |
213 | idx_t append_idx = 0; |
214 | while (append_idx < val.list_value.size()) { |
215 | idx_t this_append_len = min((idx_t)STANDARD_VECTOR_SIZE, val.list_value.size() - append_idx); |
216 | |
217 | DataChunk child_append_chunk; |
218 | child_append_chunk.SetCardinality(this_append_len); |
219 | vector<TypeId> types; |
220 | types.push_back(val.list_value[0].type); |
221 | child_append_chunk.Initialize(types); |
222 | for (idx_t i = 0; i < this_append_len; i++) { |
223 | child_append_chunk.data[0].SetValue(i, val.list_value[i + append_idx]); |
224 | } |
225 | child_cc.Append(child_append_chunk); |
226 | append_idx += this_append_len; |
227 | } |
228 | } |
229 | // now set the pointer |
230 | auto &entry = ((list_entry_t *)data)[index]; |
231 | entry.length = val.list_value.size(); |
232 | entry.offset = offset; |
233 | } break; |
234 | default: |
235 | throw NotImplementedException("Unimplemented type for Vector::SetValue" ); |
236 | } |
237 | } |
238 | |
239 | Value Vector::GetValue(idx_t index) const { |
240 | if (vector_type == VectorType::CONSTANT_VECTOR) { |
241 | index = 0; |
242 | } else if (vector_type == VectorType::DICTIONARY_VECTOR) { |
243 | // dictionary: apply dictionary and forward to child |
244 | auto &sel_vector = DictionaryVector::SelVector(*this); |
245 | auto &child = DictionaryVector::Child(*this); |
246 | return child.GetValue(sel_vector.get_index(index)); |
247 | } else { |
248 | assert(vector_type == VectorType::FLAT_VECTOR); |
249 | } |
250 | |
251 | if (nullmask[index]) { |
252 | return Value(type); |
253 | } |
254 | switch (type) { |
255 | case TypeId::BOOL: |
256 | return Value::BOOLEAN(((bool *)data)[index]); |
257 | case TypeId::INT8: |
258 | return Value::TINYINT(((int8_t *)data)[index]); |
259 | case TypeId::INT16: |
260 | return Value::SMALLINT(((int16_t *)data)[index]); |
261 | case TypeId::INT32: |
262 | return Value::INTEGER(((int32_t *)data)[index]); |
263 | case TypeId::INT64: |
264 | return Value::BIGINT(((int64_t *)data)[index]); |
265 | case TypeId::HASH: |
266 | return Value::HASH(((hash_t *)data)[index]); |
267 | case TypeId::POINTER: |
268 | return Value::POINTER(((uintptr_t *)data)[index]); |
269 | case TypeId::FLOAT: |
270 | return Value::FLOAT(((float *)data)[index]); |
271 | case TypeId::DOUBLE: |
272 | return Value::DOUBLE(((double *)data)[index]); |
273 | case TypeId::VARCHAR: { |
274 | auto str = ((string_t *)data)[index]; |
275 | // avoiding implicit cast and double conversion |
276 | return Value::BLOB(str.GetString(), false); |
277 | } |
278 | case TypeId::STRUCT: { |
279 | Value ret(TypeId::STRUCT); |
280 | ret.is_null = false; |
281 | // we can derive the value schema from the vector schema |
282 | for (auto &struct_child : StructVector::GetEntries(*this)) { |
283 | ret.struct_value.push_back(pair<string, Value>(struct_child.first, struct_child.second->GetValue(index))); |
284 | } |
285 | return ret; |
286 | } |
287 | case TypeId::LIST: { |
288 | Value ret(TypeId::LIST); |
289 | ret.is_null = false; |
290 | auto offlen = ((list_entry_t *)data)[index]; |
291 | auto &child_cc = ListVector::GetEntry(*this); |
292 | for (idx_t i = offlen.offset; i < offlen.offset + offlen.length; i++) { |
293 | ret.list_value.push_back(child_cc.GetValue(0, i)); |
294 | } |
295 | return ret; |
296 | } |
297 | default: |
298 | throw NotImplementedException("Unimplemented type for value access" ); |
299 | } |
300 | } |
301 | |
302 | string VectorTypeToString(VectorType type) { |
303 | switch (type) { |
304 | case VectorType::FLAT_VECTOR: |
305 | return "FLAT" ; |
306 | case VectorType::SEQUENCE_VECTOR: |
307 | return "SEQUENCE" ; |
308 | case VectorType::DICTIONARY_VECTOR: |
309 | return "DICTIONARY" ; |
310 | case VectorType::CONSTANT_VECTOR: |
311 | return "CONSTANT" ; |
312 | default: |
313 | return "UNKNOWN" ; |
314 | } |
315 | } |
316 | |
317 | string Vector::ToString(idx_t count) const { |
318 | string retval = VectorTypeToString(vector_type) + " " + TypeIdToString(type) + ": " + to_string(count) + " = [ " ; |
319 | switch (vector_type) { |
320 | case VectorType::FLAT_VECTOR: |
321 | case VectorType::DICTIONARY_VECTOR: |
322 | for (idx_t i = 0; i < count; i++) { |
323 | retval += GetValue(i).ToString() + (i == count - 1 ? "" : ", " ); |
324 | } |
325 | break; |
326 | case VectorType::CONSTANT_VECTOR: |
327 | retval += GetValue(0).ToString(); |
328 | break; |
329 | case VectorType::SEQUENCE_VECTOR: { |
330 | int64_t start, increment; |
331 | SequenceVector::GetSequence(*this, start, increment); |
332 | for (idx_t i = 0; i < count; i++) { |
333 | retval += to_string(start + increment * i) + (i == count - 1 ? "" : ", " ); |
334 | } |
335 | break; |
336 | } |
337 | default: |
338 | retval += "UNKNOWN VECTOR TYPE" ; |
339 | break; |
340 | } |
341 | retval += "]" ; |
342 | return retval; |
343 | } |
344 | |
345 | void Vector::Print(idx_t count) { |
346 | Printer::Print(ToString(count)); |
347 | } |
348 | |
349 | string Vector::ToString() const { |
350 | string retval = VectorTypeToString(vector_type) + " " + TypeIdToString(type) + ": (UNKNOWN COUNT) [ " ; |
351 | switch (vector_type) { |
352 | case VectorType::FLAT_VECTOR: |
353 | case VectorType::DICTIONARY_VECTOR: |
354 | break; |
355 | case VectorType::CONSTANT_VECTOR: |
356 | retval += GetValue(0).ToString(); |
357 | break; |
358 | case VectorType::SEQUENCE_VECTOR: { |
359 | break; |
360 | } |
361 | default: |
362 | retval += "UNKNOWN VECTOR TYPE" ; |
363 | break; |
364 | } |
365 | retval += "]" ; |
366 | return retval; |
367 | } |
368 | |
369 | void Vector::Print() { |
370 | Printer::Print(ToString()); |
371 | } |
372 | |
373 | template <class T> static void flatten_constant_vector_loop(data_ptr_t data, data_ptr_t old_data, idx_t count) { |
374 | auto constant = *((T *)old_data); |
375 | auto output = (T *)data; |
376 | for (idx_t i = 0; i < count; i++) { |
377 | output[i] = constant; |
378 | } |
379 | } |
380 | |
381 | void Vector::Normalify(idx_t count) { |
382 | switch (vector_type) { |
383 | case VectorType::FLAT_VECTOR: |
384 | // already a flat vector |
385 | break; |
386 | case VectorType::DICTIONARY_VECTOR: { |
387 | // create a new flat vector of this type |
388 | Vector other(type); |
389 | // now copy the data of this vector to the other vector, removing the selection vector in the process |
390 | VectorOperations::Copy(*this, other, count, 0, 0); |
391 | // create a reference to the data in the other vector |
392 | this->Reference(other); |
393 | break; |
394 | } |
395 | case VectorType::CONSTANT_VECTOR: { |
396 | vector_type = VectorType::FLAT_VECTOR; |
397 | // allocate a new buffer for the vector |
398 | auto old_buffer = move(buffer); |
399 | auto old_data = data; |
400 | buffer = VectorBuffer::CreateStandardVector(type); |
401 | data = buffer->GetData(); |
402 | if (nullmask[0]) { |
403 | // constant NULL, set nullmask |
404 | nullmask.set(); |
405 | return; |
406 | } |
407 | // non-null constant: have to repeat the constant |
408 | switch (type) { |
409 | case TypeId::BOOL: |
410 | case TypeId::INT8: |
411 | flatten_constant_vector_loop<int8_t>(data, old_data, count); |
412 | break; |
413 | case TypeId::INT16: |
414 | flatten_constant_vector_loop<int16_t>(data, old_data, count); |
415 | break; |
416 | case TypeId::INT32: |
417 | flatten_constant_vector_loop<int32_t>(data, old_data, count); |
418 | break; |
419 | case TypeId::INT64: |
420 | flatten_constant_vector_loop<int64_t>(data, old_data, count); |
421 | break; |
422 | case TypeId::FLOAT: |
423 | flatten_constant_vector_loop<float>(data, old_data, count); |
424 | break; |
425 | case TypeId::DOUBLE: |
426 | flatten_constant_vector_loop<double>(data, old_data, count); |
427 | break; |
428 | case TypeId::HASH: |
429 | flatten_constant_vector_loop<hash_t>(data, old_data, count); |
430 | break; |
431 | case TypeId::POINTER: |
432 | flatten_constant_vector_loop<uintptr_t>(data, old_data, count); |
433 | break; |
434 | case TypeId::VARCHAR: |
435 | flatten_constant_vector_loop<string_t>(data, old_data, count); |
436 | break; |
437 | case TypeId::LIST: { |
438 | flatten_constant_vector_loop<list_entry_t>(data, old_data, count); |
439 | break; |
440 | } |
441 | case TypeId::STRUCT: { |
442 | for (auto &child : StructVector::GetEntries(*this)) { |
443 | assert(child.second->vector_type == VectorType::CONSTANT_VECTOR); |
444 | child.second->Normalify(count); |
445 | } |
446 | } break; |
447 | default: |
448 | throw NotImplementedException("Unimplemented type for VectorOperations::Normalify" ); |
449 | } |
450 | break; |
451 | } |
452 | case VectorType::SEQUENCE_VECTOR: { |
453 | int64_t start, increment; |
454 | SequenceVector::GetSequence(*this, start, increment); |
455 | |
456 | vector_type = VectorType::FLAT_VECTOR; |
457 | buffer = VectorBuffer::CreateStandardVector(type); |
458 | data = buffer->GetData(); |
459 | VectorOperations::GenerateSequence(*this, count, start, increment); |
460 | break; |
461 | } |
462 | default: |
463 | throw NotImplementedException("FIXME: unimplemented type for normalify" ); |
464 | } |
465 | } |
466 | |
467 | void Vector::Normalify(const SelectionVector &sel, idx_t count) { |
468 | switch (vector_type) { |
469 | case VectorType::FLAT_VECTOR: |
470 | // already a flat vector |
471 | break; |
472 | case VectorType::SEQUENCE_VECTOR: { |
473 | int64_t start, increment; |
474 | SequenceVector::GetSequence(*this, start, increment); |
475 | |
476 | vector_type = VectorType::FLAT_VECTOR; |
477 | buffer = VectorBuffer::CreateStandardVector(type); |
478 | data = buffer->GetData(); |
479 | VectorOperations::GenerateSequence(*this, count, sel, start, increment); |
480 | break; |
481 | } |
482 | default: |
483 | throw NotImplementedException("Unimplemented type for normalify with selection vector" ); |
484 | } |
485 | } |
486 | |
487 | void Vector::Orrify(idx_t count, VectorData &data) { |
488 | switch (vector_type) { |
489 | case VectorType::DICTIONARY_VECTOR: { |
490 | auto &sel = DictionaryVector::SelVector(*this); |
491 | auto &child = DictionaryVector::Child(*this); |
492 | if (child.vector_type == VectorType::FLAT_VECTOR) { |
493 | data.sel = &sel; |
494 | data.data = FlatVector::GetData(child); |
495 | data.nullmask = &FlatVector::Nullmask(child); |
496 | } else { |
497 | // dictionary with non-flat child: create a new reference to the child and normalify it |
498 | auto new_aux = make_unique<VectorChildBuffer>(); |
499 | new_aux->data.Reference(child); |
500 | new_aux->data.Normalify(sel, count); |
501 | |
502 | data.sel = &sel; |
503 | data.data = FlatVector::GetData(new_aux->data); |
504 | data.nullmask = &FlatVector::Nullmask(new_aux->data); |
505 | this->auxiliary = move(new_aux); |
506 | } |
507 | break; |
508 | } |
509 | case VectorType::CONSTANT_VECTOR: |
510 | data.sel = &ConstantVector::ZeroSelectionVector; |
511 | data.data = ConstantVector::GetData(*this); |
512 | data.nullmask = &nullmask; |
513 | break; |
514 | default: |
515 | Normalify(count); |
516 | data.sel = &FlatVector::IncrementalSelectionVector; |
517 | data.data = FlatVector::GetData(*this); |
518 | data.nullmask = &nullmask; |
519 | break; |
520 | } |
521 | } |
522 | |
523 | void Vector::Sequence(int64_t start, int64_t increment) { |
524 | vector_type = VectorType::SEQUENCE_VECTOR; |
525 | this->buffer = make_buffer<VectorBuffer>(sizeof(int64_t) * 2); |
526 | auto data = (int64_t *)buffer->GetData(); |
527 | data[0] = start; |
528 | data[1] = increment; |
529 | nullmask.reset(); |
530 | auxiliary.reset(); |
531 | } |
532 | |
533 | void Vector::Serialize(idx_t count, Serializer &serializer) { |
534 | if (TypeIsConstantSize(type)) { |
535 | // constant size type: simple copy |
536 | idx_t write_size = GetTypeIdSize(type) * count; |
537 | auto ptr = unique_ptr<data_t[]>(new data_t[write_size]); |
538 | VectorOperations::WriteToStorage(*this, count, ptr.get()); |
539 | serializer.WriteData(ptr.get(), write_size); |
540 | } else { |
541 | VectorData vdata; |
542 | Orrify(count, vdata); |
543 | |
544 | switch (type) { |
545 | case TypeId::VARCHAR: { |
546 | auto strings = (string_t *)vdata.data; |
547 | for (idx_t i = 0; i < count; i++) { |
548 | auto idx = vdata.sel->get_index(i); |
549 | auto source = (*vdata.nullmask)[idx] ? NullValue<const char *>() : strings[idx].GetData(); |
550 | serializer.WriteString(source); |
551 | } |
552 | break; |
553 | } |
554 | default: |
555 | throw NotImplementedException("Unimplemented type for Vector::Serialize!" ); |
556 | } |
557 | } |
558 | } |
559 | |
560 | void Vector::Deserialize(idx_t count, Deserializer &source) { |
561 | if (TypeIsConstantSize(type)) { |
562 | // constant size type: read fixed amount of data from |
563 | auto column_size = GetTypeIdSize(type) * count; |
564 | auto ptr = unique_ptr<data_t[]>(new data_t[column_size]); |
565 | source.ReadData(ptr.get(), column_size); |
566 | |
567 | VectorOperations::ReadFromStorage(ptr.get(), count, *this); |
568 | } else { |
569 | auto strings = FlatVector::GetData<string_t>(*this); |
570 | auto &nullmask = FlatVector::Nullmask(*this); |
571 | for (idx_t i = 0; i < count; i++) { |
572 | // read the strings |
573 | auto str = source.Read<string>(); |
574 | // now add the string to the StringHeap of the vector |
575 | // and write the pointer into the vector |
576 | if (IsNullValue<const char *>((const char *)str.c_str())) { |
577 | nullmask[i] = true; |
578 | } else { |
579 | strings[i] = StringVector::AddString(*this, str); |
580 | } |
581 | } |
582 | } |
583 | } |
584 | |
585 | void Vector::UTFVerify(const SelectionVector &sel, idx_t count) { |
586 | #ifdef DEBUG |
587 | if (count == 0) { |
588 | return; |
589 | } |
590 | if (type == TypeId::VARCHAR) { |
591 | // we just touch all the strings and let the sanitizer figure out if any |
592 | // of them are deallocated/corrupt |
593 | switch (vector_type) { |
594 | case VectorType::CONSTANT_VECTOR: { |
595 | auto string = ConstantVector::GetData<string_t>(*this); |
596 | if (!ConstantVector::IsNull(*this)) { |
597 | string->Verify(); |
598 | } |
599 | break; |
600 | } |
601 | case VectorType::FLAT_VECTOR: { |
602 | auto strings = FlatVector::GetData<string_t>(*this); |
603 | for (idx_t i = 0; i < count; i++) { |
604 | auto oidx = sel.get_index(i); |
605 | if (!nullmask[oidx]) { |
606 | strings[oidx].Verify(); |
607 | } |
608 | } |
609 | break; |
610 | } |
611 | default: |
612 | break; |
613 | } |
614 | } |
615 | #endif |
616 | } |
617 | |
618 | void Vector::UTFVerify(idx_t count) { |
619 | UTFVerify(FlatVector::IncrementalSelectionVector, count); |
620 | } |
621 | |
622 | void Vector::Verify(const SelectionVector &sel, idx_t count) { |
623 | #ifdef DEBUG |
624 | if (count == 0) { |
625 | return; |
626 | } |
627 | if (vector_type == VectorType::DICTIONARY_VECTOR) { |
628 | auto &child = DictionaryVector::Child(*this); |
629 | auto &dict_sel = DictionaryVector::SelVector(*this); |
630 | for (idx_t i = 0; i < count; i++) { |
631 | auto oidx = sel.get_index(i); |
632 | auto idx = dict_sel.get_index(oidx); |
633 | assert(idx < STANDARD_VECTOR_SIZE); |
634 | } |
635 | // merge the selection vectors and verify the child |
636 | auto new_buffer = dict_sel.Slice(sel, count); |
637 | SelectionVector new_sel(new_buffer); |
638 | child.Verify(new_sel, count); |
639 | return; |
640 | } |
641 | if (type == TypeId::DOUBLE) { |
642 | // verify that there are no INF or NAN values |
643 | switch (vector_type) { |
644 | case VectorType::CONSTANT_VECTOR: { |
645 | auto dbl = ConstantVector::GetData<double>(*this); |
646 | if (!ConstantVector::IsNull(*this)) { |
647 | assert(Value::DoubleIsValid(*dbl)); |
648 | } |
649 | break; |
650 | } |
651 | case VectorType::FLAT_VECTOR: { |
652 | auto doubles = FlatVector::GetData<double>(*this); |
653 | for (idx_t i = 0; i < count; i++) { |
654 | auto oidx = sel.get_index(i); |
655 | if (!nullmask[oidx]) { |
656 | assert(Value::DoubleIsValid(doubles[oidx])); |
657 | } |
658 | } |
659 | break; |
660 | } |
661 | default: |
662 | break; |
663 | } |
664 | } |
665 | |
666 | if (type == TypeId::STRUCT) { |
667 | if (vector_type == VectorType::FLAT_VECTOR || vector_type == VectorType::CONSTANT_VECTOR) { |
668 | auto &children = StructVector::GetEntries(*this); |
669 | assert(children.size() > 0); |
670 | for (auto &child : children) { |
671 | child.second->Verify(sel, count); |
672 | } |
673 | } |
674 | } |
675 | |
676 | if (type == TypeId::LIST) { |
677 | if (vector_type == VectorType::CONSTANT_VECTOR) { |
678 | if (!ConstantVector::IsNull(*this)) { |
679 | ListVector::GetEntry(*this).Verify(); |
680 | auto le = ConstantVector::GetData<list_entry_t>(*this); |
681 | assert(le->offset + le->length <= ListVector::GetEntry(*this).count); |
682 | } |
683 | } else if (vector_type == VectorType::FLAT_VECTOR) { |
684 | if (ListVector::HasEntry(*this)) { |
685 | ListVector::GetEntry(*this).Verify(); |
686 | } |
687 | auto list_data = FlatVector::GetData<list_entry_t>(*this); |
688 | for (idx_t i = 0; i < count; i++) { |
689 | auto idx = sel.get_index(i); |
690 | auto &le = list_data[idx]; |
691 | if (!nullmask[idx]) { |
692 | assert(le.offset + le.length <= ListVector::GetEntry(*this).count); |
693 | } |
694 | } |
695 | } |
696 | } |
697 | // TODO verify list and struct |
698 | #endif |
699 | } |
700 | |
701 | void Vector::Verify(idx_t count) { |
702 | Verify(FlatVector::IncrementalSelectionVector, count); |
703 | } |
704 | |
705 | string_t StringVector::AddString(Vector &vector, const char *data, idx_t len) { |
706 | return StringVector::AddString(vector, string_t(data, len)); |
707 | } |
708 | |
709 | string_t StringVector::AddString(Vector &vector, const char *data) { |
710 | return StringVector::AddString(vector, string_t(data, strlen(data))); |
711 | } |
712 | |
713 | string_t StringVector::AddString(Vector &vector, const string &data) { |
714 | return StringVector::AddString(vector, string_t(data.c_str(), data.size())); |
715 | } |
716 | |
717 | string_t StringVector::AddString(Vector &vector, string_t data) { |
718 | assert(vector.type == TypeId::VARCHAR); |
719 | if (data.IsInlined()) { |
720 | // string will be inlined: no need to store in string heap |
721 | return data; |
722 | } |
723 | if (!vector.auxiliary) { |
724 | vector.auxiliary = make_buffer<VectorStringBuffer>(); |
725 | } |
726 | assert(vector.auxiliary->type == VectorBufferType::STRING_BUFFER); |
727 | auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; |
728 | return string_buffer.AddString(data); |
729 | } |
730 | |
731 | string_t StringVector::AddBlob(Vector &vector, string_t data) { |
732 | assert(vector.type == TypeId::VARCHAR); |
733 | if (data.IsInlined()) { |
734 | // string will be inlined: no need to store in string heap |
735 | return data; |
736 | } |
737 | if (!vector.auxiliary) { |
738 | vector.auxiliary = make_buffer<VectorStringBuffer>(); |
739 | } |
740 | assert(vector.auxiliary->type == VectorBufferType::STRING_BUFFER); |
741 | auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; |
742 | return string_buffer.AddBlob(data); |
743 | } |
744 | |
745 | string_t StringVector::EmptyString(Vector &vector, idx_t len) { |
746 | assert(vector.type == TypeId::VARCHAR); |
747 | if (len < string_t::INLINE_LENGTH) { |
748 | return string_t(len); |
749 | } |
750 | if (!vector.auxiliary) { |
751 | vector.auxiliary = make_buffer<VectorStringBuffer>(); |
752 | } |
753 | assert(vector.auxiliary->type == VectorBufferType::STRING_BUFFER); |
754 | auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; |
755 | return string_buffer.EmptyString(len); |
756 | } |
757 | |
758 | void StringVector::AddHeapReference(Vector &vector, Vector &other) { |
759 | assert(vector.type == TypeId::VARCHAR); |
760 | assert(other.type == TypeId::VARCHAR); |
761 | |
762 | if (other.vector_type == VectorType::DICTIONARY_VECTOR) { |
763 | StringVector::AddHeapReference(vector, DictionaryVector::Child(other)); |
764 | return; |
765 | } |
766 | if (!other.auxiliary) { |
767 | return; |
768 | } |
769 | if (!vector.auxiliary) { |
770 | vector.auxiliary = make_buffer<VectorStringBuffer>(); |
771 | } |
772 | assert(vector.auxiliary->type == VectorBufferType::STRING_BUFFER); |
773 | assert(other.auxiliary->type == VectorBufferType::STRING_BUFFER); |
774 | auto &string_buffer = (VectorStringBuffer &)*vector.auxiliary; |
775 | string_buffer.AddHeapReference(other.auxiliary); |
776 | } |
777 | |
778 | bool StructVector::HasEntries(const Vector &vector) { |
779 | assert(vector.type == TypeId::STRUCT); |
780 | assert(vector.vector_type == VectorType::FLAT_VECTOR || vector.vector_type == VectorType::CONSTANT_VECTOR); |
781 | assert(vector.auxiliary == nullptr || vector.auxiliary->type == VectorBufferType::STRUCT_BUFFER); |
782 | return vector.auxiliary != nullptr; |
783 | } |
784 | |
785 | child_list_t<unique_ptr<Vector>> &StructVector::GetEntries(const Vector &vector) { |
786 | assert(vector.type == TypeId::STRUCT); |
787 | assert(vector.vector_type == VectorType::FLAT_VECTOR || vector.vector_type == VectorType::CONSTANT_VECTOR); |
788 | assert(vector.auxiliary); |
789 | assert(vector.auxiliary->type == VectorBufferType::STRUCT_BUFFER); |
790 | return ((VectorStructBuffer *)vector.auxiliary.get())->GetChildren(); |
791 | } |
792 | |
793 | void StructVector::AddEntry(Vector &vector, string name, unique_ptr<Vector> entry) { |
794 | // TODO asser that an entry with this name does not already exist |
795 | assert(vector.type == TypeId::STRUCT); |
796 | assert(vector.vector_type == VectorType::FLAT_VECTOR || vector.vector_type == VectorType::CONSTANT_VECTOR); |
797 | if (!vector.auxiliary) { |
798 | vector.auxiliary = make_buffer<VectorStructBuffer>(); |
799 | } |
800 | assert(vector.auxiliary); |
801 | assert(vector.auxiliary->type == VectorBufferType::STRUCT_BUFFER); |
802 | ((VectorStructBuffer *)vector.auxiliary.get())->AddChild(name, move(entry)); |
803 | } |
804 | |
805 | bool ListVector::HasEntry(const Vector &vector) { |
806 | assert(vector.type == TypeId::LIST); |
807 | if (vector.vector_type == VectorType::DICTIONARY_VECTOR) { |
808 | auto &child = DictionaryVector::Child(vector); |
809 | return ListVector::HasEntry(child); |
810 | } |
811 | assert(vector.vector_type == VectorType::FLAT_VECTOR || vector.vector_type == VectorType::CONSTANT_VECTOR); |
812 | return vector.auxiliary != nullptr; |
813 | } |
814 | |
815 | ChunkCollection &ListVector::GetEntry(const Vector &vector) { |
816 | assert(vector.type == TypeId::LIST); |
817 | if (vector.vector_type == VectorType::DICTIONARY_VECTOR) { |
818 | auto &child = DictionaryVector::Child(vector); |
819 | return ListVector::GetEntry(child); |
820 | } |
821 | assert(vector.vector_type == VectorType::FLAT_VECTOR || vector.vector_type == VectorType::CONSTANT_VECTOR); |
822 | assert(vector.auxiliary); |
823 | assert(vector.auxiliary->type == VectorBufferType::LIST_BUFFER); |
824 | return ((VectorListBuffer *)vector.auxiliary.get())->GetChild(); |
825 | } |
826 | |
827 | void ListVector::SetEntry(Vector &vector, unique_ptr<ChunkCollection> cc) { |
828 | assert(vector.type == TypeId::LIST); |
829 | assert(vector.vector_type == VectorType::FLAT_VECTOR || vector.vector_type == VectorType::CONSTANT_VECTOR); |
830 | if (!vector.auxiliary) { |
831 | vector.auxiliary = make_buffer<VectorListBuffer>(); |
832 | } |
833 | assert(vector.auxiliary); |
834 | assert(vector.auxiliary->type == VectorBufferType::LIST_BUFFER); |
835 | ((VectorListBuffer *)vector.auxiliary.get())->SetChild(move(cc)); |
836 | } |
837 | |
838 | } // namespace duckdb |
839 | |