| 1 | #include "duckdb/common/types/string_heap.hpp" |
| 2 | |
| 3 | #include "duckdb/common/types/string_type.hpp" |
| 4 | #include "duckdb/common/exception.hpp" |
| 5 | #include "utf8proc_wrapper.hpp" |
| 6 | |
| 7 | #include <cstring> |
| 8 | |
| 9 | using namespace duckdb; |
| 10 | using namespace std; |
| 11 | |
| 12 | #define MINIMUM_HEAP_SIZE 4096 |
| 13 | |
| 14 | StringHeap::StringHeap() : tail(nullptr) { |
| 15 | } |
| 16 | |
| 17 | string_t StringHeap::AddString(const char *data, idx_t len) { |
| 18 | assert(Utf8Proc::Analyze(data, len) != UnicodeType::INVALID); |
| 19 | return AddBlob(data, len); |
| 20 | } |
| 21 | |
| 22 | string_t StringHeap::AddString(const char *data) { |
| 23 | return AddString(data, strlen(data)); |
| 24 | } |
| 25 | |
| 26 | string_t StringHeap::AddString(const string &data) { |
| 27 | return AddString(data.c_str(), data.size()); |
| 28 | } |
| 29 | |
| 30 | string_t StringHeap::AddString(const string_t &data) { |
| 31 | return AddString(data.GetData(), data.GetSize()); |
| 32 | } |
| 33 | |
| 34 | string_t StringHeap::AddBlob(const char *data, idx_t len) { |
| 35 | auto insert_string = EmptyString(len); |
| 36 | auto insert_pos = insert_string.GetData(); |
| 37 | memcpy(insert_pos, data, len); |
| 38 | insert_string.Finalize(); |
| 39 | return insert_string; |
| 40 | } |
| 41 | |
| 42 | string_t StringHeap::EmptyString(idx_t len) { |
| 43 | assert(len >= string_t::INLINE_LENGTH); |
| 44 | if (!chunk || chunk->current_position + len >= chunk->maximum_size) { |
| 45 | // have to make a new entry |
| 46 | auto new_chunk = make_unique<StringChunk>(std::max(len + 1, (idx_t)MINIMUM_HEAP_SIZE)); |
| 47 | new_chunk->prev = move(chunk); |
| 48 | chunk = move(new_chunk); |
| 49 | if (!tail) { |
| 50 | tail = chunk.get(); |
| 51 | } |
| 52 | } |
| 53 | auto insert_pos = chunk->data.get() + chunk->current_position; |
| 54 | chunk->current_position += len + 1; |
| 55 | return string_t(insert_pos, len); |
| 56 | } |
| 57 | |
| 58 | void StringHeap::MergeHeap(StringHeap &other) { |
| 59 | if (!other.tail) { |
| 60 | return; |
| 61 | } |
| 62 | other.tail->prev = move(chunk); |
| 63 | this->chunk = move(other.chunk); |
| 64 | if (!tail) { |
| 65 | tail = this->chunk.get(); |
| 66 | } |
| 67 | other.tail = nullptr; |
| 68 | } |
| 69 | |