| 1 | #include "duckdb/storage/single_file_block_manager.hpp" | 
|---|
| 2 | #include "duckdb/storage/meta_block_writer.hpp" | 
|---|
| 3 | #include "duckdb/storage/meta_block_reader.hpp" | 
|---|
| 4 | #include "duckdb/common/exception.hpp" | 
|---|
| 5 |  | 
|---|
| 6 | using namespace duckdb; | 
|---|
| 7 | using namespace std; | 
|---|
| 8 |  | 
|---|
| 9 | SingleFileBlockManager::SingleFileBlockManager(FileSystem &fs, string path, bool read_only, bool create_new, | 
|---|
| 10 | bool use_direct_io) | 
|---|
| 11 | : path(path), header_buffer(FileBufferType::MANAGED_BUFFER, Storage::FILE_HEADER_SIZE), read_only(read_only), | 
|---|
| 12 | use_direct_io(use_direct_io) { | 
|---|
| 13 |  | 
|---|
| 14 | uint8_t flags; | 
|---|
| 15 | FileLockType lock; | 
|---|
| 16 | if (read_only) { | 
|---|
| 17 | assert(!create_new); | 
|---|
| 18 | flags = FileFlags::READ; | 
|---|
| 19 | lock = FileLockType::READ_LOCK; | 
|---|
| 20 | } else { | 
|---|
| 21 | flags = FileFlags::WRITE; | 
|---|
| 22 | lock = FileLockType::WRITE_LOCK; | 
|---|
| 23 | if (create_new) { | 
|---|
| 24 | flags |= FileFlags::CREATE; | 
|---|
| 25 | } | 
|---|
| 26 | } | 
|---|
| 27 | if (use_direct_io) { | 
|---|
| 28 | flags |= FileFlags::DIRECT_IO; | 
|---|
| 29 | } | 
|---|
| 30 | // open the RDBMS handle | 
|---|
| 31 | handle = fs.OpenFile(path, flags, lock); | 
|---|
| 32 | if (create_new) { | 
|---|
| 33 | // if we create a new file, we fill the metadata of the file | 
|---|
| 34 | // first fill in the new header | 
|---|
| 35 | header_buffer.Clear(); | 
|---|
| 36 | MainHeader *main_header = (MainHeader *)header_buffer.buffer; | 
|---|
| 37 | main_header->version_number = VERSION_NUMBER; | 
|---|
| 38 | // now write the header to the file | 
|---|
| 39 | header_buffer.Write(*handle, 0); | 
|---|
| 40 | header_buffer.Clear(); | 
|---|
| 41 |  | 
|---|
| 42 | // write the database headers | 
|---|
| 43 | // initialize meta_block and free_list to INVALID_BLOCK because the database file does not contain any actual | 
|---|
| 44 | // content yet | 
|---|
| 45 | DatabaseHeader * = (DatabaseHeader *)header_buffer.buffer; | 
|---|
| 46 | // header 1 | 
|---|
| 47 | header->iteration = 0; | 
|---|
| 48 | header->meta_block = INVALID_BLOCK; | 
|---|
| 49 | header->free_list = INVALID_BLOCK; | 
|---|
| 50 | header->block_count = 0; | 
|---|
| 51 | header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE); | 
|---|
| 52 | // header 2 | 
|---|
| 53 | header->iteration = 1; | 
|---|
| 54 | header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE * 2); | 
|---|
| 55 | // ensure that writing to disk is completed before returning | 
|---|
| 56 | handle->Sync(); | 
|---|
| 57 | // we start with h2 as active_header, this way our initial write will be in h1 | 
|---|
| 58 | active_header = 1; | 
|---|
| 59 | max_block = 0; | 
|---|
| 60 | } else { | 
|---|
| 61 | MainHeader ; | 
|---|
| 62 | // otherwise, we check the metadata of the file | 
|---|
| 63 | header_buffer.Read(*handle, 0); | 
|---|
| 64 | header = *((MainHeader *)header_buffer.buffer); | 
|---|
| 65 | // check the version number | 
|---|
| 66 | if (header.version_number != VERSION_NUMBER) { | 
|---|
| 67 | throw IOException( | 
|---|
| 68 | "Trying to read a database file with version number %lld, but we can only read version %lld", | 
|---|
| 69 | header.version_number, VERSION_NUMBER); | 
|---|
| 70 | } | 
|---|
| 71 | // read the database headers from disk | 
|---|
| 72 | DatabaseHeader h1, h2; | 
|---|
| 73 | header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE); | 
|---|
| 74 | h1 = *((DatabaseHeader *)header_buffer.buffer); | 
|---|
| 75 | header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE * 2); | 
|---|
| 76 | h2 = *((DatabaseHeader *)header_buffer.buffer); | 
|---|
| 77 | // check the header with the highest iteration count | 
|---|
| 78 | if (h1.iteration > h2.iteration) { | 
|---|
| 79 | // h1 is active header | 
|---|
| 80 | active_header = 0; | 
|---|
| 81 | Initialize(h1); | 
|---|
| 82 | } else { | 
|---|
| 83 | // h2 is active header | 
|---|
| 84 | active_header = 1; | 
|---|
| 85 | Initialize(h2); | 
|---|
| 86 | } | 
|---|
| 87 | } | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | void SingleFileBlockManager::(DatabaseHeader &) { | 
|---|
| 91 | free_list_id = header.free_list; | 
|---|
| 92 | meta_block = header.meta_block; | 
|---|
| 93 | iteration_count = header.iteration; | 
|---|
| 94 | max_block = header.block_count; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | void SingleFileBlockManager::LoadFreeList(BufferManager &manager) { | 
|---|
| 98 | if (read_only) { | 
|---|
| 99 | // no need to load free list for read only db | 
|---|
| 100 | return; | 
|---|
| 101 | } | 
|---|
| 102 | if (free_list_id == INVALID_BLOCK) { | 
|---|
| 103 | // no free list | 
|---|
| 104 | return; | 
|---|
| 105 | } | 
|---|
| 106 | MetaBlockReader reader(manager, free_list_id); | 
|---|
| 107 | auto free_list_count = reader.Read<uint64_t>(); | 
|---|
| 108 | free_list.clear(); | 
|---|
| 109 | free_list.reserve(free_list_count); | 
|---|
| 110 | for (idx_t i = 0; i < free_list_count; i++) { | 
|---|
| 111 | free_list.push_back(reader.Read<block_id_t>()); | 
|---|
| 112 | } | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | void SingleFileBlockManager::StartCheckpoint() { | 
|---|
| 116 | used_blocks.clear(); | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | block_id_t SingleFileBlockManager::GetFreeBlockId() { | 
|---|
| 120 | block_id_t block; | 
|---|
| 121 | if (free_list.size() > 0) { | 
|---|
| 122 | // free list is non empty | 
|---|
| 123 | // take an entry from the free list | 
|---|
| 124 | block = free_list.back(); | 
|---|
| 125 | // erase the entry from the free list again | 
|---|
| 126 | free_list.pop_back(); | 
|---|
| 127 | } else { | 
|---|
| 128 | block = max_block++; | 
|---|
| 129 | } | 
|---|
| 130 | used_blocks.insert(block); | 
|---|
| 131 | return block; | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | block_id_t SingleFileBlockManager::GetMetaBlock() { | 
|---|
| 135 | return meta_block; | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | unique_ptr<Block> SingleFileBlockManager::CreateBlock() { | 
|---|
| 139 | return make_unique<Block>(GetFreeBlockId()); | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | void SingleFileBlockManager::Read(Block &block) { | 
|---|
| 143 | assert(block.id >= 0); | 
|---|
| 144 | assert(std::find(free_list.begin(), free_list.end(), block.id) == free_list.end()); | 
|---|
| 145 | block.Read(*handle, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE); | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | void SingleFileBlockManager::Write(FileBuffer &buffer, block_id_t block_id) { | 
|---|
| 149 | assert(block_id >= 0); | 
|---|
| 150 | buffer.Write(*handle, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE); | 
|---|
| 151 | } | 
|---|
| 152 |  | 
|---|
| 153 | void SingleFileBlockManager::(DatabaseHeader ) { | 
|---|
| 154 | // set the iteration count | 
|---|
| 155 | header.iteration = ++iteration_count; | 
|---|
| 156 | header.block_count = max_block; | 
|---|
| 157 | // now handle the free list | 
|---|
| 158 | free_list.clear(); | 
|---|
| 159 | for (block_id_t i = 0; i < max_block; i++) { | 
|---|
| 160 | if (used_blocks.find(i) == used_blocks.end()) { | 
|---|
| 161 | free_list.push_back(i); | 
|---|
| 162 | } | 
|---|
| 163 | } | 
|---|
| 164 | if (free_list.size() > 0) { | 
|---|
| 165 | // there are blocks in the free list | 
|---|
| 166 | // write them to the file | 
|---|
| 167 | MetaBlockWriter writer(*this); | 
|---|
| 168 | auto entry = std::find(free_list.begin(), free_list.end(), writer.block->id); | 
|---|
| 169 | if (entry != free_list.end()) { | 
|---|
| 170 | free_list.erase(entry); | 
|---|
| 171 | } | 
|---|
| 172 | header.free_list = writer.block->id; | 
|---|
| 173 |  | 
|---|
| 174 | writer.Write<uint64_t>(free_list.size()); | 
|---|
| 175 | for (auto &block_id : free_list) { | 
|---|
| 176 | writer.Write<block_id_t>(block_id); | 
|---|
| 177 | } | 
|---|
| 178 | writer.Flush(); | 
|---|
| 179 | } else { | 
|---|
| 180 | // no blocks in the free list | 
|---|
| 181 | header.free_list = INVALID_BLOCK; | 
|---|
| 182 | } | 
|---|
| 183 | if (!use_direct_io) { | 
|---|
| 184 | // if we are not using Direct IO we need to fsync BEFORE we write the header to ensure that all the previous | 
|---|
| 185 | // blocks are written as well | 
|---|
| 186 | handle->Sync(); | 
|---|
| 187 | } | 
|---|
| 188 | // set the header inside the buffer | 
|---|
| 189 | header_buffer.Clear(); | 
|---|
| 190 | *((DatabaseHeader *)header_buffer.buffer) = header; | 
|---|
| 191 | // now write the header to the file, active_header determines whether we write to h1 or h2 | 
|---|
| 192 | // note that if active_header is h1 we write to h2, and vice versa | 
|---|
| 193 | header_buffer.Write(*handle, active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2); | 
|---|
| 194 | // switch active header to the other header | 
|---|
| 195 | active_header = 1 - active_header; | 
|---|
| 196 | //! Ensure the header write ends up on disk | 
|---|
| 197 | handle->Sync(); | 
|---|
| 198 |  | 
|---|
| 199 | // the free list is now equal to the blocks that were used by the previous iteration | 
|---|
| 200 | free_list.clear(); | 
|---|
| 201 | for (auto &block_id : used_blocks) { | 
|---|
| 202 | free_list.push_back(block_id); | 
|---|
| 203 | } | 
|---|
| 204 | used_blocks.clear(); | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|