1 | #include "duckdb/storage/single_file_block_manager.hpp" |
2 | #include "duckdb/storage/meta_block_writer.hpp" |
3 | #include "duckdb/storage/meta_block_reader.hpp" |
4 | #include "duckdb/common/exception.hpp" |
5 | |
6 | using namespace duckdb; |
7 | using namespace std; |
8 | |
9 | SingleFileBlockManager::SingleFileBlockManager(FileSystem &fs, string path, bool read_only, bool create_new, |
10 | bool use_direct_io) |
11 | : path(path), header_buffer(FileBufferType::MANAGED_BUFFER, Storage::FILE_HEADER_SIZE), read_only(read_only), |
12 | use_direct_io(use_direct_io) { |
13 | |
14 | uint8_t flags; |
15 | FileLockType lock; |
16 | if (read_only) { |
17 | assert(!create_new); |
18 | flags = FileFlags::READ; |
19 | lock = FileLockType::READ_LOCK; |
20 | } else { |
21 | flags = FileFlags::WRITE; |
22 | lock = FileLockType::WRITE_LOCK; |
23 | if (create_new) { |
24 | flags |= FileFlags::CREATE; |
25 | } |
26 | } |
27 | if (use_direct_io) { |
28 | flags |= FileFlags::DIRECT_IO; |
29 | } |
30 | // open the RDBMS handle |
31 | handle = fs.OpenFile(path, flags, lock); |
32 | if (create_new) { |
33 | // if we create a new file, we fill the metadata of the file |
34 | // first fill in the new header |
35 | header_buffer.Clear(); |
36 | MainHeader *main_header = (MainHeader *)header_buffer.buffer; |
37 | main_header->version_number = VERSION_NUMBER; |
38 | // now write the header to the file |
39 | header_buffer.Write(*handle, 0); |
40 | header_buffer.Clear(); |
41 | |
42 | // write the database headers |
43 | // initialize meta_block and free_list to INVALID_BLOCK because the database file does not contain any actual |
44 | // content yet |
45 | DatabaseHeader * = (DatabaseHeader *)header_buffer.buffer; |
46 | // header 1 |
47 | header->iteration = 0; |
48 | header->meta_block = INVALID_BLOCK; |
49 | header->free_list = INVALID_BLOCK; |
50 | header->block_count = 0; |
51 | header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE); |
52 | // header 2 |
53 | header->iteration = 1; |
54 | header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE * 2); |
55 | // ensure that writing to disk is completed before returning |
56 | handle->Sync(); |
57 | // we start with h2 as active_header, this way our initial write will be in h1 |
58 | active_header = 1; |
59 | max_block = 0; |
60 | } else { |
61 | MainHeader ; |
62 | // otherwise, we check the metadata of the file |
63 | header_buffer.Read(*handle, 0); |
64 | header = *((MainHeader *)header_buffer.buffer); |
65 | // check the version number |
66 | if (header.version_number != VERSION_NUMBER) { |
67 | throw IOException( |
68 | "Trying to read a database file with version number %lld, but we can only read version %lld" , |
69 | header.version_number, VERSION_NUMBER); |
70 | } |
71 | // read the database headers from disk |
72 | DatabaseHeader h1, h2; |
73 | header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE); |
74 | h1 = *((DatabaseHeader *)header_buffer.buffer); |
75 | header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE * 2); |
76 | h2 = *((DatabaseHeader *)header_buffer.buffer); |
77 | // check the header with the highest iteration count |
78 | if (h1.iteration > h2.iteration) { |
79 | // h1 is active header |
80 | active_header = 0; |
81 | Initialize(h1); |
82 | } else { |
83 | // h2 is active header |
84 | active_header = 1; |
85 | Initialize(h2); |
86 | } |
87 | } |
88 | } |
89 | |
90 | void SingleFileBlockManager::(DatabaseHeader &) { |
91 | free_list_id = header.free_list; |
92 | meta_block = header.meta_block; |
93 | iteration_count = header.iteration; |
94 | max_block = header.block_count; |
95 | } |
96 | |
97 | void SingleFileBlockManager::LoadFreeList(BufferManager &manager) { |
98 | if (read_only) { |
99 | // no need to load free list for read only db |
100 | return; |
101 | } |
102 | if (free_list_id == INVALID_BLOCK) { |
103 | // no free list |
104 | return; |
105 | } |
106 | MetaBlockReader reader(manager, free_list_id); |
107 | auto free_list_count = reader.Read<uint64_t>(); |
108 | free_list.clear(); |
109 | free_list.reserve(free_list_count); |
110 | for (idx_t i = 0; i < free_list_count; i++) { |
111 | free_list.push_back(reader.Read<block_id_t>()); |
112 | } |
113 | } |
114 | |
115 | void SingleFileBlockManager::StartCheckpoint() { |
116 | used_blocks.clear(); |
117 | } |
118 | |
119 | block_id_t SingleFileBlockManager::GetFreeBlockId() { |
120 | block_id_t block; |
121 | if (free_list.size() > 0) { |
122 | // free list is non empty |
123 | // take an entry from the free list |
124 | block = free_list.back(); |
125 | // erase the entry from the free list again |
126 | free_list.pop_back(); |
127 | } else { |
128 | block = max_block++; |
129 | } |
130 | used_blocks.insert(block); |
131 | return block; |
132 | } |
133 | |
134 | block_id_t SingleFileBlockManager::GetMetaBlock() { |
135 | return meta_block; |
136 | } |
137 | |
138 | unique_ptr<Block> SingleFileBlockManager::CreateBlock() { |
139 | return make_unique<Block>(GetFreeBlockId()); |
140 | } |
141 | |
142 | void SingleFileBlockManager::Read(Block &block) { |
143 | assert(block.id >= 0); |
144 | assert(std::find(free_list.begin(), free_list.end(), block.id) == free_list.end()); |
145 | block.Read(*handle, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE); |
146 | } |
147 | |
148 | void SingleFileBlockManager::Write(FileBuffer &buffer, block_id_t block_id) { |
149 | assert(block_id >= 0); |
150 | buffer.Write(*handle, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE); |
151 | } |
152 | |
153 | void SingleFileBlockManager::(DatabaseHeader ) { |
154 | // set the iteration count |
155 | header.iteration = ++iteration_count; |
156 | header.block_count = max_block; |
157 | // now handle the free list |
158 | free_list.clear(); |
159 | for (block_id_t i = 0; i < max_block; i++) { |
160 | if (used_blocks.find(i) == used_blocks.end()) { |
161 | free_list.push_back(i); |
162 | } |
163 | } |
164 | if (free_list.size() > 0) { |
165 | // there are blocks in the free list |
166 | // write them to the file |
167 | MetaBlockWriter writer(*this); |
168 | auto entry = std::find(free_list.begin(), free_list.end(), writer.block->id); |
169 | if (entry != free_list.end()) { |
170 | free_list.erase(entry); |
171 | } |
172 | header.free_list = writer.block->id; |
173 | |
174 | writer.Write<uint64_t>(free_list.size()); |
175 | for (auto &block_id : free_list) { |
176 | writer.Write<block_id_t>(block_id); |
177 | } |
178 | writer.Flush(); |
179 | } else { |
180 | // no blocks in the free list |
181 | header.free_list = INVALID_BLOCK; |
182 | } |
183 | if (!use_direct_io) { |
184 | // if we are not using Direct IO we need to fsync BEFORE we write the header to ensure that all the previous |
185 | // blocks are written as well |
186 | handle->Sync(); |
187 | } |
188 | // set the header inside the buffer |
189 | header_buffer.Clear(); |
190 | *((DatabaseHeader *)header_buffer.buffer) = header; |
191 | // now write the header to the file, active_header determines whether we write to h1 or h2 |
192 | // note that if active_header is h1 we write to h2, and vice versa |
193 | header_buffer.Write(*handle, active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2); |
194 | // switch active header to the other header |
195 | active_header = 1 - active_header; |
196 | //! Ensure the header write ends up on disk |
197 | handle->Sync(); |
198 | |
199 | // the free list is now equal to the blocks that were used by the previous iteration |
200 | free_list.clear(); |
201 | for (auto &block_id : used_blocks) { |
202 | free_list.push_back(block_id); |
203 | } |
204 | used_blocks.clear(); |
205 | } |
206 | |