1#include "duckdb/storage/single_file_block_manager.hpp"
2#include "duckdb/storage/meta_block_writer.hpp"
3#include "duckdb/storage/meta_block_reader.hpp"
4#include "duckdb/common/exception.hpp"
5
6using namespace duckdb;
7using namespace std;
8
9SingleFileBlockManager::SingleFileBlockManager(FileSystem &fs, string path, bool read_only, bool create_new,
10 bool use_direct_io)
11 : path(path), header_buffer(FileBufferType::MANAGED_BUFFER, Storage::FILE_HEADER_SIZE), read_only(read_only),
12 use_direct_io(use_direct_io) {
13
14 uint8_t flags;
15 FileLockType lock;
16 if (read_only) {
17 assert(!create_new);
18 flags = FileFlags::READ;
19 lock = FileLockType::READ_LOCK;
20 } else {
21 flags = FileFlags::WRITE;
22 lock = FileLockType::WRITE_LOCK;
23 if (create_new) {
24 flags |= FileFlags::CREATE;
25 }
26 }
27 if (use_direct_io) {
28 flags |= FileFlags::DIRECT_IO;
29 }
30 // open the RDBMS handle
31 handle = fs.OpenFile(path, flags, lock);
32 if (create_new) {
33 // if we create a new file, we fill the metadata of the file
34 // first fill in the new header
35 header_buffer.Clear();
36 MainHeader *main_header = (MainHeader *)header_buffer.buffer;
37 main_header->version_number = VERSION_NUMBER;
38 // now write the header to the file
39 header_buffer.Write(*handle, 0);
40 header_buffer.Clear();
41
42 // write the database headers
43 // initialize meta_block and free_list to INVALID_BLOCK because the database file does not contain any actual
44 // content yet
45 DatabaseHeader *header = (DatabaseHeader *)header_buffer.buffer;
46 // header 1
47 header->iteration = 0;
48 header->meta_block = INVALID_BLOCK;
49 header->free_list = INVALID_BLOCK;
50 header->block_count = 0;
51 header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE);
52 // header 2
53 header->iteration = 1;
54 header_buffer.Write(*handle, Storage::FILE_HEADER_SIZE * 2);
55 // ensure that writing to disk is completed before returning
56 handle->Sync();
57 // we start with h2 as active_header, this way our initial write will be in h1
58 active_header = 1;
59 max_block = 0;
60 } else {
61 MainHeader header;
62 // otherwise, we check the metadata of the file
63 header_buffer.Read(*handle, 0);
64 header = *((MainHeader *)header_buffer.buffer);
65 // check the version number
66 if (header.version_number != VERSION_NUMBER) {
67 throw IOException(
68 "Trying to read a database file with version number %lld, but we can only read version %lld",
69 header.version_number, VERSION_NUMBER);
70 }
71 // read the database headers from disk
72 DatabaseHeader h1, h2;
73 header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE);
74 h1 = *((DatabaseHeader *)header_buffer.buffer);
75 header_buffer.Read(*handle, Storage::FILE_HEADER_SIZE * 2);
76 h2 = *((DatabaseHeader *)header_buffer.buffer);
77 // check the header with the highest iteration count
78 if (h1.iteration > h2.iteration) {
79 // h1 is active header
80 active_header = 0;
81 Initialize(h1);
82 } else {
83 // h2 is active header
84 active_header = 1;
85 Initialize(h2);
86 }
87 }
88}
89
90void SingleFileBlockManager::Initialize(DatabaseHeader &header) {
91 free_list_id = header.free_list;
92 meta_block = header.meta_block;
93 iteration_count = header.iteration;
94 max_block = header.block_count;
95}
96
97void SingleFileBlockManager::LoadFreeList(BufferManager &manager) {
98 if (read_only) {
99 // no need to load free list for read only db
100 return;
101 }
102 if (free_list_id == INVALID_BLOCK) {
103 // no free list
104 return;
105 }
106 MetaBlockReader reader(manager, free_list_id);
107 auto free_list_count = reader.Read<uint64_t>();
108 free_list.clear();
109 free_list.reserve(free_list_count);
110 for (idx_t i = 0; i < free_list_count; i++) {
111 free_list.push_back(reader.Read<block_id_t>());
112 }
113}
114
115void SingleFileBlockManager::StartCheckpoint() {
116 used_blocks.clear();
117}
118
119block_id_t SingleFileBlockManager::GetFreeBlockId() {
120 block_id_t block;
121 if (free_list.size() > 0) {
122 // free list is non empty
123 // take an entry from the free list
124 block = free_list.back();
125 // erase the entry from the free list again
126 free_list.pop_back();
127 } else {
128 block = max_block++;
129 }
130 used_blocks.insert(block);
131 return block;
132}
133
134block_id_t SingleFileBlockManager::GetMetaBlock() {
135 return meta_block;
136}
137
138unique_ptr<Block> SingleFileBlockManager::CreateBlock() {
139 return make_unique<Block>(GetFreeBlockId());
140}
141
142void SingleFileBlockManager::Read(Block &block) {
143 assert(block.id >= 0);
144 assert(std::find(free_list.begin(), free_list.end(), block.id) == free_list.end());
145 block.Read(*handle, BLOCK_START + block.id * Storage::BLOCK_ALLOC_SIZE);
146}
147
148void SingleFileBlockManager::Write(FileBuffer &buffer, block_id_t block_id) {
149 assert(block_id >= 0);
150 buffer.Write(*handle, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE);
151}
152
153void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
154 // set the iteration count
155 header.iteration = ++iteration_count;
156 header.block_count = max_block;
157 // now handle the free list
158 free_list.clear();
159 for (block_id_t i = 0; i < max_block; i++) {
160 if (used_blocks.find(i) == used_blocks.end()) {
161 free_list.push_back(i);
162 }
163 }
164 if (free_list.size() > 0) {
165 // there are blocks in the free list
166 // write them to the file
167 MetaBlockWriter writer(*this);
168 auto entry = std::find(free_list.begin(), free_list.end(), writer.block->id);
169 if (entry != free_list.end()) {
170 free_list.erase(entry);
171 }
172 header.free_list = writer.block->id;
173
174 writer.Write<uint64_t>(free_list.size());
175 for (auto &block_id : free_list) {
176 writer.Write<block_id_t>(block_id);
177 }
178 writer.Flush();
179 } else {
180 // no blocks in the free list
181 header.free_list = INVALID_BLOCK;
182 }
183 if (!use_direct_io) {
184 // if we are not using Direct IO we need to fsync BEFORE we write the header to ensure that all the previous
185 // blocks are written as well
186 handle->Sync();
187 }
188 // set the header inside the buffer
189 header_buffer.Clear();
190 *((DatabaseHeader *)header_buffer.buffer) = header;
191 // now write the header to the file, active_header determines whether we write to h1 or h2
192 // note that if active_header is h1 we write to h2, and vice versa
193 header_buffer.Write(*handle, active_header == 1 ? Storage::FILE_HEADER_SIZE : Storage::FILE_HEADER_SIZE * 2);
194 // switch active header to the other header
195 active_header = 1 - active_header;
196 //! Ensure the header write ends up on disk
197 handle->Sync();
198
199 // the free list is now equal to the blocks that were used by the previous iteration
200 free_list.clear();
201 for (auto &block_id : used_blocks) {
202 free_list.push_back(block_id);
203 }
204 used_blocks.clear();
205}
206