1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39#include <algorithm>
40
41#include <string.h>
42
43#include "toku_portability.h"
44#include "portability/memory.h"
45#include "portability/toku_assert.h"
46#include "portability/toku_stdint.h"
47#include "portability/toku_stdlib.h"
48
49#include "ft/serialize/block_allocator.h"
50#include "ft/serialize/rbtree_mhs.h"
51
52#ifdef TOKU_DEBUG_PARANOID
53#define VALIDATE() Validate()
54#else
55#define VALIDATE()
56#endif
57
58void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
59 uint64_t alignment) {
60 // the alignment must be at least 512 and aligned with 512 to work with
61 // direct I/O
62 invariant(alignment >= 512 && (alignment % 512) == 0);
63
64 _reserve_at_beginning = reserve_at_beginning;
65 _alignment = alignment;
66 _n_blocks = 0;
67 _n_bytes_in_use = reserve_at_beginning;
68 _tree = new MhsRbTree::Tree(alignment);
69}
70
71void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
72 CreateInternal(reserve_at_beginning, alignment);
73 _tree->Insert({reserve_at_beginning, MAX_BYTE});
74 VALIDATE();
75}
76
77void BlockAllocator::Destroy() {
78 delete _tree;
79}
80
81void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
82 uint64_t alignment,
83 struct BlockPair *translation_pairs,
84 uint64_t n_blocks) {
85 CreateInternal(reserve_at_beginning, alignment);
86 _n_blocks = n_blocks;
87
88 struct BlockPair *XMALLOC_N(n_blocks, pairs);
89 memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
90 std::sort(pairs, pairs + n_blocks);
91
92 if (pairs[0]._offset > reserve_at_beginning) {
93 _tree->Insert(
94 {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
95 }
96 for (uint64_t i = 0; i < _n_blocks; i++) {
97 // Allocator does not support size 0 blocks. See
98 // block_allocator_free_block.
99 invariant(pairs[i]._size > 0);
100 invariant(pairs[i]._offset >= _reserve_at_beginning);
101 invariant(pairs[i]._offset % _alignment == 0);
102
103 _n_bytes_in_use += pairs[i]._size;
104
105 MhsRbTree::OUUInt64 free_size(MAX_BYTE);
106 MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
107 if (i < n_blocks - 1) {
108 MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
109 invariant(next_offset >= free_offset);
110 free_size = next_offset - free_offset;
111 if (free_size == 0)
112 continue;
113 }
114 _tree->Insert({free_offset, free_size});
115 }
116 toku_free(pairs);
117 VALIDATE();
118}
119
120// Effect: align a value by rounding up.
121static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
122 return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
123}
124
125// Effect: Allocate a block. The resulting block must be aligned on the
126// ba->alignment (which to make direct_io happy must be a positive multiple of
127// 512).
128void BlockAllocator::AllocBlock(uint64_t size,
129 uint64_t *offset) {
130 // Allocator does not support size 0 blocks. See block_allocator_free_block.
131 invariant(size > 0);
132
133 _n_bytes_in_use += size;
134 *offset = _tree->Remove(size);
135
136 _n_blocks++;
137 VALIDATE();
138}
139
140// To support 0-sized blocks, we need to include size as an input to this
141// function.
142// All 0-sized blocks at the same offset can be considered identical, but
143// a 0-sized block can share offset with a non-zero sized block.
144// The non-zero sized block is not exchangable with a zero sized block (or vice
145// versa), so inserting 0-sized blocks can cause corruption here.
146void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
147 VALIDATE();
148 _n_bytes_in_use -= size;
149 _tree->Insert({offset, size});
150 _n_blocks--;
151 VALIDATE();
152}
153
154uint64_t BlockAllocator::AllocatedLimit() const {
155 MhsRbTree::Node *max_node = _tree->MaxNode();
156 return rbn_offset(max_node).ToInt();
157}
158
159// Effect: Consider the blocks in sorted order. The reserved block at the
160// beginning is number 0. The next one is number 1 and so forth.
161// Return the offset and size of the block with that number.
162// Return 0 if there is a block that big, return nonzero if b is too big.
163int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
164 uint64_t *offset,
165 uint64_t *size) {
166 MhsRbTree::Node *x, *y;
167 if (b == 0) {
168 *offset = 0;
169 *size = _reserve_at_beginning;
170 return 0;
171 } else if (b > _n_blocks) {
172 return -1;
173 } else {
174 x = _tree->MinNode();
175 for (uint64_t i = 1; i <= b; i++) {
176 y = x;
177 x = _tree->Successor(x);
178 }
179 *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
180 *offset = (rbn_offset(y) + rbn_size(y)).ToInt();
181 return 0;
182 }
183}
184
185struct VisUnusedExtra {
186 TOKU_DB_FRAGMENTATION _report;
187 uint64_t _align;
188};
189
190static void VisUnusedCollector(void *extra,
191 MhsRbTree::Node *node,
192 uint64_t UU(depth)) {
193 struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
194 TOKU_DB_FRAGMENTATION report = v_e->_report;
195 uint64_t alignm = v_e->_align;
196
197 MhsRbTree::OUUInt64 offset = rbn_offset(node);
198 MhsRbTree::OUUInt64 size = rbn_size(node);
199 MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
200 uint64_t free_space = (offset + size - answer_offset).ToInt();
201 if (free_space > 0) {
202 report->unused_bytes += free_space;
203 report->unused_blocks++;
204 if (free_space > report->largest_unused_block) {
205 report->largest_unused_block = free_space;
206 }
207 }
208}
209// Requires: report->file_size_bytes is filled in
210// Requires: report->data_bytes is filled in
211// Requires: report->checkpoint_bytes_additional is filled in
212void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
213 invariant(_n_bytes_in_use ==
214 report->data_bytes + report->checkpoint_bytes_additional);
215
216 report->unused_bytes = 0;
217 report->unused_blocks = 0;
218 report->largest_unused_block = 0;
219 struct VisUnusedExtra extra = {report, _alignment};
220 _tree->InOrderVisitor(VisUnusedCollector, &extra);
221}
222
223void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
224 report->data_bytes = _n_bytes_in_use;
225 report->data_blocks = _n_blocks;
226 report->file_size_bytes = 0;
227 report->checkpoint_bytes_additional = 0;
228 UnusedStatistics(report);
229}
230
231struct ValidateExtra {
232 uint64_t _bytes;
233 MhsRbTree::Node *_pre_node;
234};
235static void VisUsedBlocksInOrder(void *extra,
236 MhsRbTree::Node *cur_node,
237 uint64_t UU(depth)) {
238 struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
239 MhsRbTree::Node *pre_node = v_e->_pre_node;
240 // verify no overlaps
241 if (pre_node) {
242 invariant(rbn_size(pre_node) > 0);
243 invariant(rbn_offset(cur_node) >
244 rbn_offset(pre_node) + rbn_size(pre_node));
245 MhsRbTree::OUUInt64 used_space =
246 rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
247 v_e->_bytes += used_space.ToInt();
248 } else {
249 v_e->_bytes += rbn_offset(cur_node).ToInt();
250 }
251 v_e->_pre_node = cur_node;
252}
253
254void BlockAllocator::Validate() const {
255 _tree->ValidateBalance();
256 _tree->ValidateMhs();
257 struct ValidateExtra extra = {0, nullptr};
258 _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
259 invariant(extra._bytes == _n_bytes_in_use);
260}
261