| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #pragma once |
| 40 | |
| 41 | #include <db.h> |
| 42 | |
| 43 | #include "portability/toku_pthread.h" |
| 44 | #include "portability/toku_stdint.h" |
| 45 | #include "portability/toku_stdlib.h" |
| 46 | |
| 47 | // RBTree(Red-black tree) with max hole sizes for subtrees. |
| 48 | |
| 49 | // This is a tentative data struct to improve the block allocation time |
| 50 | // complexity from the linear time to the log time. Please be noted this DS only |
| 51 | // supports first-fit for now. It is actually easier to do it with |
| 52 | // best-fit.(just |
| 53 | // sort by size). |
| 54 | |
| 55 | // RBTree is a classic data struct with O(log(n)) for insertion, deletion and |
| 56 | // search. Many years have seen its efficiency. |
| 57 | |
| 58 | // a *hole* is the representation of an available BlockPair for allocation. |
| 59 | // defined as (start_address,size) or (offset, size) interchangably. |
| 60 | |
| 61 | // each node has a *label* to indicate a pair of the max hole sizes for its |
| 62 | // subtree. |
| 63 | |
| 64 | // We are implementing a RBTree with max hole sizes for subtree. It is a red |
| 65 | // black tree that is sorted by the start_address but also labeld with the max |
| 66 | // hole sizes of the subtrees. |
| 67 | |
| 68 | // [(6,3)] -> [(offset, size)], the hole |
| 69 | // [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label |
| 70 | /* / \ */ |
| 71 | // [(0, 1)] [(10, 5)] |
| 72 | // [{0, 2}] [{0, 0}] |
| 73 | /* \ */ |
| 74 | // [(3, 2)] |
| 75 | // [{0, 0}] |
| 76 | // request of allocation size=2 goes from root to [(3,2)]. |
| 77 | |
| 78 | // above example shows a simplified RBTree_max_holes. |
| 79 | // it is easier to tell the search time is O(log(n)) as we can make a decision |
| 80 | // on each descent until we get to the target. |
| 81 | |
| 82 | // the only question is if we can keep the maintenance cost low -- and i think |
| 83 | // it is not a problem becoz an insertion/deletion is only going to update the |
| 84 | // max_hole_sizes of the nodes along the path from the root to the node to be |
| 85 | // deleted/inserted. The path can be cached and search is anyway O(log(n)). |
| 86 | |
| 87 | // unlike the typical rbtree, Tree has to handle the inserts and deletes |
| 88 | // with more care: an allocation that triggers the delete might leave some |
| 89 | // unused space which we can simply update the start_addr and size without |
| 90 | // worrying overlapping. An free might not only mean the insertion but also |
| 91 | // *merging* with the adjacent holes. |
| 92 | |
| 93 | namespace MhsRbTree { |
| 94 | |
| 95 | #define offset_t uint64_t |
| 96 | enum class EColor { RED, BLACK }; |
| 97 | enum class EDirection { NONE = 0, LEFT, RIGHT }; |
| 98 | |
| 99 | // I am a bit tired of fixing overflow/underflow, just quickly craft some |
| 100 | // int |
| 101 | // class that has an infinity-like max value and prevents overflow and |
| 102 | // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not |
| 103 | // a problem here. :-/ - JYM |
| 104 | class OUUInt64 { |
| 105 | public: |
| 106 | static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff; |
| 107 | OUUInt64() : _value(0) {} |
| 108 | OUUInt64(uint64_t s) : _value(s) {} |
| 109 | OUUInt64(const OUUInt64& o) : _value(o._value) {} |
| 110 | bool operator<(const OUUInt64 &r) const { |
| 111 | invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); |
| 112 | return _value < r.ToInt(); |
| 113 | } |
| 114 | bool operator>(const OUUInt64 &r) const { |
| 115 | invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); |
| 116 | return _value > r.ToInt(); |
| 117 | } |
| 118 | bool operator<=(const OUUInt64 &r) const { |
| 119 | invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); |
| 120 | return _value <= r.ToInt(); |
| 121 | } |
| 122 | bool operator>=(const OUUInt64 &r) const { |
| 123 | invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); |
| 124 | return _value >= r.ToInt(); |
| 125 | } |
| 126 | OUUInt64 operator+(const OUUInt64 &r) const { |
| 127 | if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) { |
| 128 | OUUInt64 tmp(MHS_MAX_VAL); |
| 129 | return tmp; |
| 130 | } else { |
| 131 | // detecting overflow |
| 132 | invariant((MHS_MAX_VAL - _value) >= r.ToInt()); |
| 133 | uint64_t plus = _value + r.ToInt(); |
| 134 | OUUInt64 tmp(plus); |
| 135 | return tmp; |
| 136 | } |
| 137 | } |
| 138 | OUUInt64 operator-(const OUUInt64 &r) const { |
| 139 | invariant(r.ToInt() != MHS_MAX_VAL); |
| 140 | if (_value == MHS_MAX_VAL) { |
| 141 | return *this; |
| 142 | } else { |
| 143 | invariant(_value >= r.ToInt()); |
| 144 | uint64_t minus = _value - r.ToInt(); |
| 145 | OUUInt64 tmp(minus); |
| 146 | return tmp; |
| 147 | } |
| 148 | } |
| 149 | OUUInt64 operator-=(const OUUInt64 &r) { |
| 150 | if (_value != MHS_MAX_VAL) { |
| 151 | invariant(r.ToInt() != MHS_MAX_VAL); |
| 152 | invariant(_value >= r.ToInt()); |
| 153 | _value -= r.ToInt(); |
| 154 | } |
| 155 | return *this; |
| 156 | } |
| 157 | OUUInt64 operator+=(const OUUInt64 &r) { |
| 158 | if (_value != MHS_MAX_VAL) { |
| 159 | if (r.ToInt() == MHS_MAX_VAL) { |
| 160 | _value = MHS_MAX_VAL; |
| 161 | } else { |
| 162 | invariant((MHS_MAX_VAL - _value) >= r.ToInt()); |
| 163 | this->_value += r.ToInt(); |
| 164 | } |
| 165 | } |
| 166 | return *this; |
| 167 | } |
| 168 | bool operator==(const OUUInt64 &r) const { |
| 169 | return _value == r.ToInt(); |
| 170 | } |
| 171 | bool operator!=(const OUUInt64 &r) const { |
| 172 | return _value != r.ToInt(); |
| 173 | } |
| 174 | OUUInt64 operator=(const OUUInt64 &r) { |
| 175 | _value = r.ToInt(); |
| 176 | return *this; |
| 177 | } |
| 178 | uint64_t ToInt() const { return _value; } |
| 179 | |
| 180 | private: |
| 181 | uint64_t _value; |
| 182 | }; |
| 183 | |
| 184 | class Node { |
| 185 | public: |
| 186 | class BlockPair { |
| 187 | public: |
| 188 | OUUInt64 _offset; |
| 189 | OUUInt64 _size; |
| 190 | |
| 191 | BlockPair() : _offset(0), _size(0) {} |
| 192 | BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} |
| 193 | BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {} |
| 194 | BlockPair(const BlockPair &o) |
| 195 | : _offset(o._offset), _size(o._size) {} |
| 196 | |
| 197 | int operator<(const BlockPair &rhs) const { |
| 198 | return _offset < rhs._offset; |
| 199 | } |
| 200 | int operator<(const uint64_t &o) const { return _offset < o; } |
| 201 | }; |
| 202 | |
| 203 | struct Pair { |
| 204 | uint64_t _left; |
| 205 | uint64_t _right; |
| 206 | Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {} |
| 207 | }; |
| 208 | |
| 209 | EColor _color; |
| 210 | BlockPair _hole; |
| 211 | Pair _label; |
| 212 | Node *_left; |
| 213 | Node *_right; |
| 214 | Node *_parent; |
| 215 | |
| 216 | Node(EColor c, |
| 217 | Node::BlockPair h, |
| 218 | Pair lb, |
| 219 | Node *l, |
| 220 | Node *r, |
| 221 | Node *p) |
| 222 | : _color(c), |
| 223 | _hole(h), |
| 224 | _label(lb), |
| 225 | _left(l), |
| 226 | _right(r), |
| 227 | _parent(p) {} |
| 228 | }; |
| 229 | |
| 230 | class Tree { |
| 231 | private: |
| 232 | Node *_root; |
| 233 | uint64_t _align; |
| 234 | |
| 235 | public: |
| 236 | Tree(); |
| 237 | Tree(uint64_t); |
| 238 | ~Tree(); |
| 239 | |
| 240 | void PreOrder(); |
| 241 | void InOrder(); |
| 242 | void PostOrder(); |
| 243 | // immutable operations |
| 244 | Node *SearchByOffset(uint64_t addr); |
| 245 | Node *SearchFirstFitBySize(uint64_t size); |
| 246 | |
| 247 | Node *MinNode(); |
| 248 | Node *MaxNode(); |
| 249 | |
| 250 | Node *Successor(Node *); |
| 251 | Node *Predecessor(Node *); |
| 252 | |
| 253 | // mapped from tree_allocator::free_block |
| 254 | int Insert(Node::BlockPair pair); |
| 255 | // mapped from tree_allocator::alloc_block |
| 256 | uint64_t Remove(size_t size); |
| 257 | // mapped from tree_allocator::alloc_block_after |
| 258 | |
| 259 | void RawRemove(uint64_t offset); |
| 260 | void Destroy(); |
| 261 | // print the tree |
| 262 | void Dump(); |
| 263 | // validation |
| 264 | // balance |
| 265 | void ValidateBalance(); |
| 266 | void ValidateInOrder(Node::BlockPair *); |
| 267 | void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *); |
| 268 | void ValidateMhs(); |
| 269 | |
| 270 | private: |
| 271 | void PreOrder(Node *node) const; |
| 272 | void InOrder(Node *node) const; |
| 273 | void PostOrder(Node *node) const; |
| 274 | Node *SearchByOffset(Node *node, offset_t addr) const; |
| 275 | Node *SearchFirstFitBySize(Node *node, size_t size) const; |
| 276 | |
| 277 | Node *MinNode(Node *node); |
| 278 | Node *MaxNode(Node *node); |
| 279 | |
| 280 | // rotations to fix up. we will have to update the labels too. |
| 281 | void LeftRotate(Node *&root, Node *x); |
| 282 | void RightRotate(Node *&root, Node *y); |
| 283 | |
| 284 | int Insert(Node *&root, Node::BlockPair pair); |
| 285 | int InsertFixup(Node *&root, Node *node); |
| 286 | |
| 287 | void RawRemove(Node *&root, Node *node); |
| 288 | uint64_t Remove(Node *&root, Node *node, size_t size); |
| 289 | void RawRemoveFixup(Node *&root, Node *node, Node *parent); |
| 290 | |
| 291 | void Destroy(Node *&tree); |
| 292 | void Dump(Node *tree, Node::BlockPair pair, EDirection dir); |
| 293 | void RecalculateMhs(Node *node); |
| 294 | void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *); |
| 295 | void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool); |
| 296 | Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size); |
| 297 | |
| 298 | Node *SuccessorHelper(Node *y, Node *x); |
| 299 | |
| 300 | Node *PredecessorHelper(Node *y, Node *x); |
| 301 | |
| 302 | void InOrderVisitor(Node *, |
| 303 | void (*f)(void *, Node *, uint64_t), |
| 304 | void *, |
| 305 | uint64_t); |
| 306 | uint64_t ValidateMhs(Node *); |
| 307 | |
| 308 | uint64_t EffectiveSize(Node *); |
| 309 | // mixed with some macros..... |
| 310 | #define rbn_parent(r) ((r)->_parent) |
| 311 | #define rbn_color(r) ((r)->_color) |
| 312 | #define rbn_is_red(r) ((r)->_color == EColor::RED) |
| 313 | #define rbn_is_black(r) ((r)->_color == EColor::BLACK) |
| 314 | #define rbn_set_black(r) \ |
| 315 | do { \ |
| 316 | (r)->_color = EColor::BLACK; \ |
| 317 | } while (0) |
| 318 | #define rbn_set_red(r) \ |
| 319 | do { \ |
| 320 | (r)->_color = EColor::RED; \ |
| 321 | } while (0) |
| 322 | #define rbn_set_parent(r, p) \ |
| 323 | do { \ |
| 324 | (r)->_parent = (p); \ |
| 325 | } while (0) |
| 326 | #define rbn_set_color(r, c) \ |
| 327 | do { \ |
| 328 | (r)->_color = (c); \ |
| 329 | } while (0) |
| 330 | #define rbn_set_offset(r) \ |
| 331 | do { \ |
| 332 | (r)->_hole._offset = (c); \ |
| 333 | } while (0) |
| 334 | #define rbn_set_size(r, c) \ |
| 335 | do { \ |
| 336 | (r)->_hole._size = (c); \ |
| 337 | } while (0) |
| 338 | #define rbn_set_left_mhs(r, c) \ |
| 339 | do { \ |
| 340 | (r)->_label._left = (c); \ |
| 341 | } while (0) |
| 342 | #define rbn_set_right_mhs(r, c) \ |
| 343 | do { \ |
| 344 | (r)->_label._right = (c); \ |
| 345 | } while (0) |
| 346 | #define rbn_size(r) ((r)->_hole._size) |
| 347 | #define rbn_offset(r) ((r)->_hole._offset) |
| 348 | #define rbn_key(r) ((r)->_hole._offset) |
| 349 | #define rbn_left_mhs(r) ((r)->_label._left) |
| 350 | #define rbn_right_mhs(r) ((r)->_label._right) |
| 351 | #define mhs_of_subtree(y) \ |
| 352 | (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y))) |
| 353 | }; |
| 354 | |
| 355 | } // namespace MhsRbTree |
| 356 | |