| 1 | /* -*- c-basic-offset: 2 -*- */ |
| 2 | /* |
| 3 | Copyright(C) 2011-2016 Brazil |
| 4 | |
| 5 | This library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License version 2.1 as published by the Free Software Foundation. |
| 8 | |
| 9 | This library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with this library; if not, write to the Free Software |
| 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | */ |
| 18 | |
| 19 | #pragma once |
| 20 | |
| 21 | #include "array.hpp" |
| 22 | #include "header.hpp" |
| 23 | #include "node.hpp" |
| 24 | #include "block.hpp" |
| 25 | #include "entry.hpp" |
| 26 | #include "key.hpp" |
| 27 | #include "file.hpp" |
| 28 | |
| 29 | namespace grn { |
| 30 | namespace dat { |
| 31 | |
| 32 | class GRN_DAT_API Trie { |
| 33 | public: |
| 34 | Trie(); |
| 35 | ~Trie(); |
| 36 | |
| 37 | void create(const char *file_name = NULL, |
| 38 | UInt64 file_size = 0, |
| 39 | UInt32 max_num_keys = 0, |
| 40 | double num_nodes_per_key = 0.0, |
| 41 | double average_key_length = 0.0); |
| 42 | |
| 43 | void create(const Trie &trie, |
| 44 | const char *file_name = NULL, |
| 45 | UInt64 file_size = 0, |
| 46 | UInt32 max_num_keys = 0, |
| 47 | double num_nodes_per_key = 0.0, |
| 48 | double average_key_length = 0.0); |
| 49 | |
| 50 | void repair(const Trie &trie, const char *file_name = NULL); |
| 51 | |
| 52 | void open(const char *file_name); |
| 53 | void close(); |
| 54 | |
| 55 | void swap(Trie *trie); |
| 56 | |
| 57 | // Users can access a key by its position or ID. |
| 58 | const Key &get_key(UInt32 key_pos) const { |
| 59 | GRN_DAT_DEBUG_THROW_IF(key_pos >= next_key_pos()); |
| 60 | return *reinterpret_cast<const Key *>(key_buf_.ptr() + key_pos); |
| 61 | } |
| 62 | // If a specified ID is invalid, e.g. the key is already deleted, this |
| 63 | // function returns a reference to an invalid key object whose id() returns |
| 64 | // INVALID_KEY_ID. |
| 65 | const Key &ith_key(UInt32 key_id) const { |
| 66 | if ((key_id >= min_key_id()) && (key_id <= max_key_id()) && |
| 67 | ith_entry(key_id).is_valid()) { |
| 68 | return get_key(ith_entry(key_id).key_pos()); |
| 69 | } |
| 70 | return Key::invalid_key(); |
| 71 | } |
| 72 | |
| 73 | bool search(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) const { |
| 74 | return search_key(static_cast<const UInt8 *>(ptr), length, key_pos); |
| 75 | } |
| 76 | // Longest prefix match search. |
| 77 | bool lcp_search(const void *ptr, UInt32 length, |
| 78 | UInt32 *key_pos = NULL) const { |
| 79 | return lcp_search_key(static_cast<const UInt8 *>(ptr), length, key_pos); |
| 80 | } |
| 81 | |
| 82 | bool remove(UInt32 key_id) { |
| 83 | const Key &key = ith_key(key_id); |
| 84 | if (key.is_valid()) { |
| 85 | return remove(key.ptr(), key.length()); |
| 86 | } |
| 87 | return false; |
| 88 | } |
| 89 | bool remove(const void *ptr, UInt32 length) { |
| 90 | return remove_key(static_cast<const UInt8 *>(ptr), length); |
| 91 | } |
| 92 | |
| 93 | bool insert(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) { |
| 94 | return insert_key(static_cast<const UInt8 *>(ptr), length, key_pos); |
| 95 | } |
| 96 | |
| 97 | bool update(UInt32 key_id, const void *ptr, UInt32 length, |
| 98 | UInt32 *key_pos = NULL) { |
| 99 | return update_key(ith_key(key_id), static_cast<const UInt8 *>(ptr), |
| 100 | length, key_pos); |
| 101 | } |
| 102 | bool update(const void *src_ptr, UInt32 src_length, |
| 103 | const void *dest_ptr, UInt32 dest_length, |
| 104 | UInt32 *key_pos = NULL) { |
| 105 | UInt32 src_key_pos; |
| 106 | if (!search(src_ptr, src_length, &src_key_pos)) { |
| 107 | return false; |
| 108 | } |
| 109 | const Key &src_key = get_key(src_key_pos); |
| 110 | return update_key(src_key, static_cast<const UInt8 *>(dest_ptr), |
| 111 | dest_length, key_pos); |
| 112 | } |
| 113 | |
| 114 | const Node &ith_node(UInt32 i) const { |
| 115 | GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); |
| 116 | return nodes_[i]; |
| 117 | } |
| 118 | const Block &ith_block(UInt32 i) const { |
| 119 | GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); |
| 120 | return blocks_[i]; |
| 121 | } |
| 122 | const Entry &ith_entry(UInt32 i) const { |
| 123 | GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); |
| 124 | GRN_DAT_DEBUG_THROW_IF(i > max_key_id()); |
| 125 | return entries_[i]; |
| 126 | } |
| 127 | |
| 128 | const Header &() const { |
| 129 | return *header_; |
| 130 | } |
| 131 | |
| 132 | UInt64 file_size() const { |
| 133 | return header_->file_size(); |
| 134 | } |
| 135 | UInt64 virtual_size() const { |
| 136 | return sizeof(Header) |
| 137 | + (sizeof(Entry) * num_keys()) |
| 138 | + (sizeof(Block) * num_blocks()) |
| 139 | + (sizeof(Node) * num_nodes()) |
| 140 | + total_key_length(); |
| 141 | } |
| 142 | UInt32 total_key_length() const { |
| 143 | return header_->total_key_length(); |
| 144 | } |
| 145 | UInt32 num_keys() const { |
| 146 | return header_->num_keys(); |
| 147 | } |
| 148 | UInt32 min_key_id() const { |
| 149 | return header_->min_key_id(); |
| 150 | } |
| 151 | UInt32 next_key_id() const { |
| 152 | return header_->next_key_id(); |
| 153 | } |
| 154 | UInt32 max_key_id() const { |
| 155 | return header_->max_key_id(); |
| 156 | } |
| 157 | UInt32 max_num_keys() const { |
| 158 | return header_->max_num_keys(); |
| 159 | } |
| 160 | UInt32 num_nodes() const { |
| 161 | return header_->num_nodes(); |
| 162 | } |
| 163 | UInt32 num_phantoms() const { |
| 164 | return header_->num_phantoms(); |
| 165 | } |
| 166 | UInt32 num_zombies() const { |
| 167 | return header_->num_zombies(); |
| 168 | } |
| 169 | UInt32 max_num_nodes() const { |
| 170 | return header_->max_num_nodes(); |
| 171 | } |
| 172 | UInt32 num_blocks() const { |
| 173 | return header_->num_blocks(); |
| 174 | } |
| 175 | UInt32 max_num_blocks() const { |
| 176 | return header_->max_num_blocks(); |
| 177 | } |
| 178 | UInt32 next_key_pos() const { |
| 179 | return header_->next_key_pos(); |
| 180 | } |
| 181 | UInt32 key_buf_size() const { |
| 182 | return header_->key_buf_size(); |
| 183 | } |
| 184 | UInt32 status_flags() const { |
| 185 | return header_->status_flags(); |
| 186 | } |
| 187 | |
| 188 | void clear_status_flags() { |
| 189 | header_->set_status_flags(status_flags() & ~CHANGING_MASK); |
| 190 | } |
| 191 | |
| 192 | void flush(); |
| 193 | |
| 194 | private: |
| 195 | File file_; |
| 196 | Header *; |
| 197 | Array<Node> nodes_; |
| 198 | Array<Block> blocks_; |
| 199 | Array<Entry> entries_; |
| 200 | Array<UInt32> key_buf_; |
| 201 | |
| 202 | void create_file(const char *file_name, |
| 203 | UInt64 file_size, |
| 204 | UInt32 max_num_keys, |
| 205 | double num_nodes_per_key, |
| 206 | double average_key_length); |
| 207 | void create_file(const char *file_name, |
| 208 | UInt64 file_size, |
| 209 | UInt32 max_num_keys, |
| 210 | UInt32 max_num_blocks, |
| 211 | UInt32 key_buf_size); |
| 212 | |
| 213 | void open_file(const char *file_name); |
| 214 | |
| 215 | void map_address(void *address); |
| 216 | |
| 217 | void build_from_trie(const Trie &trie); |
| 218 | void build_from_trie(const Trie &trie, UInt32 src, UInt32 dest); |
| 219 | |
| 220 | void repair_trie(const Trie &trie); |
| 221 | void build_from_keys(const UInt32 *begin, const UInt32 *end, |
| 222 | UInt32 depth, UInt32 node_id); |
| 223 | |
| 224 | void mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth); |
| 225 | void insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth); |
| 226 | |
| 227 | inline int get_label(UInt32 key_id, UInt32 depth) const; |
| 228 | inline int get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const; |
| 229 | inline bool less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const; |
| 230 | inline static void swap_ids(UInt32 *lhs, UInt32 *rhs); |
| 231 | |
| 232 | bool search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; |
| 233 | bool search_linker(const UInt8 *ptr, UInt32 length, |
| 234 | UInt32 &node_id, UInt32 &query_pos) const; |
| 235 | |
| 236 | bool lcp_search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; |
| 237 | |
| 238 | bool remove_key(const UInt8 *ptr, UInt32 length); |
| 239 | |
| 240 | bool insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos); |
| 241 | bool insert_linker(const UInt8 *ptr, UInt32 length, |
| 242 | UInt32 &node_id, UInt32 query_pos); |
| 243 | |
| 244 | bool update_key(const Key &key, const UInt8 *ptr, UInt32 length, |
| 245 | UInt32 *key_pos); |
| 246 | |
| 247 | UInt32 insert_node(UInt32 node_id, UInt16 label); |
| 248 | UInt32 append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id); |
| 249 | |
| 250 | UInt32 separate(const UInt8 *ptr, UInt32 length, |
| 251 | UInt32 node_id, UInt32 i); |
| 252 | void resolve(UInt32 node_id, UInt16 label); |
| 253 | void migrate_nodes(UInt32 node_id, UInt32 dest_offset, |
| 254 | const UInt16 *labels, UInt32 num_labels); |
| 255 | |
| 256 | UInt32 find_offset(const UInt16 *labels, UInt32 num_labels); |
| 257 | |
| 258 | void reserve_node(UInt32 node_id); |
| 259 | void reserve_block(UInt32 block_id); |
| 260 | |
| 261 | void update_block_level(UInt32 block_id, UInt32 level); |
| 262 | void set_block_level(UInt32 block_id, UInt32 level); |
| 263 | void unset_block_level(UInt32 block_id); |
| 264 | |
| 265 | Node &ith_node(UInt32 i) { |
| 266 | GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); |
| 267 | return nodes_[i]; |
| 268 | } |
| 269 | Block &ith_block(UInt32 i) { |
| 270 | GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); |
| 271 | return blocks_[i]; |
| 272 | } |
| 273 | Entry &ith_entry(UInt32 i) { |
| 274 | GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); |
| 275 | GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1)); |
| 276 | return entries_[i]; |
| 277 | } |
| 278 | |
| 279 | // Disallows copy and assignment. |
| 280 | Trie(const Trie &); |
| 281 | Trie &operator=(const Trie &); |
| 282 | }; |
| 283 | |
| 284 | } // namespace dat |
| 285 | } // namespace grn |
| 286 | |