1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2011-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #pragma once |
20 | |
21 | #include "array.hpp" |
22 | #include "header.hpp" |
23 | #include "node.hpp" |
24 | #include "block.hpp" |
25 | #include "entry.hpp" |
26 | #include "key.hpp" |
27 | #include "file.hpp" |
28 | |
29 | namespace grn { |
30 | namespace dat { |
31 | |
32 | class GRN_DAT_API Trie { |
33 | public: |
34 | Trie(); |
35 | ~Trie(); |
36 | |
37 | void create(const char *file_name = NULL, |
38 | UInt64 file_size = 0, |
39 | UInt32 max_num_keys = 0, |
40 | double num_nodes_per_key = 0.0, |
41 | double average_key_length = 0.0); |
42 | |
43 | void create(const Trie &trie, |
44 | const char *file_name = NULL, |
45 | UInt64 file_size = 0, |
46 | UInt32 max_num_keys = 0, |
47 | double num_nodes_per_key = 0.0, |
48 | double average_key_length = 0.0); |
49 | |
50 | void repair(const Trie &trie, const char *file_name = NULL); |
51 | |
52 | void open(const char *file_name); |
53 | void close(); |
54 | |
55 | void swap(Trie *trie); |
56 | |
57 | // Users can access a key by its position or ID. |
58 | const Key &get_key(UInt32 key_pos) const { |
59 | GRN_DAT_DEBUG_THROW_IF(key_pos >= next_key_pos()); |
60 | return *reinterpret_cast<const Key *>(key_buf_.ptr() + key_pos); |
61 | } |
62 | // If a specified ID is invalid, e.g. the key is already deleted, this |
63 | // function returns a reference to an invalid key object whose id() returns |
64 | // INVALID_KEY_ID. |
65 | const Key &ith_key(UInt32 key_id) const { |
66 | if ((key_id >= min_key_id()) && (key_id <= max_key_id()) && |
67 | ith_entry(key_id).is_valid()) { |
68 | return get_key(ith_entry(key_id).key_pos()); |
69 | } |
70 | return Key::invalid_key(); |
71 | } |
72 | |
73 | bool search(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) const { |
74 | return search_key(static_cast<const UInt8 *>(ptr), length, key_pos); |
75 | } |
76 | // Longest prefix match search. |
77 | bool lcp_search(const void *ptr, UInt32 length, |
78 | UInt32 *key_pos = NULL) const { |
79 | return lcp_search_key(static_cast<const UInt8 *>(ptr), length, key_pos); |
80 | } |
81 | |
82 | bool remove(UInt32 key_id) { |
83 | const Key &key = ith_key(key_id); |
84 | if (key.is_valid()) { |
85 | return remove(key.ptr(), key.length()); |
86 | } |
87 | return false; |
88 | } |
89 | bool remove(const void *ptr, UInt32 length) { |
90 | return remove_key(static_cast<const UInt8 *>(ptr), length); |
91 | } |
92 | |
93 | bool insert(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) { |
94 | return insert_key(static_cast<const UInt8 *>(ptr), length, key_pos); |
95 | } |
96 | |
97 | bool update(UInt32 key_id, const void *ptr, UInt32 length, |
98 | UInt32 *key_pos = NULL) { |
99 | return update_key(ith_key(key_id), static_cast<const UInt8 *>(ptr), |
100 | length, key_pos); |
101 | } |
102 | bool update(const void *src_ptr, UInt32 src_length, |
103 | const void *dest_ptr, UInt32 dest_length, |
104 | UInt32 *key_pos = NULL) { |
105 | UInt32 src_key_pos; |
106 | if (!search(src_ptr, src_length, &src_key_pos)) { |
107 | return false; |
108 | } |
109 | const Key &src_key = get_key(src_key_pos); |
110 | return update_key(src_key, static_cast<const UInt8 *>(dest_ptr), |
111 | dest_length, key_pos); |
112 | } |
113 | |
114 | const Node &ith_node(UInt32 i) const { |
115 | GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); |
116 | return nodes_[i]; |
117 | } |
118 | const Block &ith_block(UInt32 i) const { |
119 | GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); |
120 | return blocks_[i]; |
121 | } |
122 | const Entry &ith_entry(UInt32 i) const { |
123 | GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); |
124 | GRN_DAT_DEBUG_THROW_IF(i > max_key_id()); |
125 | return entries_[i]; |
126 | } |
127 | |
128 | const Header &() const { |
129 | return *header_; |
130 | } |
131 | |
132 | UInt64 file_size() const { |
133 | return header_->file_size(); |
134 | } |
135 | UInt64 virtual_size() const { |
136 | return sizeof(Header) |
137 | + (sizeof(Entry) * num_keys()) |
138 | + (sizeof(Block) * num_blocks()) |
139 | + (sizeof(Node) * num_nodes()) |
140 | + total_key_length(); |
141 | } |
142 | UInt32 total_key_length() const { |
143 | return header_->total_key_length(); |
144 | } |
145 | UInt32 num_keys() const { |
146 | return header_->num_keys(); |
147 | } |
148 | UInt32 min_key_id() const { |
149 | return header_->min_key_id(); |
150 | } |
151 | UInt32 next_key_id() const { |
152 | return header_->next_key_id(); |
153 | } |
154 | UInt32 max_key_id() const { |
155 | return header_->max_key_id(); |
156 | } |
157 | UInt32 max_num_keys() const { |
158 | return header_->max_num_keys(); |
159 | } |
160 | UInt32 num_nodes() const { |
161 | return header_->num_nodes(); |
162 | } |
163 | UInt32 num_phantoms() const { |
164 | return header_->num_phantoms(); |
165 | } |
166 | UInt32 num_zombies() const { |
167 | return header_->num_zombies(); |
168 | } |
169 | UInt32 max_num_nodes() const { |
170 | return header_->max_num_nodes(); |
171 | } |
172 | UInt32 num_blocks() const { |
173 | return header_->num_blocks(); |
174 | } |
175 | UInt32 max_num_blocks() const { |
176 | return header_->max_num_blocks(); |
177 | } |
178 | UInt32 next_key_pos() const { |
179 | return header_->next_key_pos(); |
180 | } |
181 | UInt32 key_buf_size() const { |
182 | return header_->key_buf_size(); |
183 | } |
184 | UInt32 status_flags() const { |
185 | return header_->status_flags(); |
186 | } |
187 | |
188 | void clear_status_flags() { |
189 | header_->set_status_flags(status_flags() & ~CHANGING_MASK); |
190 | } |
191 | |
192 | void flush(); |
193 | |
194 | private: |
195 | File file_; |
196 | Header *; |
197 | Array<Node> nodes_; |
198 | Array<Block> blocks_; |
199 | Array<Entry> entries_; |
200 | Array<UInt32> key_buf_; |
201 | |
202 | void create_file(const char *file_name, |
203 | UInt64 file_size, |
204 | UInt32 max_num_keys, |
205 | double num_nodes_per_key, |
206 | double average_key_length); |
207 | void create_file(const char *file_name, |
208 | UInt64 file_size, |
209 | UInt32 max_num_keys, |
210 | UInt32 max_num_blocks, |
211 | UInt32 key_buf_size); |
212 | |
213 | void open_file(const char *file_name); |
214 | |
215 | void map_address(void *address); |
216 | |
217 | void build_from_trie(const Trie &trie); |
218 | void build_from_trie(const Trie &trie, UInt32 src, UInt32 dest); |
219 | |
220 | void repair_trie(const Trie &trie); |
221 | void build_from_keys(const UInt32 *begin, const UInt32 *end, |
222 | UInt32 depth, UInt32 node_id); |
223 | |
224 | void mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth); |
225 | void insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth); |
226 | |
227 | inline int get_label(UInt32 key_id, UInt32 depth) const; |
228 | inline int get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const; |
229 | inline bool less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const; |
230 | inline static void swap_ids(UInt32 *lhs, UInt32 *rhs); |
231 | |
232 | bool search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; |
233 | bool search_linker(const UInt8 *ptr, UInt32 length, |
234 | UInt32 &node_id, UInt32 &query_pos) const; |
235 | |
236 | bool lcp_search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const; |
237 | |
238 | bool remove_key(const UInt8 *ptr, UInt32 length); |
239 | |
240 | bool insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos); |
241 | bool insert_linker(const UInt8 *ptr, UInt32 length, |
242 | UInt32 &node_id, UInt32 query_pos); |
243 | |
244 | bool update_key(const Key &key, const UInt8 *ptr, UInt32 length, |
245 | UInt32 *key_pos); |
246 | |
247 | UInt32 insert_node(UInt32 node_id, UInt16 label); |
248 | UInt32 append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id); |
249 | |
250 | UInt32 separate(const UInt8 *ptr, UInt32 length, |
251 | UInt32 node_id, UInt32 i); |
252 | void resolve(UInt32 node_id, UInt16 label); |
253 | void migrate_nodes(UInt32 node_id, UInt32 dest_offset, |
254 | const UInt16 *labels, UInt32 num_labels); |
255 | |
256 | UInt32 find_offset(const UInt16 *labels, UInt32 num_labels); |
257 | |
258 | void reserve_node(UInt32 node_id); |
259 | void reserve_block(UInt32 block_id); |
260 | |
261 | void update_block_level(UInt32 block_id, UInt32 level); |
262 | void set_block_level(UInt32 block_id, UInt32 level); |
263 | void unset_block_level(UInt32 block_id); |
264 | |
265 | Node &ith_node(UInt32 i) { |
266 | GRN_DAT_DEBUG_THROW_IF(i >= num_nodes()); |
267 | return nodes_[i]; |
268 | } |
269 | Block &ith_block(UInt32 i) { |
270 | GRN_DAT_DEBUG_THROW_IF(i >= num_blocks()); |
271 | return blocks_[i]; |
272 | } |
273 | Entry &ith_entry(UInt32 i) { |
274 | GRN_DAT_DEBUG_THROW_IF(i < min_key_id()); |
275 | GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1)); |
276 | return entries_[i]; |
277 | } |
278 | |
279 | // Disallows copy and assignment. |
280 | Trie(const Trie &); |
281 | Trie &operator=(const Trie &); |
282 | }; |
283 | |
284 | } // namespace dat |
285 | } // namespace grn |
286 | |