1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* Copyright(C) 2011-2015 Brazil |
3 | |
4 | This library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License version 2.1 as published by the Free Software Foundation. |
7 | |
8 | This library is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | Lesser General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU Lesser General Public |
14 | License along with this library; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | */ |
17 | |
18 | #include "trie.hpp" |
19 | |
20 | #include <algorithm> |
21 | #include <cstring> |
22 | |
23 | #include "vector.hpp" |
24 | |
25 | namespace grn { |
26 | namespace dat { |
27 | namespace { |
28 | |
29 | class StatusFlagManager { |
30 | public: |
31 | (Header *, UInt32 status_flag) |
32 | : header_(header), status_flag_(status_flag) { |
33 | header_->set_status_flags(header_->status_flags() | status_flag_); |
34 | } |
35 | ~StatusFlagManager() { |
36 | header_->set_status_flags(header_->status_flags() & ~status_flag_); |
37 | } |
38 | |
39 | private: |
40 | Header *; |
41 | UInt32 status_flag_; |
42 | |
43 | // Disallows copy and assignment. |
44 | StatusFlagManager(const StatusFlagManager &); |
45 | StatusFlagManager &operator=(const StatusFlagManager &); |
46 | }; |
47 | |
48 | } // namespace |
49 | |
50 | Trie::Trie() |
51 | : file_(), |
52 | header_(NULL), |
53 | nodes_(), |
54 | blocks_(), |
55 | entries_(), |
56 | key_buf_() {} |
57 | |
58 | Trie::~Trie() {} |
59 | |
60 | void Trie::create(const char *file_name, |
61 | UInt64 file_size, |
62 | UInt32 max_num_keys, |
63 | double num_nodes_per_key, |
64 | double average_key_length) { |
65 | GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); |
66 | |
67 | if (num_nodes_per_key < 1.0) { |
68 | num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY; |
69 | } |
70 | if (num_nodes_per_key > MAX_NUM_NODES_PER_KEY) { |
71 | num_nodes_per_key = MAX_NUM_NODES_PER_KEY; |
72 | } |
73 | GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key < 1.0); |
74 | GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key > MAX_NUM_NODES_PER_KEY); |
75 | |
76 | if (average_key_length < 1.0) { |
77 | average_key_length = DEFAULT_AVERAGE_KEY_LENGTH; |
78 | } |
79 | GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length < 1.0); |
80 | GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length > MAX_KEY_LENGTH); |
81 | |
82 | if (max_num_keys == 0) { |
83 | if (file_size == 0) { |
84 | file_size = DEFAULT_FILE_SIZE; |
85 | } else { |
86 | GRN_DAT_THROW_IF(PARAM_ERROR, file_size < MIN_FILE_SIZE); |
87 | GRN_DAT_THROW_IF(PARAM_ERROR, file_size > MAX_FILE_SIZE); |
88 | } |
89 | } else { |
90 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_NUM_KEYS); |
91 | } |
92 | |
93 | Trie new_trie; |
94 | new_trie.create_file(file_name, file_size, max_num_keys, |
95 | num_nodes_per_key, average_key_length); |
96 | new_trie.swap(this); |
97 | } |
98 | |
99 | void Trie::create(const Trie &trie, |
100 | const char *file_name, |
101 | UInt64 file_size, |
102 | UInt32 max_num_keys, |
103 | double num_nodes_per_key, |
104 | double average_key_length) { |
105 | GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); |
106 | |
107 | if (num_nodes_per_key < 1.0) { |
108 | if (trie.num_keys() == 0) { |
109 | num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY; |
110 | } else { |
111 | num_nodes_per_key = 1.0 * trie.num_nodes() / trie.num_keys(); |
112 | if (num_nodes_per_key > MAX_NUM_NODES_PER_KEY) { |
113 | num_nodes_per_key = MAX_NUM_NODES_PER_KEY; |
114 | } |
115 | } |
116 | } |
117 | GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key < 1.0); |
118 | GRN_DAT_THROW_IF(PARAM_ERROR, num_nodes_per_key > MAX_NUM_NODES_PER_KEY); |
119 | |
120 | if (average_key_length < 1.0) { |
121 | if (trie.num_keys() == 0) { |
122 | average_key_length = DEFAULT_AVERAGE_KEY_LENGTH; |
123 | } else { |
124 | average_key_length = 1.0 * trie.total_key_length() / trie.num_keys(); |
125 | } |
126 | } |
127 | GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length < 1.0); |
128 | GRN_DAT_THROW_IF(PARAM_ERROR, average_key_length > MAX_KEY_LENGTH); |
129 | |
130 | if (max_num_keys == 0) { |
131 | if (file_size == 0) { |
132 | file_size = trie.file_size(); |
133 | } |
134 | GRN_DAT_THROW_IF(PARAM_ERROR, file_size < MIN_FILE_SIZE); |
135 | GRN_DAT_THROW_IF(PARAM_ERROR, file_size > MAX_FILE_SIZE); |
136 | GRN_DAT_THROW_IF(PARAM_ERROR, file_size < trie.virtual_size()); |
137 | } else { |
138 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys < trie.num_keys()); |
139 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys < trie.max_key_id()); |
140 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys > MAX_NUM_KEYS); |
141 | } |
142 | |
143 | Trie new_trie; |
144 | new_trie.create_file(file_name, file_size, max_num_keys, |
145 | num_nodes_per_key, average_key_length); |
146 | new_trie.build_from_trie(trie); |
147 | new_trie.swap(this); |
148 | } |
149 | |
150 | void Trie::repair(const Trie &trie, const char *file_name) { |
151 | Trie new_trie; |
152 | new_trie.create_file(file_name, trie.file_size(), trie.max_num_keys(), |
153 | trie.max_num_blocks(), trie.key_buf_size()); |
154 | new_trie.repair_trie(trie); |
155 | new_trie.swap(this); |
156 | } |
157 | |
158 | void Trie::open(const char *file_name) { |
159 | GRN_DAT_THROW_IF(PARAM_ERROR, file_name == NULL); |
160 | |
161 | Trie new_trie; |
162 | new_trie.open_file(file_name); |
163 | new_trie.swap(this); |
164 | } |
165 | |
166 | void Trie::close() { |
167 | Trie().swap(this); |
168 | } |
169 | |
170 | void Trie::swap(Trie *trie) { |
171 | file_.swap(&trie->file_); |
172 | std::swap(header_, trie->header_); |
173 | nodes_.swap(&trie->nodes_); |
174 | blocks_.swap(&trie->blocks_); |
175 | entries_.swap(&trie->entries_); |
176 | key_buf_.swap(&trie->key_buf_); |
177 | } |
178 | |
179 | void Trie::flush() { |
180 | file_.flush(); |
181 | } |
182 | |
183 | void Trie::create_file(const char *file_name, |
184 | UInt64 file_size, |
185 | UInt32 max_num_keys, |
186 | double num_nodes_per_key, |
187 | double average_key_length) { |
188 | GRN_DAT_THROW_IF(PARAM_ERROR, (file_size == 0) && (max_num_keys == 0)); |
189 | GRN_DAT_THROW_IF(PARAM_ERROR, (file_size != 0) && (max_num_keys != 0)); |
190 | if (max_num_keys == 0) { |
191 | const UInt64 avail = file_size - sizeof(Header); |
192 | const double num_bytes_per_key = (sizeof(Node) * num_nodes_per_key) |
193 | + (1.0 * sizeof(Block) / BLOCK_SIZE * num_nodes_per_key) |
194 | + sizeof(Entry) |
195 | + sizeof(UInt32) + sizeof(UInt8) + average_key_length + 1.5; |
196 | if ((avail / num_bytes_per_key) > MAX_NUM_KEYS) { |
197 | max_num_keys = MAX_NUM_KEYS; |
198 | } else { |
199 | max_num_keys = (UInt32)(avail / num_bytes_per_key); |
200 | } |
201 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_keys == 0); |
202 | } |
203 | |
204 | UInt32 max_num_blocks; |
205 | { |
206 | const double max_num_nodes = num_nodes_per_key * max_num_keys; |
207 | GRN_DAT_THROW_IF(PARAM_ERROR, (max_num_nodes - 1.0) >= MAX_NUM_NODES); |
208 | max_num_blocks = ((UInt32)max_num_nodes + BLOCK_SIZE - 1) / BLOCK_SIZE; |
209 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_blocks == 0); |
210 | GRN_DAT_THROW_IF(PARAM_ERROR, max_num_blocks > MAX_NUM_BLOCKS); |
211 | } |
212 | |
213 | UInt32 key_buf_size; |
214 | if (file_size == 0) { |
215 | const double total_key_length = average_key_length * max_num_keys; |
216 | GRN_DAT_THROW_IF(PARAM_ERROR, |
217 | (total_key_length - 1.0) >= MAX_TOTAL_KEY_LENGTH); |
218 | |
219 | // The last term is the estimated number of bytes that will be used for |
220 | // 32-bit alignment. |
221 | const UInt64 total_num_bytes = (UInt64)(total_key_length) |
222 | + (UInt64)(sizeof(UInt32) + sizeof(UInt8)) * max_num_keys |
223 | + (UInt32)(max_num_keys * 1.5); |
224 | GRN_DAT_THROW_IF(PARAM_ERROR, |
225 | (total_num_bytes / sizeof(UInt32)) >= MAX_KEY_BUF_SIZE); |
226 | key_buf_size = (UInt32)(total_num_bytes / sizeof(UInt32)); |
227 | |
228 | file_size = sizeof(Header) |
229 | + (sizeof(Block) * max_num_blocks) |
230 | + (sizeof(Node) * BLOCK_SIZE * max_num_blocks) |
231 | + (sizeof(Entry) * max_num_keys) |
232 | + (sizeof(UInt32) * key_buf_size); |
233 | } else { |
234 | const UInt64 avail = file_size - sizeof(Header) |
235 | - (sizeof(Block) * max_num_blocks) |
236 | - (sizeof(Node) * BLOCK_SIZE * max_num_blocks) |
237 | - (sizeof(Entry) * max_num_keys); |
238 | GRN_DAT_THROW_IF(PARAM_ERROR, (avail / sizeof(UInt32)) > MAX_KEY_BUF_SIZE); |
239 | key_buf_size = (UInt32)(avail / sizeof(UInt32)); |
240 | } |
241 | |
242 | create_file(file_name, file_size, max_num_keys, |
243 | max_num_blocks, key_buf_size); |
244 | } |
245 | |
246 | void Trie::create_file(const char *file_name, |
247 | UInt64 file_size, |
248 | UInt32 max_num_keys, |
249 | UInt32 max_num_blocks, |
250 | UInt32 key_buf_size) { |
251 | GRN_DAT_THROW_IF(PARAM_ERROR, file_size < (sizeof(Header) |
252 | + (sizeof(Block) * max_num_blocks) |
253 | + (sizeof(Node) * BLOCK_SIZE * max_num_blocks) |
254 | + (sizeof(Entry) * max_num_keys) |
255 | + (sizeof(UInt32) * key_buf_size))); |
256 | |
257 | file_.create(file_name, file_size); |
258 | |
259 | Header * const = static_cast<Header *>(file_.ptr()); |
260 | *header = Header(); |
261 | header->set_file_size(file_size); |
262 | header->set_max_num_keys(max_num_keys); |
263 | header->set_max_num_blocks(max_num_blocks); |
264 | header->set_key_buf_size(key_buf_size); |
265 | |
266 | map_address(file_.ptr()); |
267 | |
268 | reserve_node(ROOT_NODE_ID); |
269 | ith_node(INVALID_OFFSET).set_is_offset(true); |
270 | } |
271 | |
272 | void Trie::open_file(const char *file_name) { |
273 | GRN_DAT_THROW_IF(PARAM_ERROR, file_name == NULL); |
274 | |
275 | file_.open(file_name); |
276 | map_address(file_.ptr()); |
277 | GRN_DAT_THROW_IF(FORMAT_ERROR, file_size() != file_.size()); |
278 | } |
279 | |
280 | void Trie::map_address(void *address) { |
281 | GRN_DAT_THROW_IF(PARAM_ERROR, address == NULL); |
282 | |
283 | header_ = static_cast<Header *>(address); |
284 | nodes_.assign(header_ + 1, max_num_nodes()); |
285 | blocks_.assign(nodes_.end(), max_num_blocks()); |
286 | entries_.assign(reinterpret_cast<Entry *>(blocks_.end()) - 1, |
287 | max_num_keys() + 1); |
288 | key_buf_.assign(entries_.end(), key_buf_size()); |
289 | |
290 | GRN_DAT_THROW_IF(UNEXPECTED_ERROR, static_cast<void *>(key_buf_.end()) |
291 | > static_cast<void *>(static_cast<char *>(address) + file_size())); |
292 | } |
293 | |
294 | void Trie::build_from_trie(const Trie &trie) { |
295 | GRN_DAT_THROW_IF(SIZE_ERROR, max_num_keys() < trie.num_keys()); |
296 | GRN_DAT_THROW_IF(SIZE_ERROR, max_num_keys() < trie.max_key_id()); |
297 | |
298 | header_->set_total_key_length(trie.total_key_length()); |
299 | header_->set_num_keys(trie.num_keys()); |
300 | header_->set_max_key_id(trie.max_key_id()); |
301 | header_->set_next_key_id(trie.next_key_id()); |
302 | for (UInt32 i = min_key_id(); i <= max_key_id(); ++i) { |
303 | ith_entry(i) = trie.ith_entry(i); |
304 | } |
305 | build_from_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID); |
306 | } |
307 | |
308 | void Trie::build_from_trie(const Trie &trie, UInt32 src, UInt32 dest) { |
309 | // Keys are sorted in lexicographic order. |
310 | if (trie.ith_node(src).is_linker()) { |
311 | const Key &key = trie.get_key(trie.ith_node(src).key_pos()); |
312 | Key::create(key_buf_.ptr() + next_key_pos(), |
313 | key.id(), key.ptr(), key.length()); |
314 | ith_node(dest).set_key_pos(next_key_pos()); |
315 | ith_entry(key.id()).set_key_pos(next_key_pos()); |
316 | header_->set_next_key_pos( |
317 | next_key_pos() + Key::estimate_size(key.length())); |
318 | return; |
319 | } |
320 | |
321 | const UInt32 src_offset = trie.ith_node(src).offset(); |
322 | UInt32 dest_offset; |
323 | { |
324 | UInt16 labels[MAX_LABEL + 1]; |
325 | UInt32 num_labels = 0; |
326 | |
327 | UInt32 label = trie.ith_node(src).child(); |
328 | while (label != INVALID_LABEL) { |
329 | GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); |
330 | const UInt32 child = src_offset ^ label; |
331 | if (trie.ith_node(child).is_linker() || |
332 | (trie.ith_node(child).child() != INVALID_LABEL)) { |
333 | labels[num_labels++] = label; |
334 | } |
335 | label = trie.ith_node(child).sibling(); |
336 | } |
337 | if (num_labels == 0) { |
338 | return; |
339 | } |
340 | |
341 | dest_offset = find_offset(labels, num_labels); |
342 | for (UInt32 i = 0; i < num_labels; ++i) { |
343 | const UInt32 child = dest_offset ^ labels[i]; |
344 | reserve_node(child); |
345 | ith_node(child).set_label(labels[i]); |
346 | if ((i + 1) < num_labels) { |
347 | ith_node(child).set_sibling(labels[i + 1]); |
348 | } |
349 | } |
350 | |
351 | GRN_DAT_DEBUG_THROW_IF(ith_node(dest_offset).is_offset()); |
352 | ith_node(dest_offset).set_is_offset(true); |
353 | ith_node(dest).set_offset(dest_offset); |
354 | ith_node(dest).set_child(labels[0]); |
355 | } |
356 | |
357 | UInt32 label = ith_node(dest).child(); |
358 | while (label != INVALID_LABEL) { |
359 | build_from_trie(trie, src_offset ^ label, dest_offset ^ label); |
360 | label = ith_node(dest_offset ^ label).sibling(); |
361 | } |
362 | } |
363 | |
364 | void Trie::repair_trie(const Trie &trie) { |
365 | Vector<UInt32> valid_ids; |
366 | header_->set_max_key_id(trie.max_key_id()); |
367 | header_->set_next_key_id(trie.max_key_id() + 1); |
368 | UInt32 prev_invalid_key_id = INVALID_KEY_ID; |
369 | for (UInt32 i = min_key_id(); i <= max_key_id(); ++i) { |
370 | const Entry &entry = trie.ith_entry(i); |
371 | if (entry.is_valid()) { |
372 | valid_ids.push_back(i); |
373 | ith_entry(i) = entry; |
374 | const Key &key = trie.get_key(entry.key_pos()); |
375 | Key::create(key_buf_.ptr() + next_key_pos(), |
376 | key.id(), key.ptr(), key.length()); |
377 | ith_entry(i).set_key_pos(next_key_pos()); |
378 | header_->set_next_key_pos( |
379 | next_key_pos() + Key::estimate_size(key.length())); |
380 | header_->set_total_key_length( |
381 | total_key_length() + key.length()); |
382 | header_->set_num_keys(num_keys() + 1); |
383 | } else { |
384 | if (prev_invalid_key_id == INVALID_KEY_ID) { |
385 | header_->set_next_key_id(i); |
386 | } else { |
387 | ith_entry(prev_invalid_key_id).set_next(i); |
388 | } |
389 | prev_invalid_key_id = i; |
390 | } |
391 | } |
392 | if (prev_invalid_key_id != INVALID_KEY_ID) { |
393 | ith_entry(prev_invalid_key_id).set_next(max_key_id() + 1); |
394 | } |
395 | mkq_sort(valid_ids.begin(), valid_ids.end(), 0); |
396 | build_from_keys(valid_ids.begin(), valid_ids.end(), 0, ROOT_NODE_ID); |
397 | } |
398 | |
399 | void Trie::build_from_keys(const UInt32 *begin, const UInt32 *end, |
400 | UInt32 depth, UInt32 node_id) { |
401 | if ((end - begin) == 1) { |
402 | ith_node(node_id).set_key_pos(ith_entry(*begin).key_pos()); |
403 | return; |
404 | } |
405 | |
406 | UInt32 offset; |
407 | { |
408 | UInt16 labels[MAX_LABEL + 2]; |
409 | UInt32 num_labels = 0; |
410 | |
411 | const UInt32 *it = begin; |
412 | if (ith_key(*it).length() == depth) { |
413 | labels[num_labels++] = TERMINAL_LABEL; |
414 | ++it; |
415 | } |
416 | |
417 | labels[num_labels++] = (UInt8)ith_key(*it)[depth]; |
418 | for (++it; it < end; ++it) { |
419 | const Key &key = ith_key(*it); |
420 | if ((UInt8)key[depth] != labels[num_labels - 1]) { |
421 | labels[num_labels++] = (UInt8)key[depth]; |
422 | } |
423 | } |
424 | labels[num_labels] = INVALID_LABEL; |
425 | |
426 | offset = find_offset(labels, num_labels); |
427 | ith_node(node_id).set_child(labels[0]); |
428 | for (UInt32 i = 0; i < num_labels; ++i) { |
429 | const UInt32 next = offset ^ labels[i]; |
430 | reserve_node(next); |
431 | ith_node(next).set_label(labels[i]); |
432 | ith_node(next).set_sibling(labels[i + 1]); |
433 | } |
434 | |
435 | if (offset >= num_nodes()) { |
436 | reserve_block(num_blocks()); |
437 | } |
438 | ith_node(offset).set_is_offset(true); |
439 | ith_node(node_id).set_offset(offset); |
440 | } |
441 | |
442 | if (ith_key(*begin).length() == depth) { |
443 | build_from_keys(begin, begin + 1, depth + 1, offset ^ TERMINAL_LABEL); |
444 | ++begin; |
445 | } |
446 | |
447 | UInt16 label = ith_key(*begin)[depth]; |
448 | for (const UInt32 *it = begin + 1; it < end; ++it) { |
449 | const Key &key = ith_key(*it); |
450 | if ((UInt8)key[depth] != label) { |
451 | build_from_keys(begin, it, depth + 1, offset ^ label); |
452 | label = (UInt8)key[depth]; |
453 | begin = it; |
454 | } |
455 | } |
456 | build_from_keys(begin, end, depth + 1, offset ^ label); |
457 | } |
458 | |
459 | void Trie::mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth) { |
460 | while ((r - l) >= MKQ_SORT_THRESHOLD) { |
461 | UInt32 *pl = l; |
462 | UInt32 *pr = r; |
463 | UInt32 *pivot_l = l; |
464 | UInt32 *pivot_r = r; |
465 | |
466 | const int pivot = get_median(*l, *(l + (r - l) / 2), *(r - 1), depth); |
467 | for ( ; ; ) { |
468 | while (pl < pr) { |
469 | const int label = get_label(*pl, depth); |
470 | if (label > pivot) { |
471 | break; |
472 | } else if (label == pivot) { |
473 | swap_ids(pl, pivot_l); |
474 | ++pivot_l; |
475 | } |
476 | ++pl; |
477 | } |
478 | while (pl < pr) { |
479 | const int label = get_label(*--pr, depth); |
480 | if (label < pivot) { |
481 | break; |
482 | } else if (label == pivot) { |
483 | swap_ids(pr, --pivot_r); |
484 | } |
485 | } |
486 | if (pl >= pr) { |
487 | break; |
488 | } |
489 | swap_ids(pl, pr); |
490 | ++pl; |
491 | } |
492 | while (pivot_l > l) { |
493 | swap_ids(--pivot_l, --pl); |
494 | } |
495 | while (pivot_r < r) { |
496 | swap_ids(pivot_r, pr); |
497 | ++pivot_r; |
498 | ++pr; |
499 | } |
500 | |
501 | if (((pl - l) > (pr - pl)) || ((r - pr) > (pr - pl))) { |
502 | if ((pr - pl) > 1) { |
503 | mkq_sort(pl, pr, depth + 1); |
504 | } |
505 | |
506 | if ((pl - l) < (r - pr)) { |
507 | if ((pl - l) > 1) { |
508 | mkq_sort(l, pl, depth); |
509 | } |
510 | l = pr; |
511 | } else { |
512 | if ((r - pr) > 1) { |
513 | mkq_sort(pr, r, depth); |
514 | } |
515 | r = pl; |
516 | } |
517 | } else { |
518 | if ((pl - l) > 1) { |
519 | mkq_sort(l, pl, depth); |
520 | } |
521 | |
522 | if ((r - pr) > 1) { |
523 | mkq_sort(pr, r, depth); |
524 | } |
525 | |
526 | l = pl, r = pr; |
527 | if ((pr - pl) > 1) { |
528 | ++depth; |
529 | } |
530 | } |
531 | } |
532 | |
533 | if ((r - l) > 1) { |
534 | insertion_sort(l, r, depth); |
535 | } |
536 | } |
537 | |
538 | void Trie::insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth) { |
539 | for (UInt32 *i = l + 1; i < r; ++i) { |
540 | for (UInt32 *j = i; j > l; --j) { |
541 | if (less_than(*(j - 1), *j, depth)) { |
542 | break; |
543 | } |
544 | swap_ids(j - 1, j); |
545 | } |
546 | } |
547 | } |
548 | |
549 | int Trie::get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const { |
550 | const int x = get_label(a, depth); |
551 | const int y = get_label(b, depth); |
552 | const int z = get_label(c, depth); |
553 | if (x < y) { |
554 | if (y < z) { |
555 | return y; |
556 | } else if (x < z) { |
557 | return z; |
558 | } |
559 | return x; |
560 | } else if (x < z) { |
561 | return x; |
562 | } else if (y < z) { |
563 | return z; |
564 | } |
565 | return y; |
566 | } |
567 | |
568 | int Trie::get_label(UInt32 key_id, UInt32 depth) const { |
569 | const Key &key = ith_key(key_id); |
570 | if (depth == key.length()) { |
571 | return -1; |
572 | } else { |
573 | return (UInt8)key[depth]; |
574 | } |
575 | } |
576 | |
577 | bool Trie::less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const { |
578 | const Key &lhs_key = ith_key(lhs); |
579 | const Key &rhs_key = ith_key(rhs); |
580 | const UInt32 length = (lhs_key.length() < rhs_key.length()) ? |
581 | lhs_key.length() : rhs_key.length(); |
582 | for (UInt32 i = depth; i < length; ++i) { |
583 | if (lhs_key[i] != rhs_key[i]) { |
584 | return (UInt8)lhs_key[i] < (UInt8)rhs_key[i]; |
585 | } |
586 | } |
587 | return lhs_key.length() < rhs_key.length(); |
588 | } |
589 | |
590 | void Trie::swap_ids(UInt32 *lhs, UInt32 *rhs) { |
591 | UInt32 temp = *lhs; |
592 | *lhs = *rhs; |
593 | *rhs = temp; |
594 | } |
595 | |
596 | bool Trie::search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const { |
597 | GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); |
598 | |
599 | UInt32 node_id = ROOT_NODE_ID; |
600 | UInt32 query_pos = 0; |
601 | if (!search_linker(ptr, length, node_id, query_pos)) { |
602 | return false; |
603 | } |
604 | |
605 | const Base base = ith_node(node_id).base(); |
606 | if (!base.is_linker()) { |
607 | return false; |
608 | } |
609 | |
610 | if (get_key(base.key_pos()).equals_to(ptr, length, query_pos)) { |
611 | if (key_pos != NULL) { |
612 | *key_pos = base.key_pos(); |
613 | } |
614 | return true; |
615 | } |
616 | return false; |
617 | } |
618 | |
619 | bool Trie::search_linker(const UInt8 *ptr, UInt32 length, |
620 | UInt32 &node_id, UInt32 &query_pos) const { |
621 | GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); |
622 | |
623 | for ( ; query_pos < length; ++query_pos) { |
624 | const Base base = ith_node(node_id).base(); |
625 | if (base.is_linker()) { |
626 | return true; |
627 | } |
628 | |
629 | const UInt32 next = base.offset() ^ ptr[query_pos]; |
630 | if (ith_node(next).label() != ptr[query_pos]) { |
631 | return false; |
632 | } |
633 | node_id = next; |
634 | } |
635 | |
636 | const Base base = ith_node(node_id).base(); |
637 | if (base.is_linker()) { |
638 | return true; |
639 | } |
640 | |
641 | const UInt32 next = base.offset() ^ TERMINAL_LABEL; |
642 | if (ith_node(next).label() != TERMINAL_LABEL) { |
643 | return false; |
644 | } |
645 | node_id = next; |
646 | return ith_node(next).is_linker(); |
647 | } |
648 | |
649 | bool Trie::lcp_search_key(const UInt8 *ptr, UInt32 length, |
650 | UInt32 *key_pos) const { |
651 | GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); |
652 | |
653 | bool found = false; |
654 | UInt32 node_id = ROOT_NODE_ID; |
655 | UInt32 query_pos = 0; |
656 | |
657 | for ( ; query_pos < length; ++query_pos) { |
658 | const Base base = ith_node(node_id).base(); |
659 | if (base.is_linker()) { |
660 | const Key &key = get_key(base.key_pos()); |
661 | if ((key.length() <= length) && |
662 | key.equals_to(ptr, key.length(), query_pos)) { |
663 | if (key_pos != NULL) { |
664 | *key_pos = base.key_pos(); |
665 | } |
666 | found = true; |
667 | } |
668 | return found; |
669 | } |
670 | |
671 | if (ith_node(node_id).child() == TERMINAL_LABEL) { |
672 | const Base linker_base = |
673 | ith_node(base.offset() ^ TERMINAL_LABEL).base(); |
674 | if (linker_base.is_linker()) { |
675 | if (key_pos != NULL) { |
676 | *key_pos = linker_base.key_pos(); |
677 | } |
678 | found = true; |
679 | } |
680 | } |
681 | |
682 | node_id = base.offset() ^ ptr[query_pos]; |
683 | if (ith_node(node_id).label() != ptr[query_pos]) { |
684 | return found; |
685 | } |
686 | } |
687 | |
688 | const Base base = ith_node(node_id).base(); |
689 | if (base.is_linker()) { |
690 | const Key &key = get_key(base.key_pos()); |
691 | if (key.length() <= length) { |
692 | if (key_pos != NULL) { |
693 | *key_pos = base.key_pos(); |
694 | } |
695 | found = true; |
696 | } |
697 | } else if (ith_node(node_id).child() == TERMINAL_LABEL) { |
698 | const Base linker_base = ith_node(base.offset() ^ TERMINAL_LABEL).base(); |
699 | if (linker_base.is_linker()) { |
700 | if (key_pos != NULL) { |
701 | *key_pos = linker_base.key_pos(); |
702 | } |
703 | found = true; |
704 | } |
705 | } |
706 | return found; |
707 | } |
708 | |
709 | bool Trie::remove_key(const UInt8 *ptr, UInt32 length) { |
710 | GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); |
711 | StatusFlagManager status_flag_manager(header_, REMOVING_FLAG); |
712 | |
713 | GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); |
714 | |
715 | UInt32 node_id = ROOT_NODE_ID; |
716 | UInt32 query_pos = 0; |
717 | if (!search_linker(ptr, length, node_id, query_pos)) { |
718 | return false; |
719 | } |
720 | |
721 | const UInt32 key_pos = ith_node(node_id).key_pos(); |
722 | if (!get_key(key_pos).equals_to(ptr, length, query_pos)) { |
723 | return false; |
724 | } |
725 | |
726 | const UInt32 key_id = get_key(key_pos).id(); |
727 | ith_node(node_id).set_offset(INVALID_OFFSET); |
728 | ith_entry(key_id).set_next(next_key_id()); |
729 | |
730 | header_->set_next_key_id(key_id); |
731 | header_->set_total_key_length(total_key_length() - length); |
732 | header_->set_num_keys(num_keys() - 1); |
733 | return true; |
734 | } |
735 | |
736 | bool Trie::insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) { |
737 | GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); |
738 | StatusFlagManager status_flag_manager(header_, INSERTING_FLAG); |
739 | |
740 | GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); |
741 | |
742 | UInt32 node_id = ROOT_NODE_ID; |
743 | UInt32 query_pos = 0; |
744 | |
745 | search_linker(ptr, length, node_id, query_pos); |
746 | if (!insert_linker(ptr, length, node_id, query_pos)) { |
747 | if (key_pos != NULL) { |
748 | *key_pos = ith_node(node_id).key_pos(); |
749 | } |
750 | return false; |
751 | } |
752 | |
753 | const UInt32 new_key_id = next_key_id(); |
754 | const UInt32 new_key_pos = append_key(ptr, length, new_key_id); |
755 | |
756 | header_->set_total_key_length(total_key_length() + length); |
757 | header_->set_num_keys(num_keys() + 1); |
758 | if (new_key_id > max_key_id()) { |
759 | header_->set_max_key_id(new_key_id); |
760 | header_->set_next_key_id(new_key_id + 1); |
761 | } else { |
762 | header_->set_next_key_id(ith_entry(new_key_id).next()); |
763 | } |
764 | |
765 | ith_entry(new_key_id).set_key_pos(new_key_pos); |
766 | ith_node(node_id).set_key_pos(new_key_pos); |
767 | if (key_pos != NULL) { |
768 | *key_pos = new_key_pos; |
769 | } |
770 | return true; |
771 | } |
772 | |
773 | bool Trie::insert_linker(const UInt8 *ptr, UInt32 length, |
774 | UInt32 &node_id, UInt32 query_pos) { |
775 | if (ith_node(node_id).is_linker()) { |
776 | const Key &key = get_key(ith_node(node_id).key_pos()); |
777 | UInt32 i = query_pos; |
778 | while ((i < length) && (i < key.length())) { |
779 | if (ptr[i] != key[i]) { |
780 | break; |
781 | } |
782 | ++i; |
783 | } |
784 | if ((i == length) && (i == key.length())) { |
785 | return false; |
786 | } |
787 | GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); |
788 | GRN_DAT_DEBUG_THROW_IF(next_key_id() > max_num_keys()); |
789 | |
790 | for (UInt32 j = query_pos; j < i; ++j) { |
791 | node_id = insert_node(node_id, ptr[j]); |
792 | } |
793 | node_id = separate(ptr, length, node_id, i); |
794 | return true; |
795 | } else if (ith_node(node_id).label() == TERMINAL_LABEL) { |
796 | return true; |
797 | } else { |
798 | GRN_DAT_THROW_IF(SIZE_ERROR, num_keys() >= max_num_keys()); |
799 | const UInt16 label = (query_pos < length) ? |
800 | (UInt16)ptr[query_pos] : (UInt16)TERMINAL_LABEL; |
801 | const Base base = ith_node(node_id).base(); |
802 | if ((base.offset() == INVALID_OFFSET) || |
803 | !ith_node(base.offset() ^ label).is_phantom()) { |
804 | resolve(node_id, label); |
805 | } |
806 | node_id = insert_node(node_id, label); |
807 | return true; |
808 | } |
809 | } |
810 | |
811 | bool Trie::update_key(const Key &key, const UInt8 *ptr, UInt32 length, |
812 | UInt32 *key_pos) { |
813 | GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0); |
814 | StatusFlagManager status_flag_manager(header_, UPDATING_FLAG); |
815 | |
816 | GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (length != 0)); |
817 | |
818 | if (!key.is_valid()) { |
819 | return false; |
820 | } |
821 | |
822 | UInt32 node_id = ROOT_NODE_ID; |
823 | UInt32 query_pos = 0; |
824 | |
825 | search_linker(ptr, length, node_id, query_pos); |
826 | if (!insert_linker(ptr, length, node_id, query_pos)) { |
827 | if (key_pos != NULL) { |
828 | *key_pos = ith_node(node_id).key_pos(); |
829 | } |
830 | return false; |
831 | } |
832 | |
833 | const UInt32 new_key_pos = append_key(ptr, length, key.id()); |
834 | header_->set_total_key_length(total_key_length() + length - key.length()); |
835 | ith_entry(key.id()).set_key_pos(new_key_pos); |
836 | ith_node(node_id).set_key_pos(new_key_pos); |
837 | if (key_pos != NULL) { |
838 | *key_pos = new_key_pos; |
839 | } |
840 | |
841 | node_id = ROOT_NODE_ID; |
842 | query_pos = 0; |
843 | GRN_DAT_THROW_IF(UNEXPECTED_ERROR, |
844 | !search_linker(static_cast<const UInt8 *>(key.ptr()), key.length(), |
845 | node_id, query_pos)); |
846 | ith_node(node_id).set_offset(INVALID_OFFSET); |
847 | return true; |
848 | } |
849 | |
850 | UInt32 Trie::insert_node(UInt32 node_id, UInt16 label) { |
851 | GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); |
852 | GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); |
853 | |
854 | const Base base = ith_node(node_id).base(); |
855 | UInt32 offset; |
856 | if (base.is_linker() || (base.offset() == INVALID_OFFSET)) { |
857 | offset = find_offset(&label, 1); |
858 | } else { |
859 | offset = base.offset(); |
860 | } |
861 | |
862 | const UInt32 next = offset ^ label; |
863 | reserve_node(next); |
864 | |
865 | ith_node(next).set_label(label); |
866 | if (base.is_linker()) { |
867 | GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); |
868 | ith_node(offset).set_is_offset(true); |
869 | ith_node(next).set_key_pos(base.key_pos()); |
870 | } else if (base.offset() == INVALID_OFFSET) { |
871 | GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); |
872 | ith_node(offset).set_is_offset(true); |
873 | } else { |
874 | GRN_DAT_DEBUG_THROW_IF(!ith_node(offset).is_offset()); |
875 | } |
876 | ith_node(node_id).set_offset(offset); |
877 | |
878 | const UInt32 child_label = ith_node(node_id).child(); |
879 | GRN_DAT_DEBUG_THROW_IF(child_label == label); |
880 | if (child_label == INVALID_LABEL) { |
881 | ith_node(node_id).set_child(label); |
882 | } else if ((label == TERMINAL_LABEL) || |
883 | ((child_label != TERMINAL_LABEL) && (label < child_label))) { |
884 | GRN_DAT_DEBUG_THROW_IF(ith_node(offset ^ child_label).is_phantom()); |
885 | GRN_DAT_DEBUG_THROW_IF(ith_node(offset ^ child_label).label() != child_label); |
886 | ith_node(next).set_sibling(child_label); |
887 | ith_node(node_id).set_child(label); |
888 | } else { |
889 | UInt32 prev = offset ^ child_label; |
890 | GRN_DAT_DEBUG_THROW_IF(ith_node(prev).label() != child_label); |
891 | UInt32 sibling_label = ith_node(prev).sibling(); |
892 | while (label > sibling_label) { |
893 | prev = offset ^ sibling_label; |
894 | GRN_DAT_DEBUG_THROW_IF(ith_node(prev).label() != sibling_label); |
895 | sibling_label = ith_node(prev).sibling(); |
896 | } |
897 | GRN_DAT_DEBUG_THROW_IF(label == sibling_label); |
898 | ith_node(next).set_sibling(ith_node(prev).sibling()); |
899 | ith_node(prev).set_sibling(label); |
900 | } |
901 | return next; |
902 | } |
903 | |
904 | UInt32 Trie::append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id) { |
905 | GRN_DAT_THROW_IF(SIZE_ERROR, key_id > max_num_keys()); |
906 | |
907 | const UInt32 key_pos = next_key_pos(); |
908 | const UInt32 key_size = Key::estimate_size(length); |
909 | |
910 | GRN_DAT_THROW_IF(SIZE_ERROR, key_size > (key_buf_size() - key_pos)); |
911 | Key::create(key_buf_.ptr() + key_pos, key_id, ptr, length); |
912 | |
913 | header_->set_next_key_pos(key_pos + key_size); |
914 | return key_pos; |
915 | } |
916 | |
917 | UInt32 Trie::separate(const UInt8 *ptr, UInt32 length, |
918 | UInt32 node_id, UInt32 i) { |
919 | GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); |
920 | GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_linker()); |
921 | GRN_DAT_DEBUG_THROW_IF(i > length); |
922 | |
923 | const UInt32 key_pos = ith_node(node_id).key_pos(); |
924 | const Key &key = get_key(key_pos); |
925 | |
926 | UInt16 labels[2]; |
927 | labels[0] = (i < key.length()) ? (UInt16)key[i] : (UInt16)TERMINAL_LABEL; |
928 | labels[1] = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL; |
929 | GRN_DAT_DEBUG_THROW_IF(labels[0] == labels[1]); |
930 | |
931 | const UInt32 offset = find_offset(labels, 2); |
932 | |
933 | UInt32 next = offset ^ labels[0]; |
934 | reserve_node(next); |
935 | GRN_DAT_DEBUG_THROW_IF(ith_node(offset).is_offset()); |
936 | |
937 | ith_node(next).set_label(labels[0]); |
938 | ith_node(next).set_key_pos(key_pos); |
939 | |
940 | next = offset ^ labels[1]; |
941 | reserve_node(next); |
942 | |
943 | ith_node(next).set_label(labels[1]); |
944 | |
945 | ith_node(offset).set_is_offset(true); |
946 | ith_node(node_id).set_offset(offset); |
947 | |
948 | if ((labels[0] == TERMINAL_LABEL) || |
949 | ((labels[1] != TERMINAL_LABEL) && (labels[0] < labels[1]))) { |
950 | ith_node(node_id).set_child(labels[0]); |
951 | ith_node(offset ^ labels[0]).set_sibling(labels[1]); |
952 | } else { |
953 | ith_node(node_id).set_child(labels[1]); |
954 | ith_node(offset ^ labels[1]).set_sibling(labels[0]); |
955 | } |
956 | return next; |
957 | } |
958 | |
959 | void Trie::resolve(UInt32 node_id, UInt16 label) { |
960 | GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); |
961 | GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_linker()); |
962 | GRN_DAT_DEBUG_THROW_IF(label > MAX_LABEL); |
963 | |
964 | UInt32 offset = ith_node(node_id).offset(); |
965 | if (offset != INVALID_OFFSET) { |
966 | UInt16 labels[MAX_LABEL + 1]; |
967 | UInt32 num_labels = 0; |
968 | |
969 | UInt32 next_label = ith_node(node_id).child(); |
970 | GRN_DAT_DEBUG_THROW_IF(next_label == INVALID_LABEL); |
971 | while (next_label != INVALID_LABEL) { |
972 | GRN_DAT_DEBUG_THROW_IF(next_label > MAX_LABEL); |
973 | labels[num_labels++] = next_label; |
974 | next_label = ith_node(offset ^ next_label).sibling(); |
975 | } |
976 | GRN_DAT_DEBUG_THROW_IF(num_labels == 0); |
977 | |
978 | labels[num_labels] = label; |
979 | offset = find_offset(labels, num_labels + 1); |
980 | migrate_nodes(node_id, offset, labels, num_labels); |
981 | } else { |
982 | offset = find_offset(&label, 1); |
983 | if (offset >= num_nodes()) { |
984 | GRN_DAT_DEBUG_THROW_IF((offset / BLOCK_SIZE) != num_blocks()); |
985 | reserve_block(num_blocks()); |
986 | } |
987 | ith_node(offset).set_is_offset(true); |
988 | ith_node(node_id).set_offset(offset); |
989 | } |
990 | } |
991 | |
992 | void Trie::migrate_nodes(UInt32 node_id, UInt32 dest_offset, |
993 | const UInt16 *labels, UInt32 num_labels) { |
994 | GRN_DAT_DEBUG_THROW_IF(node_id >= num_nodes()); |
995 | GRN_DAT_DEBUG_THROW_IF(ith_node(node_id).is_linker()); |
996 | GRN_DAT_DEBUG_THROW_IF(labels == NULL); |
997 | GRN_DAT_DEBUG_THROW_IF(num_labels == 0); |
998 | GRN_DAT_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1)); |
999 | |
1000 | const UInt32 src_offset = ith_node(node_id).offset(); |
1001 | GRN_DAT_DEBUG_THROW_IF(src_offset == INVALID_OFFSET); |
1002 | GRN_DAT_DEBUG_THROW_IF(!ith_node(src_offset).is_offset()); |
1003 | |
1004 | for (UInt32 i = 0; i < num_labels; ++i) { |
1005 | const UInt32 src_node_id = src_offset ^ labels[i]; |
1006 | const UInt32 dest_node_id = dest_offset ^ labels[i]; |
1007 | GRN_DAT_DEBUG_THROW_IF(ith_node(src_node_id).is_phantom()); |
1008 | GRN_DAT_DEBUG_THROW_IF(ith_node(src_node_id).label() != labels[i]); |
1009 | |
1010 | reserve_node(dest_node_id); |
1011 | ith_node(dest_node_id).set_except_is_offset( |
1012 | ith_node(src_node_id).except_is_offset()); |
1013 | ith_node(dest_node_id).set_base(ith_node(src_node_id).base()); |
1014 | } |
1015 | header_->set_num_zombies(num_zombies() + num_labels); |
1016 | |
1017 | GRN_DAT_DEBUG_THROW_IF(ith_node(dest_offset).is_offset()); |
1018 | ith_node(dest_offset).set_is_offset(true); |
1019 | ith_node(node_id).set_offset(dest_offset); |
1020 | } |
1021 | |
1022 | UInt32 Trie::find_offset(const UInt16 *labels, UInt32 num_labels) { |
1023 | GRN_DAT_DEBUG_THROW_IF(labels == NULL); |
1024 | GRN_DAT_DEBUG_THROW_IF(num_labels == 0); |
1025 | GRN_DAT_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1)); |
1026 | |
1027 | // Blocks are tested in descending order of level. Basically, a lower level |
1028 | // block contains more phantom nodes. |
1029 | UInt32 level = 1; |
1030 | while (num_labels >= (1U << level)) { |
1031 | ++level; |
1032 | } |
1033 | level = (level < MAX_BLOCK_LEVEL) ? (MAX_BLOCK_LEVEL - level) : 0; |
1034 | |
1035 | UInt32 block_count = 0; |
1036 | do { |
1037 | UInt32 leader = header_->ith_leader(level); |
1038 | if (leader == INVALID_LEADER) { |
1039 | // This level has no blocks and it is thus skipped. |
1040 | continue; |
1041 | } |
1042 | |
1043 | UInt32 block_id = leader; |
1044 | do { |
1045 | const Block &block = ith_block(block_id); |
1046 | GRN_DAT_DEBUG_THROW_IF(block.level() != level); |
1047 | |
1048 | const UInt32 first = (block_id * BLOCK_SIZE) | block.first_phantom(); |
1049 | UInt32 node_id = first; |
1050 | do { |
1051 | GRN_DAT_DEBUG_THROW_IF(!ith_node(node_id).is_phantom()); |
1052 | const UInt32 offset = node_id ^ labels[0]; |
1053 | if (!ith_node(offset).is_offset()) { |
1054 | UInt32 i = 1; |
1055 | for ( ; i < num_labels; ++i) { |
1056 | if (!ith_node(offset ^ labels[i]).is_phantom()) { |
1057 | break; |
1058 | } |
1059 | } |
1060 | if (i >= num_labels) { |
1061 | return offset; |
1062 | } |
1063 | } |
1064 | node_id = (block_id * BLOCK_SIZE) | ith_node(node_id).next(); |
1065 | } while (node_id != first); |
1066 | |
1067 | const UInt32 prev = block_id; |
1068 | const UInt32 next = block.next(); |
1069 | block_id = next; |
1070 | ith_block(prev).set_failure_count(ith_block(prev).failure_count() + 1); |
1071 | |
1072 | // The level of a block is updated when this function fails many times, |
1073 | // actually MAX_FAILURE_COUNT times, in that block. |
1074 | if (ith_block(prev).failure_count() == MAX_FAILURE_COUNT) { |
1075 | update_block_level(prev, level + 1); |
1076 | if (next == leader) { |
1077 | break; |
1078 | } else { |
1079 | // Note that the leader might be updated in the level update. |
1080 | leader = header_->ith_leader(level); |
1081 | continue; |
1082 | } |
1083 | } |
1084 | } while ((++block_count < MAX_BLOCK_COUNT) && (block_id != leader)); |
1085 | } while ((block_count < MAX_BLOCK_COUNT) && (level-- != 0)); |
1086 | |
1087 | return num_nodes() ^ labels[0]; |
1088 | } |
1089 | |
1090 | void Trie::reserve_node(UInt32 node_id) { |
1091 | GRN_DAT_DEBUG_THROW_IF(node_id > num_nodes()); |
1092 | if (node_id >= num_nodes()) { |
1093 | reserve_block(node_id / BLOCK_SIZE); |
1094 | } |
1095 | |
1096 | Node &node = ith_node(node_id); |
1097 | GRN_DAT_DEBUG_THROW_IF(!node.is_phantom()); |
1098 | |
1099 | const UInt32 block_id = node_id / BLOCK_SIZE; |
1100 | Block &block = ith_block(block_id); |
1101 | GRN_DAT_DEBUG_THROW_IF(block.num_phantoms() == 0); |
1102 | |
1103 | const UInt32 next = (block_id * BLOCK_SIZE) | node.next(); |
1104 | const UInt32 prev = (block_id * BLOCK_SIZE) | node.prev(); |
1105 | GRN_DAT_DEBUG_THROW_IF(next >= num_nodes()); |
1106 | GRN_DAT_DEBUG_THROW_IF(prev >= num_nodes()); |
1107 | |
1108 | if ((node_id & BLOCK_MASK) == block.first_phantom()) { |
1109 | // The first phantom node is removed from the block and the second phantom |
1110 | // node comes first. |
1111 | block.set_first_phantom(next & BLOCK_MASK); |
1112 | } |
1113 | |
1114 | ith_node(next).set_prev(prev & BLOCK_MASK); |
1115 | ith_node(prev).set_next(next & BLOCK_MASK); |
1116 | |
1117 | if (block.level() != MAX_BLOCK_LEVEL) { |
1118 | const UInt32 threshold = 1U << ((MAX_BLOCK_LEVEL - block.level() - 1) * 2); |
1119 | if (block.num_phantoms() == threshold) { |
1120 | update_block_level(block_id, block.level() + 1); |
1121 | } |
1122 | } |
1123 | block.set_num_phantoms(block.num_phantoms() - 1); |
1124 | |
1125 | node.set_is_phantom(false); |
1126 | |
1127 | GRN_DAT_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET); |
1128 | GRN_DAT_DEBUG_THROW_IF(node.label() != INVALID_LABEL); |
1129 | |
1130 | header_->set_num_phantoms(num_phantoms() - 1); |
1131 | } |
1132 | |
1133 | void Trie::reserve_block(UInt32 block_id) { |
1134 | GRN_DAT_DEBUG_THROW_IF(block_id != num_blocks()); |
1135 | GRN_DAT_THROW_IF(SIZE_ERROR, block_id >= max_num_blocks()); |
1136 | |
1137 | header_->set_num_blocks(block_id + 1); |
1138 | ith_block(block_id).set_failure_count(0); |
1139 | ith_block(block_id).set_first_phantom(0); |
1140 | ith_block(block_id).set_num_phantoms(BLOCK_SIZE); |
1141 | |
1142 | const UInt32 begin = block_id * BLOCK_SIZE; |
1143 | const UInt32 end = begin + BLOCK_SIZE; |
1144 | GRN_DAT_DEBUG_THROW_IF(end != num_nodes()); |
1145 | |
1146 | Base base; |
1147 | base.set_offset(INVALID_OFFSET); |
1148 | |
1149 | Check check; |
1150 | check.set_is_phantom(true); |
1151 | |
1152 | for (UInt32 i = begin; i < end; ++i) { |
1153 | check.set_prev((i - 1) & BLOCK_MASK); |
1154 | check.set_next((i + 1) & BLOCK_MASK); |
1155 | ith_node(i).set_base(base); |
1156 | ith_node(i).set_check(check); |
1157 | } |
1158 | |
1159 | // The level of the new block is 0. |
1160 | set_block_level(block_id, 0); |
1161 | header_->set_num_phantoms(num_phantoms() + BLOCK_SIZE); |
1162 | } |
1163 | |
1164 | void Trie::update_block_level(UInt32 block_id, UInt32 level) { |
1165 | GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); |
1166 | GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); |
1167 | |
1168 | unset_block_level(block_id); |
1169 | set_block_level(block_id, level); |
1170 | } |
1171 | |
1172 | void Trie::set_block_level(UInt32 block_id, UInt32 level) { |
1173 | GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); |
1174 | GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); |
1175 | |
1176 | const UInt32 leader = header_->ith_leader(level); |
1177 | if (leader == INVALID_LEADER) { |
1178 | // The new block becomes the only one block of the linked list. |
1179 | ith_block(block_id).set_next(block_id); |
1180 | ith_block(block_id).set_prev(block_id); |
1181 | header_->set_ith_leader(level, block_id); |
1182 | } else { |
1183 | // The new block is added to the end of the list. |
1184 | const UInt32 next = leader; |
1185 | const UInt32 prev = ith_block(leader).prev(); |
1186 | GRN_DAT_DEBUG_THROW_IF(next >= num_blocks()); |
1187 | GRN_DAT_DEBUG_THROW_IF(prev >= num_blocks()); |
1188 | ith_block(block_id).set_next(next); |
1189 | ith_block(block_id).set_prev(prev); |
1190 | ith_block(next).set_prev(block_id); |
1191 | ith_block(prev).set_next(block_id); |
1192 | } |
1193 | ith_block(block_id).set_level(level); |
1194 | ith_block(block_id).set_failure_count(0); |
1195 | } |
1196 | |
1197 | void Trie::unset_block_level(UInt32 block_id) { |
1198 | GRN_DAT_DEBUG_THROW_IF(block_id >= num_blocks()); |
1199 | |
1200 | const UInt32 level = ith_block(block_id).level(); |
1201 | GRN_DAT_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL); |
1202 | |
1203 | const UInt32 leader = header_->ith_leader(level); |
1204 | GRN_DAT_DEBUG_THROW_IF(leader == INVALID_LEADER); |
1205 | |
1206 | const UInt32 next = ith_block(block_id).next(); |
1207 | const UInt32 prev = ith_block(block_id).prev(); |
1208 | GRN_DAT_DEBUG_THROW_IF(next >= num_blocks()); |
1209 | GRN_DAT_DEBUG_THROW_IF(prev >= num_blocks()); |
1210 | |
1211 | if (next == block_id) { |
1212 | // The linked list becomes empty. |
1213 | header_->set_ith_leader(level, INVALID_LEADER); |
1214 | } else { |
1215 | ith_block(next).set_prev(prev); |
1216 | ith_block(prev).set_next(next); |
1217 | if (block_id == leader) { |
1218 | // The second block becomes the leader of the linked list. |
1219 | header_->set_ith_leader(level, next); |
1220 | } |
1221 | } |
1222 | } |
1223 | |
1224 | } // namespace dat |
1225 | } // namespace grn |
1226 | |