1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* Copyright(C) 2011 Brazil |
3 | |
4 | This library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License version 2.1 as published by the Free Software Foundation. |
7 | |
8 | This library is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
11 | Lesser General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU Lesser General Public |
14 | License along with this library; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
16 | */ |
17 | |
18 | #include "predictive-cursor.hpp" |
19 | |
20 | #include <algorithm> |
21 | #include <cstring> |
22 | |
23 | #include "trie.hpp" |
24 | |
25 | namespace grn { |
26 | namespace dat { |
27 | |
28 | PredictiveCursor::PredictiveCursor() |
29 | : trie_(NULL), |
30 | offset_(0), |
31 | limit_(MAX_UINT32), |
32 | flags_(PREDICTIVE_CURSOR), |
33 | buf_(), |
34 | cur_(0), |
35 | end_(0), |
36 | min_length_(0) {} |
37 | |
38 | PredictiveCursor::~PredictiveCursor() {} |
39 | |
40 | void PredictiveCursor::open(const Trie &trie, |
41 | const String &str, |
42 | UInt32 offset, |
43 | UInt32 limit, |
44 | UInt32 flags) { |
45 | GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0)); |
46 | |
47 | flags = fix_flags(flags); |
48 | PredictiveCursor new_cursor(trie, offset, limit, flags); |
49 | new_cursor.init(str); |
50 | new_cursor.swap(this); |
51 | } |
52 | |
53 | void PredictiveCursor::close() { |
54 | PredictiveCursor new_cursor; |
55 | new_cursor.swap(this); |
56 | } |
57 | |
58 | const Key &PredictiveCursor::next() { |
59 | if (cur_ == end_) { |
60 | return Key::invalid_key(); |
61 | } |
62 | |
63 | if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { |
64 | return ascending_next(); |
65 | } else { |
66 | return descending_next(); |
67 | } |
68 | } |
69 | |
70 | PredictiveCursor::PredictiveCursor(const Trie &trie, |
71 | UInt32 offset, UInt32 limit, UInt32 flags) |
72 | : trie_(&trie), |
73 | offset_(offset), |
74 | limit_(limit), |
75 | flags_(flags), |
76 | buf_(), |
77 | cur_(0), |
78 | end_(0), |
79 | min_length_(0) {} |
80 | |
81 | UInt32 PredictiveCursor::fix_flags(UInt32 flags) const { |
82 | const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; |
83 | GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) && |
84 | (cursor_type != PREDICTIVE_CURSOR)); |
85 | flags |= PREDICTIVE_CURSOR; |
86 | |
87 | const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; |
88 | GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) && |
89 | (cursor_order != ASCENDING_CURSOR) && |
90 | (cursor_order != DESCENDING_CURSOR)); |
91 | if (cursor_order == 0) { |
92 | flags |= ASCENDING_CURSOR; |
93 | } |
94 | |
95 | const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK; |
96 | GRN_DAT_THROW_IF(PARAM_ERROR, cursor_options & ~(EXCEPT_EXACT_MATCH)); |
97 | |
98 | return flags; |
99 | } |
100 | |
101 | void PredictiveCursor::init(const String &str) { |
102 | if (limit_ == 0) { |
103 | return; |
104 | } |
105 | |
106 | min_length_ = str.length(); |
107 | if ((flags_ & EXCEPT_EXACT_MATCH) == EXCEPT_EXACT_MATCH) { |
108 | ++min_length_; |
109 | } |
110 | end_ = (offset_ > (MAX_UINT32 - limit_)) ? MAX_UINT32 : (offset_ + limit_); |
111 | |
112 | UInt32 node_id = ROOT_NODE_ID; |
113 | for (UInt32 i = 0; i < str.length(); ++i) { |
114 | const Base base = trie_->ith_node(node_id).base(); |
115 | if (base.is_linker()) { |
116 | if (offset_ == 0) { |
117 | const Key &key = trie_->get_key(base.key_pos()); |
118 | if ((key.length() >= str.length()) && |
119 | (key.str().substr(0, str.length()).compare(str, i) == 0)) { |
120 | if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { |
121 | node_id |= IS_ROOT_FLAG; |
122 | } |
123 | buf_.push_back(node_id); |
124 | } |
125 | } |
126 | return; |
127 | } |
128 | |
129 | node_id = base.offset() ^ str[i]; |
130 | if (trie_->ith_node(node_id).label() != str[i]) { |
131 | return; |
132 | } |
133 | } |
134 | |
135 | if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { |
136 | node_id |= IS_ROOT_FLAG; |
137 | } |
138 | buf_.push_back(node_id); |
139 | } |
140 | |
141 | void PredictiveCursor::swap(PredictiveCursor *cursor) { |
142 | std::swap(trie_, cursor->trie_); |
143 | std::swap(offset_, cursor->offset_); |
144 | std::swap(limit_, cursor->limit_); |
145 | std::swap(flags_, cursor->flags_); |
146 | buf_.swap(&cursor->buf_); |
147 | std::swap(cur_, cursor->cur_); |
148 | std::swap(end_, cursor->end_); |
149 | std::swap(min_length_, cursor->min_length_); |
150 | } |
151 | |
152 | const Key &PredictiveCursor::ascending_next() { |
153 | while (!buf_.empty()) { |
154 | const bool is_root = (buf_.back() & IS_ROOT_FLAG) == IS_ROOT_FLAG; |
155 | const UInt32 node_id = buf_.back() & ~IS_ROOT_FLAG; |
156 | buf_.pop_back(); |
157 | |
158 | const Node node = trie_->ith_node(node_id); |
159 | if (!is_root && (node.sibling() != INVALID_LABEL)) { |
160 | buf_.push_back(node_id ^ node.label() ^ node.sibling()); |
161 | } |
162 | |
163 | if (node.is_linker()) { |
164 | const Key &key = trie_->get_key(node.key_pos()); |
165 | if (key.length() >= min_length_) { |
166 | if (cur_++ >= offset_) { |
167 | return key; |
168 | } |
169 | } |
170 | } else if (node.child() != INVALID_LABEL) { |
171 | buf_.push_back(node.offset() ^ node.child()); |
172 | } |
173 | } |
174 | return Key::invalid_key(); |
175 | } |
176 | |
177 | const Key &PredictiveCursor::descending_next() { |
178 | while (!buf_.empty()) { |
179 | const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG; |
180 | const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG; |
181 | |
182 | const Base base = trie_->ith_node(node_id).base(); |
183 | if (post_order) { |
184 | buf_.pop_back(); |
185 | if (base.is_linker()) { |
186 | const Key &key = trie_->get_key(base.key_pos()); |
187 | if (key.length() >= min_length_) { |
188 | if (cur_++ >= offset_) { |
189 | return key; |
190 | } |
191 | } |
192 | } |
193 | } else { |
194 | buf_.back() |= POST_ORDER_FLAG; |
195 | UInt16 label = trie_->ith_node(node_id).child(); |
196 | while (label != INVALID_LABEL) { |
197 | buf_.push_back(base.offset() ^ label); |
198 | label = trie_->ith_node(base.offset() ^ label).sibling(); |
199 | } |
200 | } |
201 | } |
202 | return Key::invalid_key(); |
203 | } |
204 | |
205 | } // namespace dat |
206 | } // namespace grn |
207 | |