1 | #ifndef DAWGDIC_DICTIONARY_H |
2 | #define DAWGDIC_DICTIONARY_H |
3 | |
4 | #include <iostream> |
5 | #include <vector> |
6 | |
7 | #include "base-types.h" |
8 | #include "dictionary-unit.h" |
9 | |
10 | namespace dawgdic { |
11 | |
12 | // Dictionary class for retrieval and binary I/O. |
13 | class Dictionary { |
14 | public: |
15 | Dictionary() : units_(NULL), size_(0), units_buf_() {} |
16 | |
17 | const DictionaryUnit *units() const { |
18 | return units_; |
19 | } |
20 | SizeType size() const { |
21 | return size_; |
22 | } |
23 | SizeType total_size() const { |
24 | return sizeof(DictionaryUnit) * size_; |
25 | } |
26 | SizeType file_size() const { |
27 | return sizeof(BaseType) + total_size(); |
28 | } |
29 | |
30 | // Root index. |
31 | BaseType root() const { |
32 | return 0; |
33 | } |
34 | |
35 | // Checks if a given index is related to the end of a key. |
36 | bool has_value(BaseType index) const { |
37 | return units_[index].has_leaf(); |
38 | } |
39 | // Gets a value from a given index. |
40 | ValueType value(BaseType index) const { |
41 | return units_[index ^ units_[index].offset()].value(); |
42 | } |
43 | |
44 | // Reads a dictionary from an input stream. |
45 | bool Read(std::istream *input) { |
46 | BaseType base_size; |
47 | if (!input->read(reinterpret_cast<char *>(&base_size), sizeof(BaseType))) { |
48 | return false; |
49 | } |
50 | |
51 | SizeType size = static_cast<SizeType>(base_size); |
52 | std::vector<DictionaryUnit> units_buf(size); |
53 | if (!input->read(reinterpret_cast<char *>(&units_buf[0]), |
54 | sizeof(DictionaryUnit) * size)) { |
55 | return false; |
56 | } |
57 | |
58 | SwapUnitsBuf(&units_buf); |
59 | return true; |
60 | } |
61 | |
62 | // Writes a dictionry to an output stream. |
63 | bool Write(std::ostream *output) const { |
64 | BaseType base_size = static_cast<BaseType>(size_); |
65 | if (!output->write(reinterpret_cast<const char *>(&base_size), |
66 | sizeof(BaseType))) { |
67 | return false; |
68 | } |
69 | |
70 | if (!output->write(reinterpret_cast<const char *>(units_), |
71 | sizeof(DictionaryUnit) * size_)) { |
72 | return false; |
73 | } |
74 | |
75 | return true; |
76 | } |
77 | |
78 | // Exact matching. |
79 | bool Contains(const CharType *key) const { |
80 | BaseType index = root(); |
81 | if (!Follow(key, &index)) { |
82 | return false; |
83 | } |
84 | return has_value(index); |
85 | } |
86 | bool Contains(const CharType *key, SizeType length) const { |
87 | BaseType index = root(); |
88 | if (!Follow(key, length, &index)) { |
89 | return false; |
90 | } |
91 | return has_value(index); |
92 | } |
93 | |
94 | // Exact matching. |
95 | ValueType Find(const CharType *key) const { |
96 | BaseType index = root(); |
97 | if (!Follow(key, &index)) { |
98 | return -1; |
99 | } |
100 | return has_value(index) ? value(index) : -1; |
101 | } |
102 | ValueType Find(const CharType *key, SizeType length) const { |
103 | BaseType index = root(); |
104 | if (!Follow(key, length, &index)) { |
105 | return -1; |
106 | } |
107 | return has_value(index) ? value(index) : -1; |
108 | } |
109 | bool Find(const CharType *key, ValueType *value) const { |
110 | BaseType index = root(); |
111 | if (!Follow(key, &index) || !has_value(index)) { |
112 | return false; |
113 | } |
114 | *value = this->value(index); |
115 | return true; |
116 | } |
117 | bool Find(const CharType *key, SizeType length, ValueType *value) const { |
118 | BaseType index = root(); |
119 | if (!Follow(key, length, &index) || !has_value(index)) { |
120 | return false; |
121 | } |
122 | *value = this->value(index); |
123 | return true; |
124 | } |
125 | |
126 | // Follows a transition. |
127 | bool Follow(CharType label, BaseType *index) const { |
128 | BaseType next_index = |
129 | *index ^ units_[*index].offset() ^ static_cast<UCharType>(label); |
130 | if (units_[next_index].label() != static_cast<UCharType>(label)) { |
131 | return false; |
132 | } |
133 | *index = next_index; |
134 | return true; |
135 | } |
136 | |
137 | // Follows transitions. |
138 | bool Follow(const CharType *s, BaseType *index) const { |
139 | while (*s != '\0' && Follow(*s, index)) { |
140 | ++s; |
141 | } |
142 | return *s == '\0'; |
143 | } |
144 | bool Follow(const CharType *s, BaseType *index, SizeType *count) const { |
145 | while (*s != '\0' && Follow(*s, index)) { |
146 | ++s, ++*count; |
147 | } |
148 | return *s == '\0'; |
149 | } |
150 | |
151 | // Follows transitions. |
152 | bool Follow(const CharType *s, SizeType length, BaseType *index) const { |
153 | for (SizeType i = 0; i < length; ++i) { |
154 | if (!Follow(s[i], index)) { |
155 | return false; |
156 | } |
157 | } |
158 | return true; |
159 | } |
160 | bool Follow(const CharType *s, SizeType length, BaseType *index, |
161 | SizeType *count) const { |
162 | for (SizeType i = 0; i < length; ++i, ++*count) { |
163 | if (!Follow(s[i], index)) { |
164 | return false; |
165 | } |
166 | } |
167 | return true; |
168 | } |
169 | |
170 | // Maps memory with its size. |
171 | void Map(const void *address) { |
172 | Clear(); |
173 | units_ = reinterpret_cast<const DictionaryUnit *>( |
174 | static_cast<const BaseType *>(address) + 1); |
175 | size_ = *static_cast<const BaseType *>(address); |
176 | } |
177 | void Map(const void *address, SizeType size) { |
178 | Clear(); |
179 | units_ = static_cast<const DictionaryUnit *>(address); |
180 | size_ = size; |
181 | } |
182 | |
183 | // Initializes a dictionary. |
184 | void Clear() { |
185 | units_ = NULL; |
186 | size_ = 0; |
187 | std::vector<DictionaryUnit>(0).swap(units_buf_); |
188 | } |
189 | |
190 | // Swaps dictionaries. |
191 | void Swap(Dictionary *dic) { |
192 | std::swap(units_, dic->units_); |
193 | std::swap(size_, dic->size_); |
194 | units_buf_.swap(dic->units_buf_); |
195 | } |
196 | |
197 | // Shrinks a vector. |
198 | void Shrink() { |
199 | if (units_buf_.size() == units_buf_.capacity()) { |
200 | return; |
201 | } |
202 | |
203 | std::vector<DictionaryUnit> units_buf(units_buf_); |
204 | SwapUnitsBuf(&units_buf); |
205 | } |
206 | |
207 | public: |
208 | // Following member function is called from DawgBuilder. |
209 | |
210 | // Swaps buffers for units. |
211 | void SwapUnitsBuf(std::vector<DictionaryUnit> *units_buf) { |
212 | units_ = &(*units_buf)[0]; |
213 | size_ = static_cast<BaseType>(units_buf->size()); |
214 | units_buf_.swap(*units_buf); |
215 | } |
216 | |
217 | private: |
218 | const DictionaryUnit *units_; |
219 | SizeType size_; |
220 | std::vector<DictionaryUnit> units_buf_; |
221 | |
222 | // Disallows copies. |
223 | Dictionary(const Dictionary &); |
224 | Dictionary &operator=(const Dictionary &); |
225 | }; |
226 | |
227 | } // namespace dawgdic |
228 | |
229 | #endif // DAWGDIC_DICTIONARY_H |
230 | |