1#ifndef DAWGDIC_DICTIONARY_H
2#define DAWGDIC_DICTIONARY_H
3
4#include <iostream>
5#include <vector>
6
7#include "base-types.h"
8#include "dictionary-unit.h"
9
10namespace dawgdic {
11
12// Dictionary class for retrieval and binary I/O.
13class Dictionary {
14 public:
15 Dictionary() : units_(NULL), size_(0), units_buf_() {}
16
17 const DictionaryUnit *units() const {
18 return units_;
19 }
20 SizeType size() const {
21 return size_;
22 }
23 SizeType total_size() const {
24 return sizeof(DictionaryUnit) * size_;
25 }
26 SizeType file_size() const {
27 return sizeof(BaseType) + total_size();
28 }
29
30 // Root index.
31 BaseType root() const {
32 return 0;
33 }
34
35 // Checks if a given index is related to the end of a key.
36 bool has_value(BaseType index) const {
37 return units_[index].has_leaf();
38 }
39 // Gets a value from a given index.
40 ValueType value(BaseType index) const {
41 return units_[index ^ units_[index].offset()].value();
42 }
43
44 // Reads a dictionary from an input stream.
45 bool Read(std::istream *input) {
46 BaseType base_size;
47 if (!input->read(reinterpret_cast<char *>(&base_size), sizeof(BaseType))) {
48 return false;
49 }
50
51 SizeType size = static_cast<SizeType>(base_size);
52 std::vector<DictionaryUnit> units_buf(size);
53 if (!input->read(reinterpret_cast<char *>(&units_buf[0]),
54 sizeof(DictionaryUnit) * size)) {
55 return false;
56 }
57
58 SwapUnitsBuf(&units_buf);
59 return true;
60 }
61
62 // Writes a dictionry to an output stream.
63 bool Write(std::ostream *output) const {
64 BaseType base_size = static_cast<BaseType>(size_);
65 if (!output->write(reinterpret_cast<const char *>(&base_size),
66 sizeof(BaseType))) {
67 return false;
68 }
69
70 if (!output->write(reinterpret_cast<const char *>(units_),
71 sizeof(DictionaryUnit) * size_)) {
72 return false;
73 }
74
75 return true;
76 }
77
78 // Exact matching.
79 bool Contains(const CharType *key) const {
80 BaseType index = root();
81 if (!Follow(key, &index)) {
82 return false;
83 }
84 return has_value(index);
85 }
86 bool Contains(const CharType *key, SizeType length) const {
87 BaseType index = root();
88 if (!Follow(key, length, &index)) {
89 return false;
90 }
91 return has_value(index);
92 }
93
94 // Exact matching.
95 ValueType Find(const CharType *key) const {
96 BaseType index = root();
97 if (!Follow(key, &index)) {
98 return -1;
99 }
100 return has_value(index) ? value(index) : -1;
101 }
102 ValueType Find(const CharType *key, SizeType length) const {
103 BaseType index = root();
104 if (!Follow(key, length, &index)) {
105 return -1;
106 }
107 return has_value(index) ? value(index) : -1;
108 }
109 bool Find(const CharType *key, ValueType *value) const {
110 BaseType index = root();
111 if (!Follow(key, &index) || !has_value(index)) {
112 return false;
113 }
114 *value = this->value(index);
115 return true;
116 }
117 bool Find(const CharType *key, SizeType length, ValueType *value) const {
118 BaseType index = root();
119 if (!Follow(key, length, &index) || !has_value(index)) {
120 return false;
121 }
122 *value = this->value(index);
123 return true;
124 }
125
126 // Follows a transition.
127 bool Follow(CharType label, BaseType *index) const {
128 BaseType next_index =
129 *index ^ units_[*index].offset() ^ static_cast<UCharType>(label);
130 if (units_[next_index].label() != static_cast<UCharType>(label)) {
131 return false;
132 }
133 *index = next_index;
134 return true;
135 }
136
137 // Follows transitions.
138 bool Follow(const CharType *s, BaseType *index) const {
139 while (*s != '\0' && Follow(*s, index)) {
140 ++s;
141 }
142 return *s == '\0';
143 }
144 bool Follow(const CharType *s, BaseType *index, SizeType *count) const {
145 while (*s != '\0' && Follow(*s, index)) {
146 ++s, ++*count;
147 }
148 return *s == '\0';
149 }
150
151 // Follows transitions.
152 bool Follow(const CharType *s, SizeType length, BaseType *index) const {
153 for (SizeType i = 0; i < length; ++i) {
154 if (!Follow(s[i], index)) {
155 return false;
156 }
157 }
158 return true;
159 }
160 bool Follow(const CharType *s, SizeType length, BaseType *index,
161 SizeType *count) const {
162 for (SizeType i = 0; i < length; ++i, ++*count) {
163 if (!Follow(s[i], index)) {
164 return false;
165 }
166 }
167 return true;
168 }
169
170 // Maps memory with its size.
171 void Map(const void *address) {
172 Clear();
173 units_ = reinterpret_cast<const DictionaryUnit *>(
174 static_cast<const BaseType *>(address) + 1);
175 size_ = *static_cast<const BaseType *>(address);
176 }
177 void Map(const void *address, SizeType size) {
178 Clear();
179 units_ = static_cast<const DictionaryUnit *>(address);
180 size_ = size;
181 }
182
183 // Initializes a dictionary.
184 void Clear() {
185 units_ = NULL;
186 size_ = 0;
187 std::vector<DictionaryUnit>(0).swap(units_buf_);
188 }
189
190 // Swaps dictionaries.
191 void Swap(Dictionary *dic) {
192 std::swap(units_, dic->units_);
193 std::swap(size_, dic->size_);
194 units_buf_.swap(dic->units_buf_);
195 }
196
197 // Shrinks a vector.
198 void Shrink() {
199 if (units_buf_.size() == units_buf_.capacity()) {
200 return;
201 }
202
203 std::vector<DictionaryUnit> units_buf(units_buf_);
204 SwapUnitsBuf(&units_buf);
205 }
206
207public:
208 // Following member function is called from DawgBuilder.
209
210 // Swaps buffers for units.
211 void SwapUnitsBuf(std::vector<DictionaryUnit> *units_buf) {
212 units_ = &(*units_buf)[0];
213 size_ = static_cast<BaseType>(units_buf->size());
214 units_buf_.swap(*units_buf);
215 }
216
217 private:
218 const DictionaryUnit *units_;
219 SizeType size_;
220 std::vector<DictionaryUnit> units_buf_;
221
222 // Disallows copies.
223 Dictionary(const Dictionary &);
224 Dictionary &operator=(const Dictionary &);
225};
226
227} // namespace dawgdic
228
229#endif // DAWGDIC_DICTIONARY_H
230