1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2011-2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #pragma once |
20 | |
21 | #ifndef _MSC_VER |
22 | # include <stddef.h> |
23 | # include <stdint.h> |
24 | #endif // _MSC_VER |
25 | |
26 | #include <cstddef> |
27 | #include <exception> |
28 | |
29 | #ifdef _DEBUG |
30 | # include <iostream> |
31 | #endif // _DEBUG |
32 | |
33 | #ifndef GRN_DAT_API |
34 | # ifdef WIN32 |
35 | # ifdef GRN_DAT_EXPORT |
36 | # define GRN_DAT_API __declspec(dllexport) |
37 | # else // GRN_DAT_EXPORT |
38 | # define GRN_DAT_API __declspec(dllimport) |
39 | # endif // GRN_DAT_EXPORT |
40 | # else // WIN32 |
41 | # define GRN_DAT_API |
42 | # endif // WIN32 |
43 | #endif // GRN_DAT_API |
44 | |
45 | #ifdef WIN32 |
46 | # define grn_memcpy(dest, src, n) ::memcpy_s((dest), (n), (src), (n)) |
47 | #else // WIN32 |
48 | # define grn_memcpy(dest, src, n) std::memcpy((dest), (src), (n)) |
49 | #endif // WIN32 |
50 | |
51 | namespace grn { |
52 | namespace dat { |
53 | |
54 | #ifdef _MSC_VER |
55 | typedef unsigned __int8 UInt8; |
56 | typedef unsigned __int16 UInt16; |
57 | typedef unsigned __int32 UInt32; |
58 | typedef unsigned __int64 UInt64; |
59 | #else // _MSC_VER |
60 | typedef ::uint8_t UInt8; |
61 | typedef ::uint16_t UInt16; |
62 | typedef ::uint32_t UInt32; |
63 | typedef ::uint64_t UInt64; |
64 | #endif // _MSC_VER |
65 | |
66 | const UInt8 MAX_UINT8 = static_cast<UInt8>(0xFFU); |
67 | const UInt16 MAX_UINT16 = static_cast<UInt16>(0xFFFFU); |
68 | const UInt32 MAX_UINT32 = static_cast<UInt32>(0xFFFFFFFFU); |
69 | const UInt64 MAX_UINT64 = static_cast<UInt64>(0xFFFFFFFFFFFFFFFFULL); |
70 | |
71 | // If a key is a prefix of another key, such a key is associated with a special |
72 | // terminal node which has TERMINAL_LABEL. |
73 | const UInt16 TERMINAL_LABEL = 0x100; |
74 | const UInt16 MIN_LABEL = '\0'; |
75 | const UInt16 MAX_LABEL = TERMINAL_LABEL; |
76 | const UInt32 INVALID_LABEL = 0x1FF; |
77 | const UInt32 LABEL_MASK = 0x1FF; |
78 | |
79 | // The MSB of BASE is used to represent whether the node is a linker node or |
80 | // not and the other 31 bits represent the offset to its child nodes. So, the |
81 | // number of nodes is limited to 2^31. |
82 | const UInt32 ROOT_NODE_ID = 0; |
83 | const UInt32 MAX_NODE_ID = 0x7FFFFFFF; |
84 | const UInt32 MAX_NUM_NODES = MAX_NODE_ID + 1; |
85 | const UInt32 INVALID_NODE_ID = MAX_NODE_ID + 1; |
86 | |
87 | // 0 is reserved for non-linker leaf nodes. For example, the root node of an |
88 | // initial double-array is a non-linker leaf node. |
89 | const UInt32 MAX_OFFSET = MAX_NODE_ID; |
90 | const UInt32 INVALID_OFFSET = 0; |
91 | |
92 | // Phantom nodes are managed in each block because siblings are always put in |
93 | // the same block. |
94 | const UInt32 BLOCK_SIZE = 0x200; |
95 | const UInt32 BLOCK_MASK = 0x1FF; |
96 | const UInt32 MAX_BLOCK_ID = MAX_NODE_ID / BLOCK_SIZE; |
97 | const UInt32 MAX_NUM_BLOCKS = MAX_BLOCK_ID + 1; |
98 | |
99 | // Blocks are divided by their levels, which indicate how easily update |
100 | // operations can find a good offset in them. The level of a block rises when |
101 | // find_offset() fails in that block many times. MAX_FAILURE_COUNT is the |
102 | // threshold. Also, in order to limit the time cost, find_offset() scans at |
103 | // most MAX_BLOCK_COUNT blocks. |
104 | // Larger parameters bring more chances of finding good offsets but it leads to |
105 | // more node renumberings, which are costly operations, and thus results in |
106 | // a degradation of space/time efficiencies. |
107 | const UInt32 MAX_FAILURE_COUNT = 4; |
108 | const UInt32 MAX_BLOCK_COUNT = 16; |
109 | const UInt32 MAX_BLOCK_LEVEL = 5; |
110 | |
111 | // Blocks in the same level compose a doubly linked list. The entry block of |
112 | // a linked list is called a leader. INVALID_LEADER means that a linked list is |
113 | // empty and there exists no leader. |
114 | const UInt32 INVALID_LEADER = 0x7FFFFFFF; |
115 | |
116 | const UInt32 MIN_KEY_ID = 1; |
117 | const UInt32 MAX_KEY_ID = MAX_NODE_ID; |
118 | const UInt32 INVALID_KEY_ID = 0; |
119 | |
120 | // A key length is represented as a 12-bit unsigned integer in Key. |
121 | // A key ID is represented as a 28-bit unsigned integer in Key. |
122 | const UInt32 MAX_KEY_LENGTH = (1U << 12) - 1; |
123 | const UInt32 MAX_NUM_KEYS = (1U << 28) - 1; |
124 | |
125 | const UInt64 MIN_FILE_SIZE = 1 << 16; |
126 | const UInt64 DEFAULT_FILE_SIZE = 1 << 20; |
127 | const UInt64 MAX_FILE_SIZE = (UInt64)1 << 40; |
128 | const double DEFAULT_NUM_NODES_PER_KEY = 4.0; |
129 | const double MAX_NUM_NODES_PER_KEY = 16.0; |
130 | const double DEFAULT_AVERAGE_KEY_LENGTH = 16.0; |
131 | const UInt32 MAX_KEY_BUF_SIZE = 0x80000000U; |
132 | const UInt32 MAX_TOTAL_KEY_LENGTH = 0xFFFFFFFFU; |
133 | |
134 | const UInt32 ID_RANGE_CURSOR = 0x00001; |
135 | const UInt32 KEY_RANGE_CURSOR = 0x00002; |
136 | const UInt32 PREFIX_CURSOR = 0x00004; |
137 | const UInt32 PREDICTIVE_CURSOR = 0x00008; |
138 | const UInt32 CURSOR_TYPE_MASK = 0x000FF; |
139 | |
140 | const UInt32 ASCENDING_CURSOR = 0x00100; |
141 | const UInt32 DESCENDING_CURSOR = 0x00200; |
142 | const UInt32 CURSOR_ORDER_MASK = 0x00F00; |
143 | |
144 | const UInt32 EXCEPT_LOWER_BOUND = 0x01000; |
145 | const UInt32 EXCEPT_UPPER_BOUND = 0x02000; |
146 | const UInt32 EXCEPT_EXACT_MATCH = 0x04000; |
147 | const UInt32 CURSOR_OPTIONS_MASK = 0xFF000; |
148 | |
149 | const UInt32 REMOVING_FLAG = 1U << 0; |
150 | const UInt32 INSERTING_FLAG = 1U << 1; |
151 | const UInt32 UPDATING_FLAG = 1U << 2; |
152 | const UInt32 CHANGING_MASK = REMOVING_FLAG | INSERTING_FLAG | UPDATING_FLAG; |
153 | |
154 | const UInt32 MKQ_SORT_THRESHOLD = 10; |
155 | |
156 | enum ErrorCode { |
157 | PARAM_ERROR = -1, |
158 | IO_ERROR = -2, |
159 | FORMAT_ERROR = -3, |
160 | MEMORY_ERROR = -4, |
161 | SIZE_ERROR = -5, |
162 | UNEXPECTED_ERROR = -6, |
163 | STATUS_ERROR = -7 |
164 | }; |
165 | |
166 | class Exception : public std::exception { |
167 | public: |
168 | Exception() throw() |
169 | : std::exception(), |
170 | file_("" ), |
171 | line_(-1), |
172 | what_("" ) {} |
173 | Exception(const char *file, int line, const char *what) throw() |
174 | : std::exception(), |
175 | file_(file), |
176 | line_(line), |
177 | what_((what != NULL) ? what : "" ) {} |
178 | Exception(const Exception &ex) throw() |
179 | : std::exception(ex), |
180 | file_(ex.file_), |
181 | line_(ex.line_), |
182 | what_(ex.what_) {} |
183 | virtual ~Exception() throw() {} |
184 | |
185 | virtual ErrorCode code() const throw() = 0; |
186 | virtual const char *file() const throw() { |
187 | return file_; |
188 | } |
189 | virtual int line() const throw() { |
190 | return line_; |
191 | } |
192 | virtual const char *what() const throw() { |
193 | return what_; |
194 | } |
195 | |
196 | private: |
197 | const char *file_; |
198 | int line_; |
199 | const char *what_; |
200 | }; |
201 | |
202 | template <ErrorCode T> |
203 | class Error : public Exception { |
204 | public: |
205 | Error() throw() |
206 | : Exception() {} |
207 | Error(const char *file, int line, const char *what) throw() |
208 | : Exception(file, line, what) {} |
209 | Error(const Error &ex) throw() |
210 | : Exception(ex) {} |
211 | virtual ~Error() throw() {} |
212 | |
213 | virtual ErrorCode code() const throw() { |
214 | return T; |
215 | } |
216 | }; |
217 | |
218 | typedef Error<PARAM_ERROR> ParamError; |
219 | typedef Error<IO_ERROR> IOError; |
220 | typedef Error<FORMAT_ERROR> FormatError; |
221 | typedef Error<MEMORY_ERROR> MemoryError; |
222 | typedef Error<SIZE_ERROR> SizeError; |
223 | typedef Error<UNEXPECTED_ERROR> UnexpectedError; |
224 | typedef Error<STATUS_ERROR> StatusError; |
225 | |
226 | #define GRN_DAT_INT_TO_STR(value) #value |
227 | #define GRN_DAT_LINE_TO_STR(line) GRN_DAT_INT_TO_STR(line) |
228 | #define GRN_DAT_LINE_STR GRN_DAT_LINE_TO_STR(__LINE__) |
229 | |
230 | #define GRN_DAT_THROW(code, msg)\ |
231 | (throw grn::dat::Error<code>(__FILE__, __LINE__,\ |
232 | __FILE__ ":" GRN_DAT_LINE_STR ": " #code ": " msg)) |
233 | #define GRN_DAT_THROW_IF(code, cond)\ |
234 | (void)((!(cond)) || (GRN_DAT_THROW(code, #cond), 0)) |
235 | |
236 | #ifdef _DEBUG |
237 | #define GRN_DAT_DEBUG_THROW_IF(cond)\ |
238 | GRN_DAT_THROW_IF(grn::dat::UNEXPECTED_ERROR, cond) |
239 | #define GRN_DAT_DEBUG_LOG(var)\ |
240 | (std::clog << __FILE__ ":" GRN_DAT_LINE_STR ": " #var ": "\ |
241 | << (var) << std::endl) |
242 | #else // _DEBUG |
243 | #define GRN_DAT_DEBUG_THROW_IF(cond) |
244 | #define GRN_DAT_DEBUG_LOG(var) |
245 | #endif // _DEBUG |
246 | |
247 | } // namespace dat |
248 | } // namespace grn |
249 | |