1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief API for handling bounded repeats. |
31 | * |
32 | * This file provides an internal API for handling bounded repeats of character |
33 | * classes. It is used by the Large Bounded Repeat (LBR) engine and by the |
34 | * bounded repeat handling in the LimEx NFA engine as well. |
35 | * |
36 | * The state required by these functions is split into two regions: |
37 | * |
38 | * 1. Control block. This is a small structure (size varies with repeat mode) |
39 | * that may be copied around or compressed into stream state. |
40 | * 2. Repeat state. This is a larger structure that can be quite big for large |
41 | * repeats, often containing a multibit ring or large vector of indices. |
42 | * This generally lives in stream state and is not copied. |
43 | */ |
44 | |
45 | #ifndef REPEAT_H |
46 | #define REPEAT_H |
47 | |
48 | #include "ue2common.h" |
49 | #include "repeat_internal.h" |
50 | #include "util/bitutils.h" |
51 | |
52 | #ifdef __cplusplus |
53 | extern "C" |
54 | { |
55 | #endif |
56 | |
57 | /** Returns the offset of the most recent 'top' offset set in the repeat. */ |
58 | static really_inline |
59 | u64a repeatLastTop(const struct RepeatInfo *info, |
60 | const union RepeatControl *ctrl, const void *state); |
61 | |
62 | /** Returns the offset of the next match after 'offset', or zero if no further |
63 | * matches are possible. */ |
64 | static really_inline |
65 | u64a repeatNextMatch(const struct RepeatInfo *info, |
66 | const union RepeatControl *ctrl, const void *state, |
67 | u64a offset); |
68 | |
69 | /** Stores a new top in the repeat. If is_alive is false, the repeat will be |
70 | * initialised first and this top will become the first (and only) one. */ |
71 | static really_inline |
72 | void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, |
73 | void *state, u64a offset, char is_alive); |
74 | |
75 | /** Return type for repeatHasMatch. */ |
76 | enum RepeatMatch { |
77 | REPEAT_NOMATCH, /**< This offset is not a valid match. */ |
78 | REPEAT_MATCH, /**< This offset is a valid match. */ |
79 | REPEAT_STALE /**< This offset is not a valid match and no greater |
80 | offset will be (unless another top is stored). */ |
81 | }; |
82 | |
83 | /** Query whether the repeat has a match at the given offset. Returns |
84 | * ::REPEAT_STALE if it does not have a match at that offset _and_ |
85 | * no further matches are possible. */ |
86 | static really_inline |
87 | enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, |
88 | const union RepeatControl *ctrl, |
89 | const void *state, u64a offset); |
90 | |
91 | /** \brief Serialize a packed version of the repeat control block into stream |
92 | * state. */ |
93 | void repeatPack(char *dest, const struct RepeatInfo *info, |
94 | const union RepeatControl *ctrl, u64a offset); |
95 | |
96 | /** \brief Deserialize a packed version of the repeat control block. */ |
97 | void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, |
98 | union RepeatControl *ctrl); |
99 | |
100 | //// |
101 | //// IMPLEMENTATION. |
102 | //// |
103 | |
104 | u64a repeatLastTopRing(const struct RepeatInfo *info, |
105 | const union RepeatControl *ctrl); |
106 | |
107 | u64a repeatLastTopRange(const union RepeatControl *ctrl, |
108 | const void *state); |
109 | |
110 | u64a repeatLastTopBitmap(const union RepeatControl *ctrl); |
111 | |
112 | u64a repeatLastTopTrailer(const struct RepeatInfo *info, |
113 | const union RepeatControl *ctrl); |
114 | |
115 | u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, |
116 | const union RepeatControl *ctrl, |
117 | const void *state); |
118 | |
119 | static really_inline |
120 | u64a repeatLastTop(const struct RepeatInfo *info, |
121 | const union RepeatControl *ctrl, const void *state) { |
122 | assert(info && ctrl && state); |
123 | |
124 | switch ((enum RepeatType)info->type) { |
125 | case REPEAT_RING: |
126 | return repeatLastTopRing(info, ctrl); |
127 | case REPEAT_FIRST: |
128 | case REPEAT_LAST: |
129 | return ctrl->offset.offset; |
130 | case REPEAT_RANGE: |
131 | return repeatLastTopRange(ctrl, state); |
132 | case REPEAT_BITMAP: |
133 | return repeatLastTopBitmap(ctrl); |
134 | case REPEAT_SPARSE_OPTIMAL_P: |
135 | return repeatLastTopSparseOptimalP(info, ctrl, state); |
136 | case REPEAT_TRAILER: |
137 | return repeatLastTopTrailer(info, ctrl); |
138 | case REPEAT_ALWAYS: |
139 | return 0; |
140 | } |
141 | |
142 | DEBUG_PRINTF("bad repeat type %u\n" , info->type); |
143 | assert(0); |
144 | return 0; |
145 | } |
146 | |
147 | // Used for both FIRST and LAST models. |
148 | static really_inline |
149 | u64a repeatNextMatchOffset(const struct RepeatInfo *info, |
150 | const union RepeatControl *ctrl, u64a offset) { |
151 | u64a first = ctrl->offset.offset + info->repeatMin; |
152 | if (offset < first) { |
153 | return first; |
154 | } |
155 | |
156 | if (info->repeatMax == REPEAT_INF || |
157 | offset < ctrl->offset.offset + info->repeatMax) { |
158 | return offset + 1; |
159 | } |
160 | |
161 | return 0; // No more matches. |
162 | } |
163 | |
164 | u64a repeatNextMatchRing(const struct RepeatInfo *info, |
165 | const union RepeatControl *ctrl, |
166 | const void *state, u64a offset); |
167 | |
168 | u64a repeatNextMatchRange(const struct RepeatInfo *info, |
169 | const union RepeatControl *ctrl, |
170 | const void *state, u64a offset); |
171 | |
172 | u64a repeatNextMatchBitmap(const struct RepeatInfo *info, |
173 | const union RepeatControl *ctrl, u64a offset); |
174 | |
175 | u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, |
176 | const union RepeatControl *ctrl, |
177 | const void *state, u64a offset); |
178 | |
179 | u64a repeatNextMatchTrailer(const struct RepeatInfo *info, |
180 | const union RepeatControl *ctrl, u64a offset); |
181 | |
182 | static really_inline |
183 | u64a repeatNextMatch(const struct RepeatInfo *info, |
184 | const union RepeatControl *ctrl, const void *state, |
185 | u64a offset) { |
186 | assert(info && ctrl && state); |
187 | assert(ISALIGNED(info)); |
188 | assert(ISALIGNED(ctrl)); |
189 | |
190 | switch ((enum RepeatType)info->type) { |
191 | case REPEAT_RING: |
192 | return repeatNextMatchRing(info, ctrl, state, offset); |
193 | case REPEAT_FIRST: |
194 | // fall through |
195 | case REPEAT_LAST: |
196 | return repeatNextMatchOffset(info, ctrl, offset); |
197 | case REPEAT_RANGE: |
198 | return repeatNextMatchRange(info, ctrl, state, offset); |
199 | case REPEAT_BITMAP: |
200 | return repeatNextMatchBitmap(info, ctrl, offset); |
201 | case REPEAT_SPARSE_OPTIMAL_P: |
202 | return repeatNextMatchSparseOptimalP(info, ctrl, state, offset); |
203 | case REPEAT_TRAILER: |
204 | return repeatNextMatchTrailer(info, ctrl, offset); |
205 | case REPEAT_ALWAYS: |
206 | return offset + 1; |
207 | } |
208 | |
209 | DEBUG_PRINTF("bad repeat type %u\n" , info->type); |
210 | assert(0); |
211 | return 0; |
212 | } |
213 | |
214 | static really_inline |
215 | void repeatStoreFirst(union RepeatControl *ctrl, u64a offset, |
216 | char is_alive) { |
217 | if (is_alive) { |
218 | return; |
219 | } |
220 | ctrl->offset.offset = offset; |
221 | } |
222 | |
223 | static really_inline |
224 | void repeatStoreLast(union RepeatControl *ctrl, u64a offset, |
225 | UNUSED char is_alive) { |
226 | assert(!is_alive || offset >= ctrl->offset.offset); |
227 | ctrl->offset.offset = offset; |
228 | } |
229 | |
230 | void repeatStoreRing(const struct RepeatInfo *info, |
231 | union RepeatControl *ctrl, void *state, u64a offset, |
232 | char is_alive); |
233 | |
234 | void repeatStoreRange(const struct RepeatInfo *info, |
235 | union RepeatControl *ctrl, void *state, u64a offset, |
236 | char is_alive); |
237 | |
238 | void repeatStoreBitmap(const struct RepeatInfo *info, |
239 | union RepeatControl *ctrl, u64a offset, |
240 | char is_alive); |
241 | |
242 | void repeatStoreSparseOptimalP(const struct RepeatInfo *info, |
243 | union RepeatControl *ctrl, void *state, |
244 | u64a offset, char is_alive); |
245 | |
246 | void repeatStoreTrailer(const struct RepeatInfo *info, |
247 | union RepeatControl *ctrl, u64a offset, |
248 | char is_alive); |
249 | |
250 | static really_inline |
251 | void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, |
252 | void *state, u64a offset, char is_alive) { |
253 | assert(info && ctrl && state); |
254 | assert(ISALIGNED(info)); |
255 | assert(ISALIGNED(ctrl)); |
256 | |
257 | assert(info->repeatMin <= info->repeatMax); |
258 | assert(info->repeatMax <= REPEAT_INF); |
259 | |
260 | switch ((enum RepeatType)info->type) { |
261 | case REPEAT_RING: |
262 | repeatStoreRing(info, ctrl, state, offset, is_alive); |
263 | break; |
264 | case REPEAT_FIRST: |
265 | repeatStoreFirst(ctrl, offset, is_alive); |
266 | break; |
267 | case REPEAT_LAST: |
268 | repeatStoreLast(ctrl, offset, is_alive); |
269 | break; |
270 | case REPEAT_RANGE: |
271 | repeatStoreRange(info, ctrl, state, offset, is_alive); |
272 | break; |
273 | case REPEAT_BITMAP: |
274 | repeatStoreBitmap(info, ctrl, offset, is_alive); |
275 | break; |
276 | case REPEAT_SPARSE_OPTIMAL_P: |
277 | repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive); |
278 | break; |
279 | case REPEAT_TRAILER: |
280 | repeatStoreTrailer(info, ctrl, offset, is_alive); |
281 | break; |
282 | case REPEAT_ALWAYS: |
283 | /* nothing to do - no state */ |
284 | break; |
285 | } |
286 | } |
287 | |
288 | static really_inline |
289 | enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info, |
290 | const union RepeatControl *ctrl, |
291 | u64a offset) { |
292 | if (offset < ctrl->offset.offset + info->repeatMin) { |
293 | return REPEAT_NOMATCH; |
294 | } |
295 | |
296 | // FIRST models are {N,} repeats, i.e. they always have inf max depth. |
297 | assert(info->repeatMax == REPEAT_INF); |
298 | return REPEAT_MATCH; |
299 | } |
300 | |
301 | static really_inline |
302 | enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info, |
303 | const union RepeatControl *ctrl, |
304 | u64a offset) { |
305 | if (offset < ctrl->offset.offset + info->repeatMin) { |
306 | return REPEAT_NOMATCH; |
307 | } |
308 | assert(info->repeatMax < REPEAT_INF); |
309 | if (offset <= ctrl->offset.offset + info->repeatMax) { |
310 | return REPEAT_MATCH; |
311 | } |
312 | return REPEAT_STALE; |
313 | } |
314 | |
315 | enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, |
316 | const union RepeatControl *ctrl, |
317 | const void *state, u64a offset); |
318 | |
319 | enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info, |
320 | const union RepeatControl *ctrl, |
321 | const void *state, u64a offset); |
322 | |
323 | enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, |
324 | const union RepeatControl *ctrl, |
325 | const void *state, u64a offset); |
326 | |
327 | enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info, |
328 | const union RepeatControl *ctrl, |
329 | u64a offset); |
330 | |
331 | enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, |
332 | const union RepeatControl *ctrl, |
333 | u64a offset); |
334 | |
335 | static really_inline |
336 | enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, |
337 | const union RepeatControl *ctrl, |
338 | const void *state, u64a offset) { |
339 | assert(info && ctrl && state); |
340 | assert(ISALIGNED(info)); |
341 | assert(ISALIGNED(ctrl)); |
342 | |
343 | switch ((enum RepeatType)info->type) { |
344 | case REPEAT_RING: |
345 | return repeatHasMatchRing(info, ctrl, state, offset); |
346 | case REPEAT_FIRST: |
347 | return repeatHasMatchFirst(info, ctrl, offset); |
348 | case REPEAT_LAST: |
349 | return repeatHasMatchLast(info, ctrl, offset); |
350 | case REPEAT_RANGE: |
351 | return repeatHasMatchRange(info, ctrl, state, offset); |
352 | case REPEAT_BITMAP: |
353 | return repeatHasMatchBitmap(info, ctrl, offset); |
354 | case REPEAT_SPARSE_OPTIMAL_P: |
355 | return repeatHasMatchSparseOptimalP(info, ctrl, state, offset); |
356 | case REPEAT_TRAILER: |
357 | return repeatHasMatchTrailer(info, ctrl, offset); |
358 | case REPEAT_ALWAYS: |
359 | return REPEAT_MATCH; |
360 | } |
361 | |
362 | assert(0); |
363 | return REPEAT_NOMATCH; |
364 | } |
365 | |
366 | #ifdef __cplusplus |
367 | } |
368 | #endif |
369 | |
370 | #endif // REPEAT_H |
371 | |