1/*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief API for handling bounded repeats.
31 *
32 * This file provides an internal API for handling bounded repeats of character
33 * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
34 * bounded repeat handling in the LimEx NFA engine as well.
35 *
36 * The state required by these functions is split into two regions:
37 *
38 * 1. Control block. This is a small structure (size varies with repeat mode)
39 * that may be copied around or compressed into stream state.
40 * 2. Repeat state. This is a larger structure that can be quite big for large
41 * repeats, often containing a multibit ring or large vector of indices.
42 * This generally lives in stream state and is not copied.
43 */
44
45#ifndef REPEAT_H
46#define REPEAT_H
47
48#include "ue2common.h"
49#include "repeat_internal.h"
50#include "util/bitutils.h"
51
52#ifdef __cplusplus
53extern "C"
54{
55#endif
56
57/** Returns the offset of the most recent 'top' offset set in the repeat. */
58static really_inline
59u64a repeatLastTop(const struct RepeatInfo *info,
60 const union RepeatControl *ctrl, const void *state);
61
62/** Returns the offset of the next match after 'offset', or zero if no further
63 * matches are possible. */
64static really_inline
65u64a repeatNextMatch(const struct RepeatInfo *info,
66 const union RepeatControl *ctrl, const void *state,
67 u64a offset);
68
69/** Stores a new top in the repeat. If is_alive is false, the repeat will be
70 * initialised first and this top will become the first (and only) one. */
71static really_inline
72void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
73 void *state, u64a offset, char is_alive);
74
75/** Return type for repeatHasMatch. */
76enum RepeatMatch {
77 REPEAT_NOMATCH, /**< This offset is not a valid match. */
78 REPEAT_MATCH, /**< This offset is a valid match. */
79 REPEAT_STALE /**< This offset is not a valid match and no greater
80 offset will be (unless another top is stored). */
81};
82
83/** Query whether the repeat has a match at the given offset. Returns
84 * ::REPEAT_STALE if it does not have a match at that offset _and_
85 * no further matches are possible. */
86static really_inline
87enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
88 const union RepeatControl *ctrl,
89 const void *state, u64a offset);
90
91/** \brief Serialize a packed version of the repeat control block into stream
92 * state. */
93void repeatPack(char *dest, const struct RepeatInfo *info,
94 const union RepeatControl *ctrl, u64a offset);
95
96/** \brief Deserialize a packed version of the repeat control block. */
97void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
98 union RepeatControl *ctrl);
99
100////
101//// IMPLEMENTATION.
102////
103
104u64a repeatLastTopRing(const struct RepeatInfo *info,
105 const union RepeatControl *ctrl);
106
107u64a repeatLastTopRange(const union RepeatControl *ctrl,
108 const void *state);
109
110u64a repeatLastTopBitmap(const union RepeatControl *ctrl);
111
112u64a repeatLastTopTrailer(const struct RepeatInfo *info,
113 const union RepeatControl *ctrl);
114
115u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
116 const union RepeatControl *ctrl,
117 const void *state);
118
119static really_inline
120u64a repeatLastTop(const struct RepeatInfo *info,
121 const union RepeatControl *ctrl, const void *state) {
122 assert(info && ctrl && state);
123
124 switch ((enum RepeatType)info->type) {
125 case REPEAT_RING:
126 return repeatLastTopRing(info, ctrl);
127 case REPEAT_FIRST:
128 case REPEAT_LAST:
129 return ctrl->offset.offset;
130 case REPEAT_RANGE:
131 return repeatLastTopRange(ctrl, state);
132 case REPEAT_BITMAP:
133 return repeatLastTopBitmap(ctrl);
134 case REPEAT_SPARSE_OPTIMAL_P:
135 return repeatLastTopSparseOptimalP(info, ctrl, state);
136 case REPEAT_TRAILER:
137 return repeatLastTopTrailer(info, ctrl);
138 case REPEAT_ALWAYS:
139 return 0;
140 }
141
142 DEBUG_PRINTF("bad repeat type %u\n", info->type);
143 assert(0);
144 return 0;
145}
146
147// Used for both FIRST and LAST models.
148static really_inline
149u64a repeatNextMatchOffset(const struct RepeatInfo *info,
150 const union RepeatControl *ctrl, u64a offset) {
151 u64a first = ctrl->offset.offset + info->repeatMin;
152 if (offset < first) {
153 return first;
154 }
155
156 if (info->repeatMax == REPEAT_INF ||
157 offset < ctrl->offset.offset + info->repeatMax) {
158 return offset + 1;
159 }
160
161 return 0; // No more matches.
162}
163
164u64a repeatNextMatchRing(const struct RepeatInfo *info,
165 const union RepeatControl *ctrl,
166 const void *state, u64a offset);
167
168u64a repeatNextMatchRange(const struct RepeatInfo *info,
169 const union RepeatControl *ctrl,
170 const void *state, u64a offset);
171
172u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
173 const union RepeatControl *ctrl, u64a offset);
174
175u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
176 const union RepeatControl *ctrl,
177 const void *state, u64a offset);
178
179u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
180 const union RepeatControl *ctrl, u64a offset);
181
182static really_inline
183u64a repeatNextMatch(const struct RepeatInfo *info,
184 const union RepeatControl *ctrl, const void *state,
185 u64a offset) {
186 assert(info && ctrl && state);
187 assert(ISALIGNED(info));
188 assert(ISALIGNED(ctrl));
189
190 switch ((enum RepeatType)info->type) {
191 case REPEAT_RING:
192 return repeatNextMatchRing(info, ctrl, state, offset);
193 case REPEAT_FIRST:
194 // fall through
195 case REPEAT_LAST:
196 return repeatNextMatchOffset(info, ctrl, offset);
197 case REPEAT_RANGE:
198 return repeatNextMatchRange(info, ctrl, state, offset);
199 case REPEAT_BITMAP:
200 return repeatNextMatchBitmap(info, ctrl, offset);
201 case REPEAT_SPARSE_OPTIMAL_P:
202 return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
203 case REPEAT_TRAILER:
204 return repeatNextMatchTrailer(info, ctrl, offset);
205 case REPEAT_ALWAYS:
206 return offset + 1;
207 }
208
209 DEBUG_PRINTF("bad repeat type %u\n", info->type);
210 assert(0);
211 return 0;
212}
213
214static really_inline
215void repeatStoreFirst(union RepeatControl *ctrl, u64a offset,
216 char is_alive) {
217 if (is_alive) {
218 return;
219 }
220 ctrl->offset.offset = offset;
221}
222
223static really_inline
224void repeatStoreLast(union RepeatControl *ctrl, u64a offset,
225 UNUSED char is_alive) {
226 assert(!is_alive || offset >= ctrl->offset.offset);
227 ctrl->offset.offset = offset;
228}
229
230void repeatStoreRing(const struct RepeatInfo *info,
231 union RepeatControl *ctrl, void *state, u64a offset,
232 char is_alive);
233
234void repeatStoreRange(const struct RepeatInfo *info,
235 union RepeatControl *ctrl, void *state, u64a offset,
236 char is_alive);
237
238void repeatStoreBitmap(const struct RepeatInfo *info,
239 union RepeatControl *ctrl, u64a offset,
240 char is_alive);
241
242void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
243 union RepeatControl *ctrl, void *state,
244 u64a offset, char is_alive);
245
246void repeatStoreTrailer(const struct RepeatInfo *info,
247 union RepeatControl *ctrl, u64a offset,
248 char is_alive);
249
250static really_inline
251void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
252 void *state, u64a offset, char is_alive) {
253 assert(info && ctrl && state);
254 assert(ISALIGNED(info));
255 assert(ISALIGNED(ctrl));
256
257 assert(info->repeatMin <= info->repeatMax);
258 assert(info->repeatMax <= REPEAT_INF);
259
260 switch ((enum RepeatType)info->type) {
261 case REPEAT_RING:
262 repeatStoreRing(info, ctrl, state, offset, is_alive);
263 break;
264 case REPEAT_FIRST:
265 repeatStoreFirst(ctrl, offset, is_alive);
266 break;
267 case REPEAT_LAST:
268 repeatStoreLast(ctrl, offset, is_alive);
269 break;
270 case REPEAT_RANGE:
271 repeatStoreRange(info, ctrl, state, offset, is_alive);
272 break;
273 case REPEAT_BITMAP:
274 repeatStoreBitmap(info, ctrl, offset, is_alive);
275 break;
276 case REPEAT_SPARSE_OPTIMAL_P:
277 repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive);
278 break;
279 case REPEAT_TRAILER:
280 repeatStoreTrailer(info, ctrl, offset, is_alive);
281 break;
282 case REPEAT_ALWAYS:
283 /* nothing to do - no state */
284 break;
285 }
286}
287
288static really_inline
289enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info,
290 const union RepeatControl *ctrl,
291 u64a offset) {
292 if (offset < ctrl->offset.offset + info->repeatMin) {
293 return REPEAT_NOMATCH;
294 }
295
296 // FIRST models are {N,} repeats, i.e. they always have inf max depth.
297 assert(info->repeatMax == REPEAT_INF);
298 return REPEAT_MATCH;
299}
300
301static really_inline
302enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info,
303 const union RepeatControl *ctrl,
304 u64a offset) {
305 if (offset < ctrl->offset.offset + info->repeatMin) {
306 return REPEAT_NOMATCH;
307 }
308 assert(info->repeatMax < REPEAT_INF);
309 if (offset <= ctrl->offset.offset + info->repeatMax) {
310 return REPEAT_MATCH;
311 }
312 return REPEAT_STALE;
313}
314
315enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
316 const union RepeatControl *ctrl,
317 const void *state, u64a offset);
318
319enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
320 const union RepeatControl *ctrl,
321 const void *state, u64a offset);
322
323enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
324 const union RepeatControl *ctrl,
325 const void *state, u64a offset);
326
327enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
328 const union RepeatControl *ctrl,
329 u64a offset);
330
331enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
332 const union RepeatControl *ctrl,
333 u64a offset);
334
335static really_inline
336enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
337 const union RepeatControl *ctrl,
338 const void *state, u64a offset) {
339 assert(info && ctrl && state);
340 assert(ISALIGNED(info));
341 assert(ISALIGNED(ctrl));
342
343 switch ((enum RepeatType)info->type) {
344 case REPEAT_RING:
345 return repeatHasMatchRing(info, ctrl, state, offset);
346 case REPEAT_FIRST:
347 return repeatHasMatchFirst(info, ctrl, offset);
348 case REPEAT_LAST:
349 return repeatHasMatchLast(info, ctrl, offset);
350 case REPEAT_RANGE:
351 return repeatHasMatchRange(info, ctrl, state, offset);
352 case REPEAT_BITMAP:
353 return repeatHasMatchBitmap(info, ctrl, offset);
354 case REPEAT_SPARSE_OPTIMAL_P:
355 return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
356 case REPEAT_TRAILER:
357 return repeatHasMatchTrailer(info, ctrl, offset);
358 case REPEAT_ALWAYS:
359 return REPEAT_MATCH;
360 }
361
362 assert(0);
363 return REPEAT_NOMATCH;
364}
365
366#ifdef __cplusplus
367}
368#endif
369
370#endif // REPEAT_H
371