1/*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef REPEAT_INTERNAL_H
30#define REPEAT_INTERNAL_H
31
32#include "ue2common.h"
33
34/** \file
35 * \brief Bounded Repeat models.
36 *
37 * Used by the NFA, to represent bounded repeats managed via special POS and
38 * TUG exceptions, and by the LBR (limited bounded repeat) and Castle
39 * specialist engines.
40 *
41 * We currently have a number of different kinds of bounded repeat model, for
42 * different kinds of {N,M} repeats, described by ::RepeatType.
43 */
44
45/** Different types of bounded repeats. */
46enum RepeatType {
47 /** General mechanism for tracking {N,M} repeats. Stores the first top as
48 * an absolute offset, then subsequent tops in the {N,M} range as a ring of
49 * relative top indices stored in a multibit. */
50 REPEAT_RING,
51
52 /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
53 * since only the first top encountered needs to be stored. */
54 REPEAT_FIRST,
55
56 /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
57 * store the most recent top encountered. */
58 REPEAT_LAST,
59
60 /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
61 * where there is a large difference between N and M, and developed to
62 * reduce the state requirements of this case (relative to the RING model).
63 * Uses a small ordered array of top indices relative to \ref
64 * RepeatRangeControl::offset. */
65 REPEAT_RANGE,
66
67 /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
68 * RepeatBitmapControl structure at runtime. */
69 REPEAT_BITMAP,
70
71 /** Optimal mechanism for tracking {N,M} repeats when there is a bound on
72 * how frequently they can be retriggered.
73 * Assume f(repeat, min) representing the number of possible bit patterns
74 * we can have for repeat size = repeat, minimum period = min
75 * We will have the following recurrence relation:
76 * f(repeat, min) = f(repeat - 1, min) + f(repeat - min, min);
77 * We use this recurrence to encode bit patterns with 64-bit values by
78 * referencing a table that stores values from f(0, min) to f(repeat, min)
79 * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9.
80 * We search the optimal patch size between min and repeat in advance and
81 * use the scheme above to do encoding and decoding to reduce stream state
82 * size. */
83 REPEAT_SPARSE_OPTIMAL_P,
84
85 /** Used for {N,M} repeats where 0 < N < 64. Uses the
86 * \ref RepeatTrailerControl structure at runtime. */
87 REPEAT_TRAILER,
88
89 /** Degenerate repeat that always returns true. Used by castle for pseudo
90 * [^X]* repeats. */
91 REPEAT_ALWAYS,
92};
93
94/**
95 * \brief Value used to represent an unbounded max repeat.
96 *
97 * Note that we do not support \ref RepeatInfo::repeatMax values larger than
98 * this.
99 */
100#define REPEAT_INF 65535
101
102/** Max slots used by ::REPEAT_RANGE repeat model. */
103#define REPEAT_RANGE_MAX_SLOTS 16
104
105/** Structure describing a bounded repeat in the bytecode */
106struct RepeatInfo {
107 u8 type; //!< from enum RepeatType.
108 u32 repeatMin; //!< minimum number of repeats.
109 u32 repeatMax; //!< maximum number of repeats, or REPEAT_INF if unbounded.
110
111 /** Maximum value that is required to be stored in the control block
112 * counters. Any value greater than this will be capped at the horizon.
113 */
114 u32 horizon;
115
116 /** Size of the compressed control block in bytes. This is what is written
117 * out to stream state at stream boundaries. */
118 u32 packedCtrlSize;
119
120 /** Size of the repeat state block in bytes. This is where the REPEAT_RANGE
121 * vector and REPEAT_RING multibit are stored, in stream state, and they
122 * are manipulated directly (i.e. not copied at stream boundaries). */
123 u32 stateSize;
124
125 /** How soon after one trigger we can see the next trigger.
126 * Used by REPEAT_SPARSE_OPTIMAL_P. */
127 u32 minPeriod;
128
129 /** Packed control block field sizes (in bits), used by REPEAT_TRAILER. */
130 u32 packedFieldSizes[2];
131
132 /* Number of patches, used by REPEAT_SPARSE_OPTIMAL_P. */
133 u32 patchCount;
134
135 /* Optimal patch length, used by REPEAT_SPARSE_OPTIMAL_P. */
136 u32 patchSize;
137
138 /* Encoding patch length in bytes, used by REPEAT_SPARSE_OPTIMAL_P. */
139 u32 encodingSize;
140
141 /* RepeatInfo struct length including table size. */
142 u32 length;
143
144 /** Offset of patches relative to the start of repeat stream state,
145 * used by REPEAT_SPARSE_OPTIMAL_P. */
146 u32 patchesOffset;
147};
148
149/** Runtime control block structure for ::REPEAT_RING and
150 * ::REPEAT_SPARSE_OPTIMAL_P bounded repeats. Note that this struct is packed
151 * (may not be aligned). */
152struct RepeatRingControl {
153 u64a offset; //!< index of first top.
154 u16 first; //!< start index in ring.
155 u16 last; //!< end index in ring.
156};
157
158/** Runtime control block structure for ::REPEAT_RANGE bounded repeats. Note
159 * that this struct is packed (may not be aligned). */
160struct RepeatRangeControl {
161 u64a offset; //!< index of first top.
162 u8 num; //!< number of elements in array.
163};
164
165/** Runtime control block structure for cases where only a single offset is
166 * needed to track the repeat, both ::REPEAT_FIRST and ::REPEAT_LAST. Note that
167 * this struct is packed (may not be aligned). */
168struct RepeatOffsetControl {
169 u64a offset; //!< index of a top.
170};
171
172/** Runtime control block structure for ::REPEAT_BITMAP bounded repeats. */
173struct RepeatBitmapControl {
174 u64a offset; //!< index of first top.
175 u64a bitmap; //!< forward bitmap of tops relative to base offset.
176};
177
178/** Runtime control block structure for ::REPEAT_TRAILER bounded repeats. */
179struct RepeatTrailerControl {
180 u64a offset; //!< min extent of most recent match window.
181 u64a bitmap; //!< trailing bitmap of earlier matches, relative to offset.
182};
183
184/** \brief Union of control block types, used at runtime. */
185union RepeatControl {
186 struct RepeatRingControl ring;
187 struct RepeatRangeControl range;
188 struct RepeatOffsetControl offset;
189 struct RepeatBitmapControl bitmap;
190 struct RepeatTrailerControl trailer;
191};
192
193/** For debugging, returns the name of a repeat model. */
194static really_inline UNUSED
195const char *repeatTypeName(u8 type) {
196 switch ((enum RepeatType)type) {
197 case REPEAT_RING:
198 return "RING";
199 case REPEAT_FIRST:
200 return "FIRST";
201 case REPEAT_LAST:
202 return "LAST";
203 case REPEAT_RANGE:
204 return "RANGE";
205 case REPEAT_BITMAP:
206 return "BITMAP";
207 case REPEAT_SPARSE_OPTIMAL_P:
208 return "SPARSE_OPTIMAL_P";
209 case REPEAT_TRAILER:
210 return "TRAILER";
211 case REPEAT_ALWAYS:
212 return "ALWAYS";
213 }
214 assert(0);
215 return "UNKNOWN";
216}
217
218#endif // REPEAT_INTERNAL_H
219