1 | /* |
2 | * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE |
3 | * |
4 | * This is NOT the original regular expression code as written by Henry |
5 | * Spencer. This code has been modified specifically for use with Vim, and |
6 | * should not be used apart from compiling Vim. If you want a good regular |
7 | * expression library, get the original code. |
8 | * |
9 | * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE |
10 | */ |
11 | |
12 | #ifndef NVIM_REGEXP_DEFS_H |
13 | #define NVIM_REGEXP_DEFS_H |
14 | |
15 | #include <stdbool.h> |
16 | |
17 | #include "nvim/pos.h" |
18 | #include "nvim/types.h" |
19 | #include "nvim/profile.h" |
20 | |
21 | /* |
22 | * The number of sub-matches is limited to 10. |
23 | * The first one (index 0) is the whole match, referenced with "\0". |
24 | * The second one (index 1) is the first sub-match, referenced with "\1". |
25 | * This goes up to the tenth (index 9), referenced with "\9". |
26 | */ |
27 | #define NSUBEXP 10 |
28 | |
29 | /* |
30 | * In the NFA engine: how many braces are allowed. |
31 | * TODO(RE): Use dynamic memory allocation instead of static, like here |
32 | */ |
33 | #define NFA_MAX_BRACES 20 |
34 | |
35 | // In the NFA engine: how many states are allowed. |
36 | #define NFA_MAX_STATES 100000 |
37 | #define NFA_TOO_EXPENSIVE -1 |
38 | |
39 | // Which regexp engine to use? Needed for vim_regcomp(). |
40 | // Must match with 'regexpengine'. |
41 | #define AUTOMATIC_ENGINE 0 |
42 | #define BACKTRACKING_ENGINE 1 |
43 | #define NFA_ENGINE 2 |
44 | |
45 | typedef struct regengine regengine_T; |
46 | typedef struct regprog regprog_T; |
47 | typedef struct reg_extmatch reg_extmatch_T; |
48 | |
49 | /// Structure to be used for multi-line matching. |
50 | /// Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" |
51 | /// and ends in line "endpos[no].lnum" just before column "endpos[no].col". |
52 | /// The line numbers are relative to the first line, thus startpos[0].lnum is |
53 | /// always 0. |
54 | /// When there is no match, the line number is -1. |
55 | typedef struct { |
56 | regprog_T *regprog; |
57 | lpos_T startpos[NSUBEXP]; |
58 | lpos_T endpos[NSUBEXP]; |
59 | int rmm_ic; |
60 | colnr_T rmm_maxcol; /// when not zero: maximum column |
61 | } regmmatch_T; |
62 | |
63 | #include "nvim/buffer_defs.h" |
64 | |
65 | /* |
66 | * Structure returned by vim_regcomp() to pass on to vim_regexec(). |
67 | * This is the general structure. For the actual matcher, two specific |
68 | * structures are used. See code below. |
69 | */ |
70 | struct regprog { |
71 | regengine_T *engine; |
72 | unsigned regflags; |
73 | unsigned re_engine; ///< Automatic, backtracking or NFA engine. |
74 | unsigned re_flags; ///< Second argument for vim_regcomp(). |
75 | }; |
76 | |
77 | /* |
78 | * Structure used by the back track matcher. |
79 | * These fields are only to be used in regexp.c! |
80 | * See regexp.c for an explanation. |
81 | */ |
82 | typedef struct { |
83 | // These four members implement regprog_T. |
84 | regengine_T *engine; |
85 | unsigned regflags; |
86 | unsigned re_engine; |
87 | unsigned re_flags; ///< Second argument for vim_regcomp(). |
88 | |
89 | int regstart; |
90 | char_u reganch; |
91 | char_u *regmust; |
92 | int regmlen; |
93 | char_u reghasz; |
94 | char_u program[1]; /* actually longer.. */ |
95 | } bt_regprog_T; |
96 | |
97 | // Structure representing a NFA state. |
98 | // An NFA state may have no outgoing edge, when it is a NFA_MATCH state. |
99 | typedef struct nfa_state nfa_state_T; |
100 | struct nfa_state { |
101 | int c; |
102 | nfa_state_T *out; |
103 | nfa_state_T *out1; |
104 | int id; |
105 | int lastlist[2]; /* 0: normal, 1: recursive */ |
106 | int val; |
107 | }; |
108 | |
109 | /* |
110 | * Structure used by the NFA matcher. |
111 | */ |
112 | typedef struct { |
113 | // These four members implement regprog_T. |
114 | regengine_T *engine; |
115 | unsigned regflags; |
116 | unsigned re_engine; |
117 | unsigned re_flags; ///< Second argument for vim_regcomp(). |
118 | |
119 | nfa_state_T *start; /* points into state[] */ |
120 | |
121 | int reganch; /* pattern starts with ^ */ |
122 | int regstart; /* char at start of pattern */ |
123 | char_u *match_text; /* plain text to match with */ |
124 | |
125 | int has_zend; /* pattern contains \ze */ |
126 | int has_backref; /* pattern contains \1 .. \9 */ |
127 | int reghasz; |
128 | char_u *pattern; |
129 | int nsubexp; /* number of () */ |
130 | int nstate; |
131 | nfa_state_T state[1]; /* actually longer.. */ |
132 | } nfa_regprog_T; |
133 | |
134 | /* |
135 | * Structure to be used for single-line matching. |
136 | * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". |
137 | * When there is no match, the pointer is NULL. |
138 | */ |
139 | typedef struct { |
140 | regprog_T *regprog; |
141 | char_u *startp[NSUBEXP]; |
142 | char_u *endp[NSUBEXP]; |
143 | bool rm_ic; |
144 | } regmatch_T; |
145 | |
146 | /* |
147 | * Structure used to store external references: "\z\(\)" to "\z\1". |
148 | * Use a reference count to avoid the need to copy this around. When it goes |
149 | * from 1 to zero the matches need to be freed. |
150 | */ |
151 | struct reg_extmatch { |
152 | int16_t refcnt; |
153 | char_u *matches[NSUBEXP]; |
154 | }; |
155 | |
156 | struct regengine { |
157 | regprog_T *(*regcomp)(char_u *, int); |
158 | void (*regfree)(regprog_T *); |
159 | int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, bool); |
160 | long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, |
161 | proftime_T *, int *); |
162 | char_u *expr; |
163 | }; |
164 | |
165 | #endif // NVIM_REGEXP_DEFS_H |
166 | |