1/*
2 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3 *
4 * This is NOT the original regular expression code as written by Henry
5 * Spencer. This code has been modified specifically for use with Vim, and
6 * should not be used apart from compiling Vim. If you want a good regular
7 * expression library, get the original code.
8 *
9 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
10 */
11
12#ifndef NVIM_REGEXP_DEFS_H
13#define NVIM_REGEXP_DEFS_H
14
15#include <stdbool.h>
16
17#include "nvim/pos.h"
18#include "nvim/types.h"
19#include "nvim/profile.h"
20
21/*
22 * The number of sub-matches is limited to 10.
23 * The first one (index 0) is the whole match, referenced with "\0".
24 * The second one (index 1) is the first sub-match, referenced with "\1".
25 * This goes up to the tenth (index 9), referenced with "\9".
26 */
27#define NSUBEXP 10
28
29/*
30 * In the NFA engine: how many braces are allowed.
31 * TODO(RE): Use dynamic memory allocation instead of static, like here
32 */
33#define NFA_MAX_BRACES 20
34
35// In the NFA engine: how many states are allowed.
36#define NFA_MAX_STATES 100000
37#define NFA_TOO_EXPENSIVE -1
38
39// Which regexp engine to use? Needed for vim_regcomp().
40// Must match with 'regexpengine'.
41#define AUTOMATIC_ENGINE 0
42#define BACKTRACKING_ENGINE 1
43#define NFA_ENGINE 2
44
45typedef struct regengine regengine_T;
46typedef struct regprog regprog_T;
47typedef struct reg_extmatch reg_extmatch_T;
48
49/// Structure to be used for multi-line matching.
50/// Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
51/// and ends in line "endpos[no].lnum" just before column "endpos[no].col".
52/// The line numbers are relative to the first line, thus startpos[0].lnum is
53/// always 0.
54/// When there is no match, the line number is -1.
55typedef struct {
56 regprog_T *regprog;
57 lpos_T startpos[NSUBEXP];
58 lpos_T endpos[NSUBEXP];
59 int rmm_ic;
60 colnr_T rmm_maxcol; /// when not zero: maximum column
61} regmmatch_T;
62
63#include "nvim/buffer_defs.h"
64
65/*
66 * Structure returned by vim_regcomp() to pass on to vim_regexec().
67 * This is the general structure. For the actual matcher, two specific
68 * structures are used. See code below.
69 */
70struct regprog {
71 regengine_T *engine;
72 unsigned regflags;
73 unsigned re_engine; ///< Automatic, backtracking or NFA engine.
74 unsigned re_flags; ///< Second argument for vim_regcomp().
75};
76
77/*
78 * Structure used by the back track matcher.
79 * These fields are only to be used in regexp.c!
80 * See regexp.c for an explanation.
81 */
82typedef struct {
83 // These four members implement regprog_T.
84 regengine_T *engine;
85 unsigned regflags;
86 unsigned re_engine;
87 unsigned re_flags; ///< Second argument for vim_regcomp().
88
89 int regstart;
90 char_u reganch;
91 char_u *regmust;
92 int regmlen;
93 char_u reghasz;
94 char_u program[1]; /* actually longer.. */
95} bt_regprog_T;
96
97// Structure representing a NFA state.
98// An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
99typedef struct nfa_state nfa_state_T;
100struct nfa_state {
101 int c;
102 nfa_state_T *out;
103 nfa_state_T *out1;
104 int id;
105 int lastlist[2]; /* 0: normal, 1: recursive */
106 int val;
107};
108
109/*
110 * Structure used by the NFA matcher.
111 */
112typedef struct {
113 // These four members implement regprog_T.
114 regengine_T *engine;
115 unsigned regflags;
116 unsigned re_engine;
117 unsigned re_flags; ///< Second argument for vim_regcomp().
118
119 nfa_state_T *start; /* points into state[] */
120
121 int reganch; /* pattern starts with ^ */
122 int regstart; /* char at start of pattern */
123 char_u *match_text; /* plain text to match with */
124
125 int has_zend; /* pattern contains \ze */
126 int has_backref; /* pattern contains \1 .. \9 */
127 int reghasz;
128 char_u *pattern;
129 int nsubexp; /* number of () */
130 int nstate;
131 nfa_state_T state[1]; /* actually longer.. */
132} nfa_regprog_T;
133
134/*
135 * Structure to be used for single-line matching.
136 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
137 * When there is no match, the pointer is NULL.
138 */
139typedef struct {
140 regprog_T *regprog;
141 char_u *startp[NSUBEXP];
142 char_u *endp[NSUBEXP];
143 bool rm_ic;
144} regmatch_T;
145
146/*
147 * Structure used to store external references: "\z\(\)" to "\z\1".
148 * Use a reference count to avoid the need to copy this around. When it goes
149 * from 1 to zero the matches need to be freed.
150 */
151struct reg_extmatch {
152 int16_t refcnt;
153 char_u *matches[NSUBEXP];
154};
155
156struct regengine {
157 regprog_T *(*regcomp)(char_u *, int);
158 void (*regfree)(regprog_T *);
159 int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, bool);
160 long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T,
161 proftime_T *, int *);
162 char_u *expr;
163};
164
165#endif // NVIM_REGEXP_DEFS_H
166