1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3//---------------------------------------------------------------------------------
4//
5// Generated Header File. Do not edit by hand.
6// This file contains the state table for the ICU Regular Expression Pattern Parser
7// It is generated by the Perl script "regexcst.pl" from
8// the rule parser state definitions file "regexcst.txt".
9//
10// Copyright (C) 2002-2016 International Business Machines Corporation
11// and others. All rights reserved.
12//
13//---------------------------------------------------------------------------------
14#ifndef RBBIRPT_H
15#define RBBIRPT_H
16
17#include "unicode/utypes.h"
18
19U_NAMESPACE_BEGIN
20//
21// Character classes for regex pattern scanning.
22//
23 static const uint8_t kRuleSet_digit_char = 128;
24 static const uint8_t kRuleSet_ascii_letter = 129;
25 static const uint8_t kRuleSet_rule_char = 130;
26 constexpr uint32_t kRuleSet_count = 131-128;
27
28enum Regex_PatternParseAction {
29 doSetBackslash_D,
30 doBackslashh,
31 doBackslashH,
32 doSetLiteralEscaped,
33 doOpenLookAheadNeg,
34 doCompleteNamedBackRef,
35 doPatStart,
36 doBackslashS,
37 doBackslashD,
38 doNGStar,
39 doNOP,
40 doBackslashX,
41 doSetLiteral,
42 doContinueNamedCapture,
43 doBackslashG,
44 doBackslashR,
45 doSetBegin,
46 doSetBackslash_v,
47 doPossessivePlus,
48 doPerlInline,
49 doBackslashZ,
50 doSetAddAmp,
51 doSetBeginDifference1,
52 doIntervalError,
53 doSetNegate,
54 doIntervalInit,
55 doSetIntersection2,
56 doPossessiveInterval,
57 doRuleError,
58 doBackslashW,
59 doContinueNamedBackRef,
60 doOpenNonCaptureParen,
61 doExit,
62 doSetNamedChar,
63 doSetBackslash_V,
64 doConditionalExpr,
65 doEscapeError,
66 doBadOpenParenType,
67 doPossessiveStar,
68 doSetAddDash,
69 doEscapedLiteralChar,
70 doSetBackslash_w,
71 doIntervalUpperDigit,
72 doBackslashv,
73 doSetBackslash_S,
74 doSetNoCloseError,
75 doSetProp,
76 doBackslashB,
77 doSetEnd,
78 doSetRange,
79 doMatchModeParen,
80 doPlus,
81 doBackslashV,
82 doSetMatchMode,
83 doBackslashz,
84 doSetNamedRange,
85 doOpenLookBehindNeg,
86 doInterval,
87 doBadNamedCapture,
88 doBeginMatchMode,
89 doBackslashd,
90 doPatFinish,
91 doNamedChar,
92 doNGPlus,
93 doSetDifference2,
94 doSetBackslash_H,
95 doCloseParen,
96 doDotAny,
97 doOpenCaptureParen,
98 doEnterQuoteMode,
99 doOpenAtomicParen,
100 doBadModeFlag,
101 doSetBackslash_d,
102 doSetFinish,
103 doProperty,
104 doBeginNamedBackRef,
105 doBackRef,
106 doOpt,
107 doDollar,
108 doBeginNamedCapture,
109 doNGInterval,
110 doSetOpError,
111 doSetPosixProp,
112 doSetBeginIntersection1,
113 doBackslashb,
114 doSetBeginUnion,
115 doIntevalLowerDigit,
116 doSetBackslash_h,
117 doStar,
118 doMatchMode,
119 doBackslashA,
120 doOpenLookBehind,
121 doPossessiveOpt,
122 doOrOperator,
123 doBackslashw,
124 doBackslashs,
125 doLiteralChar,
126 doSuppressComments,
127 doCaret,
128 doIntervalSame,
129 doNGOpt,
130 doOpenLookAhead,
131 doSetBackslash_W,
132 doMismatchedParenErr,
133 doSetBackslash_s,
134 rbbiLastAction};
135
136//-------------------------------------------------------------------------------
137//
138// RegexTableEl represents the structure of a row in the transition table
139// for the pattern parser state machine.
140//-------------------------------------------------------------------------------
141struct RegexTableEl {
142 Regex_PatternParseAction fAction;
143 uint8_t fCharClass; // 0-127: an individual ASCII character
144 // 128-255: character class index
145 uint8_t fNextState; // 0-250: normal next-state numbers
146 // 255: pop next-state from stack.
147 uint8_t fPushState;
148 UBool fNextChar;
149};
150
151static const struct RegexTableEl gRuleParseStateTable[] = {
152 {doNOP, 0, 0, 0, TRUE}
153 , {doPatStart, 255, 2,0, FALSE} // 1 start
154 , {doLiteralChar, 254, 14,0, TRUE} // 2 term
155 , {doLiteralChar, 130, 14,0, TRUE} // 3
156 , {doSetBegin, 91 /* [ */, 123, 205, TRUE} // 4
157 , {doNOP, 40 /* ( */, 27,0, TRUE} // 5
158 , {doDotAny, 46 /* . */, 14,0, TRUE} // 6
159 , {doCaret, 94 /* ^ */, 14,0, TRUE} // 7
160 , {doDollar, 36 /* $ */, 14,0, TRUE} // 8
161 , {doNOP, 92 /* \ */, 89,0, TRUE} // 9
162 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
163 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
164 , {doPatFinish, 253, 2,0, FALSE} // 12
165 , {doRuleError, 255, 206,0, FALSE} // 13
166 , {doNOP, 42 /* * */, 68,0, TRUE} // 14 expr-quant
167 , {doNOP, 43 /* + */, 71,0, TRUE} // 15
168 , {doNOP, 63 /* ? */, 74,0, TRUE} // 16
169 , {doIntervalInit, 123 /* { */, 77,0, TRUE} // 17
170 , {doNOP, 40 /* ( */, 23,0, TRUE} // 18
171 , {doNOP, 255, 20,0, FALSE} // 19
172 , {doOrOperator, 124 /* | */, 2,0, TRUE} // 20 expr-cont
173 , {doCloseParen, 41 /* ) */, 255,0, TRUE} // 21
174 , {doNOP, 255, 2,0, FALSE} // 22
175 , {doSuppressComments, 63 /* ? */, 25,0, TRUE} // 23 open-paren-quant
176 , {doNOP, 255, 27,0, FALSE} // 24
177 , {doNOP, 35 /* # */, 50, 14, TRUE} // 25 open-paren-quant2
178 , {doNOP, 255, 29,0, FALSE} // 26
179 , {doSuppressComments, 63 /* ? */, 29,0, TRUE} // 27 open-paren
180 , {doOpenCaptureParen, 255, 2, 14, FALSE} // 28
181 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, TRUE} // 29 open-paren-extended
182 , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE} // 30
183 , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE} // 31
184 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE} // 32
185 , {doNOP, 60 /* < */, 46,0, TRUE} // 33
186 , {doNOP, 35 /* # */, 50, 2, TRUE} // 34
187 , {doBeginMatchMode, 105 /* i */, 53,0, FALSE} // 35
188 , {doBeginMatchMode, 100 /* d */, 53,0, FALSE} // 36
189 , {doBeginMatchMode, 109 /* m */, 53,0, FALSE} // 37
190 , {doBeginMatchMode, 115 /* s */, 53,0, FALSE} // 38
191 , {doBeginMatchMode, 117 /* u */, 53,0, FALSE} // 39
192 , {doBeginMatchMode, 119 /* w */, 53,0, FALSE} // 40
193 , {doBeginMatchMode, 120 /* x */, 53,0, FALSE} // 41
194 , {doBeginMatchMode, 45 /* - */, 53,0, FALSE} // 42
195 , {doConditionalExpr, 40 /* ( */, 206,0, TRUE} // 43
196 , {doPerlInline, 123 /* { */, 206,0, TRUE} // 44
197 , {doBadOpenParenType, 255, 206,0, FALSE} // 45
198 , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 46 open-paren-lookbehind
199 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 47
200 , {doBeginNamedCapture, 129, 64,0, FALSE} // 48
201 , {doBadOpenParenType, 255, 206,0, FALSE} // 49
202 , {doNOP, 41 /* ) */, 255,0, TRUE} // 50 paren-comment
203 , {doMismatchedParenErr, 253, 206,0, FALSE} // 51
204 , {doNOP, 255, 50,0, TRUE} // 52
205 , {doMatchMode, 105 /* i */, 53,0, TRUE} // 53 paren-flag
206 , {doMatchMode, 100 /* d */, 53,0, TRUE} // 54
207 , {doMatchMode, 109 /* m */, 53,0, TRUE} // 55
208 , {doMatchMode, 115 /* s */, 53,0, TRUE} // 56
209 , {doMatchMode, 117 /* u */, 53,0, TRUE} // 57
210 , {doMatchMode, 119 /* w */, 53,0, TRUE} // 58
211 , {doMatchMode, 120 /* x */, 53,0, TRUE} // 59
212 , {doMatchMode, 45 /* - */, 53,0, TRUE} // 60
213 , {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 61
214 , {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 62
215 , {doBadModeFlag, 255, 206,0, FALSE} // 63
216 , {doContinueNamedCapture, 129, 64,0, TRUE} // 64 named-capture
217 , {doContinueNamedCapture, 128, 64,0, TRUE} // 65
218 , {doOpenCaptureParen, 62 /* > */, 2, 14, TRUE} // 66
219 , {doBadNamedCapture, 255, 206,0, FALSE} // 67
220 , {doNGStar, 63 /* ? */, 20,0, TRUE} // 68 quant-star
221 , {doPossessiveStar, 43 /* + */, 20,0, TRUE} // 69
222 , {doStar, 255, 20,0, FALSE} // 70
223 , {doNGPlus, 63 /* ? */, 20,0, TRUE} // 71 quant-plus
224 , {doPossessivePlus, 43 /* + */, 20,0, TRUE} // 72
225 , {doPlus, 255, 20,0, FALSE} // 73
226 , {doNGOpt, 63 /* ? */, 20,0, TRUE} // 74 quant-opt
227 , {doPossessiveOpt, 43 /* + */, 20,0, TRUE} // 75
228 , {doOpt, 255, 20,0, FALSE} // 76
229 , {doNOP, 128, 79,0, FALSE} // 77 interval-open
230 , {doIntervalError, 255, 206,0, FALSE} // 78
231 , {doIntevalLowerDigit, 128, 79,0, TRUE} // 79 interval-lower
232 , {doNOP, 44 /* , */, 83,0, TRUE} // 80
233 , {doIntervalSame, 125 /* } */, 86,0, TRUE} // 81
234 , {doIntervalError, 255, 206,0, FALSE} // 82
235 , {doIntervalUpperDigit, 128, 83,0, TRUE} // 83 interval-upper
236 , {doNOP, 125 /* } */, 86,0, TRUE} // 84
237 , {doIntervalError, 255, 206,0, FALSE} // 85
238 , {doNGInterval, 63 /* ? */, 20,0, TRUE} // 86 interval-type
239 , {doPossessiveInterval, 43 /* + */, 20,0, TRUE} // 87
240 , {doInterval, 255, 20,0, FALSE} // 88
241 , {doBackslashA, 65 /* A */, 2,0, TRUE} // 89 backslash
242 , {doBackslashB, 66 /* B */, 2,0, TRUE} // 90
243 , {doBackslashb, 98 /* b */, 2,0, TRUE} // 91
244 , {doBackslashd, 100 /* d */, 14,0, TRUE} // 92
245 , {doBackslashD, 68 /* D */, 14,0, TRUE} // 93
246 , {doBackslashG, 71 /* G */, 2,0, TRUE} // 94
247 , {doBackslashh, 104 /* h */, 14,0, TRUE} // 95
248 , {doBackslashH, 72 /* H */, 14,0, TRUE} // 96
249 , {doNOP, 107 /* k */, 115,0, TRUE} // 97
250 , {doNamedChar, 78 /* N */, 14,0, FALSE} // 98
251 , {doProperty, 112 /* p */, 14,0, FALSE} // 99
252 , {doProperty, 80 /* P */, 14,0, FALSE} // 100
253 , {doBackslashR, 82 /* R */, 14,0, TRUE} // 101
254 , {doEnterQuoteMode, 81 /* Q */, 2,0, TRUE} // 102
255 , {doBackslashS, 83 /* S */, 14,0, TRUE} // 103
256 , {doBackslashs, 115 /* s */, 14,0, TRUE} // 104
257 , {doBackslashv, 118 /* v */, 14,0, TRUE} // 105
258 , {doBackslashV, 86 /* V */, 14,0, TRUE} // 106
259 , {doBackslashW, 87 /* W */, 14,0, TRUE} // 107
260 , {doBackslashw, 119 /* w */, 14,0, TRUE} // 108
261 , {doBackslashX, 88 /* X */, 14,0, TRUE} // 109
262 , {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 110
263 , {doBackslashz, 122 /* z */, 2,0, TRUE} // 111
264 , {doBackRef, 128, 14,0, TRUE} // 112
265 , {doEscapeError, 253, 206,0, FALSE} // 113
266 , {doEscapedLiteralChar, 255, 14,0, TRUE} // 114
267 , {doBeginNamedBackRef, 60 /* < */, 117,0, TRUE} // 115 named-backref
268 , {doBadNamedCapture, 255, 206,0, FALSE} // 116
269 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 117 named-backref-2
270 , {doBadNamedCapture, 255, 206,0, FALSE} // 118
271 , {doContinueNamedBackRef, 129, 119,0, TRUE} // 119 named-backref-3
272 , {doContinueNamedBackRef, 128, 119,0, TRUE} // 120
273 , {doCompleteNamedBackRef, 62 /* > */, 14,0, TRUE} // 121
274 , {doBadNamedCapture, 255, 206,0, FALSE} // 122
275 , {doSetNegate, 94 /* ^ */, 126,0, TRUE} // 123 set-open
276 , {doSetPosixProp, 58 /* : */, 128,0, FALSE} // 124
277 , {doNOP, 255, 126,0, FALSE} // 125
278 , {doSetLiteral, 93 /* ] */, 141,0, TRUE} // 126 set-open2
279 , {doNOP, 255, 131,0, FALSE} // 127
280 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 128 set-posix
281 , {doNOP, 58 /* : */, 131,0, FALSE} // 129
282 , {doRuleError, 255, 206,0, FALSE} // 130
283 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 131 set-start
284 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 132
285 , {doNOP, 92 /* \ */, 191,0, TRUE} // 133
286 , {doNOP, 45 /* - */, 137,0, TRUE} // 134
287 , {doNOP, 38 /* & */, 139,0, TRUE} // 135
288 , {doSetLiteral, 255, 141,0, TRUE} // 136
289 , {doRuleError, 45 /* - */, 206,0, FALSE} // 137 set-start-dash
290 , {doSetAddDash, 255, 141,0, FALSE} // 138
291 , {doRuleError, 38 /* & */, 206,0, FALSE} // 139 set-start-amp
292 , {doSetAddAmp, 255, 141,0, FALSE} // 140
293 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 141 set-after-lit
294 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 142
295 , {doNOP, 45 /* - */, 178,0, TRUE} // 143
296 , {doNOP, 38 /* & */, 169,0, TRUE} // 144
297 , {doNOP, 92 /* \ */, 191,0, TRUE} // 145
298 , {doSetNoCloseError, 253, 206,0, FALSE} // 146
299 , {doSetLiteral, 255, 141,0, TRUE} // 147
300 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 148 set-after-set
301 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 149
302 , {doNOP, 45 /* - */, 171,0, TRUE} // 150
303 , {doNOP, 38 /* & */, 166,0, TRUE} // 151
304 , {doNOP, 92 /* \ */, 191,0, TRUE} // 152
305 , {doSetNoCloseError, 253, 206,0, FALSE} // 153
306 , {doSetLiteral, 255, 141,0, TRUE} // 154
307 , {doSetEnd, 93 /* ] */, 255,0, TRUE} // 155 set-after-range
308 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 156
309 , {doNOP, 45 /* - */, 174,0, TRUE} // 157
310 , {doNOP, 38 /* & */, 176,0, TRUE} // 158
311 , {doNOP, 92 /* \ */, 191,0, TRUE} // 159
312 , {doSetNoCloseError, 253, 206,0, FALSE} // 160
313 , {doSetLiteral, 255, 141,0, TRUE} // 161
314 , {doSetBeginUnion, 91 /* [ */, 123, 148, TRUE} // 162 set-after-op
315 , {doSetOpError, 93 /* ] */, 206,0, FALSE} // 163
316 , {doNOP, 92 /* \ */, 191,0, TRUE} // 164
317 , {doSetLiteral, 255, 141,0, TRUE} // 165
318 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, TRUE} // 166 set-set-amp
319 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 167
320 , {doSetAddAmp, 255, 141,0, FALSE} // 168
321 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 169 set-lit-amp
322 , {doSetAddAmp, 255, 141,0, FALSE} // 170
323 , {doSetBeginDifference1, 91 /* [ */, 123, 148, TRUE} // 171 set-set-dash
324 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 172
325 , {doSetAddDash, 255, 141,0, FALSE} // 173
326 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 174 set-range-dash
327 , {doSetAddDash, 255, 141,0, FALSE} // 175
328 , {doSetIntersection2, 38 /* & */, 162,0, TRUE} // 176 set-range-amp
329 , {doSetAddAmp, 255, 141,0, FALSE} // 177
330 , {doSetDifference2, 45 /* - */, 162,0, TRUE} // 178 set-lit-dash
331 , {doSetAddDash, 91 /* [ */, 141,0, FALSE} // 179
332 , {doSetAddDash, 93 /* ] */, 141,0, FALSE} // 180
333 , {doNOP, 92 /* \ */, 183,0, TRUE} // 181
334 , {doSetRange, 255, 155,0, TRUE} // 182
335 , {doSetOpError, 115 /* s */, 206,0, FALSE} // 183 set-lit-dash-escape
336 , {doSetOpError, 83 /* S */, 206,0, FALSE} // 184
337 , {doSetOpError, 119 /* w */, 206,0, FALSE} // 185
338 , {doSetOpError, 87 /* W */, 206,0, FALSE} // 186
339 , {doSetOpError, 100 /* d */, 206,0, FALSE} // 187
340 , {doSetOpError, 68 /* D */, 206,0, FALSE} // 188
341 , {doSetNamedRange, 78 /* N */, 155,0, FALSE} // 189
342 , {doSetRange, 255, 155,0, TRUE} // 190
343 , {doSetProp, 112 /* p */, 148,0, FALSE} // 191 set-escape
344 , {doSetProp, 80 /* P */, 148,0, FALSE} // 192
345 , {doSetNamedChar, 78 /* N */, 141,0, FALSE} // 193
346 , {doSetBackslash_s, 115 /* s */, 155,0, TRUE} // 194
347 , {doSetBackslash_S, 83 /* S */, 155,0, TRUE} // 195
348 , {doSetBackslash_w, 119 /* w */, 155,0, TRUE} // 196
349 , {doSetBackslash_W, 87 /* W */, 155,0, TRUE} // 197
350 , {doSetBackslash_d, 100 /* d */, 155,0, TRUE} // 198
351 , {doSetBackslash_D, 68 /* D */, 155,0, TRUE} // 199
352 , {doSetBackslash_h, 104 /* h */, 155,0, TRUE} // 200
353 , {doSetBackslash_H, 72 /* H */, 155,0, TRUE} // 201
354 , {doSetBackslash_v, 118 /* v */, 155,0, TRUE} // 202
355 , {doSetBackslash_V, 86 /* V */, 155,0, TRUE} // 203
356 , {doSetLiteralEscaped, 255, 141,0, TRUE} // 204
357 , {doSetFinish, 255, 14,0, FALSE} // 205 set-finish
358 , {doExit, 255, 206,0, TRUE} // 206 errorDeath
359 };
360static const char * const RegexStateNames[] = { 0,
361 "start",
362 "term",
363 0,
364 0,
365 0,
366 0,
367 0,
368 0,
369 0,
370 0,
371 0,
372 0,
373 0,
374 "expr-quant",
375 0,
376 0,
377 0,
378 0,
379 0,
380 "expr-cont",
381 0,
382 0,
383 "open-paren-quant",
384 0,
385 "open-paren-quant2",
386 0,
387 "open-paren",
388 0,
389 "open-paren-extended",
390 0,
391 0,
392 0,
393 0,
394 0,
395 0,
396 0,
397 0,
398 0,
399 0,
400 0,
401 0,
402 0,
403 0,
404 0,
405 0,
406 "open-paren-lookbehind",
407 0,
408 0,
409 0,
410 "paren-comment",
411 0,
412 0,
413 "paren-flag",
414 0,
415 0,
416 0,
417 0,
418 0,
419 0,
420 0,
421 0,
422 0,
423 0,
424 "named-capture",
425 0,
426 0,
427 0,
428 "quant-star",
429 0,
430 0,
431 "quant-plus",
432 0,
433 0,
434 "quant-opt",
435 0,
436 0,
437 "interval-open",
438 0,
439 "interval-lower",
440 0,
441 0,
442 0,
443 "interval-upper",
444 0,
445 0,
446 "interval-type",
447 0,
448 0,
449 "backslash",
450 0,
451 0,
452 0,
453 0,
454 0,
455 0,
456 0,
457 0,
458 0,
459 0,
460 0,
461 0,
462 0,
463 0,
464 0,
465 0,
466 0,
467 0,
468 0,
469 0,
470 0,
471 0,
472 0,
473 0,
474 0,
475 "named-backref",
476 0,
477 "named-backref-2",
478 0,
479 "named-backref-3",
480 0,
481 0,
482 0,
483 "set-open",
484 0,
485 0,
486 "set-open2",
487 0,
488 "set-posix",
489 0,
490 0,
491 "set-start",
492 0,
493 0,
494 0,
495 0,
496 0,
497 "set-start-dash",
498 0,
499 "set-start-amp",
500 0,
501 "set-after-lit",
502 0,
503 0,
504 0,
505 0,
506 0,
507 0,
508 "set-after-set",
509 0,
510 0,
511 0,
512 0,
513 0,
514 0,
515 "set-after-range",
516 0,
517 0,
518 0,
519 0,
520 0,
521 0,
522 "set-after-op",
523 0,
524 0,
525 0,
526 "set-set-amp",
527 0,
528 0,
529 "set-lit-amp",
530 0,
531 "set-set-dash",
532 0,
533 0,
534 "set-range-dash",
535 0,
536 "set-range-amp",
537 0,
538 "set-lit-dash",
539 0,
540 0,
541 0,
542 0,
543 "set-lit-dash-escape",
544 0,
545 0,
546 0,
547 0,
548 0,
549 0,
550 0,
551 "set-escape",
552 0,
553 0,
554 0,
555 0,
556 0,
557 0,
558 0,
559 0,
560 0,
561 0,
562 0,
563 0,
564 0,
565 "set-finish",
566 "errorDeath",
567 0};
568
569U_NAMESPACE_END
570#endif
571