1// Copyright 2007-2009 Russ Cox. All Rights Reserved.
2// Copyright 2014 Paul Sokolovsky.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6#ifndef _RE1_5_REGEXP__H
7#define _RE1_5_REGEXP__H
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <stdarg.h>
13#include <assert.h>
14
15#define nil ((void*)0)
16#define nelem(x) (sizeof(x)/sizeof((x)[0]))
17
18typedef struct Regexp Regexp;
19typedef struct Prog Prog;
20typedef struct ByteProg ByteProg;
21typedef struct Inst Inst;
22typedef struct Subject Subject;
23
24struct Regexp
25{
26 int type;
27 int n;
28 int ch;
29 Regexp *left;
30 Regexp *right;
31};
32
33enum /* Regexp.type */
34{
35 Alt = 1,
36 Cat,
37 Lit,
38 Dot,
39 Paren,
40 Quest,
41 Star,
42 Plus,
43};
44
45Regexp *parse(char*);
46Regexp *reg(int type, Regexp *left, Regexp *right);
47void printre(Regexp*);
48#ifndef re1_5_fatal
49void re1_5_fatal(char*);
50#endif
51#ifndef re1_5_stack_chk
52#define re1_5_stack_chk()
53#endif
54void *mal(int);
55
56struct Prog
57{
58 Inst *start;
59 int len;
60};
61
62struct ByteProg
63{
64 int bytelen;
65 int len;
66 int sub;
67 char insts[0];
68};
69
70struct Inst
71{
72 int opcode;
73 int c;
74 int n;
75 Inst *x;
76 Inst *y;
77 int gen; // global state, oooh!
78};
79
80enum /* Inst.opcode */
81{
82 // Instructions which consume input bytes (and thus fail if none left)
83 CONSUMERS = 1,
84 Char = CONSUMERS,
85 Any,
86 Class,
87 ClassNot,
88 NamedClass,
89
90 ASSERTS = 0x50,
91 Bol = ASSERTS,
92 Eol,
93
94 // Instructions which take relative offset as arg
95 JUMPS = 0x60,
96 Jmp = JUMPS,
97 Split,
98 RSplit,
99
100 // Other (special) instructions
101 Save = 0x7e,
102 Match = 0x7f,
103};
104
105#define inst_is_consumer(inst) ((inst) < ASSERTS)
106#define inst_is_jump(inst) ((inst) & 0x70 == JUMPS)
107
108Prog *compile(Regexp*);
109void printprog(Prog*);
110
111extern int gen;
112
113enum {
114 MAXSUB = 20
115};
116
117typedef struct Sub Sub;
118
119struct Sub
120{
121 int ref;
122 int nsub;
123 const char *sub[MAXSUB];
124};
125
126Sub *newsub(int n);
127Sub *incref(Sub*);
128Sub *copy(Sub*);
129Sub *update(Sub*, int, const char*);
130void decref(Sub*);
131
132struct Subject {
133 const char *begin;
134 const char *end;
135};
136
137
138#define NON_ANCHORED_PREFIX 5
139#define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode))
140
141int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int);
142int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int);
143int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int);
144int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int);
145int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int);
146
147int re1_5_sizecode(const char *re);
148int re1_5_compilecode(ByteProg *prog, const char *re);
149void re1_5_dumpcode(ByteProg *prog);
150void cleanmarks(ByteProg *prog);
151int _re1_5_classmatch(const char *pc, const char *sp);
152int _re1_5_namedclassmatch(const char *pc, const char *sp);
153
154#endif /*_RE1_5_REGEXP__H*/
155