1 | // Copyright 2006 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Test character class manipulations. |
6 | |
7 | #include <stdio.h> |
8 | |
9 | #include "util/test.h" |
10 | #include "util/utf.h" |
11 | #include "re2/regexp.h" |
12 | |
13 | namespace re2 { |
14 | |
15 | struct CCTest { |
16 | struct { |
17 | Rune lo; |
18 | Rune hi; |
19 | } add[10]; |
20 | int remove; |
21 | struct { |
22 | Rune lo; |
23 | Rune hi; |
24 | } final[10]; |
25 | }; |
26 | |
27 | static CCTest tests[] = { |
28 | { { { 10, 20 }, {-1} }, -1, |
29 | { { 10, 20 }, {-1} } }, |
30 | |
31 | { { { 10, 20 }, { 20, 30 }, {-1} }, -1, |
32 | { { 10, 30 }, {-1} } }, |
33 | |
34 | { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1, |
35 | { { 10, 40 }, {-1} } }, |
36 | |
37 | { { { 0, 50 }, { 20, 30 }, {-1} }, -1, |
38 | { { 0, 50 }, {-1} } }, |
39 | |
40 | { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1, |
41 | { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, |
42 | |
43 | { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1, |
44 | { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, |
45 | |
46 | { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1, |
47 | { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, |
48 | |
49 | { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1, |
50 | { { 5, 25 }, {-1} } }, |
51 | |
52 | { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1, |
53 | { { 10, 23 }, {-1} } }, |
54 | |
55 | // These check boundary cases during negation. |
56 | { { { 0, Runemax }, {-1} }, -1, |
57 | { { 0, Runemax }, {-1} } }, |
58 | |
59 | { { { 0, 50 }, {-1} }, -1, |
60 | { { 0, 50 }, {-1} } }, |
61 | |
62 | { { { 50, Runemax }, {-1} }, -1, |
63 | { { 50, Runemax }, {-1} } }, |
64 | |
65 | // Check RemoveAbove. |
66 | { { { 50, Runemax }, {-1} }, 255, |
67 | { { 50, 255 }, {-1} } }, |
68 | |
69 | { { { 50, Runemax }, {-1} }, 65535, |
70 | { { 50, 65535 }, {-1} } }, |
71 | |
72 | { { { 50, Runemax }, {-1} }, Runemax, |
73 | { { 50, Runemax }, {-1} } }, |
74 | |
75 | { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255, |
76 | { { 50, 60 }, { 250, 255 }, {-1} } }, |
77 | |
78 | { { { 50, 60 }, {-1} }, 255, |
79 | { { 50, 60 }, {-1} } }, |
80 | |
81 | { { { 350, 360 }, {-1} }, 255, |
82 | { {-1} } }, |
83 | |
84 | { { {-1} }, 255, |
85 | { {-1} } }, |
86 | }; |
87 | |
88 | template<class CharClass> |
89 | static void Broke(const char *desc, const CCTest* t, CharClass* cc) { |
90 | if (t == NULL) { |
91 | printf("\t%s:" , desc); |
92 | } else { |
93 | printf("\n" ); |
94 | printf("CharClass added: [%s]" , desc); |
95 | for (int k = 0; t->add[k].lo >= 0; k++) |
96 | printf(" %d-%d" , t->add[k].lo, t->add[k].hi); |
97 | printf("\n" ); |
98 | if (t->remove >= 0) |
99 | printf("Removed > %d\n" , t->remove); |
100 | printf("\twant:" ); |
101 | for (int k = 0; t->final[k].lo >= 0; k++) |
102 | printf(" %d-%d" , t->final[k].lo, t->final[k].hi); |
103 | printf("\n" ); |
104 | printf("\thave:" ); |
105 | } |
106 | |
107 | for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it) |
108 | printf(" %d-%d" , it->lo, it->hi); |
109 | printf("\n" ); |
110 | } |
111 | |
112 | bool ShouldContain(CCTest *t, int x) { |
113 | for (int j = 0; t->final[j].lo >= 0; j++) |
114 | if (t->final[j].lo <= x && x <= t->final[j].hi) |
115 | return true; |
116 | return false; |
117 | } |
118 | |
119 | // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder. |
120 | |
121 | CharClass* Negate(CharClass *cc) { |
122 | return cc->Negate(); |
123 | } |
124 | |
125 | void Delete(CharClass* cc) { |
126 | cc->Delete(); |
127 | } |
128 | |
129 | CharClassBuilder* Negate(CharClassBuilder* cc) { |
130 | CharClassBuilder* ncc = cc->Copy(); |
131 | ncc->Negate(); |
132 | return ncc; |
133 | } |
134 | |
135 | void Delete(CharClassBuilder* cc) { |
136 | delete cc; |
137 | } |
138 | |
139 | template<class CharClass> |
140 | bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) { |
141 | typename CharClass::iterator it = cc->begin(); |
142 | int size = 0; |
143 | for (int j = 0; t->final[j].lo >= 0; j++, ++it) { |
144 | if (it == cc->end() || |
145 | it->lo != t->final[j].lo || |
146 | it->hi != t->final[j].hi) { |
147 | Broke(desc, t, cc); |
148 | return false; |
149 | } |
150 | size += it->hi - it->lo + 1; |
151 | } |
152 | if (it != cc->end()) { |
153 | Broke(desc, t, cc); |
154 | return false; |
155 | } |
156 | if (cc->size() != size) { |
157 | Broke(desc, t, cc); |
158 | printf("wrong size: want %d have %d\n" , size, cc->size()); |
159 | return false; |
160 | } |
161 | |
162 | for (int j = 0; j < 101; j++) { |
163 | if (j == 100) |
164 | j = Runemax; |
165 | if (ShouldContain(t, j) != cc->Contains(j)) { |
166 | Broke(desc, t, cc); |
167 | printf("want contains(%d)=%d, got %d\n" , |
168 | j, ShouldContain(t, j), cc->Contains(j)); |
169 | return false; |
170 | } |
171 | } |
172 | |
173 | CharClass* ncc = Negate(cc); |
174 | for (int j = 0; j < 101; j++) { |
175 | if (j == 100) |
176 | j = Runemax; |
177 | if (ShouldContain(t, j) == ncc->Contains(j)) { |
178 | Broke(desc, t, cc); |
179 | Broke("ncc" , NULL, ncc); |
180 | printf("want ncc contains(%d)!=%d, got %d\n" , |
181 | j, ShouldContain(t, j), ncc->Contains(j)); |
182 | Delete(ncc); |
183 | return false; |
184 | } |
185 | if (ncc->size() != Runemax+1 - cc->size()) { |
186 | Broke(desc, t, cc); |
187 | Broke("ncc" , NULL, ncc); |
188 | printf("ncc size should be %d is %d\n" , |
189 | Runemax+1 - cc->size(), ncc->size()); |
190 | Delete(ncc); |
191 | return false; |
192 | } |
193 | } |
194 | Delete(ncc); |
195 | return true; |
196 | } |
197 | |
198 | TEST(TestCharClassBuilder, Adds) { |
199 | int nfail = 0; |
200 | for (size_t i = 0; i < arraysize(tests); i++) { |
201 | CharClassBuilder ccb; |
202 | CCTest* t = &tests[i]; |
203 | for (int j = 0; t->add[j].lo >= 0; j++) |
204 | ccb.AddRange(t->add[j].lo, t->add[j].hi); |
205 | if (t->remove >= 0) |
206 | ccb.RemoveAbove(t->remove); |
207 | if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)" )) |
208 | nfail++; |
209 | CharClass* cc = ccb.GetCharClass(); |
210 | if (!CorrectCC(cc, t, "before copy (CharClass)" )) |
211 | nfail++; |
212 | cc->Delete(); |
213 | |
214 | CharClassBuilder *ccb1 = ccb.Copy(); |
215 | if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)" )) |
216 | nfail++; |
217 | cc = ccb.GetCharClass(); |
218 | if (!CorrectCC(cc, t, "after copy (CharClass)" )) |
219 | nfail++; |
220 | cc->Delete(); |
221 | delete ccb1; |
222 | } |
223 | EXPECT_EQ(nfail, 0); |
224 | } |
225 | |
226 | } // namespace re2 |
227 | |