| 1 | /* | 
|---|
| 2 | ** $Id: lpcode.c,v 1.24 2016/09/15 17:46:13 roberto Exp $ | 
|---|
| 3 | ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license) | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <limits.h> | 
|---|
| 7 |  | 
|---|
| 8 |  | 
|---|
| 9 | #include "lua.h" | 
|---|
| 10 | #include "lauxlib.h" | 
|---|
| 11 |  | 
|---|
| 12 | #include "lptypes.h" | 
|---|
| 13 | #include "lpcode.h" | 
|---|
| 14 |  | 
|---|
| 15 |  | 
|---|
| 16 | /* signals a "no-instruction */ | 
|---|
| 17 | #define NOINST		-1 | 
|---|
| 18 |  | 
|---|
| 19 |  | 
|---|
| 20 |  | 
|---|
| 21 | static const Charset fullset_ = | 
|---|
| 22 | {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | 
|---|
| 23 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | 
|---|
| 24 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | 
|---|
| 25 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}}; | 
|---|
| 26 |  | 
|---|
| 27 | static const Charset *fullset = &fullset_; | 
|---|
| 28 |  | 
|---|
| 29 | /* | 
|---|
| 30 | ** {====================================================== | 
|---|
| 31 | ** Analysis and some optimizations | 
|---|
| 32 | ** ======================================================= | 
|---|
| 33 | */ | 
|---|
| 34 |  | 
|---|
| 35 | /* | 
|---|
| 36 | ** Check whether a charset is empty (returns IFail), singleton (IChar), | 
|---|
| 37 | ** full (IAny), or none of those (ISet). When singleton, '*c' returns | 
|---|
| 38 | ** which character it is. (When generic set, the set was the input, | 
|---|
| 39 | ** so there is no need to return it.) | 
|---|
| 40 | */ | 
|---|
| 41 | static Opcode charsettype (const byte *cs, int *c) { | 
|---|
| 42 | int count = 0;  /* number of characters in the set */ | 
|---|
| 43 | int i; | 
|---|
| 44 | int candidate = -1;  /* candidate position for the singleton char */ | 
|---|
| 45 | for (i = 0; i < CHARSETSIZE; i++) {  /* for each byte */ | 
|---|
| 46 | int b = cs[i]; | 
|---|
| 47 | if (b == 0) {  /* is byte empty? */ | 
|---|
| 48 | if (count > 1)  /* was set neither empty nor singleton? */ | 
|---|
| 49 | return ISet;  /* neither full nor empty nor singleton */ | 
|---|
| 50 | /* else set is still empty or singleton */ | 
|---|
| 51 | } | 
|---|
| 52 | else if (b == 0xFF) {  /* is byte full? */ | 
|---|
| 53 | if (count < (i * BITSPERCHAR))  /* was set not full? */ | 
|---|
| 54 | return ISet;  /* neither full nor empty nor singleton */ | 
|---|
| 55 | else count += BITSPERCHAR;  /* set is still full */ | 
|---|
| 56 | } | 
|---|
| 57 | else if ((b & (b - 1)) == 0) {  /* has byte only one bit? */ | 
|---|
| 58 | if (count > 0)  /* was set not empty? */ | 
|---|
| 59 | return ISet;  /* neither full nor empty nor singleton */ | 
|---|
| 60 | else {  /* set has only one char till now; track it */ | 
|---|
| 61 | count++; | 
|---|
| 62 | candidate = i; | 
|---|
| 63 | } | 
|---|
| 64 | } | 
|---|
| 65 | else return ISet;  /* byte is neither empty, full, nor singleton */ | 
|---|
| 66 | } | 
|---|
| 67 | switch (count) { | 
|---|
| 68 | case 0: return IFail;  /* empty set */ | 
|---|
| 69 | case 1: {  /* singleton; find character bit inside byte */ | 
|---|
| 70 | int b = cs[candidate]; | 
|---|
| 71 | *c = candidate * BITSPERCHAR; | 
|---|
| 72 | if ((b & 0xF0) != 0) { *c += 4; b >>= 4; } | 
|---|
| 73 | if ((b & 0x0C) != 0) { *c += 2; b >>= 2; } | 
|---|
| 74 | if ((b & 0x02) != 0) { *c += 1; } | 
|---|
| 75 | return IChar; | 
|---|
| 76 | } | 
|---|
| 77 | default: { | 
|---|
| 78 | assert(count == CHARSETSIZE * BITSPERCHAR);  /* full set */ | 
|---|
| 79 | return IAny; | 
|---|
| 80 | } | 
|---|
| 81 | } | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 |  | 
|---|
| 85 | /* | 
|---|
| 86 | ** A few basic operations on Charsets | 
|---|
| 87 | */ | 
|---|
| 88 | static void cs_complement (Charset *cs) { | 
|---|
| 89 | loopset(i, cs->cs[i] = ~cs->cs[i]); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | static int cs_equal (const byte *cs1, const byte *cs2) { | 
|---|
| 93 | loopset(i, if (cs1[i] != cs2[i]) return 0); | 
|---|
| 94 | return 1; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | static int cs_disjoint (const Charset *cs1, const Charset *cs2) { | 
|---|
| 98 | loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;) | 
|---|
| 99 | return 1; | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 |  | 
|---|
| 103 | /* | 
|---|
| 104 | ** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a | 
|---|
| 105 | ** charset and return 1; else return 0. | 
|---|
| 106 | */ | 
|---|
| 107 | int tocharset (TTree *tree, Charset *cs) { | 
|---|
| 108 | switch (tree->tag) { | 
|---|
| 109 | case TSet: {  /* copy set */ | 
|---|
| 110 | loopset(i, cs->cs[i] = treebuffer(tree)[i]); | 
|---|
| 111 | return 1; | 
|---|
| 112 | } | 
|---|
| 113 | case TChar: {  /* only one char */ | 
|---|
| 114 | assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX); | 
|---|
| 115 | loopset(i, cs->cs[i] = 0);  /* erase all chars */ | 
|---|
| 116 | setchar(cs->cs, tree->u.n);  /* add that one */ | 
|---|
| 117 | return 1; | 
|---|
| 118 | } | 
|---|
| 119 | case TAny: { | 
|---|
| 120 | loopset(i, cs->cs[i] = 0xFF);  /* add all characters to the set */ | 
|---|
| 121 | return 1; | 
|---|
| 122 | } | 
|---|
| 123 | default: return 0; | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 |  | 
|---|
| 128 | /* | 
|---|
| 129 | ** Visit a TCall node taking care to stop recursion. If node not yet | 
|---|
| 130 | ** visited, return 'f(sib2(tree))', otherwise return 'def' (default | 
|---|
| 131 | ** value) | 
|---|
| 132 | */ | 
|---|
| 133 | static int callrecursive (TTree *tree, int f (TTree *t), int def) { | 
|---|
| 134 | int key = tree->key; | 
|---|
| 135 | assert(tree->tag == TCall); | 
|---|
| 136 | assert(sib2(tree)->tag == TRule); | 
|---|
| 137 | if (key == 0)  /* node already visited? */ | 
|---|
| 138 | return def;  /* return default value */ | 
|---|
| 139 | else {  /* first visit */ | 
|---|
| 140 | int result; | 
|---|
| 141 | tree->key = 0;  /* mark call as already visited */ | 
|---|
| 142 | result = f(sib2(tree));  /* go to called rule */ | 
|---|
| 143 | tree->key = key;  /* restore tree */ | 
|---|
| 144 | return result; | 
|---|
| 145 | } | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 |  | 
|---|
| 149 | /* | 
|---|
| 150 | ** Check whether a pattern tree has captures | 
|---|
| 151 | */ | 
|---|
| 152 | int hascaptures (TTree *tree) { | 
|---|
| 153 | tailcall: | 
|---|
| 154 | switch (tree->tag) { | 
|---|
| 155 | case TCapture: case TRunTime: | 
|---|
| 156 | return 1; | 
|---|
| 157 | case TCall: | 
|---|
| 158 | return callrecursive(tree, hascaptures, 0); | 
|---|
| 159 | case TRule:  /* do not follow siblings */ | 
|---|
| 160 | tree = sib1(tree); goto tailcall; | 
|---|
| 161 | case TOpenCall: assert(0); | 
|---|
| 162 | default: { | 
|---|
| 163 | switch (numsiblings[tree->tag]) { | 
|---|
| 164 | case 1:  /* return hascaptures(sib1(tree)); */ | 
|---|
| 165 | tree = sib1(tree); goto tailcall; | 
|---|
| 166 | case 2: | 
|---|
| 167 | if (hascaptures(sib1(tree))) | 
|---|
| 168 | return 1; | 
|---|
| 169 | /* else return hascaptures(sib2(tree)); */ | 
|---|
| 170 | tree = sib2(tree); goto tailcall; | 
|---|
| 171 | default: assert(numsiblings[tree->tag] == 0); return 0; | 
|---|
| 172 | } | 
|---|
| 173 | } | 
|---|
| 174 | } | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 |  | 
|---|
| 178 | /* | 
|---|
| 179 | ** Checks how a pattern behaves regarding the empty string, | 
|---|
| 180 | ** in one of two different ways: | 
|---|
| 181 | ** A pattern is *nullable* if it can match without consuming any character; | 
|---|
| 182 | ** A pattern is *nofail* if it never fails for any string | 
|---|
| 183 | ** (including the empty string). | 
|---|
| 184 | ** The difference is only for predicates and run-time captures; | 
|---|
| 185 | ** for other patterns, the two properties are equivalent. | 
|---|
| 186 | ** (With predicates, &'a' is nullable but not nofail. Of course, | 
|---|
| 187 | ** nofail => nullable.) | 
|---|
| 188 | ** These functions are all convervative in the following way: | 
|---|
| 189 | **    p is nullable => nullable(p) | 
|---|
| 190 | **    nofail(p) => p cannot fail | 
|---|
| 191 | ** The function assumes that TOpenCall is not nullable; | 
|---|
| 192 | ** this will be checked again when the grammar is fixed. | 
|---|
| 193 | ** Run-time captures can do whatever they want, so the result | 
|---|
| 194 | ** is conservative. | 
|---|
| 195 | */ | 
|---|
| 196 | int checkaux (TTree *tree, int pred) { | 
|---|
| 197 | tailcall: | 
|---|
| 198 | switch (tree->tag) { | 
|---|
| 199 | case TChar: case TSet: case TAny: | 
|---|
| 200 | case TFalse: case TOpenCall: | 
|---|
| 201 | return 0;  /* not nullable */ | 
|---|
| 202 | case TRep: case TTrue: | 
|---|
| 203 | return 1;  /* no fail */ | 
|---|
| 204 | case TNot: case TBehind:  /* can match empty, but can fail */ | 
|---|
| 205 | if (pred == PEnofail) return 0; | 
|---|
| 206 | else return 1;  /* PEnullable */ | 
|---|
| 207 | case TAnd:  /* can match empty; fail iff body does */ | 
|---|
| 208 | if (pred == PEnullable) return 1; | 
|---|
| 209 | /* else return checkaux(sib1(tree), pred); */ | 
|---|
| 210 | tree = sib1(tree); goto tailcall; | 
|---|
| 211 | case TRunTime:  /* can fail; match empty iff body does */ | 
|---|
| 212 | if (pred == PEnofail) return 0; | 
|---|
| 213 | /* else return checkaux(sib1(tree), pred); */ | 
|---|
| 214 | tree = sib1(tree); goto tailcall; | 
|---|
| 215 | case TSeq: | 
|---|
| 216 | if (!checkaux(sib1(tree), pred)) return 0; | 
|---|
| 217 | /* else return checkaux(sib2(tree), pred); */ | 
|---|
| 218 | tree = sib2(tree); goto tailcall; | 
|---|
| 219 | case TChoice: | 
|---|
| 220 | if (checkaux(sib2(tree), pred)) return 1; | 
|---|
| 221 | /* else return checkaux(sib1(tree), pred); */ | 
|---|
| 222 | tree = sib1(tree); goto tailcall; | 
|---|
| 223 | case TCapture: case TGrammar: case TRule: | 
|---|
| 224 | /* return checkaux(sib1(tree), pred); */ | 
|---|
| 225 | tree = sib1(tree); goto tailcall; | 
|---|
| 226 | case TCall:  /* return checkaux(sib2(tree), pred); */ | 
|---|
| 227 | tree = sib2(tree); goto tailcall; | 
|---|
| 228 | default: assert(0); return 0; | 
|---|
| 229 | } | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 |  | 
|---|
| 233 | /* | 
|---|
| 234 | ** number of characters to match a pattern (or -1 if variable) | 
|---|
| 235 | */ | 
|---|
| 236 | int fixedlen (TTree *tree) { | 
|---|
| 237 | int len = 0;  /* to accumulate in tail calls */ | 
|---|
| 238 | tailcall: | 
|---|
| 239 | switch (tree->tag) { | 
|---|
| 240 | case TChar: case TSet: case TAny: | 
|---|
| 241 | return len + 1; | 
|---|
| 242 | case TFalse: case TTrue: case TNot: case TAnd: case TBehind: | 
|---|
| 243 | return len; | 
|---|
| 244 | case TRep: case TRunTime: case TOpenCall: | 
|---|
| 245 | return -1; | 
|---|
| 246 | case TCapture: case TRule: case TGrammar: | 
|---|
| 247 | /* return fixedlen(sib1(tree)); */ | 
|---|
| 248 | tree = sib1(tree); goto tailcall; | 
|---|
| 249 | case TCall: { | 
|---|
| 250 | int n1 = callrecursive(tree, fixedlen, -1); | 
|---|
| 251 | if (n1 < 0) | 
|---|
| 252 | return -1; | 
|---|
| 253 | else | 
|---|
| 254 | return len + n1; | 
|---|
| 255 | } | 
|---|
| 256 | case TSeq: { | 
|---|
| 257 | int n1 = fixedlen(sib1(tree)); | 
|---|
| 258 | if (n1 < 0) | 
|---|
| 259 | return -1; | 
|---|
| 260 | /* else return fixedlen(sib2(tree)) + len; */ | 
|---|
| 261 | len += n1; tree = sib2(tree); goto tailcall; | 
|---|
| 262 | } | 
|---|
| 263 | case TChoice: { | 
|---|
| 264 | int n1 = fixedlen(sib1(tree)); | 
|---|
| 265 | int n2 = fixedlen(sib2(tree)); | 
|---|
| 266 | if (n1 != n2 || n1 < 0) | 
|---|
| 267 | return -1; | 
|---|
| 268 | else | 
|---|
| 269 | return len + n1; | 
|---|
| 270 | } | 
|---|
| 271 | default: assert(0); return 0; | 
|---|
| 272 | }; | 
|---|
| 273 | } | 
|---|
| 274 |  | 
|---|
| 275 |  | 
|---|
| 276 | /* | 
|---|
| 277 | ** Computes the 'first set' of a pattern. | 
|---|
| 278 | ** The result is a conservative aproximation: | 
|---|
| 279 | **   match p ax -> x (for some x) ==> a belongs to first(p) | 
|---|
| 280 | ** or | 
|---|
| 281 | **   a not in first(p) ==> match p ax -> fail (for all x) | 
|---|
| 282 | ** | 
|---|
| 283 | ** The set 'follow' is the first set of what follows the | 
|---|
| 284 | ** pattern (full set if nothing follows it). | 
|---|
| 285 | ** | 
|---|
| 286 | ** The function returns 0 when this resulting set can be used for | 
|---|
| 287 | ** test instructions that avoid the pattern altogether. | 
|---|
| 288 | ** A non-zero return can happen for two reasons: | 
|---|
| 289 | ** 1) match p '' -> ''            ==> return has bit 1 set | 
|---|
| 290 | ** (tests cannot be used because they would always fail for an empty input); | 
|---|
| 291 | ** 2) there is a match-time capture ==> return has bit 2 set | 
|---|
| 292 | ** (optimizations should not bypass match-time captures). | 
|---|
| 293 | */ | 
|---|
| 294 | static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) { | 
|---|
| 295 | tailcall: | 
|---|
| 296 | switch (tree->tag) { | 
|---|
| 297 | case TChar: case TSet: case TAny: { | 
|---|
| 298 | tocharset(tree, firstset); | 
|---|
| 299 | return 0; | 
|---|
| 300 | } | 
|---|
| 301 | case TTrue: { | 
|---|
| 302 | loopset(i, firstset->cs[i] = follow->cs[i]); | 
|---|
| 303 | return 1;  /* accepts the empty string */ | 
|---|
| 304 | } | 
|---|
| 305 | case TFalse: { | 
|---|
| 306 | loopset(i, firstset->cs[i] = 0); | 
|---|
| 307 | return 0; | 
|---|
| 308 | } | 
|---|
| 309 | case TChoice: { | 
|---|
| 310 | Charset csaux; | 
|---|
| 311 | int e1 = getfirst(sib1(tree), follow, firstset); | 
|---|
| 312 | int e2 = getfirst(sib2(tree), follow, &csaux); | 
|---|
| 313 | loopset(i, firstset->cs[i] |= csaux.cs[i]); | 
|---|
| 314 | return e1 | e2; | 
|---|
| 315 | } | 
|---|
| 316 | case TSeq: { | 
|---|
| 317 | if (!nullable(sib1(tree))) { | 
|---|
| 318 | /* when p1 is not nullable, p2 has nothing to contribute; | 
|---|
| 319 | return getfirst(sib1(tree), fullset, firstset); */ | 
|---|
| 320 | tree = sib1(tree); follow = fullset; goto tailcall; | 
|---|
| 321 | } | 
|---|
| 322 | else {  /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */ | 
|---|
| 323 | Charset csaux; | 
|---|
| 324 | int e2 = getfirst(sib2(tree), follow, &csaux); | 
|---|
| 325 | int e1 = getfirst(sib1(tree), &csaux, firstset); | 
|---|
| 326 | if (e1 == 0) return 0;  /* 'e1' ensures that first can be used */ | 
|---|
| 327 | else if ((e1 | e2) & 2)  /* one of the children has a matchtime? */ | 
|---|
| 328 | return 2;  /* pattern has a matchtime capture */ | 
|---|
| 329 | else return e2;  /* else depends on 'e2' */ | 
|---|
| 330 | } | 
|---|
| 331 | } | 
|---|
| 332 | case TRep: { | 
|---|
| 333 | getfirst(sib1(tree), follow, firstset); | 
|---|
| 334 | loopset(i, firstset->cs[i] |= follow->cs[i]); | 
|---|
| 335 | return 1;  /* accept the empty string */ | 
|---|
| 336 | } | 
|---|
| 337 | case TCapture: case TGrammar: case TRule: { | 
|---|
| 338 | /* return getfirst(sib1(tree), follow, firstset); */ | 
|---|
| 339 | tree = sib1(tree); goto tailcall; | 
|---|
| 340 | } | 
|---|
| 341 | case TRunTime: {  /* function invalidates any follow info. */ | 
|---|
| 342 | int e = getfirst(sib1(tree), fullset, firstset); | 
|---|
| 343 | if (e) return 2;  /* function is not "protected"? */ | 
|---|
| 344 | else return 0;  /* pattern inside capture ensures first can be used */ | 
|---|
| 345 | } | 
|---|
| 346 | case TCall: { | 
|---|
| 347 | /* return getfirst(sib2(tree), follow, firstset); */ | 
|---|
| 348 | tree = sib2(tree); goto tailcall; | 
|---|
| 349 | } | 
|---|
| 350 | case TAnd: { | 
|---|
| 351 | int e = getfirst(sib1(tree), follow, firstset); | 
|---|
| 352 | loopset(i, firstset->cs[i] &= follow->cs[i]); | 
|---|
| 353 | return e; | 
|---|
| 354 | } | 
|---|
| 355 | case TNot: { | 
|---|
| 356 | if (tocharset(sib1(tree), firstset)) { | 
|---|
| 357 | cs_complement(firstset); | 
|---|
| 358 | return 1; | 
|---|
| 359 | } | 
|---|
| 360 | /* else go through */ | 
|---|
| 361 | } | 
|---|
| 362 | case TBehind: {  /* instruction gives no new information */ | 
|---|
| 363 | /* call 'getfirst' only to check for math-time captures */ | 
|---|
| 364 | int e = getfirst(sib1(tree), follow, firstset); | 
|---|
| 365 | loopset(i, firstset->cs[i] = follow->cs[i]);  /* uses follow */ | 
|---|
| 366 | return e | 1;  /* always can accept the empty string */ | 
|---|
| 367 | } | 
|---|
| 368 | default: assert(0); return 0; | 
|---|
| 369 | } | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 |  | 
|---|
| 373 | /* | 
|---|
| 374 | ** If 'headfail(tree)' true, then 'tree' can fail only depending on the | 
|---|
| 375 | ** next character of the subject. | 
|---|
| 376 | */ | 
|---|
| 377 | static int headfail (TTree *tree) { | 
|---|
| 378 | tailcall: | 
|---|
| 379 | switch (tree->tag) { | 
|---|
| 380 | case TChar: case TSet: case TAny: case TFalse: | 
|---|
| 381 | return 1; | 
|---|
| 382 | case TTrue: case TRep: case TRunTime: case TNot: | 
|---|
| 383 | case TBehind: | 
|---|
| 384 | return 0; | 
|---|
| 385 | case TCapture: case TGrammar: case TRule: case TAnd: | 
|---|
| 386 | tree = sib1(tree); goto tailcall;  /* return headfail(sib1(tree)); */ | 
|---|
| 387 | case TCall: | 
|---|
| 388 | tree = sib2(tree); goto tailcall;  /* return headfail(sib2(tree)); */ | 
|---|
| 389 | case TSeq: | 
|---|
| 390 | if (!nofail(sib2(tree))) return 0; | 
|---|
| 391 | /* else return headfail(sib1(tree)); */ | 
|---|
| 392 | tree = sib1(tree); goto tailcall; | 
|---|
| 393 | case TChoice: | 
|---|
| 394 | if (!headfail(sib1(tree))) return 0; | 
|---|
| 395 | /* else return headfail(sib2(tree)); */ | 
|---|
| 396 | tree = sib2(tree); goto tailcall; | 
|---|
| 397 | default: assert(0); return 0; | 
|---|
| 398 | } | 
|---|
| 399 | } | 
|---|
| 400 |  | 
|---|
| 401 |  | 
|---|
| 402 | /* | 
|---|
| 403 | ** Check whether the code generation for the given tree can benefit | 
|---|
| 404 | ** from a follow set (to avoid computing the follow set when it is | 
|---|
| 405 | ** not needed) | 
|---|
| 406 | */ | 
|---|
| 407 | static int needfollow (TTree *tree) { | 
|---|
| 408 | tailcall: | 
|---|
| 409 | switch (tree->tag) { | 
|---|
| 410 | case TChar: case TSet: case TAny: | 
|---|
| 411 | case TFalse: case TTrue: case TAnd: case TNot: | 
|---|
| 412 | case TRunTime: case TGrammar: case TCall: case TBehind: | 
|---|
| 413 | return 0; | 
|---|
| 414 | case TChoice: case TRep: | 
|---|
| 415 | return 1; | 
|---|
| 416 | case TCapture: | 
|---|
| 417 | tree = sib1(tree); goto tailcall; | 
|---|
| 418 | case TSeq: | 
|---|
| 419 | tree = sib2(tree); goto tailcall; | 
|---|
| 420 | default: assert(0); return 0; | 
|---|
| 421 | } | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | /* }====================================================== */ | 
|---|
| 425 |  | 
|---|
| 426 |  | 
|---|
| 427 |  | 
|---|
| 428 | /* | 
|---|
| 429 | ** {====================================================== | 
|---|
| 430 | ** Code generation | 
|---|
| 431 | ** ======================================================= | 
|---|
| 432 | */ | 
|---|
| 433 |  | 
|---|
| 434 |  | 
|---|
| 435 | /* | 
|---|
| 436 | ** size of an instruction | 
|---|
| 437 | */ | 
|---|
| 438 | int sizei (const Instruction *i) { | 
|---|
| 439 | switch((Opcode)i->i.code) { | 
|---|
| 440 | case ISet: case ISpan: return CHARSETINSTSIZE; | 
|---|
| 441 | case ITestSet: return CHARSETINSTSIZE + 1; | 
|---|
| 442 | case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall: | 
|---|
| 443 | case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit: | 
|---|
| 444 | return 2; | 
|---|
| 445 | default: return 1; | 
|---|
| 446 | } | 
|---|
| 447 | } | 
|---|
| 448 |  | 
|---|
| 449 |  | 
|---|
| 450 | /* | 
|---|
| 451 | ** state for the compiler | 
|---|
| 452 | */ | 
|---|
| 453 | typedef struct CompileState { | 
|---|
| 454 | Pattern *p;  /* pattern being compiled */ | 
|---|
| 455 | int ncode;  /* next position in p->code to be filled */ | 
|---|
| 456 | lua_State *L; | 
|---|
| 457 | } CompileState; | 
|---|
| 458 |  | 
|---|
| 459 |  | 
|---|
| 460 | /* | 
|---|
| 461 | ** code generation is recursive; 'opt' indicates that the code is being | 
|---|
| 462 | ** generated as the last thing inside an optional pattern (so, if that | 
|---|
| 463 | ** code is optional too, it can reuse the 'IChoice' already in place for | 
|---|
| 464 | ** the outer pattern). 'tt' points to a previous test protecting this | 
|---|
| 465 | ** code (or NOINST). 'fl' is the follow set of the pattern. | 
|---|
| 466 | */ | 
|---|
| 467 | static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | 
|---|
| 468 | const Charset *fl); | 
|---|
| 469 |  | 
|---|
| 470 |  | 
|---|
| 471 | void realloccode (lua_State *L, Pattern *p, int nsize) { | 
|---|
| 472 | void *ud; | 
|---|
| 473 | lua_Alloc f = lua_getallocf(L, &ud); | 
|---|
| 474 | void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction), | 
|---|
| 475 | nsize * sizeof(Instruction)); | 
|---|
| 476 | if (newblock == NULL && nsize > 0) | 
|---|
| 477 | luaL_error(L, "not enough memory"); | 
|---|
| 478 | p->code = (Instruction *)newblock; | 
|---|
| 479 | p->codesize = nsize; | 
|---|
| 480 | } | 
|---|
| 481 |  | 
|---|
| 482 |  | 
|---|
| 483 | static int nextinstruction (CompileState *compst) { | 
|---|
| 484 | int size = compst->p->codesize; | 
|---|
| 485 | if (compst->ncode >= size) | 
|---|
| 486 | realloccode(compst->L, compst->p, size * 2); | 
|---|
| 487 | return compst->ncode++; | 
|---|
| 488 | } | 
|---|
| 489 |  | 
|---|
| 490 |  | 
|---|
| 491 | #define getinstr(cs,i)		((cs)->p->code[i]) | 
|---|
| 492 |  | 
|---|
| 493 |  | 
|---|
| 494 | static int addinstruction (CompileState *compst, Opcode op, int aux) { | 
|---|
| 495 | int i = nextinstruction(compst); | 
|---|
| 496 | getinstr(compst, i).i.code = op; | 
|---|
| 497 | getinstr(compst, i).i.aux = aux; | 
|---|
| 498 | return i; | 
|---|
| 499 | } | 
|---|
| 500 |  | 
|---|
| 501 |  | 
|---|
| 502 | /* | 
|---|
| 503 | ** Add an instruction followed by space for an offset (to be set later) | 
|---|
| 504 | */ | 
|---|
| 505 | static int addoffsetinst (CompileState *compst, Opcode op) { | 
|---|
| 506 | int i = addinstruction(compst, op, 0);  /* instruction */ | 
|---|
| 507 | addinstruction(compst, (Opcode)0, 0);  /* open space for offset */ | 
|---|
| 508 | assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2); | 
|---|
| 509 | return i; | 
|---|
| 510 | } | 
|---|
| 511 |  | 
|---|
| 512 |  | 
|---|
| 513 | /* | 
|---|
| 514 | ** Set the offset of an instruction | 
|---|
| 515 | */ | 
|---|
| 516 | static void setoffset (CompileState *compst, int instruction, int offset) { | 
|---|
| 517 | getinstr(compst, instruction + 1).offset = offset; | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 |  | 
|---|
| 521 | /* | 
|---|
| 522 | ** Add a capture instruction: | 
|---|
| 523 | ** 'op' is the capture instruction; 'cap' the capture kind; | 
|---|
| 524 | ** 'key' the key into ktable; 'aux' is the optional capture offset | 
|---|
| 525 | ** | 
|---|
| 526 | */ | 
|---|
| 527 | static int addinstcap (CompileState *compst, Opcode op, int cap, int key, | 
|---|
| 528 | int aux) { | 
|---|
| 529 | int i = addinstruction(compst, op, joinkindoff(cap, aux)); | 
|---|
| 530 | getinstr(compst, i).i.key = key; | 
|---|
| 531 | return i; | 
|---|
| 532 | } | 
|---|
| 533 |  | 
|---|
| 534 |  | 
|---|
| 535 | #define gethere(compst) 	((compst)->ncode) | 
|---|
| 536 |  | 
|---|
| 537 | #define target(code,i)		((i) + code[i + 1].offset) | 
|---|
| 538 |  | 
|---|
| 539 |  | 
|---|
| 540 | /* | 
|---|
| 541 | ** Patch 'instruction' to jump to 'target' | 
|---|
| 542 | */ | 
|---|
| 543 | static void jumptothere (CompileState *compst, int instruction, int target) { | 
|---|
| 544 | if (instruction >= 0) | 
|---|
| 545 | setoffset(compst, instruction, target - instruction); | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 |  | 
|---|
| 549 | /* | 
|---|
| 550 | ** Patch 'instruction' to jump to current position | 
|---|
| 551 | */ | 
|---|
| 552 | static void jumptohere (CompileState *compst, int instruction) { | 
|---|
| 553 | jumptothere(compst, instruction, gethere(compst)); | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 |  | 
|---|
| 557 | /* | 
|---|
| 558 | ** Code an IChar instruction, or IAny if there is an equivalent | 
|---|
| 559 | ** test dominating it | 
|---|
| 560 | */ | 
|---|
| 561 | static void codechar (CompileState *compst, int c, int tt) { | 
|---|
| 562 | if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar && | 
|---|
| 563 | getinstr(compst, tt).i.aux == c) | 
|---|
| 564 | addinstruction(compst, IAny, 0); | 
|---|
| 565 | else | 
|---|
| 566 | addinstruction(compst, IChar, c); | 
|---|
| 567 | } | 
|---|
| 568 |  | 
|---|
| 569 |  | 
|---|
| 570 | /* | 
|---|
| 571 | ** Add a charset posfix to an instruction | 
|---|
| 572 | */ | 
|---|
| 573 | static void addcharset (CompileState *compst, const byte *cs) { | 
|---|
| 574 | int p = gethere(compst); | 
|---|
| 575 | int i; | 
|---|
| 576 | for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++) | 
|---|
| 577 | nextinstruction(compst);  /* space for buffer */ | 
|---|
| 578 | /* fill buffer with charset */ | 
|---|
| 579 | loopset(j, getinstr(compst, p).buff[j] = cs[j]); | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 |  | 
|---|
| 583 | /* | 
|---|
| 584 | ** code a char set, optimizing unit sets for IChar, "complete" | 
|---|
| 585 | ** sets for IAny, and empty sets for IFail; also use an IAny | 
|---|
| 586 | ** when instruction is dominated by an equivalent test. | 
|---|
| 587 | */ | 
|---|
| 588 | static void codecharset (CompileState *compst, const byte *cs, int tt) { | 
|---|
| 589 | int c = 0;  /* (=) to avoid warnings */ | 
|---|
| 590 | Opcode op = charsettype(cs, &c); | 
|---|
| 591 | switch (op) { | 
|---|
| 592 | case IChar: codechar(compst, c, tt); break; | 
|---|
| 593 | case ISet: {  /* non-trivial set? */ | 
|---|
| 594 | if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet && | 
|---|
| 595 | cs_equal(cs, getinstr(compst, tt + 2).buff)) | 
|---|
| 596 | addinstruction(compst, IAny, 0); | 
|---|
| 597 | else { | 
|---|
| 598 | addinstruction(compst, ISet, 0); | 
|---|
| 599 | addcharset(compst, cs); | 
|---|
| 600 | } | 
|---|
| 601 | break; | 
|---|
| 602 | } | 
|---|
| 603 | default: addinstruction(compst, op, c); break; | 
|---|
| 604 | } | 
|---|
| 605 | } | 
|---|
| 606 |  | 
|---|
| 607 |  | 
|---|
| 608 | /* | 
|---|
| 609 | ** code a test set, optimizing unit sets for ITestChar, "complete" | 
|---|
| 610 | ** sets for ITestAny, and empty sets for IJmp (always fails). | 
|---|
| 611 | ** 'e' is true iff test should accept the empty string. (Test | 
|---|
| 612 | ** instructions in the current VM never accept the empty string.) | 
|---|
| 613 | */ | 
|---|
| 614 | static int codetestset (CompileState *compst, Charset *cs, int e) { | 
|---|
| 615 | if (e) return NOINST;  /* no test */ | 
|---|
| 616 | else { | 
|---|
| 617 | int c = 0; | 
|---|
| 618 | Opcode op = charsettype(cs->cs, &c); | 
|---|
| 619 | switch (op) { | 
|---|
| 620 | case IFail: return addoffsetinst(compst, IJmp);  /* always jump */ | 
|---|
| 621 | case IAny: return addoffsetinst(compst, ITestAny); | 
|---|
| 622 | case IChar: { | 
|---|
| 623 | int i = addoffsetinst(compst, ITestChar); | 
|---|
| 624 | getinstr(compst, i).i.aux = c; | 
|---|
| 625 | return i; | 
|---|
| 626 | } | 
|---|
| 627 | case ISet: { | 
|---|
| 628 | int i = addoffsetinst(compst, ITestSet); | 
|---|
| 629 | addcharset(compst, cs->cs); | 
|---|
| 630 | return i; | 
|---|
| 631 | } | 
|---|
| 632 | default: assert(0); return 0; | 
|---|
| 633 | } | 
|---|
| 634 | } | 
|---|
| 635 | } | 
|---|
| 636 |  | 
|---|
| 637 |  | 
|---|
| 638 | /* | 
|---|
| 639 | ** Find the final destination of a sequence of jumps | 
|---|
| 640 | */ | 
|---|
| 641 | static int finaltarget (Instruction *code, int i) { | 
|---|
| 642 | while (code[i].i.code == IJmp) | 
|---|
| 643 | i = target(code, i); | 
|---|
| 644 | return i; | 
|---|
| 645 | } | 
|---|
| 646 |  | 
|---|
| 647 |  | 
|---|
| 648 | /* | 
|---|
| 649 | ** final label (after traversing any jumps) | 
|---|
| 650 | */ | 
|---|
| 651 | static int finallabel (Instruction *code, int i) { | 
|---|
| 652 | return finaltarget(code, target(code, i)); | 
|---|
| 653 | } | 
|---|
| 654 |  | 
|---|
| 655 |  | 
|---|
| 656 | /* | 
|---|
| 657 | ** <behind(p)> == behind n; <p>   (where n = fixedlen(p)) | 
|---|
| 658 | */ | 
|---|
| 659 | static void codebehind (CompileState *compst, TTree *tree) { | 
|---|
| 660 | if (tree->u.n > 0) | 
|---|
| 661 | addinstruction(compst, IBehind, tree->u.n); | 
|---|
| 662 | codegen(compst, sib1(tree), 0, NOINST, fullset); | 
|---|
| 663 | } | 
|---|
| 664 |  | 
|---|
| 665 |  | 
|---|
| 666 | /* | 
|---|
| 667 | ** Choice; optimizations: | 
|---|
| 668 | ** - when p1 is headfail or | 
|---|
| 669 | ** when first(p1) and first(p2) are disjoint, than | 
|---|
| 670 | ** a character not in first(p1) cannot go to p1, and a character | 
|---|
| 671 | ** in first(p1) cannot go to p2 (at it is not in first(p2)). | 
|---|
| 672 | ** (The optimization is not valid if p1 accepts the empty string, | 
|---|
| 673 | ** as then there is no character at all...) | 
|---|
| 674 | ** - when p2 is empty and opt is true; a IPartialCommit can reuse | 
|---|
| 675 | ** the Choice already active in the stack. | 
|---|
| 676 | */ | 
|---|
| 677 | static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, | 
|---|
| 678 | const Charset *fl) { | 
|---|
| 679 | int emptyp2 = (p2->tag == TTrue); | 
|---|
| 680 | Charset cs1, cs2; | 
|---|
| 681 | int e1 = getfirst(p1, fullset, &cs1); | 
|---|
| 682 | if (headfail(p1) || | 
|---|
| 683 | (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) { | 
|---|
| 684 | /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */ | 
|---|
| 685 | int test = codetestset(compst, &cs1, 0); | 
|---|
| 686 | int jmp = NOINST; | 
|---|
| 687 | codegen(compst, p1, 0, test, fl); | 
|---|
| 688 | if (!emptyp2) | 
|---|
| 689 | jmp = addoffsetinst(compst, IJmp); | 
|---|
| 690 | jumptohere(compst, test); | 
|---|
| 691 | codegen(compst, p2, opt, NOINST, fl); | 
|---|
| 692 | jumptohere(compst, jmp); | 
|---|
| 693 | } | 
|---|
| 694 | else if (opt && emptyp2) { | 
|---|
| 695 | /* p1? == IPartialCommit; p1 */ | 
|---|
| 696 | jumptohere(compst, addoffsetinst(compst, IPartialCommit)); | 
|---|
| 697 | codegen(compst, p1, 1, NOINST, fullset); | 
|---|
| 698 | } | 
|---|
| 699 | else { | 
|---|
| 700 | /* <p1 / p2> == | 
|---|
| 701 | test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */ | 
|---|
| 702 | int pcommit; | 
|---|
| 703 | int test = codetestset(compst, &cs1, e1); | 
|---|
| 704 | int pchoice = addoffsetinst(compst, IChoice); | 
|---|
| 705 | codegen(compst, p1, emptyp2, test, fullset); | 
|---|
| 706 | pcommit = addoffsetinst(compst, ICommit); | 
|---|
| 707 | jumptohere(compst, pchoice); | 
|---|
| 708 | jumptohere(compst, test); | 
|---|
| 709 | codegen(compst, p2, opt, NOINST, fl); | 
|---|
| 710 | jumptohere(compst, pcommit); | 
|---|
| 711 | } | 
|---|
| 712 | } | 
|---|
| 713 |  | 
|---|
| 714 |  | 
|---|
| 715 | /* | 
|---|
| 716 | ** And predicate | 
|---|
| 717 | ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n | 
|---|
| 718 | ** (valid only when 'p' has no captures) | 
|---|
| 719 | */ | 
|---|
| 720 | static void codeand (CompileState *compst, TTree *tree, int tt) { | 
|---|
| 721 | int n = fixedlen(tree); | 
|---|
| 722 | if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) { | 
|---|
| 723 | codegen(compst, tree, 0, tt, fullset); | 
|---|
| 724 | if (n > 0) | 
|---|
| 725 | addinstruction(compst, IBehind, n); | 
|---|
| 726 | } | 
|---|
| 727 | else {  /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ | 
|---|
| 728 | int pcommit; | 
|---|
| 729 | int pchoice = addoffsetinst(compst, IChoice); | 
|---|
| 730 | codegen(compst, tree, 0, tt, fullset); | 
|---|
| 731 | pcommit = addoffsetinst(compst, IBackCommit); | 
|---|
| 732 | jumptohere(compst, pchoice); | 
|---|
| 733 | addinstruction(compst, IFail, 0); | 
|---|
| 734 | jumptohere(compst, pcommit); | 
|---|
| 735 | } | 
|---|
| 736 | } | 
|---|
| 737 |  | 
|---|
| 738 |  | 
|---|
| 739 | /* | 
|---|
| 740 | ** Captures: if pattern has fixed (and not too big) length, and it | 
|---|
| 741 | ** has no nested captures, use a single IFullCapture instruction | 
|---|
| 742 | ** after the match; otherwise, enclose the pattern with OpenCapture - | 
|---|
| 743 | ** CloseCapture. | 
|---|
| 744 | */ | 
|---|
| 745 | static void codecapture (CompileState *compst, TTree *tree, int tt, | 
|---|
| 746 | const Charset *fl) { | 
|---|
| 747 | int len = fixedlen(sib1(tree)); | 
|---|
| 748 | if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) { | 
|---|
| 749 | codegen(compst, sib1(tree), 0, tt, fl); | 
|---|
| 750 | addinstcap(compst, IFullCapture, tree->cap, tree->key, len); | 
|---|
| 751 | } | 
|---|
| 752 | else { | 
|---|
| 753 | addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0); | 
|---|
| 754 | codegen(compst, sib1(tree), 0, tt, fl); | 
|---|
| 755 | addinstcap(compst, ICloseCapture, Cclose, 0, 0); | 
|---|
| 756 | } | 
|---|
| 757 | } | 
|---|
| 758 |  | 
|---|
| 759 |  | 
|---|
| 760 | static void coderuntime (CompileState *compst, TTree *tree, int tt) { | 
|---|
| 761 | addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0); | 
|---|
| 762 | codegen(compst, sib1(tree), 0, tt, fullset); | 
|---|
| 763 | addinstcap(compst, ICloseRunTime, Cclose, 0, 0); | 
|---|
| 764 | } | 
|---|
| 765 |  | 
|---|
| 766 |  | 
|---|
| 767 | /* | 
|---|
| 768 | ** Repetion; optimizations: | 
|---|
| 769 | ** When pattern is a charset, can use special instruction ISpan. | 
|---|
| 770 | ** When pattern is head fail, or if it starts with characters that | 
|---|
| 771 | ** are disjoint from what follows the repetions, a simple test | 
|---|
| 772 | ** is enough (a fail inside the repetition would backtrack to fail | 
|---|
| 773 | ** again in the following pattern, so there is no need for a choice). | 
|---|
| 774 | ** When 'opt' is true, the repetion can reuse the Choice already | 
|---|
| 775 | ** active in the stack. | 
|---|
| 776 | */ | 
|---|
| 777 | static void coderep (CompileState *compst, TTree *tree, int opt, | 
|---|
| 778 | const Charset *fl) { | 
|---|
| 779 | Charset st; | 
|---|
| 780 | if (tocharset(tree, &st)) { | 
|---|
| 781 | addinstruction(compst, ISpan, 0); | 
|---|
| 782 | addcharset(compst, st.cs); | 
|---|
| 783 | } | 
|---|
| 784 | else { | 
|---|
| 785 | int e1 = getfirst(tree, fullset, &st); | 
|---|
| 786 | if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { | 
|---|
| 787 | /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */ | 
|---|
| 788 | int jmp; | 
|---|
| 789 | int test = codetestset(compst, &st, 0); | 
|---|
| 790 | codegen(compst, tree, 0, test, fullset); | 
|---|
| 791 | jmp = addoffsetinst(compst, IJmp); | 
|---|
| 792 | jumptohere(compst, test); | 
|---|
| 793 | jumptothere(compst, jmp, test); | 
|---|
| 794 | } | 
|---|
| 795 | else { | 
|---|
| 796 | /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */ | 
|---|
| 797 | /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */ | 
|---|
| 798 | int commit, l2; | 
|---|
| 799 | int test = codetestset(compst, &st, e1); | 
|---|
| 800 | int pchoice = NOINST; | 
|---|
| 801 | if (opt) | 
|---|
| 802 | jumptohere(compst, addoffsetinst(compst, IPartialCommit)); | 
|---|
| 803 | else | 
|---|
| 804 | pchoice = addoffsetinst(compst, IChoice); | 
|---|
| 805 | l2 = gethere(compst); | 
|---|
| 806 | codegen(compst, tree, 0, NOINST, fullset); | 
|---|
| 807 | commit = addoffsetinst(compst, IPartialCommit); | 
|---|
| 808 | jumptothere(compst, commit, l2); | 
|---|
| 809 | jumptohere(compst, pchoice); | 
|---|
| 810 | jumptohere(compst, test); | 
|---|
| 811 | } | 
|---|
| 812 | } | 
|---|
| 813 | } | 
|---|
| 814 |  | 
|---|
| 815 |  | 
|---|
| 816 | /* | 
|---|
| 817 | ** Not predicate; optimizations: | 
|---|
| 818 | ** In any case, if first test fails, 'not' succeeds, so it can jump to | 
|---|
| 819 | ** the end. If pattern is headfail, that is all (it cannot fail | 
|---|
| 820 | ** in other parts); this case includes 'not' of simple sets. Otherwise, | 
|---|
| 821 | ** use the default code (a choice plus a failtwice). | 
|---|
| 822 | */ | 
|---|
| 823 | static void codenot (CompileState *compst, TTree *tree) { | 
|---|
| 824 | Charset st; | 
|---|
| 825 | int e = getfirst(tree, fullset, &st); | 
|---|
| 826 | int test = codetestset(compst, &st, e); | 
|---|
| 827 | if (headfail(tree))  /* test (fail(p1)) -> L1; fail; L1:  */ | 
|---|
| 828 | addinstruction(compst, IFail, 0); | 
|---|
| 829 | else { | 
|---|
| 830 | /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1:  */ | 
|---|
| 831 | int pchoice = addoffsetinst(compst, IChoice); | 
|---|
| 832 | codegen(compst, tree, 0, NOINST, fullset); | 
|---|
| 833 | addinstruction(compst, IFailTwice, 0); | 
|---|
| 834 | jumptohere(compst, pchoice); | 
|---|
| 835 | } | 
|---|
| 836 | jumptohere(compst, test); | 
|---|
| 837 | } | 
|---|
| 838 |  | 
|---|
| 839 |  | 
|---|
| 840 | /* | 
|---|
| 841 | ** change open calls to calls, using list 'positions' to find | 
|---|
| 842 | ** correct offsets; also optimize tail calls | 
|---|
| 843 | */ | 
|---|
| 844 | static void correctcalls (CompileState *compst, int *positions, | 
|---|
| 845 | int from, int to) { | 
|---|
| 846 | int i; | 
|---|
| 847 | Instruction *code = compst->p->code; | 
|---|
| 848 | for (i = from; i < to; i += sizei(&code[i])) { | 
|---|
| 849 | if (code[i].i.code == IOpenCall) { | 
|---|
| 850 | int n = code[i].i.key;  /* rule number */ | 
|---|
| 851 | int rule = positions[n];  /* rule position */ | 
|---|
| 852 | assert(rule == from || code[rule - 1].i.code == IRet); | 
|---|
| 853 | if (code[finaltarget(code, i + 2)].i.code == IRet)  /* call; ret ? */ | 
|---|
| 854 | code[i].i.code = IJmp;  /* tail call */ | 
|---|
| 855 | else | 
|---|
| 856 | code[i].i.code = ICall; | 
|---|
| 857 | jumptothere(compst, i, rule);  /* call jumps to respective rule */ | 
|---|
| 858 | } | 
|---|
| 859 | } | 
|---|
| 860 | assert(i == to); | 
|---|
| 861 | } | 
|---|
| 862 |  | 
|---|
| 863 |  | 
|---|
| 864 | /* | 
|---|
| 865 | ** Code for a grammar: | 
|---|
| 866 | ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: | 
|---|
| 867 | */ | 
|---|
| 868 | static void codegrammar (CompileState *compst, TTree *grammar) { | 
|---|
| 869 | int positions[MAXRULES]; | 
|---|
| 870 | int rulenumber = 0; | 
|---|
| 871 | TTree *rule; | 
|---|
| 872 | int firstcall = addoffsetinst(compst, ICall);  /* call initial rule */ | 
|---|
| 873 | int jumptoend = addoffsetinst(compst, IJmp);  /* jump to the end */ | 
|---|
| 874 | int start = gethere(compst);  /* here starts the initial rule */ | 
|---|
| 875 | jumptohere(compst, firstcall); | 
|---|
| 876 | for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { | 
|---|
| 877 | positions[rulenumber++] = gethere(compst);  /* save rule position */ | 
|---|
| 878 | codegen(compst, sib1(rule), 0, NOINST, fullset);  /* code rule */ | 
|---|
| 879 | addinstruction(compst, IRet, 0); | 
|---|
| 880 | } | 
|---|
| 881 | assert(rule->tag == TTrue); | 
|---|
| 882 | jumptohere(compst, jumptoend); | 
|---|
| 883 | correctcalls(compst, positions, start, gethere(compst)); | 
|---|
| 884 | } | 
|---|
| 885 |  | 
|---|
| 886 |  | 
|---|
| 887 | static void codecall (CompileState *compst, TTree *call) { | 
|---|
| 888 | int c = addoffsetinst(compst, IOpenCall);  /* to be corrected later */ | 
|---|
| 889 | getinstr(compst, c).i.key = sib2(call)->cap;  /* rule number */ | 
|---|
| 890 | assert(sib2(call)->tag == TRule); | 
|---|
| 891 | } | 
|---|
| 892 |  | 
|---|
| 893 |  | 
|---|
| 894 | /* | 
|---|
| 895 | ** Code first child of a sequence | 
|---|
| 896 | ** (second child is called in-place to allow tail call) | 
|---|
| 897 | ** Return 'tt' for second child | 
|---|
| 898 | */ | 
|---|
| 899 | static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2, | 
|---|
| 900 | int tt, const Charset *fl) { | 
|---|
| 901 | if (needfollow(p1)) { | 
|---|
| 902 | Charset fl1; | 
|---|
| 903 | getfirst(p2, fl, &fl1);  /* p1 follow is p2 first */ | 
|---|
| 904 | codegen(compst, p1, 0, tt, &fl1); | 
|---|
| 905 | } | 
|---|
| 906 | else  /* use 'fullset' as follow */ | 
|---|
| 907 | codegen(compst, p1, 0, tt, fullset); | 
|---|
| 908 | if (fixedlen(p1) != 0)  /* can 'p1' consume anything? */ | 
|---|
| 909 | return  NOINST;  /* invalidate test */ | 
|---|
| 910 | else return tt;  /* else 'tt' still protects sib2 */ | 
|---|
| 911 | } | 
|---|
| 912 |  | 
|---|
| 913 |  | 
|---|
| 914 | /* | 
|---|
| 915 | ** Main code-generation function: dispatch to auxiliar functions | 
|---|
| 916 | ** according to kind of tree. ('needfollow' should return true | 
|---|
| 917 | ** only for consructions that use 'fl'.) | 
|---|
| 918 | */ | 
|---|
| 919 | static void codegen (CompileState *compst, TTree *tree, int opt, int tt, | 
|---|
| 920 | const Charset *fl) { | 
|---|
| 921 | tailcall: | 
|---|
| 922 | switch (tree->tag) { | 
|---|
| 923 | case TChar: codechar(compst, tree->u.n, tt); break; | 
|---|
| 924 | case TAny: addinstruction(compst, IAny, 0); break; | 
|---|
| 925 | case TSet: codecharset(compst, treebuffer(tree), tt); break; | 
|---|
| 926 | case TTrue: break; | 
|---|
| 927 | case TFalse: addinstruction(compst, IFail, 0); break; | 
|---|
| 928 | case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break; | 
|---|
| 929 | case TRep: coderep(compst, sib1(tree), opt, fl); break; | 
|---|
| 930 | case TBehind: codebehind(compst, tree); break; | 
|---|
| 931 | case TNot: codenot(compst, sib1(tree)); break; | 
|---|
| 932 | case TAnd: codeand(compst, sib1(tree), tt); break; | 
|---|
| 933 | case TCapture: codecapture(compst, tree, tt, fl); break; | 
|---|
| 934 | case TRunTime: coderuntime(compst, tree, tt); break; | 
|---|
| 935 | case TGrammar: codegrammar(compst, tree); break; | 
|---|
| 936 | case TCall: codecall(compst, tree); break; | 
|---|
| 937 | case TSeq: { | 
|---|
| 938 | tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl);  /* code 'p1' */ | 
|---|
| 939 | /* codegen(compst, p2, opt, tt, fl); */ | 
|---|
| 940 | tree = sib2(tree); goto tailcall; | 
|---|
| 941 | } | 
|---|
| 942 | default: assert(0); | 
|---|
| 943 | } | 
|---|
| 944 | } | 
|---|
| 945 |  | 
|---|
| 946 |  | 
|---|
| 947 | /* | 
|---|
| 948 | ** Optimize jumps and other jump-like instructions. | 
|---|
| 949 | ** * Update labels of instructions with labels to their final | 
|---|
| 950 | ** destinations (e.g., choice L1; ... L1: jmp L2: becomes | 
|---|
| 951 | ** choice L2) | 
|---|
| 952 | ** * Jumps to other instructions that do jumps become those | 
|---|
| 953 | ** instructions (e.g., jump to return becomes a return; jump | 
|---|
| 954 | ** to commit becomes a commit) | 
|---|
| 955 | */ | 
|---|
| 956 | static void peephole (CompileState *compst) { | 
|---|
| 957 | Instruction *code = compst->p->code; | 
|---|
| 958 | int i; | 
|---|
| 959 | for (i = 0; i < compst->ncode; i += sizei(&code[i])) { | 
|---|
| 960 | redo: | 
|---|
| 961 | switch (code[i].i.code) { | 
|---|
| 962 | case IChoice: case ICall: case ICommit: case IPartialCommit: | 
|---|
| 963 | case IBackCommit: case ITestChar: case ITestSet: | 
|---|
| 964 | case ITestAny: {  /* instructions with labels */ | 
|---|
| 965 | jumptothere(compst, i, finallabel(code, i));  /* optimize label */ | 
|---|
| 966 | break; | 
|---|
| 967 | } | 
|---|
| 968 | case IJmp: { | 
|---|
| 969 | int ft = finaltarget(code, i); | 
|---|
| 970 | switch (code[ft].i.code) {  /* jumping to what? */ | 
|---|
| 971 | case IRet: case IFail: case IFailTwice: | 
|---|
| 972 | case IEnd: {  /* instructions with unconditional implicit jumps */ | 
|---|
| 973 | code[i] = code[ft];  /* jump becomes that instruction */ | 
|---|
| 974 | code[i + 1].i.code = IAny;  /* 'no-op' for target position */ | 
|---|
| 975 | break; | 
|---|
| 976 | } | 
|---|
| 977 | case ICommit: case IPartialCommit: | 
|---|
| 978 | case IBackCommit: {  /* inst. with unconditional explicit jumps */ | 
|---|
| 979 | int fft = finallabel(code, ft); | 
|---|
| 980 | code[i] = code[ft];  /* jump becomes that instruction... */ | 
|---|
| 981 | jumptothere(compst, i, fft);  /* but must correct its offset */ | 
|---|
| 982 | goto redo;  /* reoptimize its label */ | 
|---|
| 983 | } | 
|---|
| 984 | default: { | 
|---|
| 985 | jumptothere(compst, i, ft);  /* optimize label */ | 
|---|
| 986 | break; | 
|---|
| 987 | } | 
|---|
| 988 | } | 
|---|
| 989 | break; | 
|---|
| 990 | } | 
|---|
| 991 | default: break; | 
|---|
| 992 | } | 
|---|
| 993 | } | 
|---|
| 994 | assert(code[i - 1].i.code == IEnd); | 
|---|
| 995 | } | 
|---|
| 996 |  | 
|---|
| 997 |  | 
|---|
| 998 | /* | 
|---|
| 999 | ** Compile a pattern | 
|---|
| 1000 | */ | 
|---|
| 1001 | Instruction *compile (lua_State *L, Pattern *p) { | 
|---|
| 1002 | CompileState compst; | 
|---|
| 1003 | compst.p = p;  compst.ncode = 0;  compst.L = L; | 
|---|
| 1004 | realloccode(L, p, 2);  /* minimum initial size */ | 
|---|
| 1005 | codegen(&compst, p->tree, 0, NOINST, fullset); | 
|---|
| 1006 | addinstruction(&compst, IEnd, 0); | 
|---|
| 1007 | realloccode(L, p, compst.ncode);  /* set final size */ | 
|---|
| 1008 | peephole(&compst); | 
|---|
| 1009 | return p->code; | 
|---|
| 1010 | } | 
|---|
| 1011 |  | 
|---|
| 1012 |  | 
|---|
| 1013 | /* }====================================================== */ | 
|---|
| 1014 |  | 
|---|
| 1015 |  | 
|---|