| 1 | /* |
| 2 | ** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ |
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 4 | */ |
| 5 | |
| 6 | #include "lua.h" |
| 7 | #include "lauxlib.h" |
| 8 | |
| 9 | #include "lpcap.h" |
| 10 | #include "lptypes.h" |
| 11 | |
| 12 | |
| 13 | #define captype(cap) ((cap)->kind) |
| 14 | |
| 15 | #define isclosecap(cap) (captype(cap) == Cclose) |
| 16 | |
| 17 | #define closeaddr(c) ((c)->s + (c)->siz - 1) |
| 18 | |
| 19 | #define isfullcap(cap) ((cap)->siz != 0) |
| 20 | |
| 21 | #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) |
| 22 | |
| 23 | #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) |
| 24 | |
| 25 | |
| 26 | |
| 27 | /* |
| 28 | ** Put at the cache for Lua values the value indexed by 'v' in ktable |
| 29 | ** of the running pattern (if it is not there yet); returns its index. |
| 30 | */ |
| 31 | static int updatecache (CapState *cs, int v) { |
| 32 | int idx = cs->ptop + 1; /* stack index of cache for Lua values */ |
| 33 | if (v != cs->valuecached) { /* not there? */ |
| 34 | getfromktable(cs, v); /* get value from 'ktable' */ |
| 35 | lua_replace(cs->L, idx); /* put it at reserved stack position */ |
| 36 | cs->valuecached = v; /* keep track of what is there */ |
| 37 | } |
| 38 | return idx; |
| 39 | } |
| 40 | |
| 41 | |
| 42 | static int pushcapture (CapState *cs); |
| 43 | |
| 44 | |
| 45 | /* |
| 46 | ** Goes back in a list of captures looking for an open capture |
| 47 | ** corresponding to a close |
| 48 | */ |
| 49 | static Capture *findopen (Capture *cap) { |
| 50 | int n = 0; /* number of closes waiting an open */ |
| 51 | for (;;) { |
| 52 | cap--; |
| 53 | if (isclosecap(cap)) n++; /* one more open to skip */ |
| 54 | else if (!isfullcap(cap)) |
| 55 | if (n-- == 0) return cap; |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | |
| 60 | /* |
| 61 | ** Go to the next capture |
| 62 | */ |
| 63 | static void nextcap (CapState *cs) { |
| 64 | Capture *cap = cs->cap; |
| 65 | if (!isfullcap(cap)) { /* not a single capture? */ |
| 66 | int n = 0; /* number of opens waiting a close */ |
| 67 | for (;;) { /* look for corresponding close */ |
| 68 | cap++; |
| 69 | if (isclosecap(cap)) { |
| 70 | if (n-- == 0) break; |
| 71 | } |
| 72 | else if (!isfullcap(cap)) n++; |
| 73 | } |
| 74 | } |
| 75 | cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ |
| 76 | } |
| 77 | |
| 78 | |
| 79 | /* |
| 80 | ** Push on the Lua stack all values generated by nested captures inside |
| 81 | ** the current capture. Returns number of values pushed. 'addextra' |
| 82 | ** makes it push the entire match after all captured values. The |
| 83 | ** entire match is pushed also if there are no other nested values, |
| 84 | ** so the function never returns zero. |
| 85 | */ |
| 86 | static int pushnestedvalues (CapState *cs, int ) { |
| 87 | Capture *co = cs->cap; |
| 88 | if (isfullcap(cs->cap++)) { /* no nested captures? */ |
| 89 | lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ |
| 90 | return 1; /* that is it */ |
| 91 | } |
| 92 | else { |
| 93 | int n = 0; |
| 94 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ |
| 95 | n += pushcapture(cs); |
| 96 | if (addextra || n == 0) { /* need extra? */ |
| 97 | lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ |
| 98 | n++; |
| 99 | } |
| 100 | cs->cap++; /* skip close entry */ |
| 101 | return n; |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | |
| 106 | /* |
| 107 | ** Push only the first value generated by nested captures |
| 108 | */ |
| 109 | static void pushonenestedvalue (CapState *cs) { |
| 110 | int n = pushnestedvalues(cs, 0); |
| 111 | if (n > 1) |
| 112 | lua_pop(cs->L, n - 1); /* pop extra values */ |
| 113 | } |
| 114 | |
| 115 | |
| 116 | /* |
| 117 | ** Try to find a named group capture with the name given at the top of |
| 118 | ** the stack; goes backward from 'cap'. |
| 119 | */ |
| 120 | static Capture *findback (CapState *cs, Capture *cap) { |
| 121 | lua_State *L = cs->L; |
| 122 | while (cap-- > cs->ocap) { /* repeat until end of list */ |
| 123 | if (isclosecap(cap)) |
| 124 | cap = findopen(cap); /* skip nested captures */ |
| 125 | else if (!isfullcap(cap)) |
| 126 | continue; /* opening an enclosing capture: skip and get previous */ |
| 127 | if (captype(cap) == Cgroup) { |
| 128 | getfromktable(cs, cap->idx); /* get group name */ |
| 129 | if (lp_equal(L, -2, -1)) { /* right group? */ |
| 130 | lua_pop(L, 2); /* remove reference name and group name */ |
| 131 | return cap; |
| 132 | } |
| 133 | else lua_pop(L, 1); /* remove group name */ |
| 134 | } |
| 135 | } |
| 136 | luaL_error(L, "back reference '%s' not found" , lua_tostring(L, -1)); |
| 137 | return NULL; /* to avoid warnings */ |
| 138 | } |
| 139 | |
| 140 | |
| 141 | /* |
| 142 | ** Back-reference capture. Return number of values pushed. |
| 143 | */ |
| 144 | static int backrefcap (CapState *cs) { |
| 145 | int n; |
| 146 | Capture *curr = cs->cap; |
| 147 | pushluaval(cs); /* reference name */ |
| 148 | cs->cap = findback(cs, curr); /* find corresponding group */ |
| 149 | n = pushnestedvalues(cs, 0); /* push group's values */ |
| 150 | cs->cap = curr + 1; |
| 151 | return n; |
| 152 | } |
| 153 | |
| 154 | |
| 155 | /* |
| 156 | ** Table capture: creates a new table and populates it with nested |
| 157 | ** captures. |
| 158 | */ |
| 159 | static int tablecap (CapState *cs) { |
| 160 | lua_State *L = cs->L; |
| 161 | int n = 0; |
| 162 | lua_newtable(L); |
| 163 | if (isfullcap(cs->cap++)) |
| 164 | return 1; /* table is empty */ |
| 165 | while (!isclosecap(cs->cap)) { |
| 166 | if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ |
| 167 | pushluaval(cs); /* push group name */ |
| 168 | pushonenestedvalue(cs); |
| 169 | lua_settable(L, -3); |
| 170 | } |
| 171 | else { /* not a named group */ |
| 172 | int i; |
| 173 | int k = pushcapture(cs); |
| 174 | for (i = k; i > 0; i--) /* store all values into table */ |
| 175 | lua_rawseti(L, -(i + 1), n + i); |
| 176 | n += k; |
| 177 | } |
| 178 | } |
| 179 | cs->cap++; /* skip close entry */ |
| 180 | return 1; /* number of values pushed (only the table) */ |
| 181 | } |
| 182 | |
| 183 | |
| 184 | /* |
| 185 | ** Table-query capture |
| 186 | */ |
| 187 | static int querycap (CapState *cs) { |
| 188 | int idx = cs->cap->idx; |
| 189 | pushonenestedvalue(cs); /* get nested capture */ |
| 190 | lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ |
| 191 | if (!lua_isnil(cs->L, -1)) |
| 192 | return 1; |
| 193 | else { /* no value */ |
| 194 | lua_pop(cs->L, 1); /* remove nil */ |
| 195 | return 0; |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | |
| 200 | /* |
| 201 | ** Fold capture |
| 202 | */ |
| 203 | static int foldcap (CapState *cs) { |
| 204 | int n; |
| 205 | lua_State *L = cs->L; |
| 206 | int idx = cs->cap->idx; |
| 207 | if (isfullcap(cs->cap++) || /* no nested captures? */ |
| 208 | isclosecap(cs->cap) || /* no nested captures (large subject)? */ |
| 209 | (n = pushcapture(cs)) == 0) /* nested captures with no values? */ |
| 210 | return luaL_error(L, "no initial value for fold capture" ); |
| 211 | if (n > 1) |
| 212 | lua_pop(L, n - 1); /* leave only one result for accumulator */ |
| 213 | while (!isclosecap(cs->cap)) { |
| 214 | lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ |
| 215 | lua_insert(L, -2); /* put it before accumulator */ |
| 216 | n = pushcapture(cs); /* get next capture's values */ |
| 217 | lua_call(L, n + 1, 1); /* call folding function */ |
| 218 | } |
| 219 | cs->cap++; /* skip close entry */ |
| 220 | return 1; /* only accumulator left on the stack */ |
| 221 | } |
| 222 | |
| 223 | |
| 224 | /* |
| 225 | ** Function capture |
| 226 | */ |
| 227 | static int functioncap (CapState *cs) { |
| 228 | int n; |
| 229 | int top = lua_gettop(cs->L); |
| 230 | pushluaval(cs); /* push function */ |
| 231 | n = pushnestedvalues(cs, 0); /* push nested captures */ |
| 232 | lua_call(cs->L, n, LUA_MULTRET); /* call function */ |
| 233 | return lua_gettop(cs->L) - top; /* return function's results */ |
| 234 | } |
| 235 | |
| 236 | |
| 237 | /* |
| 238 | ** Select capture |
| 239 | */ |
| 240 | static int numcap (CapState *cs) { |
| 241 | int idx = cs->cap->idx; /* value to select */ |
| 242 | if (idx == 0) { /* no values? */ |
| 243 | nextcap(cs); /* skip entire capture */ |
| 244 | return 0; /* no value produced */ |
| 245 | } |
| 246 | else { |
| 247 | int n = pushnestedvalues(cs, 0); |
| 248 | if (n < idx) /* invalid index? */ |
| 249 | return luaL_error(cs->L, "no capture '%d'" , idx); |
| 250 | else { |
| 251 | lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ |
| 252 | lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ |
| 253 | lua_pop(cs->L, n - 1); /* remove other captures */ |
| 254 | return 1; |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | |
| 260 | /* |
| 261 | ** Return the stack index of the first runtime capture in the given |
| 262 | ** list of captures (or zero if no runtime captures) |
| 263 | */ |
| 264 | int finddyncap (Capture *cap, Capture *last) { |
| 265 | for (; cap < last; cap++) { |
| 266 | if (cap->kind == Cruntime) |
| 267 | return cap->idx; /* stack position of first capture */ |
| 268 | } |
| 269 | return 0; /* no dynamic captures in this segment */ |
| 270 | } |
| 271 | |
| 272 | |
| 273 | /* |
| 274 | ** Calls a runtime capture. Returns number of captures removed by |
| 275 | ** the call, including the initial Cgroup. (Captures to be added are |
| 276 | ** on the Lua stack.) |
| 277 | */ |
| 278 | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { |
| 279 | int n, id; |
| 280 | lua_State *L = cs->L; |
| 281 | int otop = lua_gettop(L); |
| 282 | Capture *open = findopen(close); |
| 283 | assert(captype(open) == Cgroup); |
| 284 | id = finddyncap(open, close); /* get first dynamic capture argument */ |
| 285 | close->kind = Cclose; /* closes the group */ |
| 286 | close->s = s; |
| 287 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ |
| 288 | luaL_checkstack(L, 4, "too many runtime captures" ); |
| 289 | pushluaval(cs); /* push function to be called */ |
| 290 | lua_pushvalue(L, SUBJIDX); /* push original subject */ |
| 291 | lua_pushinteger(L, s - cs->s + 1); /* push current position */ |
| 292 | n = pushnestedvalues(cs, 0); /* push nested captures */ |
| 293 | lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ |
| 294 | if (id > 0) { /* are there old dynamic captures to be removed? */ |
| 295 | int i; |
| 296 | for (i = id; i <= otop; i++) |
| 297 | lua_remove(L, id); /* remove old dynamic captures */ |
| 298 | *rem = otop - id + 1; /* total number of dynamic captures removed */ |
| 299 | } |
| 300 | else |
| 301 | *rem = 0; /* no dynamic captures removed */ |
| 302 | return close - open; /* number of captures of all kinds removed */ |
| 303 | } |
| 304 | |
| 305 | |
| 306 | /* |
| 307 | ** Auxiliary structure for substitution and string captures: keep |
| 308 | ** information about nested captures for future use, avoiding to push |
| 309 | ** string results into Lua |
| 310 | */ |
| 311 | typedef struct StrAux { |
| 312 | int isstring; /* whether capture is a string */ |
| 313 | union { |
| 314 | Capture *cp; /* if not a string, respective capture */ |
| 315 | struct { /* if it is a string... */ |
| 316 | const char *s; /* ... starts here */ |
| 317 | const char *e; /* ... ends here */ |
| 318 | } s; |
| 319 | } u; |
| 320 | } StrAux; |
| 321 | |
| 322 | #define MAXSTRCAPS 10 |
| 323 | |
| 324 | /* |
| 325 | ** Collect values from current capture into array 'cps'. Current |
| 326 | ** capture must be Cstring (first call) or Csimple (recursive calls). |
| 327 | ** (In first call, fills %0 with whole match for Cstring.) |
| 328 | ** Returns number of elements in the array that were filled. |
| 329 | */ |
| 330 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { |
| 331 | int k = n++; |
| 332 | cps[k].isstring = 1; /* get string value */ |
| 333 | cps[k].u.s.s = cs->cap->s; /* starts here */ |
| 334 | if (!isfullcap(cs->cap++)) { /* nested captures? */ |
| 335 | while (!isclosecap(cs->cap)) { /* traverse them */ |
| 336 | if (n >= MAXSTRCAPS) /* too many captures? */ |
| 337 | nextcap(cs); /* skip extra captures (will not need them) */ |
| 338 | else if (captype(cs->cap) == Csimple) /* string? */ |
| 339 | n = getstrcaps(cs, cps, n); /* put info. into array */ |
| 340 | else { |
| 341 | cps[n].isstring = 0; /* not a string */ |
| 342 | cps[n].u.cp = cs->cap; /* keep original capture */ |
| 343 | nextcap(cs); |
| 344 | n++; |
| 345 | } |
| 346 | } |
| 347 | cs->cap++; /* skip close */ |
| 348 | } |
| 349 | cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ |
| 350 | return n; |
| 351 | } |
| 352 | |
| 353 | |
| 354 | /* |
| 355 | ** add next capture value (which should be a string) to buffer 'b' |
| 356 | */ |
| 357 | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); |
| 358 | |
| 359 | |
| 360 | /* |
| 361 | ** String capture: add result to buffer 'b' (instead of pushing |
| 362 | ** it into the stack) |
| 363 | */ |
| 364 | static void stringcap (luaL_Buffer *b, CapState *cs) { |
| 365 | StrAux cps[MAXSTRCAPS]; |
| 366 | int n; |
| 367 | size_t len, i; |
| 368 | const char *fmt; /* format string */ |
| 369 | fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); |
| 370 | n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ |
| 371 | for (i = 0; i < len; i++) { /* traverse them */ |
| 372 | if (fmt[i] != '%') /* not an escape? */ |
| 373 | luaL_addchar(b, fmt[i]); /* add it to buffer */ |
| 374 | else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ |
| 375 | luaL_addchar(b, fmt[i]); /* add to buffer */ |
| 376 | else { |
| 377 | int l = fmt[i] - '0'; /* capture index */ |
| 378 | if (l > n) |
| 379 | luaL_error(cs->L, "invalid capture index (%d)" , l); |
| 380 | else if (cps[l].isstring) |
| 381 | luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); |
| 382 | else { |
| 383 | Capture *curr = cs->cap; |
| 384 | cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ |
| 385 | if (!addonestring(b, cs, "capture" )) |
| 386 | luaL_error(cs->L, "no values in capture index %d" , l); |
| 387 | cs->cap = curr; /* continue from where it stopped */ |
| 388 | } |
| 389 | } |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | |
| 394 | /* |
| 395 | ** Substitution capture: add result to buffer 'b' |
| 396 | */ |
| 397 | static void substcap (luaL_Buffer *b, CapState *cs) { |
| 398 | const char *curr = cs->cap->s; |
| 399 | if (isfullcap(cs->cap)) /* no nested captures? */ |
| 400 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ |
| 401 | else { |
| 402 | cs->cap++; /* skip open entry */ |
| 403 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ |
| 404 | const char *next = cs->cap->s; |
| 405 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ |
| 406 | if (addonestring(b, cs, "replacement" )) |
| 407 | curr = closeaddr(cs->cap - 1); /* continue after match */ |
| 408 | else /* no capture value */ |
| 409 | curr = next; /* keep original text in final result */ |
| 410 | } |
| 411 | luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ |
| 412 | } |
| 413 | cs->cap++; /* go to next capture */ |
| 414 | } |
| 415 | |
| 416 | |
| 417 | /* |
| 418 | ** Evaluates a capture and adds its first value to buffer 'b'; returns |
| 419 | ** whether there was a value |
| 420 | */ |
| 421 | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { |
| 422 | switch (captype(cs->cap)) { |
| 423 | case Cstring: |
| 424 | stringcap(b, cs); /* add capture directly to buffer */ |
| 425 | return 1; |
| 426 | case Csubst: |
| 427 | substcap(b, cs); /* add capture directly to buffer */ |
| 428 | return 1; |
| 429 | default: { |
| 430 | lua_State *L = cs->L; |
| 431 | int n = pushcapture(cs); |
| 432 | if (n > 0) { |
| 433 | if (n > 1) lua_pop(L, n - 1); /* only one result */ |
| 434 | if (!lua_isstring(L, -1)) |
| 435 | luaL_error(L, "invalid %s value (a %s)" , what, luaL_typename(L, -1)); |
| 436 | luaL_addvalue(b); |
| 437 | } |
| 438 | return n; |
| 439 | } |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | |
| 444 | /* |
| 445 | ** Push all values of the current capture into the stack; returns |
| 446 | ** number of values pushed |
| 447 | */ |
| 448 | static int pushcapture (CapState *cs) { |
| 449 | lua_State *L = cs->L; |
| 450 | luaL_checkstack(L, 4, "too many captures" ); |
| 451 | switch (captype(cs->cap)) { |
| 452 | case Cposition: { |
| 453 | lua_pushinteger(L, cs->cap->s - cs->s + 1); |
| 454 | cs->cap++; |
| 455 | return 1; |
| 456 | } |
| 457 | case Cconst: { |
| 458 | pushluaval(cs); |
| 459 | cs->cap++; |
| 460 | return 1; |
| 461 | } |
| 462 | case Carg: { |
| 463 | int arg = (cs->cap++)->idx; |
| 464 | if (arg + FIXEDARGS > cs->ptop) |
| 465 | return luaL_error(L, "reference to absent extra argument #%d" , arg); |
| 466 | lua_pushvalue(L, arg + FIXEDARGS); |
| 467 | return 1; |
| 468 | } |
| 469 | case Csimple: { |
| 470 | int k = pushnestedvalues(cs, 1); |
| 471 | lua_insert(L, -k); /* make whole match be first result */ |
| 472 | return k; |
| 473 | } |
| 474 | case Cruntime: { |
| 475 | lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ |
| 476 | return 1; |
| 477 | } |
| 478 | case Cstring: { |
| 479 | luaL_Buffer b; |
| 480 | luaL_buffinit(L, &b); |
| 481 | stringcap(&b, cs); |
| 482 | luaL_pushresult(&b); |
| 483 | return 1; |
| 484 | } |
| 485 | case Csubst: { |
| 486 | luaL_Buffer b; |
| 487 | luaL_buffinit(L, &b); |
| 488 | substcap(&b, cs); |
| 489 | luaL_pushresult(&b); |
| 490 | return 1; |
| 491 | } |
| 492 | case Cgroup: { |
| 493 | if (cs->cap->idx == 0) /* anonymous group? */ |
| 494 | return pushnestedvalues(cs, 0); /* add all nested values */ |
| 495 | else { /* named group: add no values */ |
| 496 | nextcap(cs); /* skip capture */ |
| 497 | return 0; |
| 498 | } |
| 499 | } |
| 500 | case Cbackref: return backrefcap(cs); |
| 501 | case Ctable: return tablecap(cs); |
| 502 | case Cfunction: return functioncap(cs); |
| 503 | case Cnum: return numcap(cs); |
| 504 | case Cquery: return querycap(cs); |
| 505 | case Cfold: return foldcap(cs); |
| 506 | default: assert(0); return 0; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | |
| 511 | /* |
| 512 | ** Prepare a CapState structure and traverse the entire list of |
| 513 | ** captures in the stack pushing its results. 's' is the subject |
| 514 | ** string, 'r' is the final position of the match, and 'ptop' |
| 515 | ** the index in the stack where some useful values were pushed. |
| 516 | ** Returns the number of results pushed. (If the list produces no |
| 517 | ** results, push the final position of the match.) |
| 518 | */ |
| 519 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { |
| 520 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); |
| 521 | int n = 0; |
| 522 | if (!isclosecap(capture)) { /* is there any capture? */ |
| 523 | CapState cs; |
| 524 | cs.ocap = cs.cap = capture; cs.L = L; |
| 525 | cs.s = s; cs.valuecached = 0; cs.ptop = ptop; |
| 526 | do { /* collect their values */ |
| 527 | n += pushcapture(&cs); |
| 528 | } while (!isclosecap(cs.cap)); |
| 529 | } |
| 530 | if (n == 0) { /* no capture values? */ |
| 531 | lua_pushinteger(L, r - s + 1); /* return only end position */ |
| 532 | n = 1; |
| 533 | } |
| 534 | return n; |
| 535 | } |
| 536 | |
| 537 | |
| 538 | |