| 1 | /* | 
|---|
| 2 | ** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ | 
|---|
| 3 | ** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license) | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #include <limits.h> | 
|---|
| 7 | #include <string.h> | 
|---|
| 8 |  | 
|---|
| 9 |  | 
|---|
| 10 | #include "lua.h" | 
|---|
| 11 | #include "lauxlib.h" | 
|---|
| 12 |  | 
|---|
| 13 | #include "lpcap.h" | 
|---|
| 14 | #include "lptypes.h" | 
|---|
| 15 | #include "lpvm.h" | 
|---|
| 16 | #include "lpprint.h" | 
|---|
| 17 |  | 
|---|
| 18 |  | 
|---|
| 19 | /* initial size for call/backtrack stack */ | 
|---|
| 20 | #if !defined(INITBACK) | 
|---|
| 21 | #define INITBACK	MAXBACK | 
|---|
| 22 | #endif | 
|---|
| 23 |  | 
|---|
| 24 |  | 
|---|
| 25 | #define getoffset(p)	(((p) + 1)->offset) | 
|---|
| 26 |  | 
|---|
| 27 | static const Instruction giveup = {{IGiveup, 0, 0}}; | 
|---|
| 28 |  | 
|---|
| 29 |  | 
|---|
| 30 | /* | 
|---|
| 31 | ** {====================================================== | 
|---|
| 32 | ** Virtual Machine | 
|---|
| 33 | ** ======================================================= | 
|---|
| 34 | */ | 
|---|
| 35 |  | 
|---|
| 36 |  | 
|---|
| 37 | typedef struct Stack { | 
|---|
| 38 | const char *s;  /* saved position (or NULL for calls) */ | 
|---|
| 39 | const Instruction *p;  /* next instruction */ | 
|---|
| 40 | int caplevel; | 
|---|
| 41 | } Stack; | 
|---|
| 42 |  | 
|---|
| 43 |  | 
|---|
| 44 | #define getstackbase(L, ptop)	((Stack *)lua_touserdata(L, stackidx(ptop))) | 
|---|
| 45 |  | 
|---|
| 46 |  | 
|---|
| 47 | /* | 
|---|
| 48 | ** Make the size of the array of captures 'cap' twice as large as needed | 
|---|
| 49 | ** (which is 'captop'). ('n' is the number of new elements.) | 
|---|
| 50 | */ | 
|---|
| 51 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, | 
|---|
| 52 | int n, int ptop) { | 
|---|
| 53 | Capture *newc; | 
|---|
| 54 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) | 
|---|
| 55 | luaL_error(L, "too many captures"); | 
|---|
| 56 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); | 
|---|
| 57 | memcpy(newc, cap, (captop - n) * sizeof(Capture)); | 
|---|
| 58 | lua_replace(L, caplistidx(ptop)); | 
|---|
| 59 | return newc; | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 |  | 
|---|
| 63 | /* | 
|---|
| 64 | ** Double the size of the stack | 
|---|
| 65 | */ | 
|---|
| 66 | static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { | 
|---|
| 67 | Stack *stack = getstackbase(L, ptop); | 
|---|
| 68 | Stack *newstack; | 
|---|
| 69 | int n = *stacklimit - stack;  /* current stack size */ | 
|---|
| 70 | int max, newn; | 
|---|
| 71 | lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); | 
|---|
| 72 | max = lua_tointeger(L, -1);  /* maximum allowed size */ | 
|---|
| 73 | lua_pop(L, 1); | 
|---|
| 74 | if (n >= max)  /* already at maximum size? */ | 
|---|
| 75 | luaL_error(L, "backtrack stack overflow (current limit is %d)", max); | 
|---|
| 76 | newn = 2 * n;  /* new size */ | 
|---|
| 77 | if (newn > max) newn = max; | 
|---|
| 78 | newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); | 
|---|
| 79 | memcpy(newstack, stack, n * sizeof(Stack)); | 
|---|
| 80 | lua_replace(L, stackidx(ptop)); | 
|---|
| 81 | *stacklimit = newstack + newn; | 
|---|
| 82 | return newstack + n;  /* return next position */ | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 |  | 
|---|
| 86 | /* | 
|---|
| 87 | ** Interpret the result of a dynamic capture: false -> fail; | 
|---|
| 88 | ** true -> keep current position; number -> next position. | 
|---|
| 89 | ** Return new subject position. 'fr' is stack index where | 
|---|
| 90 | ** is the result; 'curr' is current subject position; 'limit' | 
|---|
| 91 | ** is subject's size. | 
|---|
| 92 | */ | 
|---|
| 93 | static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { | 
|---|
| 94 | lua_Integer res; | 
|---|
| 95 | if (!lua_toboolean(L, fr)) {  /* false value? */ | 
|---|
| 96 | lua_settop(L, fr - 1);  /* remove results */ | 
|---|
| 97 | return -1;  /* and fail */ | 
|---|
| 98 | } | 
|---|
| 99 | else if (lua_isboolean(L, fr))  /* true? */ | 
|---|
| 100 | res = curr;  /* keep current position */ | 
|---|
| 101 | else { | 
|---|
| 102 | res = lua_tointeger(L, fr) - 1;  /* new position */ | 
|---|
| 103 | if (res < curr || res > limit) | 
|---|
| 104 | luaL_error(L, "invalid position returned by match-time capture"); | 
|---|
| 105 | } | 
|---|
| 106 | lua_remove(L, fr);  /* remove first result (offset) */ | 
|---|
| 107 | return res; | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 |  | 
|---|
| 111 | /* | 
|---|
| 112 | ** Add capture values returned by a dynamic capture to the capture list | 
|---|
| 113 | ** 'base', nested inside a group capture. 'fd' indexes the first capture | 
|---|
| 114 | ** value, 'n' is the number of values (at least 1). | 
|---|
| 115 | */ | 
|---|
| 116 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { | 
|---|
| 117 | int i; | 
|---|
| 118 | base[0].kind = Cgroup;  /* create group capture */ | 
|---|
| 119 | base[0].siz = 0; | 
|---|
| 120 | base[0].idx = 0;  /* make it an anonymous group */ | 
|---|
| 121 | for (i = 1; i <= n; i++) {  /* add runtime captures */ | 
|---|
| 122 | base[i].kind = Cruntime; | 
|---|
| 123 | base[i].siz = 1;  /* mark it as closed */ | 
|---|
| 124 | base[i].idx = fd + i - 1;  /* stack index of capture value */ | 
|---|
| 125 | base[i].s = s; | 
|---|
| 126 | } | 
|---|
| 127 | base[i].kind = Cclose;  /* close group */ | 
|---|
| 128 | base[i].siz = 1; | 
|---|
| 129 | base[i].s = s; | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 |  | 
|---|
| 133 | /* | 
|---|
| 134 | ** Remove dynamic captures from the Lua stack (called in case of failure) | 
|---|
| 135 | */ | 
|---|
| 136 | static int removedyncap (lua_State *L, Capture *capture, | 
|---|
| 137 | int level, int last) { | 
|---|
| 138 | int id = finddyncap(capture + level, capture + last);  /* index of 1st cap. */ | 
|---|
| 139 | int top = lua_gettop(L); | 
|---|
| 140 | if (id == 0) return 0;  /* no dynamic captures? */ | 
|---|
| 141 | lua_settop(L, id - 1);  /* remove captures */ | 
|---|
| 142 | return top - id + 1;  /* number of values removed */ | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 |  | 
|---|
| 146 | /* | 
|---|
| 147 | ** Opcode interpreter | 
|---|
| 148 | */ | 
|---|
| 149 | const char *match (lua_State *L, const char *o, const char *s, const char *e, | 
|---|
| 150 | Instruction *op, Capture *capture, int ptop) { | 
|---|
| 151 | Stack stackbase[INITBACK]; | 
|---|
| 152 | Stack *stacklimit = stackbase + INITBACK; | 
|---|
| 153 | Stack *stack = stackbase;  /* point to first empty slot in stack */ | 
|---|
| 154 | int capsize = INITCAPSIZE; | 
|---|
| 155 | int captop = 0;  /* point to first empty slot in captures */ | 
|---|
| 156 | int ndyncap = 0;  /* number of dynamic captures (in Lua stack) */ | 
|---|
| 157 | const Instruction *p = op;  /* current instruction */ | 
|---|
| 158 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; | 
|---|
| 159 | lua_pushlightuserdata(L, stackbase); | 
|---|
| 160 | for (;;) { | 
|---|
| 161 | #if defined(DEBUG) | 
|---|
| 162 | printf( "-------------------------------------\n"); | 
|---|
| 163 | printcaplist(capture, capture + captop); | 
|---|
| 164 | printf( "s: |%s| stck:%d, dyncaps:%d, caps:%d  ", | 
|---|
| 165 | s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); | 
|---|
| 166 | printinst(op, p); | 
|---|
| 167 | #endif | 
|---|
| 168 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); | 
|---|
| 169 | switch ((Opcode)p->i.code) { | 
|---|
| 170 | case IEnd: { | 
|---|
| 171 | assert(stack == getstackbase(L, ptop) + 1); | 
|---|
| 172 | capture[captop].kind = Cclose; | 
|---|
| 173 | capture[captop].s = NULL; | 
|---|
| 174 | return s; | 
|---|
| 175 | } | 
|---|
| 176 | case IGiveup: { | 
|---|
| 177 | assert(stack == getstackbase(L, ptop)); | 
|---|
| 178 | return NULL; | 
|---|
| 179 | } | 
|---|
| 180 | case IRet: { | 
|---|
| 181 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); | 
|---|
| 182 | p = (--stack)->p; | 
|---|
| 183 | continue; | 
|---|
| 184 | } | 
|---|
| 185 | case IAny: { | 
|---|
| 186 | if (s < e) { p++; s++; } | 
|---|
| 187 | else goto fail; | 
|---|
| 188 | continue; | 
|---|
| 189 | } | 
|---|
| 190 | case ITestAny: { | 
|---|
| 191 | if (s < e) p += 2; | 
|---|
| 192 | else p += getoffset(p); | 
|---|
| 193 | continue; | 
|---|
| 194 | } | 
|---|
| 195 | case IChar: { | 
|---|
| 196 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } | 
|---|
| 197 | else goto fail; | 
|---|
| 198 | continue; | 
|---|
| 199 | } | 
|---|
| 200 | case ITestChar: { | 
|---|
| 201 | if ((byte)*s == p->i.aux && s < e) p += 2; | 
|---|
| 202 | else p += getoffset(p); | 
|---|
| 203 | continue; | 
|---|
| 204 | } | 
|---|
| 205 | case ISet: { | 
|---|
| 206 | int c = (byte)*s; | 
|---|
| 207 | if (testchar((p+1)->buff, c) && s < e) | 
|---|
| 208 | { p += CHARSETINSTSIZE; s++; } | 
|---|
| 209 | else goto fail; | 
|---|
| 210 | continue; | 
|---|
| 211 | } | 
|---|
| 212 | case ITestSet: { | 
|---|
| 213 | int c = (byte)*s; | 
|---|
| 214 | if (testchar((p + 2)->buff, c) && s < e) | 
|---|
| 215 | p += 1 + CHARSETINSTSIZE; | 
|---|
| 216 | else p += getoffset(p); | 
|---|
| 217 | continue; | 
|---|
| 218 | } | 
|---|
| 219 | case IBehind: { | 
|---|
| 220 | int n = p->i.aux; | 
|---|
| 221 | if (n > s - o) goto fail; | 
|---|
| 222 | s -= n; p++; | 
|---|
| 223 | continue; | 
|---|
| 224 | } | 
|---|
| 225 | case ISpan: { | 
|---|
| 226 | for (; s < e; s++) { | 
|---|
| 227 | int c = (byte)*s; | 
|---|
| 228 | if (!testchar((p+1)->buff, c)) break; | 
|---|
| 229 | } | 
|---|
| 230 | p += CHARSETINSTSIZE; | 
|---|
| 231 | continue; | 
|---|
| 232 | } | 
|---|
| 233 | case IJmp: { | 
|---|
| 234 | p += getoffset(p); | 
|---|
| 235 | continue; | 
|---|
| 236 | } | 
|---|
| 237 | case IChoice: { | 
|---|
| 238 | if (stack == stacklimit) | 
|---|
| 239 | stack = doublestack(L, &stacklimit, ptop); | 
|---|
| 240 | stack->p = p + getoffset(p); | 
|---|
| 241 | stack->s = s; | 
|---|
| 242 | stack->caplevel = captop; | 
|---|
| 243 | stack++; | 
|---|
| 244 | p += 2; | 
|---|
| 245 | continue; | 
|---|
| 246 | } | 
|---|
| 247 | case ICall: { | 
|---|
| 248 | if (stack == stacklimit) | 
|---|
| 249 | stack = doublestack(L, &stacklimit, ptop); | 
|---|
| 250 | stack->s = NULL; | 
|---|
| 251 | stack->p = p + 2;  /* save return address */ | 
|---|
| 252 | stack++; | 
|---|
| 253 | p += getoffset(p); | 
|---|
| 254 | continue; | 
|---|
| 255 | } | 
|---|
| 256 | case ICommit: { | 
|---|
| 257 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | 
|---|
| 258 | stack--; | 
|---|
| 259 | p += getoffset(p); | 
|---|
| 260 | continue; | 
|---|
| 261 | } | 
|---|
| 262 | case IPartialCommit: { | 
|---|
| 263 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | 
|---|
| 264 | (stack - 1)->s = s; | 
|---|
| 265 | (stack - 1)->caplevel = captop; | 
|---|
| 266 | p += getoffset(p); | 
|---|
| 267 | continue; | 
|---|
| 268 | } | 
|---|
| 269 | case IBackCommit: { | 
|---|
| 270 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); | 
|---|
| 271 | s = (--stack)->s; | 
|---|
| 272 | captop = stack->caplevel; | 
|---|
| 273 | p += getoffset(p); | 
|---|
| 274 | continue; | 
|---|
| 275 | } | 
|---|
| 276 | case IFailTwice: | 
|---|
| 277 | assert(stack > getstackbase(L, ptop)); | 
|---|
| 278 | stack--; | 
|---|
| 279 | /* go through */ | 
|---|
| 280 | case IFail: | 
|---|
| 281 | fail: { /* pattern failed: try to backtrack */ | 
|---|
| 282 | do {  /* remove pending calls */ | 
|---|
| 283 | assert(stack > getstackbase(L, ptop)); | 
|---|
| 284 | s = (--stack)->s; | 
|---|
| 285 | } while (s == NULL); | 
|---|
| 286 | if (ndyncap > 0)  /* is there matchtime captures? */ | 
|---|
| 287 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); | 
|---|
| 288 | captop = stack->caplevel; | 
|---|
| 289 | p = stack->p; | 
|---|
| 290 | #if defined(DEBUG) | 
|---|
| 291 | printf( "**FAIL**\n"); | 
|---|
| 292 | #endif | 
|---|
| 293 | continue; | 
|---|
| 294 | } | 
|---|
| 295 | case ICloseRunTime: { | 
|---|
| 296 | CapState cs; | 
|---|
| 297 | int rem, res, n; | 
|---|
| 298 | int fr = lua_gettop(L) + 1;  /* stack index of first result */ | 
|---|
| 299 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; | 
|---|
| 300 | n = runtimecap(&cs, capture + captop, s, &rem);  /* call function */ | 
|---|
| 301 | captop -= n;  /* remove nested captures */ | 
|---|
| 302 | ndyncap -= rem;  /* update number of dynamic captures */ | 
|---|
| 303 | fr -= rem;  /* 'rem' items were popped from Lua stack */ | 
|---|
| 304 | res = resdyncaptures(L, fr, s - o, e - o);  /* get result */ | 
|---|
| 305 | if (res == -1)  /* fail? */ | 
|---|
| 306 | goto fail; | 
|---|
| 307 | s = o + res;  /* else update current position */ | 
|---|
| 308 | n = lua_gettop(L) - fr + 1;  /* number of new captures */ | 
|---|
| 309 | ndyncap += n;  /* update number of dynamic captures */ | 
|---|
| 310 | if (n > 0) {  /* any new capture? */ | 
|---|
| 311 | if (fr + n >= SHRT_MAX) | 
|---|
| 312 | luaL_error(L, "too many results in match-time capture"); | 
|---|
| 313 | if ((captop += n + 2) >= capsize) { | 
|---|
| 314 | capture = doublecap(L, capture, captop, n + 2, ptop); | 
|---|
| 315 | capsize = 2 * captop; | 
|---|
| 316 | } | 
|---|
| 317 | /* add new captures to 'capture' list */ | 
|---|
| 318 | adddyncaptures(s, capture + captop - n - 2, n, fr); | 
|---|
| 319 | } | 
|---|
| 320 | p++; | 
|---|
| 321 | continue; | 
|---|
| 322 | } | 
|---|
| 323 | case ICloseCapture: { | 
|---|
| 324 | const char *s1 = s; | 
|---|
| 325 | assert(captop > 0); | 
|---|
| 326 | /* if possible, turn capture into a full capture */ | 
|---|
| 327 | if (capture[captop - 1].siz == 0 && | 
|---|
| 328 | s1 - capture[captop - 1].s < UCHAR_MAX) { | 
|---|
| 329 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; | 
|---|
| 330 | p++; | 
|---|
| 331 | continue; | 
|---|
| 332 | } | 
|---|
| 333 | else { | 
|---|
| 334 | capture[captop].siz = 1;  /* mark entry as closed */ | 
|---|
| 335 | capture[captop].s = s; | 
|---|
| 336 | goto pushcapture; | 
|---|
| 337 | } | 
|---|
| 338 | } | 
|---|
| 339 | case IOpenCapture: | 
|---|
| 340 | capture[captop].siz = 0;  /* mark entry as open */ | 
|---|
| 341 | capture[captop].s = s; | 
|---|
| 342 | goto pushcapture; | 
|---|
| 343 | case IFullCapture: | 
|---|
| 344 | capture[captop].siz = getoff(p) + 1;  /* save capture size */ | 
|---|
| 345 | capture[captop].s = s - getoff(p); | 
|---|
| 346 | /* goto pushcapture; */ | 
|---|
| 347 | pushcapture: { | 
|---|
| 348 | capture[captop].idx = p->i.key; | 
|---|
| 349 | capture[captop].kind = getkind(p); | 
|---|
| 350 | if (++captop >= capsize) { | 
|---|
| 351 | capture = doublecap(L, capture, captop, 0, ptop); | 
|---|
| 352 | capsize = 2 * captop; | 
|---|
| 353 | } | 
|---|
| 354 | p++; | 
|---|
| 355 | continue; | 
|---|
| 356 | } | 
|---|
| 357 | default: assert(0); return NULL; | 
|---|
| 358 | } | 
|---|
| 359 | } | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | /* }====================================================== */ | 
|---|
| 363 |  | 
|---|
| 364 |  | 
|---|
| 365 |  | 
|---|