| 1 | /* |
| 2 | ** $Id: lpvm.c,v 1.9 2016/06/03 20:11:18 roberto Exp $ |
| 3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
| 4 | */ |
| 5 | |
| 6 | #include <limits.h> |
| 7 | #include <string.h> |
| 8 | |
| 9 | |
| 10 | #include "lua.h" |
| 11 | #include "lauxlib.h" |
| 12 | |
| 13 | #include "lpcap.h" |
| 14 | #include "lptypes.h" |
| 15 | #include "lpvm.h" |
| 16 | #include "lpprint.h" |
| 17 | |
| 18 | |
| 19 | /* initial size for call/backtrack stack */ |
| 20 | #if !defined(INITBACK) |
| 21 | #define INITBACK MAXBACK |
| 22 | #endif |
| 23 | |
| 24 | |
| 25 | #define getoffset(p) (((p) + 1)->offset) |
| 26 | |
| 27 | static const Instruction giveup = {{IGiveup, 0, 0}}; |
| 28 | |
| 29 | |
| 30 | /* |
| 31 | ** {====================================================== |
| 32 | ** Virtual Machine |
| 33 | ** ======================================================= |
| 34 | */ |
| 35 | |
| 36 | |
| 37 | typedef struct Stack { |
| 38 | const char *s; /* saved position (or NULL for calls) */ |
| 39 | const Instruction *p; /* next instruction */ |
| 40 | int caplevel; |
| 41 | } Stack; |
| 42 | |
| 43 | |
| 44 | #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop))) |
| 45 | |
| 46 | |
| 47 | /* |
| 48 | ** Make the size of the array of captures 'cap' twice as large as needed |
| 49 | ** (which is 'captop'). ('n' is the number of new elements.) |
| 50 | */ |
| 51 | static Capture *doublecap (lua_State *L, Capture *cap, int captop, |
| 52 | int n, int ptop) { |
| 53 | Capture *newc; |
| 54 | if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) |
| 55 | luaL_error(L, "too many captures" ); |
| 56 | newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); |
| 57 | memcpy(newc, cap, (captop - n) * sizeof(Capture)); |
| 58 | lua_replace(L, caplistidx(ptop)); |
| 59 | return newc; |
| 60 | } |
| 61 | |
| 62 | |
| 63 | /* |
| 64 | ** Double the size of the stack |
| 65 | */ |
| 66 | static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) { |
| 67 | Stack *stack = getstackbase(L, ptop); |
| 68 | Stack *newstack; |
| 69 | int n = *stacklimit - stack; /* current stack size */ |
| 70 | int max, newn; |
| 71 | lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX); |
| 72 | max = lua_tointeger(L, -1); /* maximum allowed size */ |
| 73 | lua_pop(L, 1); |
| 74 | if (n >= max) /* already at maximum size? */ |
| 75 | luaL_error(L, "backtrack stack overflow (current limit is %d)" , max); |
| 76 | newn = 2 * n; /* new size */ |
| 77 | if (newn > max) newn = max; |
| 78 | newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack)); |
| 79 | memcpy(newstack, stack, n * sizeof(Stack)); |
| 80 | lua_replace(L, stackidx(ptop)); |
| 81 | *stacklimit = newstack + newn; |
| 82 | return newstack + n; /* return next position */ |
| 83 | } |
| 84 | |
| 85 | |
| 86 | /* |
| 87 | ** Interpret the result of a dynamic capture: false -> fail; |
| 88 | ** true -> keep current position; number -> next position. |
| 89 | ** Return new subject position. 'fr' is stack index where |
| 90 | ** is the result; 'curr' is current subject position; 'limit' |
| 91 | ** is subject's size. |
| 92 | */ |
| 93 | static int resdyncaptures (lua_State *L, int fr, int curr, int limit) { |
| 94 | lua_Integer res; |
| 95 | if (!lua_toboolean(L, fr)) { /* false value? */ |
| 96 | lua_settop(L, fr - 1); /* remove results */ |
| 97 | return -1; /* and fail */ |
| 98 | } |
| 99 | else if (lua_isboolean(L, fr)) /* true? */ |
| 100 | res = curr; /* keep current position */ |
| 101 | else { |
| 102 | res = lua_tointeger(L, fr) - 1; /* new position */ |
| 103 | if (res < curr || res > limit) |
| 104 | luaL_error(L, "invalid position returned by match-time capture" ); |
| 105 | } |
| 106 | lua_remove(L, fr); /* remove first result (offset) */ |
| 107 | return res; |
| 108 | } |
| 109 | |
| 110 | |
| 111 | /* |
| 112 | ** Add capture values returned by a dynamic capture to the capture list |
| 113 | ** 'base', nested inside a group capture. 'fd' indexes the first capture |
| 114 | ** value, 'n' is the number of values (at least 1). |
| 115 | */ |
| 116 | static void adddyncaptures (const char *s, Capture *base, int n, int fd) { |
| 117 | int i; |
| 118 | base[0].kind = Cgroup; /* create group capture */ |
| 119 | base[0].siz = 0; |
| 120 | base[0].idx = 0; /* make it an anonymous group */ |
| 121 | for (i = 1; i <= n; i++) { /* add runtime captures */ |
| 122 | base[i].kind = Cruntime; |
| 123 | base[i].siz = 1; /* mark it as closed */ |
| 124 | base[i].idx = fd + i - 1; /* stack index of capture value */ |
| 125 | base[i].s = s; |
| 126 | } |
| 127 | base[i].kind = Cclose; /* close group */ |
| 128 | base[i].siz = 1; |
| 129 | base[i].s = s; |
| 130 | } |
| 131 | |
| 132 | |
| 133 | /* |
| 134 | ** Remove dynamic captures from the Lua stack (called in case of failure) |
| 135 | */ |
| 136 | static int removedyncap (lua_State *L, Capture *capture, |
| 137 | int level, int last) { |
| 138 | int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */ |
| 139 | int top = lua_gettop(L); |
| 140 | if (id == 0) return 0; /* no dynamic captures? */ |
| 141 | lua_settop(L, id - 1); /* remove captures */ |
| 142 | return top - id + 1; /* number of values removed */ |
| 143 | } |
| 144 | |
| 145 | |
| 146 | /* |
| 147 | ** Opcode interpreter |
| 148 | */ |
| 149 | const char *match (lua_State *L, const char *o, const char *s, const char *e, |
| 150 | Instruction *op, Capture *capture, int ptop) { |
| 151 | Stack stackbase[INITBACK]; |
| 152 | Stack *stacklimit = stackbase + INITBACK; |
| 153 | Stack *stack = stackbase; /* point to first empty slot in stack */ |
| 154 | int capsize = INITCAPSIZE; |
| 155 | int captop = 0; /* point to first empty slot in captures */ |
| 156 | int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ |
| 157 | const Instruction *p = op; /* current instruction */ |
| 158 | stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; |
| 159 | lua_pushlightuserdata(L, stackbase); |
| 160 | for (;;) { |
| 161 | #if defined(DEBUG) |
| 162 | printf("-------------------------------------\n" ); |
| 163 | printcaplist(capture, capture + captop); |
| 164 | printf("s: |%s| stck:%d, dyncaps:%d, caps:%d " , |
| 165 | s, (int)(stack - getstackbase(L, ptop)), ndyncap, captop); |
| 166 | printinst(op, p); |
| 167 | #endif |
| 168 | assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); |
| 169 | switch ((Opcode)p->i.code) { |
| 170 | case IEnd: { |
| 171 | assert(stack == getstackbase(L, ptop) + 1); |
| 172 | capture[captop].kind = Cclose; |
| 173 | capture[captop].s = NULL; |
| 174 | return s; |
| 175 | } |
| 176 | case IGiveup: { |
| 177 | assert(stack == getstackbase(L, ptop)); |
| 178 | return NULL; |
| 179 | } |
| 180 | case IRet: { |
| 181 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); |
| 182 | p = (--stack)->p; |
| 183 | continue; |
| 184 | } |
| 185 | case IAny: { |
| 186 | if (s < e) { p++; s++; } |
| 187 | else goto fail; |
| 188 | continue; |
| 189 | } |
| 190 | case ITestAny: { |
| 191 | if (s < e) p += 2; |
| 192 | else p += getoffset(p); |
| 193 | continue; |
| 194 | } |
| 195 | case IChar: { |
| 196 | if ((byte)*s == p->i.aux && s < e) { p++; s++; } |
| 197 | else goto fail; |
| 198 | continue; |
| 199 | } |
| 200 | case ITestChar: { |
| 201 | if ((byte)*s == p->i.aux && s < e) p += 2; |
| 202 | else p += getoffset(p); |
| 203 | continue; |
| 204 | } |
| 205 | case ISet: { |
| 206 | int c = (byte)*s; |
| 207 | if (testchar((p+1)->buff, c) && s < e) |
| 208 | { p += CHARSETINSTSIZE; s++; } |
| 209 | else goto fail; |
| 210 | continue; |
| 211 | } |
| 212 | case ITestSet: { |
| 213 | int c = (byte)*s; |
| 214 | if (testchar((p + 2)->buff, c) && s < e) |
| 215 | p += 1 + CHARSETINSTSIZE; |
| 216 | else p += getoffset(p); |
| 217 | continue; |
| 218 | } |
| 219 | case IBehind: { |
| 220 | int n = p->i.aux; |
| 221 | if (n > s - o) goto fail; |
| 222 | s -= n; p++; |
| 223 | continue; |
| 224 | } |
| 225 | case ISpan: { |
| 226 | for (; s < e; s++) { |
| 227 | int c = (byte)*s; |
| 228 | if (!testchar((p+1)->buff, c)) break; |
| 229 | } |
| 230 | p += CHARSETINSTSIZE; |
| 231 | continue; |
| 232 | } |
| 233 | case IJmp: { |
| 234 | p += getoffset(p); |
| 235 | continue; |
| 236 | } |
| 237 | case IChoice: { |
| 238 | if (stack == stacklimit) |
| 239 | stack = doublestack(L, &stacklimit, ptop); |
| 240 | stack->p = p + getoffset(p); |
| 241 | stack->s = s; |
| 242 | stack->caplevel = captop; |
| 243 | stack++; |
| 244 | p += 2; |
| 245 | continue; |
| 246 | } |
| 247 | case ICall: { |
| 248 | if (stack == stacklimit) |
| 249 | stack = doublestack(L, &stacklimit, ptop); |
| 250 | stack->s = NULL; |
| 251 | stack->p = p + 2; /* save return address */ |
| 252 | stack++; |
| 253 | p += getoffset(p); |
| 254 | continue; |
| 255 | } |
| 256 | case ICommit: { |
| 257 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); |
| 258 | stack--; |
| 259 | p += getoffset(p); |
| 260 | continue; |
| 261 | } |
| 262 | case IPartialCommit: { |
| 263 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); |
| 264 | (stack - 1)->s = s; |
| 265 | (stack - 1)->caplevel = captop; |
| 266 | p += getoffset(p); |
| 267 | continue; |
| 268 | } |
| 269 | case IBackCommit: { |
| 270 | assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL); |
| 271 | s = (--stack)->s; |
| 272 | captop = stack->caplevel; |
| 273 | p += getoffset(p); |
| 274 | continue; |
| 275 | } |
| 276 | case IFailTwice: |
| 277 | assert(stack > getstackbase(L, ptop)); |
| 278 | stack--; |
| 279 | /* go through */ |
| 280 | case IFail: |
| 281 | fail: { /* pattern failed: try to backtrack */ |
| 282 | do { /* remove pending calls */ |
| 283 | assert(stack > getstackbase(L, ptop)); |
| 284 | s = (--stack)->s; |
| 285 | } while (s == NULL); |
| 286 | if (ndyncap > 0) /* is there matchtime captures? */ |
| 287 | ndyncap -= removedyncap(L, capture, stack->caplevel, captop); |
| 288 | captop = stack->caplevel; |
| 289 | p = stack->p; |
| 290 | #if defined(DEBUG) |
| 291 | printf("**FAIL**\n" ); |
| 292 | #endif |
| 293 | continue; |
| 294 | } |
| 295 | case ICloseRunTime: { |
| 296 | CapState cs; |
| 297 | int rem, res, n; |
| 298 | int fr = lua_gettop(L) + 1; /* stack index of first result */ |
| 299 | cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop; |
| 300 | n = runtimecap(&cs, capture + captop, s, &rem); /* call function */ |
| 301 | captop -= n; /* remove nested captures */ |
| 302 | ndyncap -= rem; /* update number of dynamic captures */ |
| 303 | fr -= rem; /* 'rem' items were popped from Lua stack */ |
| 304 | res = resdyncaptures(L, fr, s - o, e - o); /* get result */ |
| 305 | if (res == -1) /* fail? */ |
| 306 | goto fail; |
| 307 | s = o + res; /* else update current position */ |
| 308 | n = lua_gettop(L) - fr + 1; /* number of new captures */ |
| 309 | ndyncap += n; /* update number of dynamic captures */ |
| 310 | if (n > 0) { /* any new capture? */ |
| 311 | if (fr + n >= SHRT_MAX) |
| 312 | luaL_error(L, "too many results in match-time capture" ); |
| 313 | if ((captop += n + 2) >= capsize) { |
| 314 | capture = doublecap(L, capture, captop, n + 2, ptop); |
| 315 | capsize = 2 * captop; |
| 316 | } |
| 317 | /* add new captures to 'capture' list */ |
| 318 | adddyncaptures(s, capture + captop - n - 2, n, fr); |
| 319 | } |
| 320 | p++; |
| 321 | continue; |
| 322 | } |
| 323 | case ICloseCapture: { |
| 324 | const char *s1 = s; |
| 325 | assert(captop > 0); |
| 326 | /* if possible, turn capture into a full capture */ |
| 327 | if (capture[captop - 1].siz == 0 && |
| 328 | s1 - capture[captop - 1].s < UCHAR_MAX) { |
| 329 | capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; |
| 330 | p++; |
| 331 | continue; |
| 332 | } |
| 333 | else { |
| 334 | capture[captop].siz = 1; /* mark entry as closed */ |
| 335 | capture[captop].s = s; |
| 336 | goto pushcapture; |
| 337 | } |
| 338 | } |
| 339 | case IOpenCapture: |
| 340 | capture[captop].siz = 0; /* mark entry as open */ |
| 341 | capture[captop].s = s; |
| 342 | goto pushcapture; |
| 343 | case IFullCapture: |
| 344 | capture[captop].siz = getoff(p) + 1; /* save capture size */ |
| 345 | capture[captop].s = s - getoff(p); |
| 346 | /* goto pushcapture; */ |
| 347 | pushcapture: { |
| 348 | capture[captop].idx = p->i.key; |
| 349 | capture[captop].kind = getkind(p); |
| 350 | if (++captop >= capsize) { |
| 351 | capture = doublecap(L, capture, captop, 0, ptop); |
| 352 | capsize = 2 * captop; |
| 353 | } |
| 354 | p++; |
| 355 | continue; |
| 356 | } |
| 357 | default: assert(0); return NULL; |
| 358 | } |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | /* }====================================================== */ |
| 363 | |
| 364 | |
| 365 | |