1 | /* |
2 | ** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $ |
3 | ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license) |
4 | */ |
5 | |
6 | #include "lua.h" |
7 | #include "lauxlib.h" |
8 | |
9 | #include "lpcap.h" |
10 | #include "lptypes.h" |
11 | |
12 | |
13 | #define captype(cap) ((cap)->kind) |
14 | |
15 | #define isclosecap(cap) (captype(cap) == Cclose) |
16 | |
17 | #define closeaddr(c) ((c)->s + (c)->siz - 1) |
18 | |
19 | #define isfullcap(cap) ((cap)->siz != 0) |
20 | |
21 | #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v) |
22 | |
23 | #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx) |
24 | |
25 | |
26 | |
27 | /* |
28 | ** Put at the cache for Lua values the value indexed by 'v' in ktable |
29 | ** of the running pattern (if it is not there yet); returns its index. |
30 | */ |
31 | static int updatecache (CapState *cs, int v) { |
32 | int idx = cs->ptop + 1; /* stack index of cache for Lua values */ |
33 | if (v != cs->valuecached) { /* not there? */ |
34 | getfromktable(cs, v); /* get value from 'ktable' */ |
35 | lua_replace(cs->L, idx); /* put it at reserved stack position */ |
36 | cs->valuecached = v; /* keep track of what is there */ |
37 | } |
38 | return idx; |
39 | } |
40 | |
41 | |
42 | static int pushcapture (CapState *cs); |
43 | |
44 | |
45 | /* |
46 | ** Goes back in a list of captures looking for an open capture |
47 | ** corresponding to a close |
48 | */ |
49 | static Capture *findopen (Capture *cap) { |
50 | int n = 0; /* number of closes waiting an open */ |
51 | for (;;) { |
52 | cap--; |
53 | if (isclosecap(cap)) n++; /* one more open to skip */ |
54 | else if (!isfullcap(cap)) |
55 | if (n-- == 0) return cap; |
56 | } |
57 | } |
58 | |
59 | |
60 | /* |
61 | ** Go to the next capture |
62 | */ |
63 | static void nextcap (CapState *cs) { |
64 | Capture *cap = cs->cap; |
65 | if (!isfullcap(cap)) { /* not a single capture? */ |
66 | int n = 0; /* number of opens waiting a close */ |
67 | for (;;) { /* look for corresponding close */ |
68 | cap++; |
69 | if (isclosecap(cap)) { |
70 | if (n-- == 0) break; |
71 | } |
72 | else if (!isfullcap(cap)) n++; |
73 | } |
74 | } |
75 | cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */ |
76 | } |
77 | |
78 | |
79 | /* |
80 | ** Push on the Lua stack all values generated by nested captures inside |
81 | ** the current capture. Returns number of values pushed. 'addextra' |
82 | ** makes it push the entire match after all captured values. The |
83 | ** entire match is pushed also if there are no other nested values, |
84 | ** so the function never returns zero. |
85 | */ |
86 | static int pushnestedvalues (CapState *cs, int ) { |
87 | Capture *co = cs->cap; |
88 | if (isfullcap(cs->cap++)) { /* no nested captures? */ |
89 | lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */ |
90 | return 1; /* that is it */ |
91 | } |
92 | else { |
93 | int n = 0; |
94 | while (!isclosecap(cs->cap)) /* repeat for all nested patterns */ |
95 | n += pushcapture(cs); |
96 | if (addextra || n == 0) { /* need extra? */ |
97 | lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ |
98 | n++; |
99 | } |
100 | cs->cap++; /* skip close entry */ |
101 | return n; |
102 | } |
103 | } |
104 | |
105 | |
106 | /* |
107 | ** Push only the first value generated by nested captures |
108 | */ |
109 | static void pushonenestedvalue (CapState *cs) { |
110 | int n = pushnestedvalues(cs, 0); |
111 | if (n > 1) |
112 | lua_pop(cs->L, n - 1); /* pop extra values */ |
113 | } |
114 | |
115 | |
116 | /* |
117 | ** Try to find a named group capture with the name given at the top of |
118 | ** the stack; goes backward from 'cap'. |
119 | */ |
120 | static Capture *findback (CapState *cs, Capture *cap) { |
121 | lua_State *L = cs->L; |
122 | while (cap-- > cs->ocap) { /* repeat until end of list */ |
123 | if (isclosecap(cap)) |
124 | cap = findopen(cap); /* skip nested captures */ |
125 | else if (!isfullcap(cap)) |
126 | continue; /* opening an enclosing capture: skip and get previous */ |
127 | if (captype(cap) == Cgroup) { |
128 | getfromktable(cs, cap->idx); /* get group name */ |
129 | if (lp_equal(L, -2, -1)) { /* right group? */ |
130 | lua_pop(L, 2); /* remove reference name and group name */ |
131 | return cap; |
132 | } |
133 | else lua_pop(L, 1); /* remove group name */ |
134 | } |
135 | } |
136 | luaL_error(L, "back reference '%s' not found" , lua_tostring(L, -1)); |
137 | return NULL; /* to avoid warnings */ |
138 | } |
139 | |
140 | |
141 | /* |
142 | ** Back-reference capture. Return number of values pushed. |
143 | */ |
144 | static int backrefcap (CapState *cs) { |
145 | int n; |
146 | Capture *curr = cs->cap; |
147 | pushluaval(cs); /* reference name */ |
148 | cs->cap = findback(cs, curr); /* find corresponding group */ |
149 | n = pushnestedvalues(cs, 0); /* push group's values */ |
150 | cs->cap = curr + 1; |
151 | return n; |
152 | } |
153 | |
154 | |
155 | /* |
156 | ** Table capture: creates a new table and populates it with nested |
157 | ** captures. |
158 | */ |
159 | static int tablecap (CapState *cs) { |
160 | lua_State *L = cs->L; |
161 | int n = 0; |
162 | lua_newtable(L); |
163 | if (isfullcap(cs->cap++)) |
164 | return 1; /* table is empty */ |
165 | while (!isclosecap(cs->cap)) { |
166 | if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */ |
167 | pushluaval(cs); /* push group name */ |
168 | pushonenestedvalue(cs); |
169 | lua_settable(L, -3); |
170 | } |
171 | else { /* not a named group */ |
172 | int i; |
173 | int k = pushcapture(cs); |
174 | for (i = k; i > 0; i--) /* store all values into table */ |
175 | lua_rawseti(L, -(i + 1), n + i); |
176 | n += k; |
177 | } |
178 | } |
179 | cs->cap++; /* skip close entry */ |
180 | return 1; /* number of values pushed (only the table) */ |
181 | } |
182 | |
183 | |
184 | /* |
185 | ** Table-query capture |
186 | */ |
187 | static int querycap (CapState *cs) { |
188 | int idx = cs->cap->idx; |
189 | pushonenestedvalue(cs); /* get nested capture */ |
190 | lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */ |
191 | if (!lua_isnil(cs->L, -1)) |
192 | return 1; |
193 | else { /* no value */ |
194 | lua_pop(cs->L, 1); /* remove nil */ |
195 | return 0; |
196 | } |
197 | } |
198 | |
199 | |
200 | /* |
201 | ** Fold capture |
202 | */ |
203 | static int foldcap (CapState *cs) { |
204 | int n; |
205 | lua_State *L = cs->L; |
206 | int idx = cs->cap->idx; |
207 | if (isfullcap(cs->cap++) || /* no nested captures? */ |
208 | isclosecap(cs->cap) || /* no nested captures (large subject)? */ |
209 | (n = pushcapture(cs)) == 0) /* nested captures with no values? */ |
210 | return luaL_error(L, "no initial value for fold capture" ); |
211 | if (n > 1) |
212 | lua_pop(L, n - 1); /* leave only one result for accumulator */ |
213 | while (!isclosecap(cs->cap)) { |
214 | lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */ |
215 | lua_insert(L, -2); /* put it before accumulator */ |
216 | n = pushcapture(cs); /* get next capture's values */ |
217 | lua_call(L, n + 1, 1); /* call folding function */ |
218 | } |
219 | cs->cap++; /* skip close entry */ |
220 | return 1; /* only accumulator left on the stack */ |
221 | } |
222 | |
223 | |
224 | /* |
225 | ** Function capture |
226 | */ |
227 | static int functioncap (CapState *cs) { |
228 | int n; |
229 | int top = lua_gettop(cs->L); |
230 | pushluaval(cs); /* push function */ |
231 | n = pushnestedvalues(cs, 0); /* push nested captures */ |
232 | lua_call(cs->L, n, LUA_MULTRET); /* call function */ |
233 | return lua_gettop(cs->L) - top; /* return function's results */ |
234 | } |
235 | |
236 | |
237 | /* |
238 | ** Select capture |
239 | */ |
240 | static int numcap (CapState *cs) { |
241 | int idx = cs->cap->idx; /* value to select */ |
242 | if (idx == 0) { /* no values? */ |
243 | nextcap(cs); /* skip entire capture */ |
244 | return 0; /* no value produced */ |
245 | } |
246 | else { |
247 | int n = pushnestedvalues(cs, 0); |
248 | if (n < idx) /* invalid index? */ |
249 | return luaL_error(cs->L, "no capture '%d'" , idx); |
250 | else { |
251 | lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */ |
252 | lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */ |
253 | lua_pop(cs->L, n - 1); /* remove other captures */ |
254 | return 1; |
255 | } |
256 | } |
257 | } |
258 | |
259 | |
260 | /* |
261 | ** Return the stack index of the first runtime capture in the given |
262 | ** list of captures (or zero if no runtime captures) |
263 | */ |
264 | int finddyncap (Capture *cap, Capture *last) { |
265 | for (; cap < last; cap++) { |
266 | if (cap->kind == Cruntime) |
267 | return cap->idx; /* stack position of first capture */ |
268 | } |
269 | return 0; /* no dynamic captures in this segment */ |
270 | } |
271 | |
272 | |
273 | /* |
274 | ** Calls a runtime capture. Returns number of captures removed by |
275 | ** the call, including the initial Cgroup. (Captures to be added are |
276 | ** on the Lua stack.) |
277 | */ |
278 | int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) { |
279 | int n, id; |
280 | lua_State *L = cs->L; |
281 | int otop = lua_gettop(L); |
282 | Capture *open = findopen(close); |
283 | assert(captype(open) == Cgroup); |
284 | id = finddyncap(open, close); /* get first dynamic capture argument */ |
285 | close->kind = Cclose; /* closes the group */ |
286 | close->s = s; |
287 | cs->cap = open; cs->valuecached = 0; /* prepare capture state */ |
288 | luaL_checkstack(L, 4, "too many runtime captures" ); |
289 | pushluaval(cs); /* push function to be called */ |
290 | lua_pushvalue(L, SUBJIDX); /* push original subject */ |
291 | lua_pushinteger(L, s - cs->s + 1); /* push current position */ |
292 | n = pushnestedvalues(cs, 0); /* push nested captures */ |
293 | lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */ |
294 | if (id > 0) { /* are there old dynamic captures to be removed? */ |
295 | int i; |
296 | for (i = id; i <= otop; i++) |
297 | lua_remove(L, id); /* remove old dynamic captures */ |
298 | *rem = otop - id + 1; /* total number of dynamic captures removed */ |
299 | } |
300 | else |
301 | *rem = 0; /* no dynamic captures removed */ |
302 | return close - open; /* number of captures of all kinds removed */ |
303 | } |
304 | |
305 | |
306 | /* |
307 | ** Auxiliary structure for substitution and string captures: keep |
308 | ** information about nested captures for future use, avoiding to push |
309 | ** string results into Lua |
310 | */ |
311 | typedef struct StrAux { |
312 | int isstring; /* whether capture is a string */ |
313 | union { |
314 | Capture *cp; /* if not a string, respective capture */ |
315 | struct { /* if it is a string... */ |
316 | const char *s; /* ... starts here */ |
317 | const char *e; /* ... ends here */ |
318 | } s; |
319 | } u; |
320 | } StrAux; |
321 | |
322 | #define MAXSTRCAPS 10 |
323 | |
324 | /* |
325 | ** Collect values from current capture into array 'cps'. Current |
326 | ** capture must be Cstring (first call) or Csimple (recursive calls). |
327 | ** (In first call, fills %0 with whole match for Cstring.) |
328 | ** Returns number of elements in the array that were filled. |
329 | */ |
330 | static int getstrcaps (CapState *cs, StrAux *cps, int n) { |
331 | int k = n++; |
332 | cps[k].isstring = 1; /* get string value */ |
333 | cps[k].u.s.s = cs->cap->s; /* starts here */ |
334 | if (!isfullcap(cs->cap++)) { /* nested captures? */ |
335 | while (!isclosecap(cs->cap)) { /* traverse them */ |
336 | if (n >= MAXSTRCAPS) /* too many captures? */ |
337 | nextcap(cs); /* skip extra captures (will not need them) */ |
338 | else if (captype(cs->cap) == Csimple) /* string? */ |
339 | n = getstrcaps(cs, cps, n); /* put info. into array */ |
340 | else { |
341 | cps[n].isstring = 0; /* not a string */ |
342 | cps[n].u.cp = cs->cap; /* keep original capture */ |
343 | nextcap(cs); |
344 | n++; |
345 | } |
346 | } |
347 | cs->cap++; /* skip close */ |
348 | } |
349 | cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */ |
350 | return n; |
351 | } |
352 | |
353 | |
354 | /* |
355 | ** add next capture value (which should be a string) to buffer 'b' |
356 | */ |
357 | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what); |
358 | |
359 | |
360 | /* |
361 | ** String capture: add result to buffer 'b' (instead of pushing |
362 | ** it into the stack) |
363 | */ |
364 | static void stringcap (luaL_Buffer *b, CapState *cs) { |
365 | StrAux cps[MAXSTRCAPS]; |
366 | int n; |
367 | size_t len, i; |
368 | const char *fmt; /* format string */ |
369 | fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len); |
370 | n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */ |
371 | for (i = 0; i < len; i++) { /* traverse them */ |
372 | if (fmt[i] != '%') /* not an escape? */ |
373 | luaL_addchar(b, fmt[i]); /* add it to buffer */ |
374 | else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */ |
375 | luaL_addchar(b, fmt[i]); /* add to buffer */ |
376 | else { |
377 | int l = fmt[i] - '0'; /* capture index */ |
378 | if (l > n) |
379 | luaL_error(cs->L, "invalid capture index (%d)" , l); |
380 | else if (cps[l].isstring) |
381 | luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s); |
382 | else { |
383 | Capture *curr = cs->cap; |
384 | cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */ |
385 | if (!addonestring(b, cs, "capture" )) |
386 | luaL_error(cs->L, "no values in capture index %d" , l); |
387 | cs->cap = curr; /* continue from where it stopped */ |
388 | } |
389 | } |
390 | } |
391 | } |
392 | |
393 | |
394 | /* |
395 | ** Substitution capture: add result to buffer 'b' |
396 | */ |
397 | static void substcap (luaL_Buffer *b, CapState *cs) { |
398 | const char *curr = cs->cap->s; |
399 | if (isfullcap(cs->cap)) /* no nested captures? */ |
400 | luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */ |
401 | else { |
402 | cs->cap++; /* skip open entry */ |
403 | while (!isclosecap(cs->cap)) { /* traverse nested captures */ |
404 | const char *next = cs->cap->s; |
405 | luaL_addlstring(b, curr, next - curr); /* add text up to capture */ |
406 | if (addonestring(b, cs, "replacement" )) |
407 | curr = closeaddr(cs->cap - 1); /* continue after match */ |
408 | else /* no capture value */ |
409 | curr = next; /* keep original text in final result */ |
410 | } |
411 | luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */ |
412 | } |
413 | cs->cap++; /* go to next capture */ |
414 | } |
415 | |
416 | |
417 | /* |
418 | ** Evaluates a capture and adds its first value to buffer 'b'; returns |
419 | ** whether there was a value |
420 | */ |
421 | static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) { |
422 | switch (captype(cs->cap)) { |
423 | case Cstring: |
424 | stringcap(b, cs); /* add capture directly to buffer */ |
425 | return 1; |
426 | case Csubst: |
427 | substcap(b, cs); /* add capture directly to buffer */ |
428 | return 1; |
429 | default: { |
430 | lua_State *L = cs->L; |
431 | int n = pushcapture(cs); |
432 | if (n > 0) { |
433 | if (n > 1) lua_pop(L, n - 1); /* only one result */ |
434 | if (!lua_isstring(L, -1)) |
435 | luaL_error(L, "invalid %s value (a %s)" , what, luaL_typename(L, -1)); |
436 | luaL_addvalue(b); |
437 | } |
438 | return n; |
439 | } |
440 | } |
441 | } |
442 | |
443 | |
444 | /* |
445 | ** Push all values of the current capture into the stack; returns |
446 | ** number of values pushed |
447 | */ |
448 | static int pushcapture (CapState *cs) { |
449 | lua_State *L = cs->L; |
450 | luaL_checkstack(L, 4, "too many captures" ); |
451 | switch (captype(cs->cap)) { |
452 | case Cposition: { |
453 | lua_pushinteger(L, cs->cap->s - cs->s + 1); |
454 | cs->cap++; |
455 | return 1; |
456 | } |
457 | case Cconst: { |
458 | pushluaval(cs); |
459 | cs->cap++; |
460 | return 1; |
461 | } |
462 | case Carg: { |
463 | int arg = (cs->cap++)->idx; |
464 | if (arg + FIXEDARGS > cs->ptop) |
465 | return luaL_error(L, "reference to absent extra argument #%d" , arg); |
466 | lua_pushvalue(L, arg + FIXEDARGS); |
467 | return 1; |
468 | } |
469 | case Csimple: { |
470 | int k = pushnestedvalues(cs, 1); |
471 | lua_insert(L, -k); /* make whole match be first result */ |
472 | return k; |
473 | } |
474 | case Cruntime: { |
475 | lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */ |
476 | return 1; |
477 | } |
478 | case Cstring: { |
479 | luaL_Buffer b; |
480 | luaL_buffinit(L, &b); |
481 | stringcap(&b, cs); |
482 | luaL_pushresult(&b); |
483 | return 1; |
484 | } |
485 | case Csubst: { |
486 | luaL_Buffer b; |
487 | luaL_buffinit(L, &b); |
488 | substcap(&b, cs); |
489 | luaL_pushresult(&b); |
490 | return 1; |
491 | } |
492 | case Cgroup: { |
493 | if (cs->cap->idx == 0) /* anonymous group? */ |
494 | return pushnestedvalues(cs, 0); /* add all nested values */ |
495 | else { /* named group: add no values */ |
496 | nextcap(cs); /* skip capture */ |
497 | return 0; |
498 | } |
499 | } |
500 | case Cbackref: return backrefcap(cs); |
501 | case Ctable: return tablecap(cs); |
502 | case Cfunction: return functioncap(cs); |
503 | case Cnum: return numcap(cs); |
504 | case Cquery: return querycap(cs); |
505 | case Cfold: return foldcap(cs); |
506 | default: assert(0); return 0; |
507 | } |
508 | } |
509 | |
510 | |
511 | /* |
512 | ** Prepare a CapState structure and traverse the entire list of |
513 | ** captures in the stack pushing its results. 's' is the subject |
514 | ** string, 'r' is the final position of the match, and 'ptop' |
515 | ** the index in the stack where some useful values were pushed. |
516 | ** Returns the number of results pushed. (If the list produces no |
517 | ** results, push the final position of the match.) |
518 | */ |
519 | int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { |
520 | Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); |
521 | int n = 0; |
522 | if (!isclosecap(capture)) { /* is there any capture? */ |
523 | CapState cs; |
524 | cs.ocap = cs.cap = capture; cs.L = L; |
525 | cs.s = s; cs.valuecached = 0; cs.ptop = ptop; |
526 | do { /* collect their values */ |
527 | n += pushcapture(&cs); |
528 | } while (!isclosecap(cs.cap)); |
529 | } |
530 | if (n == 0) { /* no capture values? */ |
531 | lua_pushinteger(L, r - s + 1); /* return only end position */ |
532 | n = 1; |
533 | } |
534 | return n; |
535 | } |
536 | |
537 | |
538 | |