1/*
2** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
3** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
4*/
5
6#include "lua.h"
7#include "lauxlib.h"
8
9#include "lpcap.h"
10#include "lptypes.h"
11
12
13#define captype(cap) ((cap)->kind)
14
15#define isclosecap(cap) (captype(cap) == Cclose)
16
17#define closeaddr(c) ((c)->s + (c)->siz - 1)
18
19#define isfullcap(cap) ((cap)->siz != 0)
20
21#define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
22
23#define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
24
25
26
27/*
28** Put at the cache for Lua values the value indexed by 'v' in ktable
29** of the running pattern (if it is not there yet); returns its index.
30*/
31static int updatecache (CapState *cs, int v) {
32 int idx = cs->ptop + 1; /* stack index of cache for Lua values */
33 if (v != cs->valuecached) { /* not there? */
34 getfromktable(cs, v); /* get value from 'ktable' */
35 lua_replace(cs->L, idx); /* put it at reserved stack position */
36 cs->valuecached = v; /* keep track of what is there */
37 }
38 return idx;
39}
40
41
42static int pushcapture (CapState *cs);
43
44
45/*
46** Goes back in a list of captures looking for an open capture
47** corresponding to a close
48*/
49static Capture *findopen (Capture *cap) {
50 int n = 0; /* number of closes waiting an open */
51 for (;;) {
52 cap--;
53 if (isclosecap(cap)) n++; /* one more open to skip */
54 else if (!isfullcap(cap))
55 if (n-- == 0) return cap;
56 }
57}
58
59
60/*
61** Go to the next capture
62*/
63static void nextcap (CapState *cs) {
64 Capture *cap = cs->cap;
65 if (!isfullcap(cap)) { /* not a single capture? */
66 int n = 0; /* number of opens waiting a close */
67 for (;;) { /* look for corresponding close */
68 cap++;
69 if (isclosecap(cap)) {
70 if (n-- == 0) break;
71 }
72 else if (!isfullcap(cap)) n++;
73 }
74 }
75 cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
76}
77
78
79/*
80** Push on the Lua stack all values generated by nested captures inside
81** the current capture. Returns number of values pushed. 'addextra'
82** makes it push the entire match after all captured values. The
83** entire match is pushed also if there are no other nested values,
84** so the function never returns zero.
85*/
86static int pushnestedvalues (CapState *cs, int addextra) {
87 Capture *co = cs->cap;
88 if (isfullcap(cs->cap++)) { /* no nested captures? */
89 lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
90 return 1; /* that is it */
91 }
92 else {
93 int n = 0;
94 while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
95 n += pushcapture(cs);
96 if (addextra || n == 0) { /* need extra? */
97 lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
98 n++;
99 }
100 cs->cap++; /* skip close entry */
101 return n;
102 }
103}
104
105
106/*
107** Push only the first value generated by nested captures
108*/
109static void pushonenestedvalue (CapState *cs) {
110 int n = pushnestedvalues(cs, 0);
111 if (n > 1)
112 lua_pop(cs->L, n - 1); /* pop extra values */
113}
114
115
116/*
117** Try to find a named group capture with the name given at the top of
118** the stack; goes backward from 'cap'.
119*/
120static Capture *findback (CapState *cs, Capture *cap) {
121 lua_State *L = cs->L;
122 while (cap-- > cs->ocap) { /* repeat until end of list */
123 if (isclosecap(cap))
124 cap = findopen(cap); /* skip nested captures */
125 else if (!isfullcap(cap))
126 continue; /* opening an enclosing capture: skip and get previous */
127 if (captype(cap) == Cgroup) {
128 getfromktable(cs, cap->idx); /* get group name */
129 if (lp_equal(L, -2, -1)) { /* right group? */
130 lua_pop(L, 2); /* remove reference name and group name */
131 return cap;
132 }
133 else lua_pop(L, 1); /* remove group name */
134 }
135 }
136 luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
137 return NULL; /* to avoid warnings */
138}
139
140
141/*
142** Back-reference capture. Return number of values pushed.
143*/
144static int backrefcap (CapState *cs) {
145 int n;
146 Capture *curr = cs->cap;
147 pushluaval(cs); /* reference name */
148 cs->cap = findback(cs, curr); /* find corresponding group */
149 n = pushnestedvalues(cs, 0); /* push group's values */
150 cs->cap = curr + 1;
151 return n;
152}
153
154
155/*
156** Table capture: creates a new table and populates it with nested
157** captures.
158*/
159static int tablecap (CapState *cs) {
160 lua_State *L = cs->L;
161 int n = 0;
162 lua_newtable(L);
163 if (isfullcap(cs->cap++))
164 return 1; /* table is empty */
165 while (!isclosecap(cs->cap)) {
166 if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
167 pushluaval(cs); /* push group name */
168 pushonenestedvalue(cs);
169 lua_settable(L, -3);
170 }
171 else { /* not a named group */
172 int i;
173 int k = pushcapture(cs);
174 for (i = k; i > 0; i--) /* store all values into table */
175 lua_rawseti(L, -(i + 1), n + i);
176 n += k;
177 }
178 }
179 cs->cap++; /* skip close entry */
180 return 1; /* number of values pushed (only the table) */
181}
182
183
184/*
185** Table-query capture
186*/
187static int querycap (CapState *cs) {
188 int idx = cs->cap->idx;
189 pushonenestedvalue(cs); /* get nested capture */
190 lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
191 if (!lua_isnil(cs->L, -1))
192 return 1;
193 else { /* no value */
194 lua_pop(cs->L, 1); /* remove nil */
195 return 0;
196 }
197}
198
199
200/*
201** Fold capture
202*/
203static int foldcap (CapState *cs) {
204 int n;
205 lua_State *L = cs->L;
206 int idx = cs->cap->idx;
207 if (isfullcap(cs->cap++) || /* no nested captures? */
208 isclosecap(cs->cap) || /* no nested captures (large subject)? */
209 (n = pushcapture(cs)) == 0) /* nested captures with no values? */
210 return luaL_error(L, "no initial value for fold capture");
211 if (n > 1)
212 lua_pop(L, n - 1); /* leave only one result for accumulator */
213 while (!isclosecap(cs->cap)) {
214 lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
215 lua_insert(L, -2); /* put it before accumulator */
216 n = pushcapture(cs); /* get next capture's values */
217 lua_call(L, n + 1, 1); /* call folding function */
218 }
219 cs->cap++; /* skip close entry */
220 return 1; /* only accumulator left on the stack */
221}
222
223
224/*
225** Function capture
226*/
227static int functioncap (CapState *cs) {
228 int n;
229 int top = lua_gettop(cs->L);
230 pushluaval(cs); /* push function */
231 n = pushnestedvalues(cs, 0); /* push nested captures */
232 lua_call(cs->L, n, LUA_MULTRET); /* call function */
233 return lua_gettop(cs->L) - top; /* return function's results */
234}
235
236
237/*
238** Select capture
239*/
240static int numcap (CapState *cs) {
241 int idx = cs->cap->idx; /* value to select */
242 if (idx == 0) { /* no values? */
243 nextcap(cs); /* skip entire capture */
244 return 0; /* no value produced */
245 }
246 else {
247 int n = pushnestedvalues(cs, 0);
248 if (n < idx) /* invalid index? */
249 return luaL_error(cs->L, "no capture '%d'", idx);
250 else {
251 lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
252 lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
253 lua_pop(cs->L, n - 1); /* remove other captures */
254 return 1;
255 }
256 }
257}
258
259
260/*
261** Return the stack index of the first runtime capture in the given
262** list of captures (or zero if no runtime captures)
263*/
264int finddyncap (Capture *cap, Capture *last) {
265 for (; cap < last; cap++) {
266 if (cap->kind == Cruntime)
267 return cap->idx; /* stack position of first capture */
268 }
269 return 0; /* no dynamic captures in this segment */
270}
271
272
273/*
274** Calls a runtime capture. Returns number of captures removed by
275** the call, including the initial Cgroup. (Captures to be added are
276** on the Lua stack.)
277*/
278int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
279 int n, id;
280 lua_State *L = cs->L;
281 int otop = lua_gettop(L);
282 Capture *open = findopen(close);
283 assert(captype(open) == Cgroup);
284 id = finddyncap(open, close); /* get first dynamic capture argument */
285 close->kind = Cclose; /* closes the group */
286 close->s = s;
287 cs->cap = open; cs->valuecached = 0; /* prepare capture state */
288 luaL_checkstack(L, 4, "too many runtime captures");
289 pushluaval(cs); /* push function to be called */
290 lua_pushvalue(L, SUBJIDX); /* push original subject */
291 lua_pushinteger(L, s - cs->s + 1); /* push current position */
292 n = pushnestedvalues(cs, 0); /* push nested captures */
293 lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
294 if (id > 0) { /* are there old dynamic captures to be removed? */
295 int i;
296 for (i = id; i <= otop; i++)
297 lua_remove(L, id); /* remove old dynamic captures */
298 *rem = otop - id + 1; /* total number of dynamic captures removed */
299 }
300 else
301 *rem = 0; /* no dynamic captures removed */
302 return close - open; /* number of captures of all kinds removed */
303}
304
305
306/*
307** Auxiliary structure for substitution and string captures: keep
308** information about nested captures for future use, avoiding to push
309** string results into Lua
310*/
311typedef struct StrAux {
312 int isstring; /* whether capture is a string */
313 union {
314 Capture *cp; /* if not a string, respective capture */
315 struct { /* if it is a string... */
316 const char *s; /* ... starts here */
317 const char *e; /* ... ends here */
318 } s;
319 } u;
320} StrAux;
321
322#define MAXSTRCAPS 10
323
324/*
325** Collect values from current capture into array 'cps'. Current
326** capture must be Cstring (first call) or Csimple (recursive calls).
327** (In first call, fills %0 with whole match for Cstring.)
328** Returns number of elements in the array that were filled.
329*/
330static int getstrcaps (CapState *cs, StrAux *cps, int n) {
331 int k = n++;
332 cps[k].isstring = 1; /* get string value */
333 cps[k].u.s.s = cs->cap->s; /* starts here */
334 if (!isfullcap(cs->cap++)) { /* nested captures? */
335 while (!isclosecap(cs->cap)) { /* traverse them */
336 if (n >= MAXSTRCAPS) /* too many captures? */
337 nextcap(cs); /* skip extra captures (will not need them) */
338 else if (captype(cs->cap) == Csimple) /* string? */
339 n = getstrcaps(cs, cps, n); /* put info. into array */
340 else {
341 cps[n].isstring = 0; /* not a string */
342 cps[n].u.cp = cs->cap; /* keep original capture */
343 nextcap(cs);
344 n++;
345 }
346 }
347 cs->cap++; /* skip close */
348 }
349 cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
350 return n;
351}
352
353
354/*
355** add next capture value (which should be a string) to buffer 'b'
356*/
357static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
358
359
360/*
361** String capture: add result to buffer 'b' (instead of pushing
362** it into the stack)
363*/
364static void stringcap (luaL_Buffer *b, CapState *cs) {
365 StrAux cps[MAXSTRCAPS];
366 int n;
367 size_t len, i;
368 const char *fmt; /* format string */
369 fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
370 n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
371 for (i = 0; i < len; i++) { /* traverse them */
372 if (fmt[i] != '%') /* not an escape? */
373 luaL_addchar(b, fmt[i]); /* add it to buffer */
374 else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
375 luaL_addchar(b, fmt[i]); /* add to buffer */
376 else {
377 int l = fmt[i] - '0'; /* capture index */
378 if (l > n)
379 luaL_error(cs->L, "invalid capture index (%d)", l);
380 else if (cps[l].isstring)
381 luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
382 else {
383 Capture *curr = cs->cap;
384 cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
385 if (!addonestring(b, cs, "capture"))
386 luaL_error(cs->L, "no values in capture index %d", l);
387 cs->cap = curr; /* continue from where it stopped */
388 }
389 }
390 }
391}
392
393
394/*
395** Substitution capture: add result to buffer 'b'
396*/
397static void substcap (luaL_Buffer *b, CapState *cs) {
398 const char *curr = cs->cap->s;
399 if (isfullcap(cs->cap)) /* no nested captures? */
400 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
401 else {
402 cs->cap++; /* skip open entry */
403 while (!isclosecap(cs->cap)) { /* traverse nested captures */
404 const char *next = cs->cap->s;
405 luaL_addlstring(b, curr, next - curr); /* add text up to capture */
406 if (addonestring(b, cs, "replacement"))
407 curr = closeaddr(cs->cap - 1); /* continue after match */
408 else /* no capture value */
409 curr = next; /* keep original text in final result */
410 }
411 luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
412 }
413 cs->cap++; /* go to next capture */
414}
415
416
417/*
418** Evaluates a capture and adds its first value to buffer 'b'; returns
419** whether there was a value
420*/
421static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
422 switch (captype(cs->cap)) {
423 case Cstring:
424 stringcap(b, cs); /* add capture directly to buffer */
425 return 1;
426 case Csubst:
427 substcap(b, cs); /* add capture directly to buffer */
428 return 1;
429 default: {
430 lua_State *L = cs->L;
431 int n = pushcapture(cs);
432 if (n > 0) {
433 if (n > 1) lua_pop(L, n - 1); /* only one result */
434 if (!lua_isstring(L, -1))
435 luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
436 luaL_addvalue(b);
437 }
438 return n;
439 }
440 }
441}
442
443
444/*
445** Push all values of the current capture into the stack; returns
446** number of values pushed
447*/
448static int pushcapture (CapState *cs) {
449 lua_State *L = cs->L;
450 luaL_checkstack(L, 4, "too many captures");
451 switch (captype(cs->cap)) {
452 case Cposition: {
453 lua_pushinteger(L, cs->cap->s - cs->s + 1);
454 cs->cap++;
455 return 1;
456 }
457 case Cconst: {
458 pushluaval(cs);
459 cs->cap++;
460 return 1;
461 }
462 case Carg: {
463 int arg = (cs->cap++)->idx;
464 if (arg + FIXEDARGS > cs->ptop)
465 return luaL_error(L, "reference to absent extra argument #%d", arg);
466 lua_pushvalue(L, arg + FIXEDARGS);
467 return 1;
468 }
469 case Csimple: {
470 int k = pushnestedvalues(cs, 1);
471 lua_insert(L, -k); /* make whole match be first result */
472 return k;
473 }
474 case Cruntime: {
475 lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
476 return 1;
477 }
478 case Cstring: {
479 luaL_Buffer b;
480 luaL_buffinit(L, &b);
481 stringcap(&b, cs);
482 luaL_pushresult(&b);
483 return 1;
484 }
485 case Csubst: {
486 luaL_Buffer b;
487 luaL_buffinit(L, &b);
488 substcap(&b, cs);
489 luaL_pushresult(&b);
490 return 1;
491 }
492 case Cgroup: {
493 if (cs->cap->idx == 0) /* anonymous group? */
494 return pushnestedvalues(cs, 0); /* add all nested values */
495 else { /* named group: add no values */
496 nextcap(cs); /* skip capture */
497 return 0;
498 }
499 }
500 case Cbackref: return backrefcap(cs);
501 case Ctable: return tablecap(cs);
502 case Cfunction: return functioncap(cs);
503 case Cnum: return numcap(cs);
504 case Cquery: return querycap(cs);
505 case Cfold: return foldcap(cs);
506 default: assert(0); return 0;
507 }
508}
509
510
511/*
512** Prepare a CapState structure and traverse the entire list of
513** captures in the stack pushing its results. 's' is the subject
514** string, 'r' is the final position of the match, and 'ptop'
515** the index in the stack where some useful values were pushed.
516** Returns the number of results pushed. (If the list produces no
517** results, push the final position of the match.)
518*/
519int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
520 Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
521 int n = 0;
522 if (!isclosecap(capture)) { /* is there any capture? */
523 CapState cs;
524 cs.ocap = cs.cap = capture; cs.L = L;
525 cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
526 do { /* collect their values */
527 n += pushcapture(&cs);
528 } while (!isclosecap(cs.cap));
529 }
530 if (n == 0) { /* no capture values? */
531 lua_pushinteger(L, r - s + 1); /* return only end position */
532 n = 1;
533 }
534 return n;
535}
536
537
538