1#include "jsi.h"
2#include "jsvalue.h"
3#include "jsbuiltin.h"
4#include "utf.h"
5#include "regexp.h"
6
7static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
8{
9 int result = js_regexec(prog, string, sub, eflags);
10 if (result < 0)
11 js_error(J, "regexec failed");
12 return result;
13}
14
15static const char *checkstring(js_State *J, int idx)
16{
17 if (!js_iscoercible(J, idx))
18 js_typeerror(J, "string function called on null or undefined");
19 return js_tostring(J, idx);
20}
21
22int js_runeat(js_State *J, const char *s, int i)
23{
24 Rune rune = 0;
25 while (i-- >= 0) {
26 rune = *(unsigned char*)s;
27 if (rune < Runeself) {
28 if (rune == 0)
29 return 0;
30 ++s;
31 } else
32 s += chartorune(&rune, s);
33 }
34 return rune;
35}
36
37const char *js_utfidxtoptr(const char *s, int i)
38{
39 Rune rune;
40 while (i-- > 0) {
41 rune = *(unsigned char*)s;
42 if (rune < Runeself) {
43 if (rune == 0)
44 return NULL;
45 ++s;
46 } else
47 s += chartorune(&rune, s);
48 }
49 return s;
50}
51
52int js_utfptrtoidx(const char *s, const char *p)
53{
54 Rune rune;
55 int i = 0;
56 while (s < p) {
57 if (*(unsigned char *)s < Runeself)
58 ++s;
59 else
60 s += chartorune(&rune, s);
61 ++i;
62 }
63 return i;
64}
65
66static void jsB_new_String(js_State *J)
67{
68 js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
69}
70
71static void jsB_String(js_State *J)
72{
73 js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
74}
75
76static void Sp_toString(js_State *J)
77{
78 js_Object *self = js_toobject(J, 0);
79 if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
80 js_pushliteral(J, self->u.s.string);
81}
82
83static void Sp_valueOf(js_State *J)
84{
85 js_Object *self = js_toobject(J, 0);
86 if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
87 js_pushliteral(J, self->u.s.string);
88}
89
90static void Sp_charAt(js_State *J)
91{
92 char buf[UTFmax + 1];
93 const char *s = checkstring(J, 0);
94 int pos = js_tointeger(J, 1);
95 Rune rune = js_runeat(J, s, pos);
96 if (rune > 0) {
97 buf[runetochar(buf, &rune)] = 0;
98 js_pushstring(J, buf);
99 } else {
100 js_pushliteral(J, "");
101 }
102}
103
104static void Sp_charCodeAt(js_State *J)
105{
106 const char *s = checkstring(J, 0);
107 int pos = js_tointeger(J, 1);
108 Rune rune = js_runeat(J, s, pos);
109 if (rune > 0)
110 js_pushnumber(J, rune);
111 else
112 js_pushnumber(J, NAN);
113}
114
115static void Sp_concat(js_State *J)
116{
117 int i, top = js_gettop(J);
118 int n;
119 char * volatile out;
120 const char *s;
121
122 if (top == 1)
123 return;
124
125 s = checkstring(J, 0);
126 n = strlen(s);
127 out = js_malloc(J, n + 1);
128 strcpy(out, s);
129
130 if (js_try(J)) {
131 js_free(J, out);
132 js_throw(J);
133 }
134
135 for (i = 1; i < top; ++i) {
136 s = js_tostring(J, i);
137 n += strlen(s);
138 out = js_realloc(J, out, n + 1);
139 strcat(out, s);
140 }
141
142 js_pushstring(J, out);
143 js_endtry(J);
144 js_free(J, out);
145}
146
147static void Sp_indexOf(js_State *J)
148{
149 const char *haystack = checkstring(J, 0);
150 const char *needle = js_tostring(J, 1);
151 int pos = js_tointeger(J, 2);
152 int len = strlen(needle);
153 int k = 0;
154 Rune rune;
155 while (*haystack) {
156 if (k >= pos && !strncmp(haystack, needle, len)) {
157 js_pushnumber(J, k);
158 return;
159 }
160 haystack += chartorune(&rune, haystack);
161 ++k;
162 }
163 js_pushnumber(J, -1);
164}
165
166static void Sp_lastIndexOf(js_State *J)
167{
168 const char *haystack = checkstring(J, 0);
169 const char *needle = js_tostring(J, 1);
170 int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
171 int len = strlen(needle);
172 int k = 0, last = -1;
173 Rune rune;
174 while (*haystack && k <= pos) {
175 if (!strncmp(haystack, needle, len))
176 last = k;
177 haystack += chartorune(&rune, haystack);
178 ++k;
179 }
180 js_pushnumber(J, last);
181}
182
183static void Sp_localeCompare(js_State *J)
184{
185 const char *a = checkstring(J, 0);
186 const char *b = js_tostring(J, 1);
187 js_pushnumber(J, strcmp(a, b));
188}
189
190static void Sp_slice(js_State *J)
191{
192 const char *str = checkstring(J, 0);
193 const char *ss, *ee;
194 int len = utflen(str);
195 int s = js_tointeger(J, 1);
196 int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
197
198 s = s < 0 ? s + len : s;
199 e = e < 0 ? e + len : e;
200
201 s = s < 0 ? 0 : s > len ? len : s;
202 e = e < 0 ? 0 : e > len ? len : e;
203
204 if (s < e) {
205 ss = js_utfidxtoptr(str, s);
206 ee = js_utfidxtoptr(ss, e - s);
207 } else {
208 ss = js_utfidxtoptr(str, e);
209 ee = js_utfidxtoptr(ss, s - e);
210 }
211
212 js_pushlstring(J, ss, ee - ss);
213}
214
215static void Sp_substring(js_State *J)
216{
217 const char *str = checkstring(J, 0);
218 const char *ss, *ee;
219 int len = utflen(str);
220 int s = js_tointeger(J, 1);
221 int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
222
223 s = s < 0 ? 0 : s > len ? len : s;
224 e = e < 0 ? 0 : e > len ? len : e;
225
226 if (s < e) {
227 ss = js_utfidxtoptr(str, s);
228 ee = js_utfidxtoptr(ss, e - s);
229 } else {
230 ss = js_utfidxtoptr(str, e);
231 ee = js_utfidxtoptr(ss, s - e);
232 }
233
234 js_pushlstring(J, ss, ee - ss);
235}
236
237static void Sp_toLowerCase(js_State *J)
238{
239 const char *src = checkstring(J, 0);
240 char *dst = js_malloc(J, UTFmax * strlen(src) + 1);
241 const char *s = src;
242 char *d = dst;
243 Rune rune;
244 while (*s) {
245 s += chartorune(&rune, s);
246 rune = tolowerrune(rune);
247 d += runetochar(d, &rune);
248 }
249 *d = 0;
250 if (js_try(J)) {
251 js_free(J, dst);
252 js_throw(J);
253 }
254 js_pushstring(J, dst);
255 js_endtry(J);
256 js_free(J, dst);
257}
258
259static void Sp_toUpperCase(js_State *J)
260{
261 const char *src = checkstring(J, 0);
262 char *dst = js_malloc(J, UTFmax * strlen(src) + 1);
263 const char *s = src;
264 char *d = dst;
265 Rune rune;
266 while (*s) {
267 s += chartorune(&rune, s);
268 rune = toupperrune(rune);
269 d += runetochar(d, &rune);
270 }
271 *d = 0;
272 if (js_try(J)) {
273 js_free(J, dst);
274 js_throw(J);
275 }
276 js_pushstring(J, dst);
277 js_endtry(J);
278 js_free(J, dst);
279}
280
281static int istrim(int c)
282{
283 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
284 c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
285}
286
287static void Sp_trim(js_State *J)
288{
289 const char *s, *e;
290 s = checkstring(J, 0);
291 while (istrim(*s))
292 ++s;
293 e = s + strlen(s);
294 while (e > s && istrim(e[-1]))
295 --e;
296 js_pushlstring(J, s, e - s);
297}
298
299static void S_fromCharCode(js_State *J)
300{
301 int i, top = js_gettop(J);
302 Rune c;
303 char *s, *p;
304
305 s = p = js_malloc(J, (top-1) * UTFmax + 1);
306
307 if (js_try(J)) {
308 js_free(J, s);
309 js_throw(J);
310 }
311
312 for (i = 1; i < top; ++i) {
313 c = js_touint16(J, i);
314 p += runetochar(p, &c);
315 }
316 *p = 0;
317 js_pushstring(J, s);
318
319 js_endtry(J);
320 js_free(J, s);
321}
322
323static void Sp_match(js_State *J)
324{
325 js_Regexp *re;
326 const char *text;
327 int len;
328 const char *a, *b, *c, *e;
329 Resub m;
330
331 text = checkstring(J, 0);
332
333 if (js_isregexp(J, 1))
334 js_copy(J, 1);
335 else if (js_isundefined(J, 1))
336 js_newregexp(J, "", 0);
337 else
338 js_newregexp(J, js_tostring(J, 1), 0);
339
340 re = js_toregexp(J, -1);
341 if (!(re->flags & JS_REGEXP_G)) {
342 js_RegExp_prototype_exec(J, re, text);
343 return;
344 }
345
346 re->last = 0;
347
348 js_newarray(J);
349
350 len = 0;
351 a = text;
352 e = text + strlen(text);
353 while (a <= e) {
354 if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
355 break;
356
357 b = m.sub[0].sp;
358 c = m.sub[0].ep;
359
360 js_pushlstring(J, b, c - b);
361 js_setindex(J, -2, len++);
362
363 a = c;
364 if (c - b == 0)
365 ++a;
366 }
367
368 if (len == 0) {
369 js_pop(J, 1);
370 js_pushnull(J);
371 }
372}
373
374static void Sp_search(js_State *J)
375{
376 js_Regexp *re;
377 const char *text;
378 Resub m;
379
380 text = checkstring(J, 0);
381
382 if (js_isregexp(J, 1))
383 js_copy(J, 1);
384 else if (js_isundefined(J, 1))
385 js_newregexp(J, "", 0);
386 else
387 js_newregexp(J, js_tostring(J, 1), 0);
388
389 re = js_toregexp(J, -1);
390
391 if (!js_doregexec(J, re->prog, text, &m, 0))
392 js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
393 else
394 js_pushnumber(J, -1);
395}
396
397static void Sp_replace_regexp(js_State *J)
398{
399 js_Regexp *re;
400 const char *source, *s, *r;
401 js_Buffer *sb = NULL;
402 int n, x;
403 Resub m;
404
405 source = checkstring(J, 0);
406 re = js_toregexp(J, 1);
407
408 if (js_doregexec(J, re->prog, source, &m, 0)) {
409 js_copy(J, 0);
410 return;
411 }
412
413 re->last = 0;
414
415loop:
416 s = m.sub[0].sp;
417 n = m.sub[0].ep - m.sub[0].sp;
418
419 if (js_iscallable(J, 2)) {
420 js_copy(J, 2);
421 js_pushundefined(J);
422 for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
423 js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
424 js_pushnumber(J, s - source); /* arg x+2: offset within search string */
425 js_copy(J, 0); /* arg x+3: search string */
426 js_call(J, 2 + x);
427 r = js_tostring(J, -1);
428 js_putm(J, &sb, source, s);
429 js_puts(J, &sb, r);
430 js_pop(J, 1);
431 } else {
432 r = js_tostring(J, 2);
433 js_putm(J, &sb, source, s);
434 while (*r) {
435 if (*r == '$') {
436 switch (*(++r)) {
437 case 0: --r; /* end of string; back up */
438 /* fallthrough */
439 case '$': js_putc(J, &sb, '$'); break;
440 case '`': js_putm(J, &sb, source, s); break;
441 case '\'': js_puts(J, &sb, s + n); break;
442 case '&':
443 js_putm(J, &sb, s, s + n);
444 break;
445 case '0': case '1': case '2': case '3': case '4':
446 case '5': case '6': case '7': case '8': case '9':
447 x = *r - '0';
448 if (r[1] >= '0' && r[1] <= '9')
449 x = x * 10 + *(++r) - '0';
450 if (x > 0 && x < m.nsub) {
451 js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
452 } else {
453 js_putc(J, &sb, '$');
454 if (x > 10) {
455 js_putc(J, &sb, '0' + x / 10);
456 js_putc(J, &sb, '0' + x % 10);
457 } else {
458 js_putc(J, &sb, '0' + x);
459 }
460 }
461 break;
462 default:
463 js_putc(J, &sb, '$');
464 js_putc(J, &sb, *r);
465 break;
466 }
467 ++r;
468 } else {
469 js_putc(J, &sb, *r++);
470 }
471 }
472 }
473
474 if (re->flags & JS_REGEXP_G) {
475 source = m.sub[0].ep;
476 if (n == 0) {
477 if (*source)
478 js_putc(J, &sb, *source++);
479 else
480 goto end;
481 }
482 if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
483 goto loop;
484 }
485
486end:
487 js_puts(J, &sb, s + n);
488 js_putc(J, &sb, 0);
489
490 if (js_try(J)) {
491 js_free(J, sb);
492 js_throw(J);
493 }
494 js_pushstring(J, sb ? sb->s : "");
495 js_endtry(J);
496 js_free(J, sb);
497}
498
499static void Sp_replace_string(js_State *J)
500{
501 const char *source, *needle, *s, *r;
502 js_Buffer *sb = NULL;
503 int n;
504
505 source = checkstring(J, 0);
506 needle = js_tostring(J, 1);
507
508 s = strstr(source, needle);
509 if (!s) {
510 js_copy(J, 0);
511 return;
512 }
513 n = strlen(needle);
514
515 if (js_iscallable(J, 2)) {
516 js_copy(J, 2);
517 js_pushundefined(J);
518 js_pushlstring(J, s, n); /* arg 1: substring that matched */
519 js_pushnumber(J, s - source); /* arg 2: offset within search string */
520 js_copy(J, 0); /* arg 3: search string */
521 js_call(J, 3);
522 r = js_tostring(J, -1);
523 js_putm(J, &sb, source, s);
524 js_puts(J, &sb, r);
525 js_puts(J, &sb, s + n);
526 js_putc(J, &sb, 0);
527 js_pop(J, 1);
528 } else {
529 r = js_tostring(J, 2);
530 js_putm(J, &sb, source, s);
531 while (*r) {
532 if (*r == '$') {
533 switch (*(++r)) {
534 case 0: --r; /* end of string; back up */
535 /* fallthrough */
536 case '$': js_putc(J, &sb, '$'); break;
537 case '&': js_putm(J, &sb, s, s + n); break;
538 case '`': js_putm(J, &sb, source, s); break;
539 case '\'': js_puts(J, &sb, s + n); break;
540 default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
541 }
542 ++r;
543 } else {
544 js_putc(J, &sb, *r++);
545 }
546 }
547 js_puts(J, &sb, s + n);
548 js_putc(J, &sb, 0);
549 }
550
551 if (js_try(J)) {
552 js_free(J, sb);
553 js_throw(J);
554 }
555 js_pushstring(J, sb ? sb->s : "");
556 js_endtry(J);
557 js_free(J, sb);
558}
559
560static void Sp_replace(js_State *J)
561{
562 if (js_isregexp(J, 1))
563 Sp_replace_regexp(J);
564 else
565 Sp_replace_string(J);
566}
567
568static void Sp_split_regexp(js_State *J)
569{
570 js_Regexp *re;
571 const char *text;
572 int limit, len, k;
573 const char *p, *a, *b, *c, *e;
574 Resub m;
575
576 text = checkstring(J, 0);
577 re = js_toregexp(J, 1);
578 limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
579
580 js_newarray(J);
581 len = 0;
582
583 e = text + strlen(text);
584
585 /* splitting the empty string */
586 if (e == text) {
587 if (js_doregexec(J, re->prog, text, &m, 0)) {
588 if (len == limit) return;
589 js_pushliteral(J, "");
590 js_setindex(J, -2, 0);
591 }
592 return;
593 }
594
595 p = a = text;
596 while (a < e) {
597 if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
598 break; /* no match */
599
600 b = m.sub[0].sp;
601 c = m.sub[0].ep;
602
603 /* empty string at end of last match */
604 if (b == p) {
605 ++a;
606 continue;
607 }
608
609 if (len == limit) return;
610 js_pushlstring(J, p, b - p);
611 js_setindex(J, -2, len++);
612
613 for (k = 1; k < m.nsub; ++k) {
614 if (len == limit) return;
615 js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
616 js_setindex(J, -2, len++);
617 }
618
619 a = p = c;
620 }
621
622 if (len == limit) return;
623 js_pushstring(J, p);
624 js_setindex(J, -2, len);
625}
626
627static void Sp_split_string(js_State *J)
628{
629 const char *str = checkstring(J, 0);
630 const char *sep = js_tostring(J, 1);
631 int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
632 int i, n;
633
634 js_newarray(J);
635
636 n = strlen(sep);
637
638 /* empty string */
639 if (n == 0) {
640 Rune rune;
641 for (i = 0; *str && i < limit; ++i) {
642 n = chartorune(&rune, str);
643 js_pushlstring(J, str, n);
644 js_setindex(J, -2, i);
645 str += n;
646 }
647 return;
648 }
649
650 for (i = 0; str && i < limit; ++i) {
651 const char *s = strstr(str, sep);
652 if (s) {
653 js_pushlstring(J, str, s-str);
654 js_setindex(J, -2, i);
655 str = s + n;
656 } else {
657 js_pushstring(J, str);
658 js_setindex(J, -2, i);
659 str = NULL;
660 }
661 }
662}
663
664static void Sp_split(js_State *J)
665{
666 if (js_isundefined(J, 1)) {
667 js_newarray(J);
668 js_copy(J, 0);
669 js_setindex(J, -2, 0);
670 } else if (js_isregexp(J, 1)) {
671 Sp_split_regexp(J);
672 } else {
673 Sp_split_string(J);
674 }
675}
676
677void jsB_initstring(js_State *J)
678{
679 J->String_prototype->u.s.string = "";
680 J->String_prototype->u.s.length = 0;
681
682 js_pushobject(J, J->String_prototype);
683 {
684 jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
685 jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
686 jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
687 jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
688 jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
689 jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
690 jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
691 jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
692 jsB_propf(J, "String.prototype.match", Sp_match, 1);
693 jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
694 jsB_propf(J, "String.prototype.search", Sp_search, 1);
695 jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
696 jsB_propf(J, "String.prototype.split", Sp_split, 2);
697 jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
698 jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
699 jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
700 jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
701 jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);
702
703 /* ES5 */
704 jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
705 }
706 js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
707 {
708 jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
709 }
710 js_defglobal(J, "String", JS_DONTENUM);
711}
712