1/*
2** String formatting.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_state.h"
15#include "lj_char.h"
16#include "lj_strfmt.h"
17
18/* -- Format parser ------------------------------------------------------- */
19
20static const uint8_t strfmt_map[('x'-'A')+1] = {
21 STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
23 0,0,0,0,0,0,
24 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
25 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
26};
27
28SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
29{
30 const uint8_t *p = fs->p, *e = fs->e;
31 fs->str = (const char *)p;
32 for (; p < e; p++) {
33 if (*p == '%') { /* Escape char? */
34 if (p[1] == '%') { /* '%%'? */
35 fs->p = ++p+1;
36 goto retlit;
37 } else {
38 SFormat sf = 0;
39 uint32_t c;
40 if (p != (const uint8_t *)fs->str)
41 break;
42 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
43 /* Parse flags. */
44 if (*p == '-') sf |= STRFMT_F_LEFT;
45 else if (*p == '+') sf |= STRFMT_F_PLUS;
46 else if (*p == '0') sf |= STRFMT_F_ZERO;
47 else if (*p == ' ') sf |= STRFMT_F_SPACE;
48 else if (*p == '#') sf |= STRFMT_F_ALT;
49 else break;
50 }
51 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
52 uint32_t width = (uint32_t)*p++ - '0';
53 if ((uint32_t)*p - '0' < 10)
54 width = (uint32_t)*p++ - '0' + width*10;
55 sf |= (width << STRFMT_SH_WIDTH);
56 }
57 if (*p == '.') { /* Parse precision. */
58 uint32_t prec = 0;
59 p++;
60 if ((uint32_t)*p - '0' < 10) {
61 prec = (uint32_t)*p++ - '0';
62 if ((uint32_t)*p - '0' < 10)
63 prec = (uint32_t)*p++ - '0' + prec*10;
64 }
65 sf |= ((prec+1) << STRFMT_SH_PREC);
66 }
67 /* Parse conversion. */
68 c = (uint32_t)*p - 'A';
69 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
70 uint32_t sx = strfmt_map[c];
71 if (sx) {
72 fs->p = p+1;
73 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
74 }
75 }
76 /* Return error location. */
77 if (*p >= 32) p++;
78 fs->len = (MSize)(p - (const uint8_t *)fs->str);
79 fs->p = fs->e;
80 return STRFMT_ERR;
81 }
82 }
83 }
84 fs->p = p;
85retlit:
86 fs->len = (MSize)(p - (const uint8_t *)fs->str);
87 return fs->len ? STRFMT_LIT : STRFMT_EOF;
88}
89
90/* -- Raw conversions ----------------------------------------------------- */
91
92#define WINT_R(x, sh, sc) \
93 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
94
95/* Write integer to buffer. */
96char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
97{
98 uint32_t u = (uint32_t)k;
99 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
100 if (u < 10000) {
101 if (u < 10) goto dig1;
102 if (u < 100) goto dig2;
103 if (u < 1000) goto dig3;
104 } else {
105 uint32_t v = u / 10000; u -= v * 10000;
106 if (v < 10000) {
107 if (v < 10) goto dig5;
108 if (v < 100) goto dig6;
109 if (v < 1000) goto dig7;
110 } else {
111 uint32_t w = v / 10000; v -= w * 10000;
112 if (w >= 10) WINT_R(w, 10, 10)
113 *p++ = (char)('0'+w);
114 }
115 WINT_R(v, 23, 1000)
116 dig7: WINT_R(v, 12, 100)
117 dig6: WINT_R(v, 10, 10)
118 dig5: *p++ = (char)('0'+v);
119 }
120 WINT_R(u, 23, 1000)
121 dig3: WINT_R(u, 12, 100)
122 dig2: WINT_R(u, 10, 10)
123 dig1: *p++ = (char)('0'+u);
124 return p;
125}
126#undef WINT_R
127
128/* Write pointer to buffer. */
129char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
130{
131 ptrdiff_t x = (ptrdiff_t)v;
132 MSize i, n = STRFMT_MAXBUF_PTR;
133 if (x == 0) {
134 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
135 return p;
136 }
137#if LJ_64
138 /* Shorten output for 64 bit pointers. */
139 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
140#endif
141 p[0] = '0';
142 p[1] = 'x';
143 for (i = n-1; i >= 2; i--, x >>= 4)
144 p[i] = "0123456789abcdef"[(x & 15)];
145 return p+n;
146}
147
148/* Write ULEB128 to buffer. */
149char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
150{
151 for (; v >= 0x80; v >>= 7)
152 *p++ = (char)((v & 0x7f) | 0x80);
153 *p++ = (char)v;
154 return p;
155}
156
157/* Return string or write number to tmp buffer and return pointer to start. */
158const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
159{
160 SBuf *sb;
161 if (tvisstr(o)) {
162 *lenp = strV(o)->len;
163 return strVdata(o);
164 } else if (tvisint(o)) {
165 sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
166 } else if (tvisnum(o)) {
167 sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
168 } else {
169 return NULL;
170 }
171 *lenp = sbuflen(sb);
172 return sbufB(sb);
173}
174
175/* -- Unformatted conversions to buffer ----------------------------------- */
176
177/* Add integer to buffer. */
178SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
179{
180 setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
181 return sb;
182}
183
184#if LJ_HASJIT
185/* Add number to buffer. */
186SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
187{
188 return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
189}
190#endif
191
192SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
193{
194 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
195 return sb;
196}
197
198/* Add quoted string to buffer. */
199SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
200{
201 const char *s = strdata(str);
202 MSize len = str->len;
203 lj_buf_putb(sb, '"');
204 while (len--) {
205 uint32_t c = (uint32_t)(uint8_t)*s++;
206 char *p = lj_buf_more(sb, 4);
207 if (c == '"' || c == '\\' || c == '\n') {
208 *p++ = '\\';
209 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
210 uint32_t d;
211 *p++ = '\\';
212 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
213 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
214 goto tens;
215 } else if (c >= 10) {
216 tens:
217 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
218 }
219 c += '0';
220 }
221 *p++ = (char)c;
222 setsbufP(sb, p);
223 }
224 lj_buf_putb(sb, '"');
225 return sb;
226}
227
228/* -- Formatted conversions to buffer ------------------------------------- */
229
230/* Add formatted char to buffer. */
231SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
232{
233 MSize width = STRFMT_WIDTH(sf);
234 char *p = lj_buf_more(sb, width > 1 ? width : 1);
235 if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
236 while (width-- > 1) *p++ = ' ';
237 if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
238 setsbufP(sb, p);
239 return sb;
240}
241
242/* Add formatted string to buffer. */
243SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
244{
245 MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
246 MSize width = STRFMT_WIDTH(sf);
247 char *p = lj_buf_more(sb, width > len ? width : len);
248 if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
249 while (width-- > len) *p++ = ' ';
250 if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
251 setsbufP(sb, p);
252 return sb;
253}
254
255/* Add formatted signed/unsigned integer to buffer. */
256SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
257{
258 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
259#ifdef LUA_USE_ASSERT
260 char *ps;
261#endif
262 MSize prefix = 0, len, prec, pprec, width, need;
263
264 /* Figure out signed prefixes. */
265 if (STRFMT_TYPE(sf) == STRFMT_INT) {
266 if ((int64_t)k < 0) {
267 k = (uint64_t)-(int64_t)k;
268 prefix = 256 + '-';
269 } else if ((sf & STRFMT_F_PLUS)) {
270 prefix = 256 + '+';
271 } else if ((sf & STRFMT_F_SPACE)) {
272 prefix = 256 + ' ';
273 }
274 }
275
276 /* Convert number and store to fixed-size buffer in reverse order. */
277 prec = STRFMT_PREC(sf);
278 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
279 if (k == 0) { /* Special-case zero argument. */
280 if (prec != 0 ||
281 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
282 *--q = '0';
283 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
284 uint32_t k2;
285 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
286 k2 = (uint32_t)k;
287 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
288 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
289 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
290 "0123456789abcdef";
291 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
292 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
293 } else { /* Octal. */
294 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
295 if ((sf & STRFMT_F_ALT)) *--q = '0';
296 }
297
298 /* Calculate sizes. */
299 len = (MSize)(buf + sizeof(buf) - q);
300 if ((int32_t)len >= (int32_t)prec) prec = len;
301 width = STRFMT_WIDTH(sf);
302 pprec = prec + (prefix >> 8);
303 need = width > pprec ? width : pprec;
304 p = lj_buf_more(sb, need);
305#ifdef LUA_USE_ASSERT
306 ps = p;
307#endif
308
309 /* Format number with leading/trailing whitespace and zeros. */
310 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
311 while (width-- > pprec) *p++ = ' ';
312 if (prefix) {
313 if ((char)prefix >= 'X') *p++ = '0';
314 *p++ = (char)prefix;
315 }
316 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
317 while (width-- > pprec) *p++ = '0';
318 while (prec-- > len) *p++ = '0';
319 while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
320 if ((sf & STRFMT_F_LEFT))
321 while (width-- > pprec) *p++ = ' ';
322
323 lj_assertX(need == (MSize)(p - ps), "miscalculated format size");
324 setsbufP(sb, p);
325 return sb;
326}
327
328/* Add number formatted as signed integer to buffer. */
329SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
330{
331 int64_t k = (int64_t)n;
332 if (checki32(k) && sf == STRFMT_INT)
333 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
334 else
335 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
336}
337
338/* Add number formatted as unsigned integer to buffer. */
339SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
340{
341 int64_t k;
342 if (n >= 9223372036854775808.0)
343 k = (int64_t)(n - 18446744073709551616.0);
344 else
345 k = (int64_t)n;
346 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
347}
348
349/* -- Conversions to strings ---------------------------------------------- */
350
351/* Convert integer to string. */
352GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
353{
354 char buf[STRFMT_MAXBUF_INT];
355 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
356 return lj_str_new(L, buf, len);
357}
358
359/* Convert integer or number to string. */
360GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
361{
362 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
363}
364
365#if LJ_HASJIT
366/* Convert char value to string. */
367GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
368{
369 char buf[1];
370 buf[0] = c;
371 return lj_str_new(L, buf, 1);
372}
373#endif
374
375/* Raw conversion of object to string. */
376GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
377{
378 if (tvisstr(o)) {
379 return strV(o);
380 } else if (tvisnumber(o)) {
381 return lj_strfmt_number(L, o);
382 } else if (tvisnil(o)) {
383 return lj_str_newlit(L, "nil");
384 } else if (tvisfalse(o)) {
385 return lj_str_newlit(L, "false");
386 } else if (tvistrue(o)) {
387 return lj_str_newlit(L, "true");
388 } else {
389 char buf[8+2+2+16], *p = buf;
390 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
391 *p++ = ':'; *p++ = ' ';
392 if (tvisfunc(o) && isffunc(funcV(o))) {
393 p = lj_buf_wmem(p, "builtin#", 8);
394 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
395 } else {
396 p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o));
397 }
398 return lj_str_new(L, buf, (size_t)(p - buf));
399 }
400}
401
402/* -- Internal string formatting ------------------------------------------ */
403
404/*
405** These functions are only used for lua_pushfstring(), lua_pushvfstring()
406** and for internal string formatting (e.g. error messages). Caveat: unlike
407** string.format(), only a limited subset of formats and flags are supported!
408**
409** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
410** - %d %u %o %x with full formatting, 32 bit integers only.
411** - %f and other FP formats are really %.14g.
412** - %s %c %p without formatting.
413*/
414
415/* Push formatted message as a string object to Lua stack. va_list variant. */
416const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
417{
418 SBuf *sb = lj_buf_tmp_(L);
419 FormatState fs;
420 SFormat sf;
421 GCstr *str;
422 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
423 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
424 switch (STRFMT_TYPE(sf)) {
425 case STRFMT_LIT:
426 lj_buf_putmem(sb, fs.str, fs.len);
427 break;
428 case STRFMT_INT:
429 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
430 break;
431 case STRFMT_UINT:
432 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
433 break;
434 case STRFMT_NUM:
435 lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
436 break;
437 case STRFMT_STR: {
438 const char *s = va_arg(argp, char *);
439 if (s == NULL) s = "(null)";
440 lj_buf_putmem(sb, s, (MSize)strlen(s));
441 break;
442 }
443 case STRFMT_CHAR:
444 lj_buf_putb(sb, va_arg(argp, int));
445 break;
446 case STRFMT_PTR:
447 lj_strfmt_putptr(sb, va_arg(argp, void *));
448 break;
449 case STRFMT_ERR:
450 default:
451 lj_buf_putb(sb, '?');
452 lj_assertL(0, "bad string format near offset %d", fs.len);
453 break;
454 }
455 }
456 str = lj_buf_str(L, sb);
457 setstrV(L, L->top, str);
458 incr_top(L);
459 return strdata(str);
460}
461
462/* Push formatted message as a string object to Lua stack. Vararg variant. */
463const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
464{
465 const char *msg;
466 va_list argp;
467 va_start(argp, fmt);
468 msg = lj_strfmt_pushvf(L, fmt, argp);
469 va_end(argp);
470 return msg;
471}
472
473