1 | /* |
2 | ** String formatting. |
3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #include <stdio.h> |
7 | |
8 | #define lj_strfmt_c |
9 | #define LUA_CORE |
10 | |
11 | #include "lj_obj.h" |
12 | #include "lj_buf.h" |
13 | #include "lj_str.h" |
14 | #include "lj_state.h" |
15 | #include "lj_char.h" |
16 | #include "lj_strfmt.h" |
17 | |
18 | /* -- Format parser ------------------------------------------------------- */ |
19 | |
20 | static const uint8_t strfmt_map[('x'-'A')+1] = { |
21 | STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, |
22 | 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, |
23 | 0,0,0,0,0,0, |
24 | STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, |
25 | 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X |
26 | }; |
27 | |
28 | SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) |
29 | { |
30 | const uint8_t *p = fs->p, *e = fs->e; |
31 | fs->str = (const char *)p; |
32 | for (; p < e; p++) { |
33 | if (*p == '%') { /* Escape char? */ |
34 | if (p[1] == '%') { /* '%%'? */ |
35 | fs->p = ++p+1; |
36 | goto retlit; |
37 | } else { |
38 | SFormat sf = 0; |
39 | uint32_t c; |
40 | if (p != (const uint8_t *)fs->str) |
41 | break; |
42 | for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { |
43 | /* Parse flags. */ |
44 | if (*p == '-') sf |= STRFMT_F_LEFT; |
45 | else if (*p == '+') sf |= STRFMT_F_PLUS; |
46 | else if (*p == '0') sf |= STRFMT_F_ZERO; |
47 | else if (*p == ' ') sf |= STRFMT_F_SPACE; |
48 | else if (*p == '#') sf |= STRFMT_F_ALT; |
49 | else break; |
50 | } |
51 | if ((uint32_t)*p - '0' < 10) { /* Parse width. */ |
52 | uint32_t width = (uint32_t)*p++ - '0'; |
53 | if ((uint32_t)*p - '0' < 10) |
54 | width = (uint32_t)*p++ - '0' + width*10; |
55 | sf |= (width << STRFMT_SH_WIDTH); |
56 | } |
57 | if (*p == '.') { /* Parse precision. */ |
58 | uint32_t prec = 0; |
59 | p++; |
60 | if ((uint32_t)*p - '0' < 10) { |
61 | prec = (uint32_t)*p++ - '0'; |
62 | if ((uint32_t)*p - '0' < 10) |
63 | prec = (uint32_t)*p++ - '0' + prec*10; |
64 | } |
65 | sf |= ((prec+1) << STRFMT_SH_PREC); |
66 | } |
67 | /* Parse conversion. */ |
68 | c = (uint32_t)*p - 'A'; |
69 | if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { |
70 | uint32_t sx = strfmt_map[c]; |
71 | if (sx) { |
72 | fs->p = p+1; |
73 | return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); |
74 | } |
75 | } |
76 | /* Return error location. */ |
77 | if (*p >= 32) p++; |
78 | fs->len = (MSize)(p - (const uint8_t *)fs->str); |
79 | fs->p = fs->e; |
80 | return STRFMT_ERR; |
81 | } |
82 | } |
83 | } |
84 | fs->p = p; |
85 | retlit: |
86 | fs->len = (MSize)(p - (const uint8_t *)fs->str); |
87 | return fs->len ? STRFMT_LIT : STRFMT_EOF; |
88 | } |
89 | |
90 | /* -- Raw conversions ----------------------------------------------------- */ |
91 | |
92 | #define WINT_R(x, sh, sc) \ |
93 | { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } |
94 | |
95 | /* Write integer to buffer. */ |
96 | char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) |
97 | { |
98 | uint32_t u = (uint32_t)k; |
99 | if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } |
100 | if (u < 10000) { |
101 | if (u < 10) goto dig1; |
102 | if (u < 100) goto dig2; |
103 | if (u < 1000) goto dig3; |
104 | } else { |
105 | uint32_t v = u / 10000; u -= v * 10000; |
106 | if (v < 10000) { |
107 | if (v < 10) goto dig5; |
108 | if (v < 100) goto dig6; |
109 | if (v < 1000) goto dig7; |
110 | } else { |
111 | uint32_t w = v / 10000; v -= w * 10000; |
112 | if (w >= 10) WINT_R(w, 10, 10) |
113 | *p++ = (char)('0'+w); |
114 | } |
115 | WINT_R(v, 23, 1000) |
116 | dig7: WINT_R(v, 12, 100) |
117 | dig6: WINT_R(v, 10, 10) |
118 | dig5: *p++ = (char)('0'+v); |
119 | } |
120 | WINT_R(u, 23, 1000) |
121 | dig3: WINT_R(u, 12, 100) |
122 | dig2: WINT_R(u, 10, 10) |
123 | dig1: *p++ = (char)('0'+u); |
124 | return p; |
125 | } |
126 | #undef WINT_R |
127 | |
128 | /* Write pointer to buffer. */ |
129 | char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) |
130 | { |
131 | ptrdiff_t x = (ptrdiff_t)v; |
132 | MSize i, n = STRFMT_MAXBUF_PTR; |
133 | if (x == 0) { |
134 | *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; |
135 | return p; |
136 | } |
137 | #if LJ_64 |
138 | /* Shorten output for 64 bit pointers. */ |
139 | n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); |
140 | #endif |
141 | p[0] = '0'; |
142 | p[1] = 'x'; |
143 | for (i = n-1; i >= 2; i--, x >>= 4) |
144 | p[i] = "0123456789abcdef" [(x & 15)]; |
145 | return p+n; |
146 | } |
147 | |
148 | /* Write ULEB128 to buffer. */ |
149 | char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) |
150 | { |
151 | for (; v >= 0x80; v >>= 7) |
152 | *p++ = (char)((v & 0x7f) | 0x80); |
153 | *p++ = (char)v; |
154 | return p; |
155 | } |
156 | |
157 | /* Return string or write number to tmp buffer and return pointer to start. */ |
158 | const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) |
159 | { |
160 | SBuf *sb; |
161 | if (tvisstr(o)) { |
162 | *lenp = strV(o)->len; |
163 | return strVdata(o); |
164 | } else if (tvisint(o)) { |
165 | sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); |
166 | } else if (tvisnum(o)) { |
167 | sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); |
168 | } else { |
169 | return NULL; |
170 | } |
171 | *lenp = sbuflen(sb); |
172 | return sbufB(sb); |
173 | } |
174 | |
175 | /* -- Unformatted conversions to buffer ----------------------------------- */ |
176 | |
177 | /* Add integer to buffer. */ |
178 | SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) |
179 | { |
180 | setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k)); |
181 | return sb; |
182 | } |
183 | |
184 | #if LJ_HASJIT |
185 | /* Add number to buffer. */ |
186 | SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) |
187 | { |
188 | return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); |
189 | } |
190 | #endif |
191 | |
192 | SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) |
193 | { |
194 | setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v)); |
195 | return sb; |
196 | } |
197 | |
198 | /* Add quoted string to buffer. */ |
199 | SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) |
200 | { |
201 | const char *s = strdata(str); |
202 | MSize len = str->len; |
203 | lj_buf_putb(sb, '"'); |
204 | while (len--) { |
205 | uint32_t c = (uint32_t)(uint8_t)*s++; |
206 | char *p = lj_buf_more(sb, 4); |
207 | if (c == '"' || c == '\\' || c == '\n') { |
208 | *p++ = '\\'; |
209 | } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ |
210 | uint32_t d; |
211 | *p++ = '\\'; |
212 | if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { |
213 | *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; |
214 | goto tens; |
215 | } else if (c >= 10) { |
216 | tens: |
217 | d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); |
218 | } |
219 | c += '0'; |
220 | } |
221 | *p++ = (char)c; |
222 | setsbufP(sb, p); |
223 | } |
224 | lj_buf_putb(sb, '"'); |
225 | return sb; |
226 | } |
227 | |
228 | /* -- Formatted conversions to buffer ------------------------------------- */ |
229 | |
230 | /* Add formatted char to buffer. */ |
231 | SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) |
232 | { |
233 | MSize width = STRFMT_WIDTH(sf); |
234 | char *p = lj_buf_more(sb, width > 1 ? width : 1); |
235 | if ((sf & STRFMT_F_LEFT)) *p++ = (char)c; |
236 | while (width-- > 1) *p++ = ' '; |
237 | if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c; |
238 | setsbufP(sb, p); |
239 | return sb; |
240 | } |
241 | |
242 | /* Add formatted string to buffer. */ |
243 | SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) |
244 | { |
245 | MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); |
246 | MSize width = STRFMT_WIDTH(sf); |
247 | char *p = lj_buf_more(sb, width > len ? width : len); |
248 | if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); |
249 | while (width-- > len) *p++ = ' '; |
250 | if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); |
251 | setsbufP(sb, p); |
252 | return sb; |
253 | } |
254 | |
255 | /* Add formatted signed/unsigned integer to buffer. */ |
256 | SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) |
257 | { |
258 | char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p; |
259 | #ifdef LUA_USE_ASSERT |
260 | char *ps; |
261 | #endif |
262 | MSize prefix = 0, len, prec, pprec, width, need; |
263 | |
264 | /* Figure out signed prefixes. */ |
265 | if (STRFMT_TYPE(sf) == STRFMT_INT) { |
266 | if ((int64_t)k < 0) { |
267 | k = (uint64_t)-(int64_t)k; |
268 | prefix = 256 + '-'; |
269 | } else if ((sf & STRFMT_F_PLUS)) { |
270 | prefix = 256 + '+'; |
271 | } else if ((sf & STRFMT_F_SPACE)) { |
272 | prefix = 256 + ' '; |
273 | } |
274 | } |
275 | |
276 | /* Convert number and store to fixed-size buffer in reverse order. */ |
277 | prec = STRFMT_PREC(sf); |
278 | if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; |
279 | if (k == 0) { /* Special-case zero argument. */ |
280 | if (prec != 0 || |
281 | (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) |
282 | *--q = '0'; |
283 | } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ |
284 | uint32_t k2; |
285 | while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } |
286 | k2 = (uint32_t)k; |
287 | do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); |
288 | } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ |
289 | const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : |
290 | "0123456789abcdef" ; |
291 | do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); |
292 | if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x'); |
293 | } else { /* Octal. */ |
294 | do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); |
295 | if ((sf & STRFMT_F_ALT)) *--q = '0'; |
296 | } |
297 | |
298 | /* Calculate sizes. */ |
299 | len = (MSize)(buf + sizeof(buf) - q); |
300 | if ((int32_t)len >= (int32_t)prec) prec = len; |
301 | width = STRFMT_WIDTH(sf); |
302 | pprec = prec + (prefix >> 8); |
303 | need = width > pprec ? width : pprec; |
304 | p = lj_buf_more(sb, need); |
305 | #ifdef LUA_USE_ASSERT |
306 | ps = p; |
307 | #endif |
308 | |
309 | /* Format number with leading/trailing whitespace and zeros. */ |
310 | if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) |
311 | while (width-- > pprec) *p++ = ' '; |
312 | if (prefix) { |
313 | if ((char)prefix >= 'X') *p++ = '0'; |
314 | *p++ = (char)prefix; |
315 | } |
316 | if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) |
317 | while (width-- > pprec) *p++ = '0'; |
318 | while (prec-- > len) *p++ = '0'; |
319 | while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */ |
320 | if ((sf & STRFMT_F_LEFT)) |
321 | while (width-- > pprec) *p++ = ' '; |
322 | |
323 | lj_assertX(need == (MSize)(p - ps), "miscalculated format size" ); |
324 | setsbufP(sb, p); |
325 | return sb; |
326 | } |
327 | |
328 | /* Add number formatted as signed integer to buffer. */ |
329 | SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) |
330 | { |
331 | int64_t k = (int64_t)n; |
332 | if (checki32(k) && sf == STRFMT_INT) |
333 | return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ |
334 | else |
335 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); |
336 | } |
337 | |
338 | /* Add number formatted as unsigned integer to buffer. */ |
339 | SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) |
340 | { |
341 | int64_t k; |
342 | if (n >= 9223372036854775808.0) |
343 | k = (int64_t)(n - 18446744073709551616.0); |
344 | else |
345 | k = (int64_t)n; |
346 | return lj_strfmt_putfxint(sb, sf, (uint64_t)k); |
347 | } |
348 | |
349 | /* -- Conversions to strings ---------------------------------------------- */ |
350 | |
351 | /* Convert integer to string. */ |
352 | GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) |
353 | { |
354 | char buf[STRFMT_MAXBUF_INT]; |
355 | MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); |
356 | return lj_str_new(L, buf, len); |
357 | } |
358 | |
359 | /* Convert integer or number to string. */ |
360 | GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) |
361 | { |
362 | return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); |
363 | } |
364 | |
365 | #if LJ_HASJIT |
366 | /* Convert char value to string. */ |
367 | GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) |
368 | { |
369 | char buf[1]; |
370 | buf[0] = c; |
371 | return lj_str_new(L, buf, 1); |
372 | } |
373 | #endif |
374 | |
375 | /* Raw conversion of object to string. */ |
376 | GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) |
377 | { |
378 | if (tvisstr(o)) { |
379 | return strV(o); |
380 | } else if (tvisnumber(o)) { |
381 | return lj_strfmt_number(L, o); |
382 | } else if (tvisnil(o)) { |
383 | return lj_str_newlit(L, "nil" ); |
384 | } else if (tvisfalse(o)) { |
385 | return lj_str_newlit(L, "false" ); |
386 | } else if (tvistrue(o)) { |
387 | return lj_str_newlit(L, "true" ); |
388 | } else { |
389 | char buf[8+2+2+16], *p = buf; |
390 | p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o))); |
391 | *p++ = ':'; *p++ = ' '; |
392 | if (tvisfunc(o) && isffunc(funcV(o))) { |
393 | p = lj_buf_wmem(p, "builtin#" , 8); |
394 | p = lj_strfmt_wint(p, funcV(o)->c.ffid); |
395 | } else { |
396 | p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o)); |
397 | } |
398 | return lj_str_new(L, buf, (size_t)(p - buf)); |
399 | } |
400 | } |
401 | |
402 | /* -- Internal string formatting ------------------------------------------ */ |
403 | |
404 | /* |
405 | ** These functions are only used for lua_pushfstring(), lua_pushvfstring() |
406 | ** and for internal string formatting (e.g. error messages). Caveat: unlike |
407 | ** string.format(), only a limited subset of formats and flags are supported! |
408 | ** |
409 | ** LuaJIT has support for a couple more formats than Lua 5.1/5.2: |
410 | ** - %d %u %o %x with full formatting, 32 bit integers only. |
411 | ** - %f and other FP formats are really %.14g. |
412 | ** - %s %c %p without formatting. |
413 | */ |
414 | |
415 | /* Push formatted message as a string object to Lua stack. va_list variant. */ |
416 | const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) |
417 | { |
418 | SBuf *sb = lj_buf_tmp_(L); |
419 | FormatState fs; |
420 | SFormat sf; |
421 | GCstr *str; |
422 | lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt)); |
423 | while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { |
424 | switch (STRFMT_TYPE(sf)) { |
425 | case STRFMT_LIT: |
426 | lj_buf_putmem(sb, fs.str, fs.len); |
427 | break; |
428 | case STRFMT_INT: |
429 | lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t)); |
430 | break; |
431 | case STRFMT_UINT: |
432 | lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); |
433 | break; |
434 | case STRFMT_NUM: |
435 | lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); |
436 | break; |
437 | case STRFMT_STR: { |
438 | const char *s = va_arg(argp, char *); |
439 | if (s == NULL) s = "(null)" ; |
440 | lj_buf_putmem(sb, s, (MSize)strlen(s)); |
441 | break; |
442 | } |
443 | case STRFMT_CHAR: |
444 | lj_buf_putb(sb, va_arg(argp, int)); |
445 | break; |
446 | case STRFMT_PTR: |
447 | lj_strfmt_putptr(sb, va_arg(argp, void *)); |
448 | break; |
449 | case STRFMT_ERR: |
450 | default: |
451 | lj_buf_putb(sb, '?'); |
452 | lj_assertL(0, "bad string format near offset %d" , fs.len); |
453 | break; |
454 | } |
455 | } |
456 | str = lj_buf_str(L, sb); |
457 | setstrV(L, L->top, str); |
458 | incr_top(L); |
459 | return strdata(str); |
460 | } |
461 | |
462 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ |
463 | const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) |
464 | { |
465 | const char *msg; |
466 | va_list argp; |
467 | va_start(argp, fmt); |
468 | msg = lj_strfmt_pushvf(L, fmt, argp); |
469 | va_end(argp); |
470 | return msg; |
471 | } |
472 | |
473 | |