1 | /* |
2 | ** String handling. |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | ** |
5 | ** Portions taken verbatim or adapted from the Lua interpreter. |
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h |
7 | */ |
8 | |
9 | #include <stdio.h> |
10 | |
11 | #define lj_str_c |
12 | #define LUA_CORE |
13 | |
14 | #include "lj_obj.h" |
15 | #include "lj_gc.h" |
16 | #include "lj_err.h" |
17 | #include "lj_str.h" |
18 | #include "lj_state.h" |
19 | #include "lj_char.h" |
20 | |
21 | /* -- String interning ---------------------------------------------------- */ |
22 | |
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
25 | { |
26 | MSize i, n = a->len > b->len ? b->len : a->len; |
27 | for (i = 0; i < n; i += 4) { |
28 | /* Note: innocuous access up to end of string + 3. */ |
29 | uint32_t va = *(const uint32_t *)(strdata(a)+i); |
30 | uint32_t vb = *(const uint32_t *)(strdata(b)+i); |
31 | if (va != vb) { |
32 | #if LJ_LE |
33 | va = lj_bswap(va); vb = lj_bswap(vb); |
34 | #endif |
35 | i -= n; |
36 | if ((int32_t)i >= -3) { |
37 | va >>= 32+(i<<3); vb >>= 32+(i<<3); |
38 | if (va == vb) break; |
39 | } |
40 | return va < vb ? -1 : 1; |
41 | } |
42 | } |
43 | return (int32_t)(a->len - b->len); |
44 | } |
45 | |
46 | /* Fast string data comparison. Caveat: unaligned access to 1st string! */ |
47 | static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) |
48 | { |
49 | MSize i = 0; |
50 | lua_assert(len > 0); |
51 | lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); |
52 | do { /* Note: innocuous access up to end of string + 3. */ |
53 | uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); |
54 | if (v) { |
55 | i -= len; |
56 | #if LJ_LE |
57 | return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; |
58 | #else |
59 | return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; |
60 | #endif |
61 | } |
62 | i += 4; |
63 | } while (i < len); |
64 | return 0; |
65 | } |
66 | |
67 | /* Resize the string hash table (grow and shrink). */ |
68 | void lj_str_resize(lua_State *L, MSize newmask) |
69 | { |
70 | global_State *g = G(L); |
71 | GCRef *newhash; |
72 | MSize i; |
73 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) |
74 | return; /* No resizing during GC traversal or if already too big. */ |
75 | newhash = lj_mem_newvec(L, newmask+1, GCRef); |
76 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); |
77 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ |
78 | GCobj *p = gcref(g->strhash[i]); |
79 | while (p) { /* Follow each hash chain and reinsert all strings. */ |
80 | MSize h = gco2str(p)->hash & newmask; |
81 | GCobj *next = gcnext(p); |
82 | /* NOBARRIER: The string table is a GC root. */ |
83 | setgcrefr(p->gch.nextgc, newhash[h]); |
84 | setgcref(newhash[h], p); |
85 | p = next; |
86 | } |
87 | } |
88 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); |
89 | g->strmask = newmask; |
90 | g->strhash = newhash; |
91 | } |
92 | |
93 | /* Intern a string and return string object. */ |
94 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) |
95 | { |
96 | global_State *g; |
97 | GCstr *s; |
98 | GCobj *o; |
99 | MSize len = (MSize)lenx; |
100 | MSize a, b, h = len; |
101 | if (lenx >= LJ_MAX_STR) |
102 | lj_err_msg(L, LJ_ERR_STROV); |
103 | g = G(L); |
104 | /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ |
105 | if (len >= 4) { /* Caveat: unaligned access! */ |
106 | a = lj_getu32(str); |
107 | h ^= lj_getu32(str+len-4); |
108 | b = lj_getu32(str+(len>>1)-2); |
109 | h ^= b; h -= lj_rol(b, 14); |
110 | b += lj_getu32(str+(len>>2)-1); |
111 | } else if (len > 0) { |
112 | a = *(const uint8_t *)str; |
113 | h ^= *(const uint8_t *)(str+len-1); |
114 | b = *(const uint8_t *)(str+(len>>1)); |
115 | h ^= b; h -= lj_rol(b, 14); |
116 | } else { |
117 | return &g->strempty; |
118 | } |
119 | a ^= h; a -= lj_rol(h, 11); |
120 | b ^= a; b -= lj_rol(a, 25); |
121 | h ^= b; h -= lj_rol(b, 16); |
122 | /* Check if the string has already been interned. */ |
123 | o = gcref(g->strhash[h & g->strmask]); |
124 | if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { |
125 | while (o != NULL) { |
126 | GCstr *sx = gco2str(o); |
127 | if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { |
128 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ |
129 | if (isdead(g, o)) flipwhite(o); |
130 | return sx; /* Return existing string. */ |
131 | } |
132 | o = gcnext(o); |
133 | } |
134 | } else { /* Slow path: end of string is too close to a page boundary. */ |
135 | while (o != NULL) { |
136 | GCstr *sx = gco2str(o); |
137 | if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { |
138 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ |
139 | if (isdead(g, o)) flipwhite(o); |
140 | return sx; /* Return existing string. */ |
141 | } |
142 | o = gcnext(o); |
143 | } |
144 | } |
145 | /* Nope, create a new string. */ |
146 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); |
147 | newwhite(g, s); |
148 | s->gct = ~LJ_TSTR; |
149 | s->len = len; |
150 | s->hash = h; |
151 | s->reserved = 0; |
152 | memcpy(strdatawr(s), str, len); |
153 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ |
154 | /* Add it to string hash table. */ |
155 | h &= g->strmask; |
156 | s->nextgc = g->strhash[h]; |
157 | /* NOBARRIER: The string table is a GC root. */ |
158 | setgcref(g->strhash[h], obj2gco(s)); |
159 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ |
160 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ |
161 | return s; /* Return newly interned string. */ |
162 | } |
163 | |
164 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) |
165 | { |
166 | g->strnum--; |
167 | lj_mem_free(g, s, sizestring(s)); |
168 | } |
169 | |
170 | /* -- Type conversions ---------------------------------------------------- */ |
171 | |
172 | /* Print number to buffer. Canonicalizes non-finite values. */ |
173 | size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) |
174 | { |
175 | if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ |
176 | lua_Number n = o->n; |
177 | #if __BIONIC__ |
178 | if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } |
179 | #endif |
180 | return (size_t)lua_number2str(s, n); |
181 | } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { |
182 | s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; |
183 | } else if ((o->u32.hi & 0x80000000) == 0) { |
184 | s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; |
185 | } else { |
186 | s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; |
187 | } |
188 | } |
189 | |
190 | /* Print integer to buffer. Returns pointer to start. */ |
191 | char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) |
192 | { |
193 | uint32_t u = (uint32_t)(k < 0 ? -k : k); |
194 | p += 1+10; |
195 | do { *--p = (char)('0' + u % 10); } while (u /= 10); |
196 | if (k < 0) *--p = '-'; |
197 | return p; |
198 | } |
199 | |
200 | /* Convert number to string. */ |
201 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) |
202 | { |
203 | char buf[LJ_STR_NUMBUF]; |
204 | size_t len = lj_str_bufnum(buf, (TValue *)np); |
205 | return lj_str_new(L, buf, len); |
206 | } |
207 | |
208 | /* Convert integer to string. */ |
209 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) |
210 | { |
211 | char s[1+10]; |
212 | char *p = lj_str_bufint(s, k); |
213 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); |
214 | } |
215 | |
216 | GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) |
217 | { |
218 | return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); |
219 | } |
220 | |
221 | /* -- String formatting --------------------------------------------------- */ |
222 | |
223 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) |
224 | { |
225 | char *p; |
226 | MSize i; |
227 | if (sb->n + len > sb->sz) { |
228 | MSize sz = sb->sz * 2; |
229 | while (sb->n + len > sz) sz = sz * 2; |
230 | lj_str_resizebuf(L, sb, sz); |
231 | } |
232 | p = sb->buf + sb->n; |
233 | sb->n += len; |
234 | for (i = 0; i < len; i++) p[i] = str[i]; |
235 | } |
236 | |
237 | static void addchar(lua_State *L, SBuf *sb, int c) |
238 | { |
239 | if (sb->n + 1 > sb->sz) { |
240 | MSize sz = sb->sz * 2; |
241 | lj_str_resizebuf(L, sb, sz); |
242 | } |
243 | sb->buf[sb->n++] = (char)c; |
244 | } |
245 | |
246 | /* Push formatted message as a string object to Lua stack. va_list variant. */ |
247 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) |
248 | { |
249 | SBuf *sb = &G(L)->tmpbuf; |
250 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); |
251 | lj_str_resetbuf(sb); |
252 | for (;;) { |
253 | const char *e = strchr(fmt, '%'); |
254 | if (e == NULL) break; |
255 | addstr(L, sb, fmt, (MSize)(e-fmt)); |
256 | /* This function only handles %s, %c, %d, %f and %p formats. */ |
257 | switch (e[1]) { |
258 | case 's': { |
259 | const char *s = va_arg(argp, char *); |
260 | if (s == NULL) s = "(null)" ; |
261 | addstr(L, sb, s, (MSize)strlen(s)); |
262 | break; |
263 | } |
264 | case 'c': |
265 | addchar(L, sb, va_arg(argp, int)); |
266 | break; |
267 | case 'd': { |
268 | char buf[LJ_STR_INTBUF]; |
269 | char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); |
270 | addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); |
271 | break; |
272 | } |
273 | case 'f': { |
274 | char buf[LJ_STR_NUMBUF]; |
275 | TValue tv; |
276 | MSize len; |
277 | tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); |
278 | len = (MSize)lj_str_bufnum(buf, &tv); |
279 | addstr(L, sb, buf, len); |
280 | break; |
281 | } |
282 | case 'p': { |
283 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) |
284 | char buf[2+FMTP_CHARS]; |
285 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); |
286 | ptrdiff_t i, lasti = 2+FMTP_CHARS; |
287 | if (p == 0) { |
288 | addstr(L, sb, "NULL" , 4); |
289 | break; |
290 | } |
291 | #if LJ_64 |
292 | /* Shorten output for 64 bit pointers. */ |
293 | lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); |
294 | #endif |
295 | buf[0] = '0'; |
296 | buf[1] = 'x'; |
297 | for (i = lasti-1; i >= 2; i--, p >>= 4) |
298 | buf[i] = "0123456789abcdef" [(p & 15)]; |
299 | addstr(L, sb, buf, (MSize)lasti); |
300 | break; |
301 | } |
302 | case '%': |
303 | addchar(L, sb, '%'); |
304 | break; |
305 | default: |
306 | addchar(L, sb, '%'); |
307 | addchar(L, sb, e[1]); |
308 | break; |
309 | } |
310 | fmt = e+2; |
311 | } |
312 | addstr(L, sb, fmt, (MSize)strlen(fmt)); |
313 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); |
314 | incr_top(L); |
315 | return strVdata(L->top - 1); |
316 | } |
317 | |
318 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ |
319 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) |
320 | { |
321 | const char *msg; |
322 | va_list argp; |
323 | va_start(argp, fmt); |
324 | msg = lj_str_pushvf(L, fmt, argp); |
325 | va_end(argp); |
326 | return msg; |
327 | } |
328 | |
329 | /* -- Buffer handling ----------------------------------------------------- */ |
330 | |
331 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) |
332 | { |
333 | if (sz > sb->sz) { |
334 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; |
335 | lj_str_resizebuf(L, sb, sz); |
336 | } |
337 | return sb->buf; |
338 | } |
339 | |
340 | |