| 1 | /* |
| 2 | ** String handling. |
| 3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
| 4 | ** |
| 5 | ** Portions taken verbatim or adapted from the Lua interpreter. |
| 6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h |
| 7 | */ |
| 8 | |
| 9 | #include <stdio.h> |
| 10 | |
| 11 | #define lj_str_c |
| 12 | #define LUA_CORE |
| 13 | |
| 14 | #include "lj_obj.h" |
| 15 | #include "lj_gc.h" |
| 16 | #include "lj_err.h" |
| 17 | #include "lj_str.h" |
| 18 | #include "lj_state.h" |
| 19 | #include "lj_char.h" |
| 20 | |
| 21 | /* -- String interning ---------------------------------------------------- */ |
| 22 | |
| 23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
| 24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
| 25 | { |
| 26 | MSize i, n = a->len > b->len ? b->len : a->len; |
| 27 | for (i = 0; i < n; i += 4) { |
| 28 | /* Note: innocuous access up to end of string + 3. */ |
| 29 | uint32_t va = *(const uint32_t *)(strdata(a)+i); |
| 30 | uint32_t vb = *(const uint32_t *)(strdata(b)+i); |
| 31 | if (va != vb) { |
| 32 | #if LJ_LE |
| 33 | va = lj_bswap(va); vb = lj_bswap(vb); |
| 34 | #endif |
| 35 | i -= n; |
| 36 | if ((int32_t)i >= -3) { |
| 37 | va >>= 32+(i<<3); vb >>= 32+(i<<3); |
| 38 | if (va == vb) break; |
| 39 | } |
| 40 | return va < vb ? -1 : 1; |
| 41 | } |
| 42 | } |
| 43 | return (int32_t)(a->len - b->len); |
| 44 | } |
| 45 | |
| 46 | /* Fast string data comparison. Caveat: unaligned access to 1st string! */ |
| 47 | static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) |
| 48 | { |
| 49 | MSize i = 0; |
| 50 | lua_assert(len > 0); |
| 51 | lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); |
| 52 | do { /* Note: innocuous access up to end of string + 3. */ |
| 53 | uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); |
| 54 | if (v) { |
| 55 | i -= len; |
| 56 | #if LJ_LE |
| 57 | return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; |
| 58 | #else |
| 59 | return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; |
| 60 | #endif |
| 61 | } |
| 62 | i += 4; |
| 63 | } while (i < len); |
| 64 | return 0; |
| 65 | } |
| 66 | |
| 67 | /* Resize the string hash table (grow and shrink). */ |
| 68 | void lj_str_resize(lua_State *L, MSize newmask) |
| 69 | { |
| 70 | global_State *g = G(L); |
| 71 | GCRef *newhash; |
| 72 | MSize i; |
| 73 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) |
| 74 | return; /* No resizing during GC traversal or if already too big. */ |
| 75 | newhash = lj_mem_newvec(L, newmask+1, GCRef); |
| 76 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); |
| 77 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ |
| 78 | GCobj *p = gcref(g->strhash[i]); |
| 79 | while (p) { /* Follow each hash chain and reinsert all strings. */ |
| 80 | MSize h = gco2str(p)->hash & newmask; |
| 81 | GCobj *next = gcnext(p); |
| 82 | /* NOBARRIER: The string table is a GC root. */ |
| 83 | setgcrefr(p->gch.nextgc, newhash[h]); |
| 84 | setgcref(newhash[h], p); |
| 85 | p = next; |
| 86 | } |
| 87 | } |
| 88 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); |
| 89 | g->strmask = newmask; |
| 90 | g->strhash = newhash; |
| 91 | } |
| 92 | |
| 93 | /* Intern a string and return string object. */ |
| 94 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) |
| 95 | { |
| 96 | global_State *g; |
| 97 | GCstr *s; |
| 98 | GCobj *o; |
| 99 | MSize len = (MSize)lenx; |
| 100 | MSize a, b, h = len; |
| 101 | if (lenx >= LJ_MAX_STR) |
| 102 | lj_err_msg(L, LJ_ERR_STROV); |
| 103 | g = G(L); |
| 104 | /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ |
| 105 | if (len >= 4) { /* Caveat: unaligned access! */ |
| 106 | a = lj_getu32(str); |
| 107 | h ^= lj_getu32(str+len-4); |
| 108 | b = lj_getu32(str+(len>>1)-2); |
| 109 | h ^= b; h -= lj_rol(b, 14); |
| 110 | b += lj_getu32(str+(len>>2)-1); |
| 111 | } else if (len > 0) { |
| 112 | a = *(const uint8_t *)str; |
| 113 | h ^= *(const uint8_t *)(str+len-1); |
| 114 | b = *(const uint8_t *)(str+(len>>1)); |
| 115 | h ^= b; h -= lj_rol(b, 14); |
| 116 | } else { |
| 117 | return &g->strempty; |
| 118 | } |
| 119 | a ^= h; a -= lj_rol(h, 11); |
| 120 | b ^= a; b -= lj_rol(a, 25); |
| 121 | h ^= b; h -= lj_rol(b, 16); |
| 122 | /* Check if the string has already been interned. */ |
| 123 | o = gcref(g->strhash[h & g->strmask]); |
| 124 | if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { |
| 125 | while (o != NULL) { |
| 126 | GCstr *sx = gco2str(o); |
| 127 | if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { |
| 128 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ |
| 129 | if (isdead(g, o)) flipwhite(o); |
| 130 | return sx; /* Return existing string. */ |
| 131 | } |
| 132 | o = gcnext(o); |
| 133 | } |
| 134 | } else { /* Slow path: end of string is too close to a page boundary. */ |
| 135 | while (o != NULL) { |
| 136 | GCstr *sx = gco2str(o); |
| 137 | if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { |
| 138 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ |
| 139 | if (isdead(g, o)) flipwhite(o); |
| 140 | return sx; /* Return existing string. */ |
| 141 | } |
| 142 | o = gcnext(o); |
| 143 | } |
| 144 | } |
| 145 | /* Nope, create a new string. */ |
| 146 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); |
| 147 | newwhite(g, s); |
| 148 | s->gct = ~LJ_TSTR; |
| 149 | s->len = len; |
| 150 | s->hash = h; |
| 151 | s->reserved = 0; |
| 152 | memcpy(strdatawr(s), str, len); |
| 153 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ |
| 154 | /* Add it to string hash table. */ |
| 155 | h &= g->strmask; |
| 156 | s->nextgc = g->strhash[h]; |
| 157 | /* NOBARRIER: The string table is a GC root. */ |
| 158 | setgcref(g->strhash[h], obj2gco(s)); |
| 159 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ |
| 160 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ |
| 161 | return s; /* Return newly interned string. */ |
| 162 | } |
| 163 | |
| 164 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) |
| 165 | { |
| 166 | g->strnum--; |
| 167 | lj_mem_free(g, s, sizestring(s)); |
| 168 | } |
| 169 | |
| 170 | /* -- Type conversions ---------------------------------------------------- */ |
| 171 | |
| 172 | /* Print number to buffer. Canonicalizes non-finite values. */ |
| 173 | size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) |
| 174 | { |
| 175 | if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ |
| 176 | lua_Number n = o->n; |
| 177 | #if __BIONIC__ |
| 178 | if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } |
| 179 | #endif |
| 180 | return (size_t)lua_number2str(s, n); |
| 181 | } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { |
| 182 | s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; |
| 183 | } else if ((o->u32.hi & 0x80000000) == 0) { |
| 184 | s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; |
| 185 | } else { |
| 186 | s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | /* Print integer to buffer. Returns pointer to start. */ |
| 191 | char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) |
| 192 | { |
| 193 | uint32_t u = (uint32_t)(k < 0 ? -k : k); |
| 194 | p += 1+10; |
| 195 | do { *--p = (char)('0' + u % 10); } while (u /= 10); |
| 196 | if (k < 0) *--p = '-'; |
| 197 | return p; |
| 198 | } |
| 199 | |
| 200 | /* Convert number to string. */ |
| 201 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) |
| 202 | { |
| 203 | char buf[LJ_STR_NUMBUF]; |
| 204 | size_t len = lj_str_bufnum(buf, (TValue *)np); |
| 205 | return lj_str_new(L, buf, len); |
| 206 | } |
| 207 | |
| 208 | /* Convert integer to string. */ |
| 209 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) |
| 210 | { |
| 211 | char s[1+10]; |
| 212 | char *p = lj_str_bufint(s, k); |
| 213 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); |
| 214 | } |
| 215 | |
| 216 | GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) |
| 217 | { |
| 218 | return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); |
| 219 | } |
| 220 | |
| 221 | /* -- String formatting --------------------------------------------------- */ |
| 222 | |
| 223 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) |
| 224 | { |
| 225 | char *p; |
| 226 | MSize i; |
| 227 | if (sb->n + len > sb->sz) { |
| 228 | MSize sz = sb->sz * 2; |
| 229 | while (sb->n + len > sz) sz = sz * 2; |
| 230 | lj_str_resizebuf(L, sb, sz); |
| 231 | } |
| 232 | p = sb->buf + sb->n; |
| 233 | sb->n += len; |
| 234 | for (i = 0; i < len; i++) p[i] = str[i]; |
| 235 | } |
| 236 | |
| 237 | static void addchar(lua_State *L, SBuf *sb, int c) |
| 238 | { |
| 239 | if (sb->n + 1 > sb->sz) { |
| 240 | MSize sz = sb->sz * 2; |
| 241 | lj_str_resizebuf(L, sb, sz); |
| 242 | } |
| 243 | sb->buf[sb->n++] = (char)c; |
| 244 | } |
| 245 | |
| 246 | /* Push formatted message as a string object to Lua stack. va_list variant. */ |
| 247 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) |
| 248 | { |
| 249 | SBuf *sb = &G(L)->tmpbuf; |
| 250 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); |
| 251 | lj_str_resetbuf(sb); |
| 252 | for (;;) { |
| 253 | const char *e = strchr(fmt, '%'); |
| 254 | if (e == NULL) break; |
| 255 | addstr(L, sb, fmt, (MSize)(e-fmt)); |
| 256 | /* This function only handles %s, %c, %d, %f and %p formats. */ |
| 257 | switch (e[1]) { |
| 258 | case 's': { |
| 259 | const char *s = va_arg(argp, char *); |
| 260 | if (s == NULL) s = "(null)" ; |
| 261 | addstr(L, sb, s, (MSize)strlen(s)); |
| 262 | break; |
| 263 | } |
| 264 | case 'c': |
| 265 | addchar(L, sb, va_arg(argp, int)); |
| 266 | break; |
| 267 | case 'd': { |
| 268 | char buf[LJ_STR_INTBUF]; |
| 269 | char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); |
| 270 | addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); |
| 271 | break; |
| 272 | } |
| 273 | case 'f': { |
| 274 | char buf[LJ_STR_NUMBUF]; |
| 275 | TValue tv; |
| 276 | MSize len; |
| 277 | tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); |
| 278 | len = (MSize)lj_str_bufnum(buf, &tv); |
| 279 | addstr(L, sb, buf, len); |
| 280 | break; |
| 281 | } |
| 282 | case 'p': { |
| 283 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) |
| 284 | char buf[2+FMTP_CHARS]; |
| 285 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); |
| 286 | ptrdiff_t i, lasti = 2+FMTP_CHARS; |
| 287 | if (p == 0) { |
| 288 | addstr(L, sb, "NULL" , 4); |
| 289 | break; |
| 290 | } |
| 291 | #if LJ_64 |
| 292 | /* Shorten output for 64 bit pointers. */ |
| 293 | lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); |
| 294 | #endif |
| 295 | buf[0] = '0'; |
| 296 | buf[1] = 'x'; |
| 297 | for (i = lasti-1; i >= 2; i--, p >>= 4) |
| 298 | buf[i] = "0123456789abcdef" [(p & 15)]; |
| 299 | addstr(L, sb, buf, (MSize)lasti); |
| 300 | break; |
| 301 | } |
| 302 | case '%': |
| 303 | addchar(L, sb, '%'); |
| 304 | break; |
| 305 | default: |
| 306 | addchar(L, sb, '%'); |
| 307 | addchar(L, sb, e[1]); |
| 308 | break; |
| 309 | } |
| 310 | fmt = e+2; |
| 311 | } |
| 312 | addstr(L, sb, fmt, (MSize)strlen(fmt)); |
| 313 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); |
| 314 | incr_top(L); |
| 315 | return strVdata(L->top - 1); |
| 316 | } |
| 317 | |
| 318 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ |
| 319 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) |
| 320 | { |
| 321 | const char *msg; |
| 322 | va_list argp; |
| 323 | va_start(argp, fmt); |
| 324 | msg = lj_str_pushvf(L, fmt, argp); |
| 325 | va_end(argp); |
| 326 | return msg; |
| 327 | } |
| 328 | |
| 329 | /* -- Buffer handling ----------------------------------------------------- */ |
| 330 | |
| 331 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) |
| 332 | { |
| 333 | if (sz > sb->sz) { |
| 334 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; |
| 335 | lj_str_resizebuf(L, sb, sz); |
| 336 | } |
| 337 | return sb->buf; |
| 338 | } |
| 339 | |
| 340 | |