| 1 | /* |
| 2 | ** Object de/serialization. |
| 3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ |
| 5 | |
| 6 | #define lj_serialize_c |
| 7 | #define LUA_CORE |
| 8 | |
| 9 | #include "lj_obj.h" |
| 10 | |
| 11 | #if LJ_HASBUFFER |
| 12 | #include "lj_err.h" |
| 13 | #include "lj_buf.h" |
| 14 | #include "lj_str.h" |
| 15 | #include "lj_tab.h" |
| 16 | #include "lj_udata.h" |
| 17 | #if LJ_HASFFI |
| 18 | #include "lj_ctype.h" |
| 19 | #include "lj_cdata.h" |
| 20 | #endif |
| 21 | #include "lj_serialize.h" |
| 22 | |
| 23 | /* Tags for internal serialization format. */ |
| 24 | enum { |
| 25 | SER_TAG_NIL, /* 0x00 */ |
| 26 | SER_TAG_FALSE, |
| 27 | SER_TAG_TRUE, |
| 28 | SER_TAG_NULL, |
| 29 | SER_TAG_LIGHTUD32, |
| 30 | SER_TAG_LIGHTUD64, |
| 31 | SER_TAG_INT, |
| 32 | SER_TAG_NUM, |
| 33 | SER_TAG_TAB, /* 0x08 */ |
| 34 | SER_TAG_0x0e = SER_TAG_TAB+6, |
| 35 | SER_TAG_0x0f, |
| 36 | SER_TAG_INT64, /* 0x10 */ |
| 37 | SER_TAG_UINT64, |
| 38 | SER_TAG_COMPLEX, |
| 39 | SER_TAG_0x13, |
| 40 | SER_TAG_0x14, |
| 41 | SER_TAG_0x15, |
| 42 | SER_TAG_0x16, |
| 43 | SER_TAG_0x17, |
| 44 | SER_TAG_0x18, /* 0x18 */ |
| 45 | SER_TAG_0x19, |
| 46 | SER_TAG_0x1a, |
| 47 | SER_TAG_0x1b, |
| 48 | SER_TAG_0x1c, |
| 49 | SER_TAG_0x1d, |
| 50 | SER_TAG_0x1e, |
| 51 | SER_TAG_0x1f, |
| 52 | SER_TAG_STR, /* 0x20 + str->len */ |
| 53 | }; |
| 54 | LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0); |
| 55 | |
| 56 | /* -- Helper functions ---------------------------------------------------- */ |
| 57 | |
| 58 | static LJ_AINLINE char *serialize_more(char *w, StrBuf *sbuf, MSize sz) |
| 59 | { |
| 60 | if (LJ_UNLIKELY(sz > (MSize)(sbufE(sbuf->sb) - w))) { |
| 61 | setsbufP(sbuf->sb, w); |
| 62 | w = lj_buf_more2(sbuf->sb, sz); |
| 63 | } |
| 64 | return w; |
| 65 | } |
| 66 | |
| 67 | /* Write U124 to buffer. */ |
| 68 | static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v) |
| 69 | { |
| 70 | if (v < 0x1fe0) { |
| 71 | v -= 0xe0; |
| 72 | *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v; |
| 73 | } else { |
| 74 | *w++ = (char)0xff; |
| 75 | #if LJ_BE |
| 76 | v = lj_bswap(v); |
| 77 | #endif |
| 78 | memcpy(w, &v, 4); w += 4; |
| 79 | } |
| 80 | return w; |
| 81 | } |
| 82 | |
| 83 | static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v) |
| 84 | { |
| 85 | if (LJ_LIKELY(v < 0xe0)) { |
| 86 | *w++ = (char)v; |
| 87 | return w; |
| 88 | } else { |
| 89 | return serialize_wu124_(w, v); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | static LJ_NOINLINE char *serialize_ru124_(char *r, char *e, uint32_t *pv) |
| 94 | { |
| 95 | uint32_t v = *pv; |
| 96 | if (v != 0xff) { |
| 97 | if (r >= e) return NULL; |
| 98 | v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++; |
| 99 | } else { |
| 100 | if (r + 4 > e) return NULL; |
| 101 | v = lj_getu32(r); r += 4; |
| 102 | #if LJ_BE |
| 103 | v = lj_bswap(v); |
| 104 | #endif |
| 105 | } |
| 106 | *pv = v; |
| 107 | return r; |
| 108 | } |
| 109 | |
| 110 | static LJ_AINLINE char *serialize_ru124(char *r, char *e, uint32_t *pv) |
| 111 | { |
| 112 | if (LJ_LIKELY(r < e)) { |
| 113 | uint32_t v = *(uint8_t *)r; r++; |
| 114 | *pv = v; |
| 115 | if (LJ_UNLIKELY(v >= 0xe0)) { |
| 116 | r = serialize_ru124_(r, e, pv); |
| 117 | } |
| 118 | return r; |
| 119 | } |
| 120 | return NULL; |
| 121 | } |
| 122 | |
| 123 | /* -- Internal serializer ------------------------------------------------- */ |
| 124 | |
| 125 | /* Put serialized object into buffer. */ |
| 126 | static char *serialize_put(char *w, StrBuf *sbuf, cTValue *o) |
| 127 | { |
| 128 | if (LJ_LIKELY(tvisstr(o))) { |
| 129 | const GCstr *str = strV(o); |
| 130 | MSize len = str->len; |
| 131 | w = serialize_more(w, sbuf, 5+len); |
| 132 | w = serialize_wu124(w, SER_TAG_STR + len); |
| 133 | w = lj_buf_wmem(w, strdata(str), len); |
| 134 | } else if (tvisint(o)) { |
| 135 | uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o); |
| 136 | w = serialize_more(w, sbuf, 1+4); |
| 137 | *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4; |
| 138 | } else if (tvisnum(o)) { |
| 139 | uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64; |
| 140 | w = serialize_more(w, sbuf, 1+sizeof(lua_Number)); |
| 141 | *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8; |
| 142 | } else if (tvispri(o)) { |
| 143 | w = serialize_more(w, sbuf, 1); |
| 144 | *w++ = (char)(SER_TAG_NIL + ~itype(o)); |
| 145 | } else if (tvistab(o)) { |
| 146 | const GCtab *t = tabV(o); |
| 147 | uint32_t narray = 0, nhash = 0, one = 2; |
| 148 | if (sbuf->depth <= 0) lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_DEPTH); |
| 149 | sbuf->depth--; |
| 150 | if (t->asize > 0) { /* Determine max. length of array part. */ |
| 151 | ptrdiff_t i; |
| 152 | TValue *array = tvref(t->array); |
| 153 | for (i = (ptrdiff_t)t->asize-1; i >= 0; i--) |
| 154 | if (!tvisnil(&array[i])) |
| 155 | break; |
| 156 | narray = (uint32_t)(i+1); |
| 157 | if (narray && tvisnil(&array[0])) one = 4; |
| 158 | } |
| 159 | if (t->hmask > 0) { /* Count number of used hash slots. */ |
| 160 | uint32_t i, hmask = t->hmask; |
| 161 | Node *node = noderef(t->node); |
| 162 | for (i = 0; i <= hmask; i++) |
| 163 | nhash += !tvisnil(&node[i].val); |
| 164 | } |
| 165 | /* Write number of array slots and hash slots. */ |
| 166 | w = serialize_more(w, sbuf, 1+2*5); |
| 167 | *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0)); |
| 168 | if (narray) w = serialize_wu124(w, narray); |
| 169 | if (nhash) w = serialize_wu124(w, nhash); |
| 170 | if (narray) { /* Write array entries. */ |
| 171 | cTValue *oa = tvref(t->array) + (one >> 2); |
| 172 | cTValue *oe = tvref(t->array) + narray; |
| 173 | while (oa < oe) w = serialize_put(w, sbuf, oa++); |
| 174 | } |
| 175 | if (nhash) { /* Write hash entries. */ |
| 176 | const Node *node = noderef(t->node) + t->hmask; |
| 177 | for (;; node--) |
| 178 | if (!tvisnil(&node->val)) { |
| 179 | w = serialize_put(w, sbuf, &node->key); |
| 180 | w = serialize_put(w, sbuf, &node->val); |
| 181 | if (--nhash == 0) break; |
| 182 | } |
| 183 | } |
| 184 | sbuf->depth++; |
| 185 | #if LJ_HASFFI |
| 186 | } else if (tviscdata(o)) { |
| 187 | CTState *cts = ctype_cts(sbufL(sbuf->sb)); |
| 188 | CType *s = ctype_raw(cts, cdataV(o)->ctypeid); |
| 189 | uint8_t *sp = cdataptr(cdataV(o)); |
| 190 | if (ctype_isinteger(s->info) && s->size == 8) { |
| 191 | w = serialize_more(w, sbuf, 1+8); |
| 192 | *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64; |
| 193 | #if LJ_BE |
| 194 | { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); } |
| 195 | #else |
| 196 | memcpy(w, sp, 8); |
| 197 | #endif |
| 198 | w += 8; |
| 199 | } else if (ctype_iscomplex(s->info) && s->size == 16) { |
| 200 | w = serialize_more(w, sbuf, 1+16); |
| 201 | *w++ = SER_TAG_COMPLEX; |
| 202 | #if LJ_BE |
| 203 | { /* Only swap the doubles. The re/im order stays the same. */ |
| 204 | uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8); |
| 205 | u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8); |
| 206 | } |
| 207 | #else |
| 208 | memcpy(w, sp, 16); |
| 209 | #endif |
| 210 | w += 16; |
| 211 | } else { |
| 212 | goto badenc; /* NYI other cdata */ |
| 213 | } |
| 214 | #endif |
| 215 | } else if (tvislightud(o)) { |
| 216 | uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbuf->sb)), o); |
| 217 | w = serialize_more(w, sbuf, 1+sizeof(ud)); |
| 218 | if (ud == 0) { |
| 219 | *w++ = SER_TAG_NULL; |
| 220 | } else if (LJ_32 || checku32(ud)) { |
| 221 | #if LJ_BE && LJ_64 |
| 222 | ud = lj_bswap64(ud); |
| 223 | #elif LJ_BE |
| 224 | ud = lj_bswap(ud); |
| 225 | #endif |
| 226 | *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4; |
| 227 | } else { |
| 228 | #if LJ_BE |
| 229 | ud = lj_bswap64(ud); |
| 230 | #endif |
| 231 | *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8; |
| 232 | } |
| 233 | } else { |
| 234 | /* NYI userdata */ |
| 235 | #if LJ_HASFFI |
| 236 | badenc: |
| 237 | #endif |
| 238 | lj_err_callerv(sbufL(sbuf->sb), LJ_ERR_BUFFER_BADENC, lj_typename(o)); |
| 239 | } |
| 240 | return w; |
| 241 | } |
| 242 | |
| 243 | /* Get serialized object from buffer. */ |
| 244 | static char *serialize_get(char *r, StrBuf *sbuf, TValue *o) |
| 245 | { |
| 246 | char *e = sbufE(sbuf->sb); |
| 247 | uint32_t tp; |
| 248 | r = serialize_ru124(r, e, &tp); if (LJ_UNLIKELY(!r)) goto eob; |
| 249 | if (LJ_LIKELY(tp >= SER_TAG_STR)) { |
| 250 | uint32_t len = tp - SER_TAG_STR; |
| 251 | if (LJ_UNLIKELY(len > (uint32_t)(e - r))) goto eob; |
| 252 | setstrV(sbufL(sbuf->sb), o, lj_str_new(sbufL(sbuf->sb), r, len)); |
| 253 | r += len; |
| 254 | } else if (tp == SER_TAG_INT) { |
| 255 | if (LJ_UNLIKELY(r + 4 > e)) goto eob; |
| 256 | setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r))); |
| 257 | r += 4; |
| 258 | } else if (tp == SER_TAG_NUM) { |
| 259 | if (LJ_UNLIKELY(r + 8 > e)) goto eob; |
| 260 | memcpy(o, r, 8); r += 8; |
| 261 | #if LJ_BE |
| 262 | o->u64 = lj_bswap64(o->u64); |
| 263 | #endif |
| 264 | if (!tvisnum(o)) setnanV(o); |
| 265 | } else if (tp <= SER_TAG_TRUE) { |
| 266 | setpriV(o, ~tp); |
| 267 | } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) { |
| 268 | uint32_t narray = 0, nhash = 0; |
| 269 | GCtab *t; |
| 270 | if (tp >= SER_TAG_TAB+2) { |
| 271 | r = serialize_ru124(r, e, &narray); if (LJ_UNLIKELY(!r)) goto eob; |
| 272 | } |
| 273 | if ((tp & 1)) { |
| 274 | r = serialize_ru124(r, e, &nhash); if (LJ_UNLIKELY(!r)) goto eob; |
| 275 | } |
| 276 | t = lj_tab_new(sbufL(sbuf->sb), narray, hsize2hbits(nhash)); |
| 277 | settabV(sbufL(sbuf->sb), o, t); |
| 278 | if (narray) { |
| 279 | TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4); |
| 280 | TValue *oe = tvref(t->array) + narray; |
| 281 | while (oa < oe) r = serialize_get(r, sbuf, oa++); |
| 282 | } |
| 283 | if (nhash) { |
| 284 | do { |
| 285 | TValue k, *v; |
| 286 | r = serialize_get(r, sbuf, &k); |
| 287 | v = lj_tab_set(sbufL(sbuf->sb), t, &k); |
| 288 | if (LJ_UNLIKELY(!tvisnil(v))) |
| 289 | lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_DUPKEY); |
| 290 | r = serialize_get(r, sbuf, v); |
| 291 | } while (--nhash); |
| 292 | } |
| 293 | #if LJ_HASFFI |
| 294 | } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) { |
| 295 | uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8; |
| 296 | GCcdata *cd; |
| 297 | if (LJ_UNLIKELY(r + sz > e)) goto eob; |
| 298 | cd = lj_cdata_new_(sbufL(sbuf->sb), |
| 299 | tp == SER_TAG_INT64 ? CTID_INT64 : |
| 300 | tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE, |
| 301 | sz); |
| 302 | memcpy(cdataptr(cd), r, sz); r += sz; |
| 303 | #if LJ_BE |
| 304 | *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd)); |
| 305 | if (sz == 16) |
| 306 | ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]); |
| 307 | #endif |
| 308 | setcdataV(sbufL(sbuf->sb), o, cd); |
| 309 | #endif |
| 310 | } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) { |
| 311 | uintptr_t ud = 0; |
| 312 | if (tp == SER_TAG_LIGHTUD32) { |
| 313 | if (LJ_UNLIKELY(r + 4 > e)) goto eob; |
| 314 | ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)); |
| 315 | r += 4; |
| 316 | } |
| 317 | #if LJ_64 |
| 318 | else if (tp == SER_TAG_LIGHTUD64) { |
| 319 | if (LJ_UNLIKELY(r + 8 > e)) goto eob; |
| 320 | memcpy(&ud, r, 8); r += 8; |
| 321 | #if LJ_BE |
| 322 | ud = lj_bswap64(ud); |
| 323 | #endif |
| 324 | } |
| 325 | setrawlightudV(o, lj_lightud_intern(sbufL(sbuf->sb), (void *)ud)); |
| 326 | #else |
| 327 | setrawlightudV(o, (void *)ud); |
| 328 | #endif |
| 329 | } else { |
| 330 | lj_err_callerv(sbufL(sbuf->sb), LJ_ERR_BUFFER_BADDEC, tp); |
| 331 | } |
| 332 | return r; |
| 333 | eob: |
| 334 | lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_EOB); |
| 335 | return NULL; |
| 336 | } |
| 337 | |
| 338 | StrBuf * LJ_FASTCALL lj_serialize_put(StrBuf *sbuf, cTValue *o) |
| 339 | { |
| 340 | sbuf->depth = LJ_SERIALIZE_DEPTH; |
| 341 | setsbufP(sbuf->sb, serialize_put(sbufP(sbuf->sb), sbuf, o)); |
| 342 | return sbuf; |
| 343 | } |
| 344 | |
| 345 | StrBuf * LJ_FASTCALL lj_serialize_get(StrBuf *sbuf, TValue *o) |
| 346 | { |
| 347 | char *r = serialize_get(sbuf->r, sbuf, o); |
| 348 | if (r != sbufP(sbuf->sb)) |
| 349 | lj_err_caller(sbufL(sbuf->sb), LJ_ERR_BUFFER_LEFTOV); |
| 350 | sbuf->r = r; |
| 351 | return sbuf; |
| 352 | } |
| 353 | |
| 354 | #endif |
| 355 | |