| 1 | // |
| 2 | // Copyright 2000 - 2003 Google Inc. |
| 3 | // |
| 4 | // |
| 5 | // This holds the encoding/decoding routines that used to live in netutil |
| 6 | |
| 7 | #ifndef UTIL_CODING_CODER_H__ |
| 8 | #define UTIL_CODING_CODER_H__ |
| 9 | |
| 10 | #include <algorithm> |
| 11 | using std::min; |
| 12 | using std::max; |
| 13 | using std::swap; |
| 14 | using std::reverse; |
| 15 | // for min |
| 16 | #include "util/coding/varint.h" |
| 17 | #include "base/logging.h" |
| 18 | #include "base/port.h" |
| 19 | #include "util/endian/endian.h" |
| 20 | |
| 21 | /* Class for encoding data into a memory buffer */ |
| 22 | class Encoder { |
| 23 | public: |
| 24 | // Creates an empty Encoder with no room that is enlarged |
| 25 | // (if necessary) when "Encoder::Ensure(N)" is called. |
| 26 | Encoder(); |
| 27 | ~Encoder(); |
| 28 | |
| 29 | // Initialize encoder to encode into "buf" |
| 30 | explicit Encoder(void* buf, int maxn); |
| 31 | void reset(void* buf, int maxn); |
| 32 | void clear(); |
| 33 | |
| 34 | // Encoding routines. Note that these do not check bounds |
| 35 | void put8(unsigned char v); |
| 36 | void put16(uint16 v); |
| 37 | void put32(uint32 v); |
| 38 | void put64(uint64 v); |
| 39 | void putword(uword_t v); |
| 40 | void putn(const void* mem, int n); |
| 41 | |
| 42 | // put no more than n bytes, stopping when c is put |
| 43 | void putcn(const void* mem, int c, int n); |
| 44 | |
| 45 | void puts(const void* mem); // put a c-string including \0 |
| 46 | void puts_without_null(const char* mem); // put a c-string without \0 |
| 47 | void putfloat(float f); |
| 48 | void putdouble(double d); |
| 49 | |
| 50 | // Support for variable length encoding with 7 bits per byte |
| 51 | // (these are just simple wrappers around the Varint module) |
| 52 | static const int kVarintMax32 = Varint::kMax32; |
| 53 | static const int kVarintMax64 = Varint::kMax64; |
| 54 | |
| 55 | void put_varint32(uint32 v); |
| 56 | void put_varint64(uint64 v); |
| 57 | static int varint32_length(uint32 v); // Length of var encoding of "v" |
| 58 | static int varint64_length(uint64 v); // Length of var encoding of "v" |
| 59 | |
| 60 | // DEPRECATED |
| 61 | // |
| 62 | // For new code use put_varint32(ZigZagEncode(signed_value)); |
| 63 | // ZigZag coding is defined in utils/coding/transforms.h |
| 64 | void put_varsigned32(int32 v); |
| 65 | |
| 66 | // Support for a few special types we don't want to restrict size of |
| 67 | void put_docid(DocId d); |
| 68 | |
| 69 | // Return number of bytes encoded so far |
| 70 | int length() const; |
| 71 | |
| 72 | // Return number of bytes of space remaining in buffer |
| 73 | int avail() const; |
| 74 | |
| 75 | // REQUIRES: Encoder was created with the 0-argument constructor interface. |
| 76 | // |
| 77 | // This interface ensures that at least "N" more bytes are available |
| 78 | // in the underlying buffer by resizing the buffer (if necessary). |
| 79 | // |
| 80 | // Note that no bounds checking is done on any of the put routines, |
| 81 | // so it is the client's responsibility to call Ensure() at |
| 82 | // appropriate intervals to ensure that enough space is available |
| 83 | // for the data being added. |
| 84 | void Ensure(int N); |
| 85 | |
| 86 | // Returns true if Ensure is allowed to be called on "this" |
| 87 | bool ensure_allowed() const { return underlying_buffer_ != NULL; } |
| 88 | |
| 89 | // Return ptr to start of encoded data. This pointer remains valid |
| 90 | // until reset or Ensure is called. |
| 91 | const char* base() const { return (const char*)orig_; } |
| 92 | |
| 93 | // Advances the write pointer by "N" bytes. |
| 94 | void skip(int N) { buf_ += N; } |
| 95 | |
| 96 | // REQUIRES: length() >= N |
| 97 | // Removes the last N bytes out of the encoded buffer |
| 98 | void RemoveLast(int N); |
| 99 | |
| 100 | // REQUIRES: length() >= N |
| 101 | // Removes the last length()-N bytes to make the encoded buffer have length N |
| 102 | void Resize(int N); |
| 103 | |
| 104 | private: |
| 105 | void EnsureSlowPath(int N); |
| 106 | |
| 107 | unsigned char* orig_; |
| 108 | unsigned char* buf_; |
| 109 | unsigned char* limit_; |
| 110 | |
| 111 | // If constructed with the zero-argument constructor, we're allowed |
| 112 | // to use Ensure; otherwise we're not. If Ensure is allowed, |
| 113 | // underlying_buffer_ is non-NULL; otherwise it is set to NULL. |
| 114 | unsigned char* underlying_buffer_; |
| 115 | |
| 116 | static unsigned char kEmptyBuffer; |
| 117 | |
| 118 | DISALLOW_EVIL_CONSTRUCTORS(Encoder); |
| 119 | }; |
| 120 | |
| 121 | /* Class for decoding data from a memory buffer */ |
| 122 | class Decoder { |
| 123 | public: |
| 124 | // Empty constructor to create uninitialized decoder |
| 125 | inline Decoder() { } |
| 126 | |
| 127 | // NOTE: for efficiency reasons, this is not virtual. so don't add |
| 128 | // any members that really need to be destructed, and be careful about |
| 129 | // inheritance. |
| 130 | ~Decoder() { } |
| 131 | |
| 132 | // Initialize decoder to decode from "buf" |
| 133 | explicit Decoder(const void* buf, int maxn); |
| 134 | void reset(const void* buf, int maxn); |
| 135 | |
| 136 | // Decoding routines. Note that these do not check bounds |
| 137 | unsigned char get8(); |
| 138 | uint16 get16(); |
| 139 | uint32 get32(); |
| 140 | uint64 get64(); |
| 141 | uword_t getword(); |
| 142 | float getfloat(); |
| 143 | double getdouble(); |
| 144 | void getn(void* mem, int n); |
| 145 | void getcn(void* mem, int c, int n); // get no more than n bytes, |
| 146 | // stopping after c is got |
| 147 | void gets(void* mem, int n); // get a c-string no more than |
| 148 | // n bytes. always appends '\0' |
| 149 | void skip(int n); |
| 150 | unsigned char const* ptr(); // Return ptr to current position in buffer |
| 151 | |
| 152 | // "get_varint" actually checks bounds |
| 153 | bool get_varint32(uint32* v); |
| 154 | bool get_varint64(uint64* v); |
| 155 | |
| 156 | // DEPRECATED |
| 157 | // |
| 158 | // For new code use |
| 159 | // get_varint32(&unsigned_temp); |
| 160 | // signed_value = ZigZagDecode(unsigned_temp); |
| 161 | // ZigZag coding is defined in utils/coding/transforms.h |
| 162 | bool get_varsigned32(int32* v); |
| 163 | |
| 164 | // Support for a few special types we don't want to restrict size of |
| 165 | DocId get_docid(); |
| 166 | |
| 167 | // This is used for transitioning docids from 32bits to 64bits |
| 168 | DocId32Bit get_docid_32bit(); |
| 169 | |
| 170 | int pos() const; |
| 171 | // Return number of bytes decoded so far |
| 172 | |
| 173 | int avail() const; |
| 174 | // Return number of available bytes to read |
| 175 | |
| 176 | private: |
| 177 | friend class IndexBlockDecoder; |
| 178 | const unsigned char* orig_; |
| 179 | const unsigned char* buf_; |
| 180 | const unsigned char* limit_; |
| 181 | }; |
| 182 | DECLARE_POD(Decoder); // so then we might as well be a POD |
| 183 | |
| 184 | /***** Implementation details. Clients should ignore them. *****/ |
| 185 | |
| 186 | inline Encoder::Encoder(void* b, int maxn) { |
| 187 | orig_ = buf_ = reinterpret_cast<unsigned char*>(b); |
| 188 | limit_ = orig_ + maxn; |
| 189 | underlying_buffer_ = NULL; |
| 190 | } |
| 191 | |
| 192 | inline void Encoder::reset(void* b, int maxn) { |
| 193 | orig_ = buf_ = reinterpret_cast<unsigned char*>(b); |
| 194 | limit_ = orig_ + maxn; |
| 195 | // Can't use the underlying buffer anymore |
| 196 | if (underlying_buffer_ != &kEmptyBuffer) { |
| 197 | delete[] underlying_buffer_; |
| 198 | } |
| 199 | underlying_buffer_ = NULL; |
| 200 | } |
| 201 | |
| 202 | inline void Encoder::clear() { |
| 203 | buf_ = orig_; |
| 204 | } |
| 205 | |
| 206 | inline void Encoder::Ensure(int N) { |
| 207 | DCHECK(ensure_allowed()); |
| 208 | if (avail() < N) { |
| 209 | EnsureSlowPath(N); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | inline int Encoder::length() const { |
| 214 | return (buf_ - orig_); |
| 215 | } |
| 216 | |
| 217 | inline int Encoder::avail() const { |
| 218 | return (limit_ - buf_); |
| 219 | } |
| 220 | |
| 221 | inline void Encoder::putn(const void* src, int n) { |
| 222 | memcpy(buf_, src, n); |
| 223 | buf_ += n; |
| 224 | } |
| 225 | |
| 226 | inline void Encoder::putcn(const void* src, int c, int n) { |
| 227 | unsigned char *old = buf_; |
| 228 | buf_ = static_cast<unsigned char *>(memccpy(buf_, src, c, n)); |
| 229 | if (buf_ == NULL) |
| 230 | buf_ = old + n; |
| 231 | } |
| 232 | |
| 233 | inline void Encoder::puts(const void* src) { |
| 234 | putcn(src, '\0', limit_ - buf_); |
| 235 | } |
| 236 | |
| 237 | inline void Encoder::puts_without_null(const char* mem) { |
| 238 | while (*mem != '\0' && buf_ < limit_) { |
| 239 | *buf_++ = *mem++; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | inline void Encoder::put_varint32(uint32 v) { |
| 244 | buf_ = reinterpret_cast<unsigned char*> |
| 245 | (Varint::Encode32(reinterpret_cast<char*>(buf_), v)); |
| 246 | } |
| 247 | |
| 248 | inline void Encoder::put_varint64(uint64 v) { |
| 249 | buf_ = reinterpret_cast<unsigned char*> |
| 250 | (Varint::Encode64(reinterpret_cast<char*>(buf_), v)); |
| 251 | } |
| 252 | |
| 253 | // DEPRECATED |
| 254 | // |
| 255 | // For new code use put_varint32(ZigZagEncode(signed_value)); |
| 256 | // ZigZag coding is defined in utils/coding/transforms.h |
| 257 | inline void Encoder::put_varsigned32(int32 n) { |
| 258 | // Encode sign in low-bit |
| 259 | int sign = (n < 0) ? 1 : 0; |
| 260 | uint32 mag = (n < 0) ? -n : n; |
| 261 | put_varint32((mag << 1) | sign); |
| 262 | } |
| 263 | |
| 264 | inline Decoder::Decoder(const void* b, int maxn) { |
| 265 | orig_ = buf_ = reinterpret_cast<const unsigned char*>(b); |
| 266 | limit_ = orig_ + maxn; |
| 267 | } |
| 268 | |
| 269 | inline void Decoder::reset(const void* b, int maxn) { |
| 270 | orig_ = buf_ = reinterpret_cast<const unsigned char*>(b); |
| 271 | limit_ = orig_ + maxn; |
| 272 | } |
| 273 | |
| 274 | inline int Decoder::pos() const { |
| 275 | return (buf_ - orig_); |
| 276 | } |
| 277 | |
| 278 | inline int Decoder::avail() const { |
| 279 | return (limit_ - buf_); |
| 280 | } |
| 281 | |
| 282 | inline void Decoder::getn(void* dst, int n) { |
| 283 | memcpy(dst, buf_, n); |
| 284 | buf_ += n; |
| 285 | } |
| 286 | |
| 287 | inline void Decoder::getcn(void* dst, int c, int n) { |
| 288 | void *ptr; |
| 289 | ptr = memccpy(dst, buf_, c, n); |
| 290 | if (ptr == NULL) |
| 291 | buf_ = buf_ + n; |
| 292 | else |
| 293 | buf_ = buf_ + (reinterpret_cast<unsigned char *>(ptr) - |
| 294 | reinterpret_cast<unsigned char *>(dst)); |
| 295 | } |
| 296 | |
| 297 | inline void Decoder::gets(void* dst, int n) { |
| 298 | int len = min<int>((n - 1), (limit_ - buf_)); |
| 299 | (reinterpret_cast<char *>(dst))[len] = '\0'; |
| 300 | getcn(dst, '\0', len); |
| 301 | } |
| 302 | |
| 303 | inline void Decoder::skip(int n) { |
| 304 | buf_ += n; |
| 305 | } |
| 306 | |
| 307 | inline unsigned char const* Decoder::ptr() { |
| 308 | return buf_; |
| 309 | } |
| 310 | |
| 311 | |
| 312 | // DEPRECATED |
| 313 | // |
| 314 | // For new code use |
| 315 | // get_varint32(&unsigned_temp); |
| 316 | // signed_value = ZigZagDecode(unsigned_temp); |
| 317 | // ZigZag coding is defined in utils/coding/transforms.h |
| 318 | inline bool Decoder::get_varsigned32(int32* v) { |
| 319 | uint32 coding; |
| 320 | if (get_varint32(&coding)) { |
| 321 | int sign = coding & 1; |
| 322 | int32 mag = coding >> 1; |
| 323 | if (sign) { |
| 324 | // Special handling for encoding of kint32min |
| 325 | *v = (mag == 0) ? kint32min : -mag; |
| 326 | } else { |
| 327 | *v = mag; |
| 328 | } |
| 329 | return true; |
| 330 | } else { |
| 331 | return false; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | inline void Encoder::put8(unsigned char v) { |
| 336 | DCHECK_GE(avail(), sizeof(v)); |
| 337 | *buf_ = v; |
| 338 | buf_ += sizeof(v); |
| 339 | } |
| 340 | |
| 341 | inline void Encoder::put16(uint16 v) { |
| 342 | DCHECK_GE(avail(), sizeof(v)); |
| 343 | LittleEndian::Store16(buf_, v); |
| 344 | buf_ += sizeof(v); |
| 345 | } |
| 346 | |
| 347 | inline void Encoder::put32(uint32 v) { |
| 348 | DCHECK_GE(avail(), sizeof(v)); |
| 349 | LittleEndian::Store32(buf_, v); |
| 350 | buf_ += sizeof(v); |
| 351 | } |
| 352 | |
| 353 | inline void Encoder::put64(uint64 v) { |
| 354 | DCHECK_GE(avail(), sizeof(v)); |
| 355 | LittleEndian::Store64(buf_, v); |
| 356 | buf_ += sizeof(v); |
| 357 | } |
| 358 | |
| 359 | inline void Encoder::putword(uword_t v) { |
| 360 | #ifdef _LP64 |
| 361 | LittleEndian::Store64(buf_, v); |
| 362 | #else |
| 363 | LittleEndian::Store32(buf_, v); |
| 364 | #endif /* _LP64 */ |
| 365 | buf_ += sizeof(v); |
| 366 | } |
| 367 | |
| 368 | inline void Encoder::put_docid(DocId d) { |
| 369 | put64(DocIdAsNumber(d)); |
| 370 | } |
| 371 | |
| 372 | inline void Encoder::putfloat(float f) { |
| 373 | uint32 v; |
| 374 | typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1]; |
| 375 | memcpy(&v, &f, sizeof(f)); |
| 376 | put32(v); |
| 377 | } |
| 378 | |
| 379 | inline void Encoder::putdouble(double d) { |
| 380 | uint64 v; |
| 381 | typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1]; |
| 382 | memcpy(&v, &d, sizeof(d)); |
| 383 | put64(v); |
| 384 | } |
| 385 | |
| 386 | inline unsigned char Decoder::get8() { |
| 387 | const unsigned char v = *buf_; |
| 388 | buf_ += sizeof(v); |
| 389 | return v; |
| 390 | } |
| 391 | |
| 392 | inline uint16 Decoder::get16() { |
| 393 | const uint16 v = LittleEndian::Load16(buf_); |
| 394 | buf_ += sizeof(v); |
| 395 | return v; |
| 396 | } |
| 397 | |
| 398 | inline uint32 Decoder::get32() { |
| 399 | const uint32 v = LittleEndian::Load32(buf_); |
| 400 | buf_ += sizeof(v); |
| 401 | return v; |
| 402 | } |
| 403 | |
| 404 | inline uint64 Decoder::get64() { |
| 405 | const uint64 v = LittleEndian::Load64(buf_); |
| 406 | buf_ += sizeof(v); |
| 407 | return v; |
| 408 | } |
| 409 | |
| 410 | inline uword_t Decoder::getword() { |
| 411 | #ifdef _LP64 |
| 412 | const uword_t v = LittleEndian::Load64(buf_); |
| 413 | #else |
| 414 | const uword_t v = LittleEndian::Load32(buf_); |
| 415 | #endif /* _LP64 */ |
| 416 | buf_ += sizeof(v); |
| 417 | return v; |
| 418 | } |
| 419 | |
| 420 | inline DocId Decoder::get_docid() { |
| 421 | return DocId(get64()); |
| 422 | } |
| 423 | |
| 424 | inline DocId32Bit Decoder::get_docid_32bit() { |
| 425 | return DocId32Bit(get32()); |
| 426 | } |
| 427 | |
| 428 | inline float Decoder::getfloat() { |
| 429 | uint32 v = get32(); |
| 430 | float f; |
| 431 | typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1]; |
| 432 | memcpy(&f, &v, sizeof(f)); |
| 433 | return f; |
| 434 | } |
| 435 | |
| 436 | inline double Decoder::getdouble() { |
| 437 | uint64 v = get64(); |
| 438 | double d; |
| 439 | typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1]; |
| 440 | memcpy(&d, &v, sizeof(d)); |
| 441 | return d; |
| 442 | } |
| 443 | |
| 444 | #endif // UTIL_CODING_CODER_H__ |
| 445 | |