| 1 | // | 
|---|
| 2 | // Copyright 2000 - 2003 Google Inc. | 
|---|
| 3 | // | 
|---|
| 4 | // | 
|---|
| 5 | // This holds the encoding/decoding routines that used to live in netutil | 
|---|
| 6 |  | 
|---|
| 7 | #ifndef UTIL_CODING_CODER_H__ | 
|---|
| 8 | #define UTIL_CODING_CODER_H__ | 
|---|
| 9 |  | 
|---|
| 10 | #include <algorithm> | 
|---|
| 11 | using std::min; | 
|---|
| 12 | using std::max; | 
|---|
| 13 | using std::swap; | 
|---|
| 14 | using std::reverse; | 
|---|
| 15 | // for min | 
|---|
| 16 | #include "util/coding/varint.h" | 
|---|
| 17 | #include "base/logging.h" | 
|---|
| 18 | #include "base/port.h" | 
|---|
| 19 | #include "util/endian/endian.h" | 
|---|
| 20 |  | 
|---|
| 21 | /* Class for encoding data into a memory buffer */ | 
|---|
| 22 | class Encoder { | 
|---|
| 23 | public: | 
|---|
| 24 | // Creates an empty Encoder with no room that is enlarged | 
|---|
| 25 | // (if necessary) when "Encoder::Ensure(N)" is called. | 
|---|
| 26 | Encoder(); | 
|---|
| 27 | ~Encoder(); | 
|---|
| 28 |  | 
|---|
| 29 | // Initialize encoder to encode into "buf" | 
|---|
| 30 | explicit Encoder(void* buf, int maxn); | 
|---|
| 31 | void reset(void* buf, int maxn); | 
|---|
| 32 | void clear(); | 
|---|
| 33 |  | 
|---|
| 34 | // Encoding routines.  Note that these do not check bounds | 
|---|
| 35 | void put8(unsigned char v); | 
|---|
| 36 | void put16(uint16 v); | 
|---|
| 37 | void put32(uint32 v); | 
|---|
| 38 | void put64(uint64 v); | 
|---|
| 39 | void putword(uword_t v); | 
|---|
| 40 | void putn(const void* mem, int n); | 
|---|
| 41 |  | 
|---|
| 42 | // put no more than n bytes, stopping when c is put | 
|---|
| 43 | void putcn(const void* mem, int c, int n); | 
|---|
| 44 |  | 
|---|
| 45 | void puts(const void* mem);                // put a c-string including \0 | 
|---|
| 46 | void puts_without_null(const char* mem);   // put a c-string without \0 | 
|---|
| 47 | void putfloat(float f); | 
|---|
| 48 | void putdouble(double d); | 
|---|
| 49 |  | 
|---|
| 50 | // Support for variable length encoding with 7 bits per byte | 
|---|
| 51 | // (these are just simple wrappers around the Varint module) | 
|---|
| 52 | static const int kVarintMax32 = Varint::kMax32; | 
|---|
| 53 | static const int kVarintMax64 = Varint::kMax64; | 
|---|
| 54 |  | 
|---|
| 55 | void put_varint32(uint32 v); | 
|---|
| 56 | void put_varint64(uint64 v); | 
|---|
| 57 | static int varint32_length(uint32 v);  // Length of var encoding of "v" | 
|---|
| 58 | static int varint64_length(uint64 v);  // Length of var encoding of "v" | 
|---|
| 59 |  | 
|---|
| 60 | // DEPRECATED | 
|---|
| 61 | // | 
|---|
| 62 | // For new code use put_varint32(ZigZagEncode(signed_value)); | 
|---|
| 63 | // ZigZag coding is defined in utils/coding/transforms.h | 
|---|
| 64 | void put_varsigned32(int32 v); | 
|---|
| 65 |  | 
|---|
| 66 | // Support for a few special types we don't want to restrict size of | 
|---|
| 67 | void put_docid(DocId d); | 
|---|
| 68 |  | 
|---|
| 69 | // Return number of bytes encoded so far | 
|---|
| 70 | int length() const; | 
|---|
| 71 |  | 
|---|
| 72 | // Return number of bytes of space remaining in buffer | 
|---|
| 73 | int avail() const; | 
|---|
| 74 |  | 
|---|
| 75 | // REQUIRES: Encoder was created with the 0-argument constructor interface. | 
|---|
| 76 | // | 
|---|
| 77 | // This interface ensures that at least "N" more bytes are available | 
|---|
| 78 | // in the underlying buffer by resizing the buffer (if necessary). | 
|---|
| 79 | // | 
|---|
| 80 | // Note that no bounds checking is done on any of the put routines, | 
|---|
| 81 | // so it is the client's responsibility to call Ensure() at | 
|---|
| 82 | // appropriate intervals to ensure that enough space is available | 
|---|
| 83 | // for the data being added. | 
|---|
| 84 | void Ensure(int N); | 
|---|
| 85 |  | 
|---|
| 86 | // Returns true if Ensure is allowed to be called on "this" | 
|---|
| 87 | bool ensure_allowed() const { return underlying_buffer_ != NULL; } | 
|---|
| 88 |  | 
|---|
| 89 | // Return ptr to start of encoded data.  This pointer remains valid | 
|---|
| 90 | // until reset or Ensure is called. | 
|---|
| 91 | const char* base() const { return (const char*)orig_; } | 
|---|
| 92 |  | 
|---|
| 93 | // Advances the write pointer by "N" bytes. | 
|---|
| 94 | void skip(int N) { buf_ += N; } | 
|---|
| 95 |  | 
|---|
| 96 | // REQUIRES: length() >= N | 
|---|
| 97 | // Removes the last N bytes out of the encoded buffer | 
|---|
| 98 | void RemoveLast(int N); | 
|---|
| 99 |  | 
|---|
| 100 | // REQUIRES: length() >= N | 
|---|
| 101 | // Removes the last length()-N bytes to make the encoded buffer have length N | 
|---|
| 102 | void Resize(int N); | 
|---|
| 103 |  | 
|---|
| 104 | private: | 
|---|
| 105 | void EnsureSlowPath(int N); | 
|---|
| 106 |  | 
|---|
| 107 | unsigned char* orig_; | 
|---|
| 108 | unsigned char* buf_; | 
|---|
| 109 | unsigned char* limit_; | 
|---|
| 110 |  | 
|---|
| 111 | // If constructed with the zero-argument constructor, we're allowed | 
|---|
| 112 | // to use Ensure; otherwise we're not.  If Ensure is allowed, | 
|---|
| 113 | // underlying_buffer_ is non-NULL; otherwise it is set to NULL. | 
|---|
| 114 | unsigned char* underlying_buffer_; | 
|---|
| 115 |  | 
|---|
| 116 | static unsigned char kEmptyBuffer; | 
|---|
| 117 |  | 
|---|
| 118 | DISALLOW_EVIL_CONSTRUCTORS(Encoder); | 
|---|
| 119 | }; | 
|---|
| 120 |  | 
|---|
| 121 | /* Class for decoding data from a memory buffer */ | 
|---|
| 122 | class Decoder { | 
|---|
| 123 | public: | 
|---|
| 124 | // Empty constructor to create uninitialized decoder | 
|---|
| 125 | inline Decoder() { } | 
|---|
| 126 |  | 
|---|
| 127 | // NOTE: for efficiency reasons, this is not virtual.  so don't add | 
|---|
| 128 | // any members that really need to be destructed, and be careful about | 
|---|
| 129 | // inheritance. | 
|---|
| 130 | ~Decoder() { } | 
|---|
| 131 |  | 
|---|
| 132 | // Initialize decoder to decode from "buf" | 
|---|
| 133 | explicit Decoder(const void* buf, int maxn); | 
|---|
| 134 | void reset(const void* buf, int maxn); | 
|---|
| 135 |  | 
|---|
| 136 | // Decoding routines.  Note that these do not check bounds | 
|---|
| 137 | unsigned char  get8(); | 
|---|
| 138 | uint16 get16(); | 
|---|
| 139 | uint32 get32(); | 
|---|
| 140 | uint64 get64(); | 
|---|
| 141 | uword_t getword(); | 
|---|
| 142 | float  getfloat(); | 
|---|
| 143 | double getdouble(); | 
|---|
| 144 | void   getn(void* mem, int n); | 
|---|
| 145 | void   getcn(void* mem, int c, int n);    // get no more than n bytes, | 
|---|
| 146 | // stopping after c is got | 
|---|
| 147 | void   gets(void* mem, int n);            // get a c-string no more than | 
|---|
| 148 | // n bytes. always appends '\0' | 
|---|
| 149 | void   skip(int n); | 
|---|
| 150 | unsigned char const* ptr();       // Return ptr to current position in buffer | 
|---|
| 151 |  | 
|---|
| 152 | // "get_varint" actually checks bounds | 
|---|
| 153 | bool get_varint32(uint32* v); | 
|---|
| 154 | bool get_varint64(uint64* v); | 
|---|
| 155 |  | 
|---|
| 156 | // DEPRECATED | 
|---|
| 157 | // | 
|---|
| 158 | // For new code use | 
|---|
| 159 | //   get_varint32(&unsigned_temp); | 
|---|
| 160 | //   signed_value = ZigZagDecode(unsigned_temp); | 
|---|
| 161 | // ZigZag coding is defined in utils/coding/transforms.h | 
|---|
| 162 | bool get_varsigned32(int32* v); | 
|---|
| 163 |  | 
|---|
| 164 | // Support for a few special types we don't want to restrict size of | 
|---|
| 165 | DocId get_docid(); | 
|---|
| 166 |  | 
|---|
| 167 | // This is used for transitioning docids from 32bits to 64bits | 
|---|
| 168 | DocId32Bit get_docid_32bit(); | 
|---|
| 169 |  | 
|---|
| 170 | int pos() const; | 
|---|
| 171 | // Return number of bytes decoded so far | 
|---|
| 172 |  | 
|---|
| 173 | int avail() const; | 
|---|
| 174 | // Return number of available bytes to read | 
|---|
| 175 |  | 
|---|
| 176 | private: | 
|---|
| 177 | friend class IndexBlockDecoder; | 
|---|
| 178 | const unsigned char* orig_; | 
|---|
| 179 | const unsigned char* buf_; | 
|---|
| 180 | const unsigned char* limit_; | 
|---|
| 181 | }; | 
|---|
| 182 | DECLARE_POD(Decoder);  // so then we might as well be a POD | 
|---|
| 183 |  | 
|---|
| 184 | /***** Implementation details.  Clients should ignore them. *****/ | 
|---|
| 185 |  | 
|---|
| 186 | inline Encoder::Encoder(void* b, int maxn) { | 
|---|
| 187 | orig_ = buf_ = reinterpret_cast<unsigned char*>(b); | 
|---|
| 188 | limit_ = orig_ + maxn; | 
|---|
| 189 | underlying_buffer_ = NULL; | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | inline void Encoder::reset(void* b, int maxn) { | 
|---|
| 193 | orig_ = buf_ = reinterpret_cast<unsigned char*>(b); | 
|---|
| 194 | limit_ = orig_ + maxn; | 
|---|
| 195 | // Can't use the underlying buffer anymore | 
|---|
| 196 | if (underlying_buffer_ != &kEmptyBuffer) { | 
|---|
| 197 | delete[] underlying_buffer_; | 
|---|
| 198 | } | 
|---|
| 199 | underlying_buffer_ = NULL; | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | inline void Encoder::clear() { | 
|---|
| 203 | buf_ = orig_; | 
|---|
| 204 | } | 
|---|
| 205 |  | 
|---|
| 206 | inline void Encoder::Ensure(int N) { | 
|---|
| 207 | DCHECK(ensure_allowed()); | 
|---|
| 208 | if (avail() < N) { | 
|---|
| 209 | EnsureSlowPath(N); | 
|---|
| 210 | } | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | inline int Encoder::length() const { | 
|---|
| 214 | return (buf_ - orig_); | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | inline int Encoder::avail() const { | 
|---|
| 218 | return (limit_ - buf_); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | inline void Encoder::putn(const void* src, int n) { | 
|---|
| 222 | memcpy(buf_, src, n); | 
|---|
| 223 | buf_ += n; | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | inline void Encoder::putcn(const void* src, int c, int n) { | 
|---|
| 227 | unsigned char *old = buf_; | 
|---|
| 228 | buf_ = static_cast<unsigned char *>(memccpy(buf_, src, c, n)); | 
|---|
| 229 | if (buf_ == NULL) | 
|---|
| 230 | buf_ = old + n; | 
|---|
| 231 | } | 
|---|
| 232 |  | 
|---|
| 233 | inline void Encoder::puts(const void* src) { | 
|---|
| 234 | putcn(src, '\0', limit_ - buf_); | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | inline void Encoder::puts_without_null(const char* mem) { | 
|---|
| 238 | while (*mem != '\0' && buf_ < limit_) { | 
|---|
| 239 | *buf_++ = *mem++; | 
|---|
| 240 | } | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | inline void Encoder::put_varint32(uint32 v) { | 
|---|
| 244 | buf_ = reinterpret_cast<unsigned char*> | 
|---|
| 245 | (Varint::Encode32(reinterpret_cast<char*>(buf_), v)); | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 | inline void Encoder::put_varint64(uint64 v) { | 
|---|
| 249 | buf_ = reinterpret_cast<unsigned char*> | 
|---|
| 250 | (Varint::Encode64(reinterpret_cast<char*>(buf_), v)); | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | // DEPRECATED | 
|---|
| 254 | // | 
|---|
| 255 | // For new code use put_varint32(ZigZagEncode(signed_value)); | 
|---|
| 256 | // ZigZag coding is defined in utils/coding/transforms.h | 
|---|
| 257 | inline void Encoder::put_varsigned32(int32 n) { | 
|---|
| 258 | // Encode sign in low-bit | 
|---|
| 259 | int sign = (n < 0) ? 1 : 0; | 
|---|
| 260 | uint32 mag = (n < 0) ? -n : n; | 
|---|
| 261 | put_varint32((mag << 1) | sign); | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | inline Decoder::Decoder(const void* b, int maxn) { | 
|---|
| 265 | orig_ = buf_ = reinterpret_cast<const unsigned char*>(b); | 
|---|
| 266 | limit_ = orig_ + maxn; | 
|---|
| 267 | } | 
|---|
| 268 |  | 
|---|
| 269 | inline void Decoder::reset(const void* b, int maxn) { | 
|---|
| 270 | orig_ = buf_ = reinterpret_cast<const unsigned char*>(b); | 
|---|
| 271 | limit_ = orig_ + maxn; | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|
| 274 | inline int Decoder::pos() const { | 
|---|
| 275 | return (buf_ - orig_); | 
|---|
| 276 | } | 
|---|
| 277 |  | 
|---|
| 278 | inline int Decoder::avail() const { | 
|---|
| 279 | return (limit_ - buf_); | 
|---|
| 280 | } | 
|---|
| 281 |  | 
|---|
| 282 | inline void Decoder::getn(void* dst, int n) { | 
|---|
| 283 | memcpy(dst, buf_, n); | 
|---|
| 284 | buf_ += n; | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | inline void Decoder::getcn(void* dst, int c, int n) { | 
|---|
| 288 | void *ptr; | 
|---|
| 289 | ptr = memccpy(dst, buf_, c, n); | 
|---|
| 290 | if (ptr == NULL) | 
|---|
| 291 | buf_ = buf_ + n; | 
|---|
| 292 | else | 
|---|
| 293 | buf_ = buf_ + (reinterpret_cast<unsigned char *>(ptr) - | 
|---|
| 294 | reinterpret_cast<unsigned char *>(dst)); | 
|---|
| 295 | } | 
|---|
| 296 |  | 
|---|
| 297 | inline void Decoder::gets(void* dst, int n) { | 
|---|
| 298 | int len = min<int>((n - 1), (limit_ - buf_)); | 
|---|
| 299 | (reinterpret_cast<char *>(dst))[len] = '\0'; | 
|---|
| 300 | getcn(dst, '\0', len); | 
|---|
| 301 | } | 
|---|
| 302 |  | 
|---|
| 303 | inline void Decoder::skip(int n) { | 
|---|
| 304 | buf_ += n; | 
|---|
| 305 | } | 
|---|
| 306 |  | 
|---|
| 307 | inline unsigned char const* Decoder::ptr() { | 
|---|
| 308 | return buf_; | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 |  | 
|---|
| 312 | // DEPRECATED | 
|---|
| 313 | // | 
|---|
| 314 | // For new code use | 
|---|
| 315 | //   get_varint32(&unsigned_temp); | 
|---|
| 316 | //   signed_value = ZigZagDecode(unsigned_temp); | 
|---|
| 317 | // ZigZag coding is defined in utils/coding/transforms.h | 
|---|
| 318 | inline bool Decoder::get_varsigned32(int32* v) { | 
|---|
| 319 | uint32 coding; | 
|---|
| 320 | if (get_varint32(&coding)) { | 
|---|
| 321 | int sign = coding & 1; | 
|---|
| 322 | int32 mag = coding >> 1; | 
|---|
| 323 | if (sign) { | 
|---|
| 324 | // Special handling for encoding of kint32min | 
|---|
| 325 | *v = (mag == 0) ? kint32min : -mag; | 
|---|
| 326 | } else { | 
|---|
| 327 | *v = mag; | 
|---|
| 328 | } | 
|---|
| 329 | return true; | 
|---|
| 330 | } else { | 
|---|
| 331 | return false; | 
|---|
| 332 | } | 
|---|
| 333 | } | 
|---|
| 334 |  | 
|---|
| 335 | inline void Encoder::put8(unsigned char v) { | 
|---|
| 336 | DCHECK_GE(avail(), sizeof(v)); | 
|---|
| 337 | *buf_ = v; | 
|---|
| 338 | buf_ += sizeof(v); | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | inline void Encoder::put16(uint16 v) { | 
|---|
| 342 | DCHECK_GE(avail(), sizeof(v)); | 
|---|
| 343 | LittleEndian::Store16(buf_, v); | 
|---|
| 344 | buf_ += sizeof(v); | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | inline void Encoder::put32(uint32 v) { | 
|---|
| 348 | DCHECK_GE(avail(), sizeof(v)); | 
|---|
| 349 | LittleEndian::Store32(buf_, v); | 
|---|
| 350 | buf_ += sizeof(v); | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | inline void Encoder::put64(uint64 v) { | 
|---|
| 354 | DCHECK_GE(avail(), sizeof(v)); | 
|---|
| 355 | LittleEndian::Store64(buf_, v); | 
|---|
| 356 | buf_ += sizeof(v); | 
|---|
| 357 | } | 
|---|
| 358 |  | 
|---|
| 359 | inline void Encoder::putword(uword_t v) { | 
|---|
| 360 | #ifdef _LP64 | 
|---|
| 361 | LittleEndian::Store64(buf_, v); | 
|---|
| 362 | #else | 
|---|
| 363 | LittleEndian::Store32(buf_, v); | 
|---|
| 364 | #endif /* _LP64 */ | 
|---|
| 365 | buf_ += sizeof(v); | 
|---|
| 366 | } | 
|---|
| 367 |  | 
|---|
| 368 | inline void Encoder::put_docid(DocId d) { | 
|---|
| 369 | put64(DocIdAsNumber(d)); | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 | inline void Encoder::putfloat(float f) { | 
|---|
| 373 | uint32 v; | 
|---|
| 374 | typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1]; | 
|---|
| 375 | memcpy(&v, &f, sizeof(f)); | 
|---|
| 376 | put32(v); | 
|---|
| 377 | } | 
|---|
| 378 |  | 
|---|
| 379 | inline void Encoder::putdouble(double d) { | 
|---|
| 380 | uint64 v; | 
|---|
| 381 | typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1]; | 
|---|
| 382 | memcpy(&v, &d, sizeof(d)); | 
|---|
| 383 | put64(v); | 
|---|
| 384 | } | 
|---|
| 385 |  | 
|---|
| 386 | inline unsigned char Decoder::get8() { | 
|---|
| 387 | const unsigned char v = *buf_; | 
|---|
| 388 | buf_ += sizeof(v); | 
|---|
| 389 | return v; | 
|---|
| 390 | } | 
|---|
| 391 |  | 
|---|
| 392 | inline uint16 Decoder::get16() { | 
|---|
| 393 | const uint16 v = LittleEndian::Load16(buf_); | 
|---|
| 394 | buf_ += sizeof(v); | 
|---|
| 395 | return v; | 
|---|
| 396 | } | 
|---|
| 397 |  | 
|---|
| 398 | inline uint32 Decoder::get32() { | 
|---|
| 399 | const uint32 v = LittleEndian::Load32(buf_); | 
|---|
| 400 | buf_ += sizeof(v); | 
|---|
| 401 | return v; | 
|---|
| 402 | } | 
|---|
| 403 |  | 
|---|
| 404 | inline uint64 Decoder::get64() { | 
|---|
| 405 | const uint64 v = LittleEndian::Load64(buf_); | 
|---|
| 406 | buf_ += sizeof(v); | 
|---|
| 407 | return v; | 
|---|
| 408 | } | 
|---|
| 409 |  | 
|---|
| 410 | inline uword_t Decoder::getword() { | 
|---|
| 411 | #ifdef _LP64 | 
|---|
| 412 | const uword_t v = LittleEndian::Load64(buf_); | 
|---|
| 413 | #else | 
|---|
| 414 | const uword_t v = LittleEndian::Load32(buf_); | 
|---|
| 415 | #endif /* _LP64 */ | 
|---|
| 416 | buf_ += sizeof(v); | 
|---|
| 417 | return v; | 
|---|
| 418 | } | 
|---|
| 419 |  | 
|---|
| 420 | inline DocId Decoder::get_docid() { | 
|---|
| 421 | return DocId(get64()); | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | inline DocId32Bit Decoder::get_docid_32bit() { | 
|---|
| 425 | return DocId32Bit(get32()); | 
|---|
| 426 | } | 
|---|
| 427 |  | 
|---|
| 428 | inline float Decoder::getfloat() { | 
|---|
| 429 | uint32 v = get32(); | 
|---|
| 430 | float f; | 
|---|
| 431 | typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1]; | 
|---|
| 432 | memcpy(&f, &v, sizeof(f)); | 
|---|
| 433 | return f; | 
|---|
| 434 | } | 
|---|
| 435 |  | 
|---|
| 436 | inline double Decoder::getdouble() { | 
|---|
| 437 | uint64 v = get64(); | 
|---|
| 438 | double d; | 
|---|
| 439 | typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1]; | 
|---|
| 440 | memcpy(&d, &v, sizeof(d)); | 
|---|
| 441 | return d; | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | #endif  // UTIL_CODING_CODER_H__ | 
|---|
| 445 |  | 
|---|