1//
2// Copyright 2000 - 2003 Google Inc.
3//
4//
5// This holds the encoding/decoding routines that used to live in netutil
6
7#ifndef UTIL_CODING_CODER_H__
8#define UTIL_CODING_CODER_H__
9
10#include <algorithm>
11using std::min;
12using std::max;
13using std::swap;
14using std::reverse;
15 // for min
16#include "util/coding/varint.h"
17#include "base/logging.h"
18#include "base/port.h"
19#include "util/endian/endian.h"
20
21/* Class for encoding data into a memory buffer */
22class Encoder {
23 public:
24 // Creates an empty Encoder with no room that is enlarged
25 // (if necessary) when "Encoder::Ensure(N)" is called.
26 Encoder();
27 ~Encoder();
28
29 // Initialize encoder to encode into "buf"
30 explicit Encoder(void* buf, int maxn);
31 void reset(void* buf, int maxn);
32 void clear();
33
34 // Encoding routines. Note that these do not check bounds
35 void put8(unsigned char v);
36 void put16(uint16 v);
37 void put32(uint32 v);
38 void put64(uint64 v);
39 void putword(uword_t v);
40 void putn(const void* mem, int n);
41
42 // put no more than n bytes, stopping when c is put
43 void putcn(const void* mem, int c, int n);
44
45 void puts(const void* mem); // put a c-string including \0
46 void puts_without_null(const char* mem); // put a c-string without \0
47 void putfloat(float f);
48 void putdouble(double d);
49
50 // Support for variable length encoding with 7 bits per byte
51 // (these are just simple wrappers around the Varint module)
52 static const int kVarintMax32 = Varint::kMax32;
53 static const int kVarintMax64 = Varint::kMax64;
54
55 void put_varint32(uint32 v);
56 void put_varint64(uint64 v);
57 static int varint32_length(uint32 v); // Length of var encoding of "v"
58 static int varint64_length(uint64 v); // Length of var encoding of "v"
59
60 // DEPRECATED
61 //
62 // For new code use put_varint32(ZigZagEncode(signed_value));
63 // ZigZag coding is defined in utils/coding/transforms.h
64 void put_varsigned32(int32 v);
65
66 // Support for a few special types we don't want to restrict size of
67 void put_docid(DocId d);
68
69 // Return number of bytes encoded so far
70 int length() const;
71
72 // Return number of bytes of space remaining in buffer
73 int avail() const;
74
75 // REQUIRES: Encoder was created with the 0-argument constructor interface.
76 //
77 // This interface ensures that at least "N" more bytes are available
78 // in the underlying buffer by resizing the buffer (if necessary).
79 //
80 // Note that no bounds checking is done on any of the put routines,
81 // so it is the client's responsibility to call Ensure() at
82 // appropriate intervals to ensure that enough space is available
83 // for the data being added.
84 void Ensure(int N);
85
86 // Returns true if Ensure is allowed to be called on "this"
87 bool ensure_allowed() const { return underlying_buffer_ != NULL; }
88
89 // Return ptr to start of encoded data. This pointer remains valid
90 // until reset or Ensure is called.
91 const char* base() const { return (const char*)orig_; }
92
93 // Advances the write pointer by "N" bytes.
94 void skip(int N) { buf_ += N; }
95
96 // REQUIRES: length() >= N
97 // Removes the last N bytes out of the encoded buffer
98 void RemoveLast(int N);
99
100 // REQUIRES: length() >= N
101 // Removes the last length()-N bytes to make the encoded buffer have length N
102 void Resize(int N);
103
104 private:
105 void EnsureSlowPath(int N);
106
107 unsigned char* orig_;
108 unsigned char* buf_;
109 unsigned char* limit_;
110
111 // If constructed with the zero-argument constructor, we're allowed
112 // to use Ensure; otherwise we're not. If Ensure is allowed,
113 // underlying_buffer_ is non-NULL; otherwise it is set to NULL.
114 unsigned char* underlying_buffer_;
115
116 static unsigned char kEmptyBuffer;
117
118 DISALLOW_EVIL_CONSTRUCTORS(Encoder);
119};
120
121/* Class for decoding data from a memory buffer */
122class Decoder {
123 public:
124 // Empty constructor to create uninitialized decoder
125 inline Decoder() { }
126
127 // NOTE: for efficiency reasons, this is not virtual. so don't add
128 // any members that really need to be destructed, and be careful about
129 // inheritance.
130 ~Decoder() { }
131
132 // Initialize decoder to decode from "buf"
133 explicit Decoder(const void* buf, int maxn);
134 void reset(const void* buf, int maxn);
135
136 // Decoding routines. Note that these do not check bounds
137 unsigned char get8();
138 uint16 get16();
139 uint32 get32();
140 uint64 get64();
141 uword_t getword();
142 float getfloat();
143 double getdouble();
144 void getn(void* mem, int n);
145 void getcn(void* mem, int c, int n); // get no more than n bytes,
146 // stopping after c is got
147 void gets(void* mem, int n); // get a c-string no more than
148 // n bytes. always appends '\0'
149 void skip(int n);
150 unsigned char const* ptr(); // Return ptr to current position in buffer
151
152 // "get_varint" actually checks bounds
153 bool get_varint32(uint32* v);
154 bool get_varint64(uint64* v);
155
156 // DEPRECATED
157 //
158 // For new code use
159 // get_varint32(&unsigned_temp);
160 // signed_value = ZigZagDecode(unsigned_temp);
161 // ZigZag coding is defined in utils/coding/transforms.h
162 bool get_varsigned32(int32* v);
163
164 // Support for a few special types we don't want to restrict size of
165 DocId get_docid();
166
167 // This is used for transitioning docids from 32bits to 64bits
168 DocId32Bit get_docid_32bit();
169
170 int pos() const;
171 // Return number of bytes decoded so far
172
173 int avail() const;
174 // Return number of available bytes to read
175
176 private:
177 friend class IndexBlockDecoder;
178 const unsigned char* orig_;
179 const unsigned char* buf_;
180 const unsigned char* limit_;
181};
182DECLARE_POD(Decoder); // so then we might as well be a POD
183
184/***** Implementation details. Clients should ignore them. *****/
185
186inline Encoder::Encoder(void* b, int maxn) {
187 orig_ = buf_ = reinterpret_cast<unsigned char*>(b);
188 limit_ = orig_ + maxn;
189 underlying_buffer_ = NULL;
190}
191
192inline void Encoder::reset(void* b, int maxn) {
193 orig_ = buf_ = reinterpret_cast<unsigned char*>(b);
194 limit_ = orig_ + maxn;
195 // Can't use the underlying buffer anymore
196 if (underlying_buffer_ != &kEmptyBuffer) {
197 delete[] underlying_buffer_;
198 }
199 underlying_buffer_ = NULL;
200}
201
202inline void Encoder::clear() {
203 buf_ = orig_;
204}
205
206inline void Encoder::Ensure(int N) {
207 DCHECK(ensure_allowed());
208 if (avail() < N) {
209 EnsureSlowPath(N);
210 }
211}
212
213inline int Encoder::length() const {
214 return (buf_ - orig_);
215}
216
217inline int Encoder::avail() const {
218 return (limit_ - buf_);
219}
220
221inline void Encoder::putn(const void* src, int n) {
222 memcpy(buf_, src, n);
223 buf_ += n;
224}
225
226inline void Encoder::putcn(const void* src, int c, int n) {
227 unsigned char *old = buf_;
228 buf_ = static_cast<unsigned char *>(memccpy(buf_, src, c, n));
229 if (buf_ == NULL)
230 buf_ = old + n;
231}
232
233inline void Encoder::puts(const void* src) {
234 putcn(src, '\0', limit_ - buf_);
235}
236
237inline void Encoder::puts_without_null(const char* mem) {
238 while (*mem != '\0' && buf_ < limit_) {
239 *buf_++ = *mem++;
240 }
241}
242
243inline void Encoder::put_varint32(uint32 v) {
244 buf_ = reinterpret_cast<unsigned char*>
245 (Varint::Encode32(reinterpret_cast<char*>(buf_), v));
246}
247
248inline void Encoder::put_varint64(uint64 v) {
249 buf_ = reinterpret_cast<unsigned char*>
250 (Varint::Encode64(reinterpret_cast<char*>(buf_), v));
251}
252
253// DEPRECATED
254//
255// For new code use put_varint32(ZigZagEncode(signed_value));
256// ZigZag coding is defined in utils/coding/transforms.h
257inline void Encoder::put_varsigned32(int32 n) {
258 // Encode sign in low-bit
259 int sign = (n < 0) ? 1 : 0;
260 uint32 mag = (n < 0) ? -n : n;
261 put_varint32((mag << 1) | sign);
262}
263
264inline Decoder::Decoder(const void* b, int maxn) {
265 orig_ = buf_ = reinterpret_cast<const unsigned char*>(b);
266 limit_ = orig_ + maxn;
267}
268
269inline void Decoder::reset(const void* b, int maxn) {
270 orig_ = buf_ = reinterpret_cast<const unsigned char*>(b);
271 limit_ = orig_ + maxn;
272}
273
274inline int Decoder::pos() const {
275 return (buf_ - orig_);
276}
277
278inline int Decoder::avail() const {
279 return (limit_ - buf_);
280}
281
282inline void Decoder::getn(void* dst, int n) {
283 memcpy(dst, buf_, n);
284 buf_ += n;
285}
286
287inline void Decoder::getcn(void* dst, int c, int n) {
288 void *ptr;
289 ptr = memccpy(dst, buf_, c, n);
290 if (ptr == NULL)
291 buf_ = buf_ + n;
292 else
293 buf_ = buf_ + (reinterpret_cast<unsigned char *>(ptr) -
294 reinterpret_cast<unsigned char *>(dst));
295}
296
297inline void Decoder::gets(void* dst, int n) {
298 int len = min<int>((n - 1), (limit_ - buf_));
299 (reinterpret_cast<char *>(dst))[len] = '\0';
300 getcn(dst, '\0', len);
301}
302
303inline void Decoder::skip(int n) {
304 buf_ += n;
305}
306
307inline unsigned char const* Decoder::ptr() {
308 return buf_;
309}
310
311
312// DEPRECATED
313//
314// For new code use
315// get_varint32(&unsigned_temp);
316// signed_value = ZigZagDecode(unsigned_temp);
317// ZigZag coding is defined in utils/coding/transforms.h
318inline bool Decoder::get_varsigned32(int32* v) {
319 uint32 coding;
320 if (get_varint32(&coding)) {
321 int sign = coding & 1;
322 int32 mag = coding >> 1;
323 if (sign) {
324 // Special handling for encoding of kint32min
325 *v = (mag == 0) ? kint32min : -mag;
326 } else {
327 *v = mag;
328 }
329 return true;
330 } else {
331 return false;
332 }
333}
334
335inline void Encoder::put8(unsigned char v) {
336 DCHECK_GE(avail(), sizeof(v));
337 *buf_ = v;
338 buf_ += sizeof(v);
339}
340
341inline void Encoder::put16(uint16 v) {
342 DCHECK_GE(avail(), sizeof(v));
343 LittleEndian::Store16(buf_, v);
344 buf_ += sizeof(v);
345}
346
347inline void Encoder::put32(uint32 v) {
348 DCHECK_GE(avail(), sizeof(v));
349 LittleEndian::Store32(buf_, v);
350 buf_ += sizeof(v);
351}
352
353inline void Encoder::put64(uint64 v) {
354 DCHECK_GE(avail(), sizeof(v));
355 LittleEndian::Store64(buf_, v);
356 buf_ += sizeof(v);
357}
358
359inline void Encoder::putword(uword_t v) {
360#ifdef _LP64
361 LittleEndian::Store64(buf_, v);
362#else
363 LittleEndian::Store32(buf_, v);
364#endif /* _LP64 */
365 buf_ += sizeof(v);
366}
367
368inline void Encoder::put_docid(DocId d) {
369 put64(DocIdAsNumber(d));
370}
371
372inline void Encoder::putfloat(float f) {
373 uint32 v;
374 typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1];
375 memcpy(&v, &f, sizeof(f));
376 put32(v);
377}
378
379inline void Encoder::putdouble(double d) {
380 uint64 v;
381 typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1];
382 memcpy(&v, &d, sizeof(d));
383 put64(v);
384}
385
386inline unsigned char Decoder::get8() {
387 const unsigned char v = *buf_;
388 buf_ += sizeof(v);
389 return v;
390}
391
392inline uint16 Decoder::get16() {
393 const uint16 v = LittleEndian::Load16(buf_);
394 buf_ += sizeof(v);
395 return v;
396}
397
398inline uint32 Decoder::get32() {
399 const uint32 v = LittleEndian::Load32(buf_);
400 buf_ += sizeof(v);
401 return v;
402}
403
404inline uint64 Decoder::get64() {
405 const uint64 v = LittleEndian::Load64(buf_);
406 buf_ += sizeof(v);
407 return v;
408}
409
410inline uword_t Decoder::getword() {
411#ifdef _LP64
412 const uword_t v = LittleEndian::Load64(buf_);
413#else
414 const uword_t v = LittleEndian::Load32(buf_);
415#endif /* _LP64 */
416 buf_ += sizeof(v);
417 return v;
418}
419
420inline DocId Decoder::get_docid() {
421 return DocId(get64());
422}
423
424inline DocId32Bit Decoder::get_docid_32bit() {
425 return DocId32Bit(get32());
426}
427
428inline float Decoder::getfloat() {
429 uint32 v = get32();
430 float f;
431 typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1];
432 memcpy(&f, &v, sizeof(f));
433 return f;
434}
435
436inline double Decoder::getdouble() {
437 uint64 v = get64();
438 double d;
439 typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1];
440 memcpy(&d, &v, sizeof(d));
441 return d;
442}
443
444#endif // UTIL_CODING_CODER_H__
445