1 | // |
2 | // Copyright 2000 - 2003 Google Inc. |
3 | // |
4 | // |
5 | // This holds the encoding/decoding routines that used to live in netutil |
6 | |
7 | #ifndef UTIL_CODING_CODER_H__ |
8 | #define UTIL_CODING_CODER_H__ |
9 | |
10 | #include <algorithm> |
11 | using std::min; |
12 | using std::max; |
13 | using std::swap; |
14 | using std::reverse; |
15 | // for min |
16 | #include "util/coding/varint.h" |
17 | #include "base/logging.h" |
18 | #include "base/port.h" |
19 | #include "util/endian/endian.h" |
20 | |
21 | /* Class for encoding data into a memory buffer */ |
22 | class Encoder { |
23 | public: |
24 | // Creates an empty Encoder with no room that is enlarged |
25 | // (if necessary) when "Encoder::Ensure(N)" is called. |
26 | Encoder(); |
27 | ~Encoder(); |
28 | |
29 | // Initialize encoder to encode into "buf" |
30 | explicit Encoder(void* buf, int maxn); |
31 | void reset(void* buf, int maxn); |
32 | void clear(); |
33 | |
34 | // Encoding routines. Note that these do not check bounds |
35 | void put8(unsigned char v); |
36 | void put16(uint16 v); |
37 | void put32(uint32 v); |
38 | void put64(uint64 v); |
39 | void putword(uword_t v); |
40 | void putn(const void* mem, int n); |
41 | |
42 | // put no more than n bytes, stopping when c is put |
43 | void putcn(const void* mem, int c, int n); |
44 | |
45 | void puts(const void* mem); // put a c-string including \0 |
46 | void puts_without_null(const char* mem); // put a c-string without \0 |
47 | void putfloat(float f); |
48 | void putdouble(double d); |
49 | |
50 | // Support for variable length encoding with 7 bits per byte |
51 | // (these are just simple wrappers around the Varint module) |
52 | static const int kVarintMax32 = Varint::kMax32; |
53 | static const int kVarintMax64 = Varint::kMax64; |
54 | |
55 | void put_varint32(uint32 v); |
56 | void put_varint64(uint64 v); |
57 | static int varint32_length(uint32 v); // Length of var encoding of "v" |
58 | static int varint64_length(uint64 v); // Length of var encoding of "v" |
59 | |
60 | // DEPRECATED |
61 | // |
62 | // For new code use put_varint32(ZigZagEncode(signed_value)); |
63 | // ZigZag coding is defined in utils/coding/transforms.h |
64 | void put_varsigned32(int32 v); |
65 | |
66 | // Support for a few special types we don't want to restrict size of |
67 | void put_docid(DocId d); |
68 | |
69 | // Return number of bytes encoded so far |
70 | int length() const; |
71 | |
72 | // Return number of bytes of space remaining in buffer |
73 | int avail() const; |
74 | |
75 | // REQUIRES: Encoder was created with the 0-argument constructor interface. |
76 | // |
77 | // This interface ensures that at least "N" more bytes are available |
78 | // in the underlying buffer by resizing the buffer (if necessary). |
79 | // |
80 | // Note that no bounds checking is done on any of the put routines, |
81 | // so it is the client's responsibility to call Ensure() at |
82 | // appropriate intervals to ensure that enough space is available |
83 | // for the data being added. |
84 | void Ensure(int N); |
85 | |
86 | // Returns true if Ensure is allowed to be called on "this" |
87 | bool ensure_allowed() const { return underlying_buffer_ != NULL; } |
88 | |
89 | // Return ptr to start of encoded data. This pointer remains valid |
90 | // until reset or Ensure is called. |
91 | const char* base() const { return (const char*)orig_; } |
92 | |
93 | // Advances the write pointer by "N" bytes. |
94 | void skip(int N) { buf_ += N; } |
95 | |
96 | // REQUIRES: length() >= N |
97 | // Removes the last N bytes out of the encoded buffer |
98 | void RemoveLast(int N); |
99 | |
100 | // REQUIRES: length() >= N |
101 | // Removes the last length()-N bytes to make the encoded buffer have length N |
102 | void Resize(int N); |
103 | |
104 | private: |
105 | void EnsureSlowPath(int N); |
106 | |
107 | unsigned char* orig_; |
108 | unsigned char* buf_; |
109 | unsigned char* limit_; |
110 | |
111 | // If constructed with the zero-argument constructor, we're allowed |
112 | // to use Ensure; otherwise we're not. If Ensure is allowed, |
113 | // underlying_buffer_ is non-NULL; otherwise it is set to NULL. |
114 | unsigned char* underlying_buffer_; |
115 | |
116 | static unsigned char kEmptyBuffer; |
117 | |
118 | DISALLOW_EVIL_CONSTRUCTORS(Encoder); |
119 | }; |
120 | |
121 | /* Class for decoding data from a memory buffer */ |
122 | class Decoder { |
123 | public: |
124 | // Empty constructor to create uninitialized decoder |
125 | inline Decoder() { } |
126 | |
127 | // NOTE: for efficiency reasons, this is not virtual. so don't add |
128 | // any members that really need to be destructed, and be careful about |
129 | // inheritance. |
130 | ~Decoder() { } |
131 | |
132 | // Initialize decoder to decode from "buf" |
133 | explicit Decoder(const void* buf, int maxn); |
134 | void reset(const void* buf, int maxn); |
135 | |
136 | // Decoding routines. Note that these do not check bounds |
137 | unsigned char get8(); |
138 | uint16 get16(); |
139 | uint32 get32(); |
140 | uint64 get64(); |
141 | uword_t getword(); |
142 | float getfloat(); |
143 | double getdouble(); |
144 | void getn(void* mem, int n); |
145 | void getcn(void* mem, int c, int n); // get no more than n bytes, |
146 | // stopping after c is got |
147 | void gets(void* mem, int n); // get a c-string no more than |
148 | // n bytes. always appends '\0' |
149 | void skip(int n); |
150 | unsigned char const* ptr(); // Return ptr to current position in buffer |
151 | |
152 | // "get_varint" actually checks bounds |
153 | bool get_varint32(uint32* v); |
154 | bool get_varint64(uint64* v); |
155 | |
156 | // DEPRECATED |
157 | // |
158 | // For new code use |
159 | // get_varint32(&unsigned_temp); |
160 | // signed_value = ZigZagDecode(unsigned_temp); |
161 | // ZigZag coding is defined in utils/coding/transforms.h |
162 | bool get_varsigned32(int32* v); |
163 | |
164 | // Support for a few special types we don't want to restrict size of |
165 | DocId get_docid(); |
166 | |
167 | // This is used for transitioning docids from 32bits to 64bits |
168 | DocId32Bit get_docid_32bit(); |
169 | |
170 | int pos() const; |
171 | // Return number of bytes decoded so far |
172 | |
173 | int avail() const; |
174 | // Return number of available bytes to read |
175 | |
176 | private: |
177 | friend class IndexBlockDecoder; |
178 | const unsigned char* orig_; |
179 | const unsigned char* buf_; |
180 | const unsigned char* limit_; |
181 | }; |
182 | DECLARE_POD(Decoder); // so then we might as well be a POD |
183 | |
184 | /***** Implementation details. Clients should ignore them. *****/ |
185 | |
186 | inline Encoder::Encoder(void* b, int maxn) { |
187 | orig_ = buf_ = reinterpret_cast<unsigned char*>(b); |
188 | limit_ = orig_ + maxn; |
189 | underlying_buffer_ = NULL; |
190 | } |
191 | |
192 | inline void Encoder::reset(void* b, int maxn) { |
193 | orig_ = buf_ = reinterpret_cast<unsigned char*>(b); |
194 | limit_ = orig_ + maxn; |
195 | // Can't use the underlying buffer anymore |
196 | if (underlying_buffer_ != &kEmptyBuffer) { |
197 | delete[] underlying_buffer_; |
198 | } |
199 | underlying_buffer_ = NULL; |
200 | } |
201 | |
202 | inline void Encoder::clear() { |
203 | buf_ = orig_; |
204 | } |
205 | |
206 | inline void Encoder::Ensure(int N) { |
207 | DCHECK(ensure_allowed()); |
208 | if (avail() < N) { |
209 | EnsureSlowPath(N); |
210 | } |
211 | } |
212 | |
213 | inline int Encoder::length() const { |
214 | return (buf_ - orig_); |
215 | } |
216 | |
217 | inline int Encoder::avail() const { |
218 | return (limit_ - buf_); |
219 | } |
220 | |
221 | inline void Encoder::putn(const void* src, int n) { |
222 | memcpy(buf_, src, n); |
223 | buf_ += n; |
224 | } |
225 | |
226 | inline void Encoder::putcn(const void* src, int c, int n) { |
227 | unsigned char *old = buf_; |
228 | buf_ = static_cast<unsigned char *>(memccpy(buf_, src, c, n)); |
229 | if (buf_ == NULL) |
230 | buf_ = old + n; |
231 | } |
232 | |
233 | inline void Encoder::puts(const void* src) { |
234 | putcn(src, '\0', limit_ - buf_); |
235 | } |
236 | |
237 | inline void Encoder::puts_without_null(const char* mem) { |
238 | while (*mem != '\0' && buf_ < limit_) { |
239 | *buf_++ = *mem++; |
240 | } |
241 | } |
242 | |
243 | inline void Encoder::put_varint32(uint32 v) { |
244 | buf_ = reinterpret_cast<unsigned char*> |
245 | (Varint::Encode32(reinterpret_cast<char*>(buf_), v)); |
246 | } |
247 | |
248 | inline void Encoder::put_varint64(uint64 v) { |
249 | buf_ = reinterpret_cast<unsigned char*> |
250 | (Varint::Encode64(reinterpret_cast<char*>(buf_), v)); |
251 | } |
252 | |
253 | // DEPRECATED |
254 | // |
255 | // For new code use put_varint32(ZigZagEncode(signed_value)); |
256 | // ZigZag coding is defined in utils/coding/transforms.h |
257 | inline void Encoder::put_varsigned32(int32 n) { |
258 | // Encode sign in low-bit |
259 | int sign = (n < 0) ? 1 : 0; |
260 | uint32 mag = (n < 0) ? -n : n; |
261 | put_varint32((mag << 1) | sign); |
262 | } |
263 | |
264 | inline Decoder::Decoder(const void* b, int maxn) { |
265 | orig_ = buf_ = reinterpret_cast<const unsigned char*>(b); |
266 | limit_ = orig_ + maxn; |
267 | } |
268 | |
269 | inline void Decoder::reset(const void* b, int maxn) { |
270 | orig_ = buf_ = reinterpret_cast<const unsigned char*>(b); |
271 | limit_ = orig_ + maxn; |
272 | } |
273 | |
274 | inline int Decoder::pos() const { |
275 | return (buf_ - orig_); |
276 | } |
277 | |
278 | inline int Decoder::avail() const { |
279 | return (limit_ - buf_); |
280 | } |
281 | |
282 | inline void Decoder::getn(void* dst, int n) { |
283 | memcpy(dst, buf_, n); |
284 | buf_ += n; |
285 | } |
286 | |
287 | inline void Decoder::getcn(void* dst, int c, int n) { |
288 | void *ptr; |
289 | ptr = memccpy(dst, buf_, c, n); |
290 | if (ptr == NULL) |
291 | buf_ = buf_ + n; |
292 | else |
293 | buf_ = buf_ + (reinterpret_cast<unsigned char *>(ptr) - |
294 | reinterpret_cast<unsigned char *>(dst)); |
295 | } |
296 | |
297 | inline void Decoder::gets(void* dst, int n) { |
298 | int len = min<int>((n - 1), (limit_ - buf_)); |
299 | (reinterpret_cast<char *>(dst))[len] = '\0'; |
300 | getcn(dst, '\0', len); |
301 | } |
302 | |
303 | inline void Decoder::skip(int n) { |
304 | buf_ += n; |
305 | } |
306 | |
307 | inline unsigned char const* Decoder::ptr() { |
308 | return buf_; |
309 | } |
310 | |
311 | |
312 | // DEPRECATED |
313 | // |
314 | // For new code use |
315 | // get_varint32(&unsigned_temp); |
316 | // signed_value = ZigZagDecode(unsigned_temp); |
317 | // ZigZag coding is defined in utils/coding/transforms.h |
318 | inline bool Decoder::get_varsigned32(int32* v) { |
319 | uint32 coding; |
320 | if (get_varint32(&coding)) { |
321 | int sign = coding & 1; |
322 | int32 mag = coding >> 1; |
323 | if (sign) { |
324 | // Special handling for encoding of kint32min |
325 | *v = (mag == 0) ? kint32min : -mag; |
326 | } else { |
327 | *v = mag; |
328 | } |
329 | return true; |
330 | } else { |
331 | return false; |
332 | } |
333 | } |
334 | |
335 | inline void Encoder::put8(unsigned char v) { |
336 | DCHECK_GE(avail(), sizeof(v)); |
337 | *buf_ = v; |
338 | buf_ += sizeof(v); |
339 | } |
340 | |
341 | inline void Encoder::put16(uint16 v) { |
342 | DCHECK_GE(avail(), sizeof(v)); |
343 | LittleEndian::Store16(buf_, v); |
344 | buf_ += sizeof(v); |
345 | } |
346 | |
347 | inline void Encoder::put32(uint32 v) { |
348 | DCHECK_GE(avail(), sizeof(v)); |
349 | LittleEndian::Store32(buf_, v); |
350 | buf_ += sizeof(v); |
351 | } |
352 | |
353 | inline void Encoder::put64(uint64 v) { |
354 | DCHECK_GE(avail(), sizeof(v)); |
355 | LittleEndian::Store64(buf_, v); |
356 | buf_ += sizeof(v); |
357 | } |
358 | |
359 | inline void Encoder::putword(uword_t v) { |
360 | #ifdef _LP64 |
361 | LittleEndian::Store64(buf_, v); |
362 | #else |
363 | LittleEndian::Store32(buf_, v); |
364 | #endif /* _LP64 */ |
365 | buf_ += sizeof(v); |
366 | } |
367 | |
368 | inline void Encoder::put_docid(DocId d) { |
369 | put64(DocIdAsNumber(d)); |
370 | } |
371 | |
372 | inline void Encoder::putfloat(float f) { |
373 | uint32 v; |
374 | typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1]; |
375 | memcpy(&v, &f, sizeof(f)); |
376 | put32(v); |
377 | } |
378 | |
379 | inline void Encoder::putdouble(double d) { |
380 | uint64 v; |
381 | typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1]; |
382 | memcpy(&v, &d, sizeof(d)); |
383 | put64(v); |
384 | } |
385 | |
386 | inline unsigned char Decoder::get8() { |
387 | const unsigned char v = *buf_; |
388 | buf_ += sizeof(v); |
389 | return v; |
390 | } |
391 | |
392 | inline uint16 Decoder::get16() { |
393 | const uint16 v = LittleEndian::Load16(buf_); |
394 | buf_ += sizeof(v); |
395 | return v; |
396 | } |
397 | |
398 | inline uint32 Decoder::get32() { |
399 | const uint32 v = LittleEndian::Load32(buf_); |
400 | buf_ += sizeof(v); |
401 | return v; |
402 | } |
403 | |
404 | inline uint64 Decoder::get64() { |
405 | const uint64 v = LittleEndian::Load64(buf_); |
406 | buf_ += sizeof(v); |
407 | return v; |
408 | } |
409 | |
410 | inline uword_t Decoder::getword() { |
411 | #ifdef _LP64 |
412 | const uword_t v = LittleEndian::Load64(buf_); |
413 | #else |
414 | const uword_t v = LittleEndian::Load32(buf_); |
415 | #endif /* _LP64 */ |
416 | buf_ += sizeof(v); |
417 | return v; |
418 | } |
419 | |
420 | inline DocId Decoder::get_docid() { |
421 | return DocId(get64()); |
422 | } |
423 | |
424 | inline DocId32Bit Decoder::get_docid_32bit() { |
425 | return DocId32Bit(get32()); |
426 | } |
427 | |
428 | inline float Decoder::getfloat() { |
429 | uint32 v = get32(); |
430 | float f; |
431 | typedef char VerifySizesAreEqual[sizeof(f) == sizeof(v) ? 1 : -1]; |
432 | memcpy(&f, &v, sizeof(f)); |
433 | return f; |
434 | } |
435 | |
436 | inline double Decoder::getdouble() { |
437 | uint64 v = get64(); |
438 | double d; |
439 | typedef char VerifySizesAreEqual[sizeof(d) == sizeof(v) ? 1 : -1]; |
440 | memcpy(&d, &v, sizeof(d)); |
441 | return d; |
442 | } |
443 | |
444 | #endif // UTIL_CODING_CODER_H__ |
445 | |