varint.h source code [Aerospike/modules/s2-geometry-library/geometry/util/coding/varint.h]

1	// Copyright 2001 and onwards Google Inc.
2	//
3	// Raw support for varint encoding. Higher level interfaces are
4	// provided by Encoder/Decoder/IOBuffer. Clients should typically use
5	// those interfaces, unless speed is paramount.
6	//
7	// If decoding speed is very important, consider using PrefixVarint instead.
8	// It has the same compression ratio, but generally faster decoding.
9	//
10	// Provided routines:
11	// vi_parse32_unchecked
12	// vi_parse64_unchecked
13	// vi_encode32_unchecked
14	// vi_encode64_unchecked
15
16	#ifndef _VARINT_H
17	#define _VARINT_H
18
19	#include <string>
20	using std::string;
21
22	#include "base/basictypes.h"
23
24	// Just a namespace, not a real class
25	class Varint {
26	public:
27	// Maximum lengths of varint encoding of uint32 and uint64
28	static const int kMax32 = `5`;
29	static const int kMax64 = `10`;
30
31	// REQUIRES "ptr" points to a buffer of length at least kMaxXX
32	// EFFECTS Scan next varint from "ptr" and store in OUTPUT.
33	// Returns pointer just past last read byte. Returns
34	// NULL if a valid varint value was not found.
35	static const char* Parse32(const char* ptr, uint32* OUTPUT);
36	static const char* Parse64(const char* ptr, uint64* OUTPUT);
37
38	// A fully inlined version of Parse32: useful in the most time critical
39	// routines, but its code size is large
40	static const char* Parse32Inline(const char* ptr, uint32* OUTPUT);
41
42	// REQUIRES "ptr" points just past the last byte of a varint-encoded value.
43	// REQUIRES A second varint must be encoded just before the one we parse,
44	// OR "base" must point to the first byte of the one we parse.
45	// REQUIRES Bytes [base, ptr-1] are readable
46	//
47	// EFFECTS Scan backwards from "ptr" and store in OUTPUT. Stop at the last
48	// byte of the previous varint, OR at "base", whichever one comes
49	// first. Returns pointer to the first byte of the decoded varint
50	// NULL if a valid varint value was not found.
51	static const char* Parse32Backward(const char* ptr, const char* base,
52	uint32* OUTPUT);
53	static const char* Parse64Backward(const char* ptr, const char* base,
54	uint64* OUTPUT);
55
56	// Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
57	// Never reads a character at or beyond limit. If a valid/terminated varint32
58	// was found in the range, stores it in OUTPUT and returns a pointer just*
59	// past the last byte of the varint32. Else returns NULL. On success,
60	// "result <= limit".
61	static const char* Parse32WithLimit(const char* ptr, const char* limit,
62	uint32* OUTPUT);
63	static const char* Parse64WithLimit(const char* ptr, const char* limit,
64	uint64* OUTPUT);
65
66	// REQUIRES "ptr" points to the first byte of a varint-encoded value.
67	// EFFECTS Scans until the end of the varint and returns a pointer just
68	// past the last byte. Returns NULL if "ptr" does not point to
69	// a valid varint value.
70	static const char* Skip32(const char* ptr);
71	static const char* Skip64(const char* ptr);
72
73	// REQUIRES "ptr" points just past the last byte of a varint-encoded value.
74	// REQUIRES A second varint must be encoded just before the one we parse,
75	// OR "base" must point to the first byte of the one we parse.
76	// REQUIRES Bytes [base, ptr-1] are readable
77	//
78	// EFFECTS Scan backwards from "ptr" and stop at the last byte of the
79	// previous varint, OR at "base", whichever one comes first.
80	// Returns pointer to the first byte of the skipped varint or
81	// NULL if a valid varint value was not found.
82	static const char* Skip32Backward(const char* ptr, const char* base);
83	static const char* Skip64Backward(const char* ptr, const char* base);
84
85	// REQUIRES "ptr" points to a buffer of length sufficient to hold "v".
86	// EFFECTS Encodes "v" into "ptr" and returns a pointer to the
87	// byte just past the last encoded byte.
88	static char* Encode32(char* ptr, uint32 v);
89	static char* Encode64(char* ptr, uint64 v);
90
91	// A fully inlined version of Encode32: useful in the most time critical
92	// routines, but its code size is large
93	static char* Encode32Inline(char* ptr, uint32 v);
94
95	// EFFECTS Returns the encoding length of the specified value.
96	static int Length32(uint32 v);
97	static int Length64(uint64 v);
98
99	static int Length32NonInline(uint32 v);
100
101	// EFFECTS Appends the varint representation of "value" to "s".*
102	static void Append32(string* s, uint32 value);
103	static void Append64(string* s, uint64 value);
104
105	// EFFECTS Encodes a pair of values to "s". The encoding*
106	// is done by weaving together 4 bit groups of
107	// each number into a single 64 bit value, and then
108	// encoding this value as a Varint64 value. This means
109	// that if both a and b are small, both values can be
110	// encoded in a single byte.
111	static void EncodeTwo32Values(string* s, uint32 a, uint32 b);
112	static const char* DecodeTwo32Values(const char* ptr, uint32* a, uint32* b);
113
114	// Decode and sum up a sequence of deltas until the sum >= goal.
115	// It is significantly faster than calling ParseXXInline in a loop.
116	// NOTE(user): The code does NO error checking, it assumes all the
117	// deltas are valid and the sum of deltas will never exceed kint64max. The
118	// code works for both 32bits and 64bits varint, and on 64 bits machines,
119	// the 64 bits version is almost always faster. Thus we only have a 64 bits
120	// interface here. The interface is slightly different from the other
121	// functions in that it requires signed* integers.*
122	// REQUIRES "ptr" points to the first byte of a varint-encoded delta.
123	// The sum of deltas >= goal (the code does NO boundary check).
124	// goal is positive and fit into a signed int64.
125	// EFFECTS Returns a pointer just past last read byte.
126	// "out" stores the actual sum.
127	static const char* FastDecodeDeltas(const char* ptr, int64 goal, int64* out);
128
129	private:
130	static const char* Parse32FallbackInline(const char* p, uint32* val);
131	static const char* Parse32Fallback(const char* p, uint32* val);
132	static const char* Parse64Fallback(const char* p, uint64* val);
133
134	static char* Encode32Fallback(char* ptr, uint32 v);
135
136	static const char* DecodeTwo32ValuesSlow(const char* p, uint32* a, uint32* b);
137	static const char* Parse32BackwardSlow(const char* ptr, const char* base,
138	uint32* OUTPUT);
139	static const char* Parse64BackwardSlow(const char* ptr, const char* base,
140	uint64* OUTPUT);
141	static const char* Skip32BackwardSlow(const char* ptr, const char* base);
142	static const char* Skip64BackwardSlow(const char* ptr, const char* base);
143
144	static void Append32Slow(string* s, uint32 value);
145	static void Append64Slow(string* s, uint64 value);
146
147	// Mapping from rightmost bit set to the number of bytes required
148	static const char length32_bytes_required[`33`];
149	};
150
151	/** Implementation details; clients should ignore **/
152
153	inline const char* Varint::Parse32FallbackInline(const char* p,
154	uint32* OUTPUT) {
155	// Fast path
156	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
157	uint32 byte, result;
158	byte = *(ptr++); result = byte & `127`;
159	assert(byte >= `128`); // Already checked in inlined prelude
160	byte = (ptr++); result \|= (byte & `127`) << `7`; if (byte < `128`) goto* done;
161	byte = (ptr++); result \|= (byte & `127`) << `14`; if (byte < `128`) goto* done;
162	byte = (ptr++); result \|= (byte & `127`) << `21`; if (byte < `128`) goto* done;
163	byte = (ptr++); result \|= (byte & `127`) << `28`; if (byte < `128`) goto* done;
164	return NULL; // Value is too long to be a varint32
165	done:
166	*OUTPUT = result;
167	return reinterpret_cast<const char*>(ptr);
168	}
169
170	inline const char* Varint::Parse32(const char* p, uint32* OUTPUT) {
171	// Fast path for inlining
172	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
173	uint32 byte = *ptr;
174	if (byte < `128`) {
175	*OUTPUT = byte;
176	return reinterpret_cast<const char*>(ptr) + `1`;
177	} else {
178	return Parse32Fallback(p, OUTPUT);
179	}
180	}
181
182	inline const char* Varint::Parse32Inline(const char* p, uint32* OUTPUT) {
183	// Fast path for inlining
184	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
185	uint32 byte = *ptr;
186	if (byte < `128`) {
187	*OUTPUT = byte;
188	return reinterpret_cast<const char*>(ptr) + `1`;
189	} else {
190	return Parse32FallbackInline(p, OUTPUT);
191	}
192	}
193
194	inline const char* Varint::Skip32(const char* p) {
195	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
196	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
197	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
198	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
199	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
200	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
201	return NULL; // value is too long to be a varint32
202	}
203
204	inline const char* Varint::Parse32Backward(const char* p, const char* base,
205	uint32* OUTPUT) {
206	if (p > base + kMax32) {
207	// Fast path
208	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
209	uint32 byte, result;
210	byte = (--ptr); if (byte > `127`) return* NULL;
211	result = byte;
212	byte = (--ptr); if (byte < `128`) goto* done;
213	result <<= `7`; result \|= (byte & `127`);
214	byte = (--ptr); if (byte < `128`) goto* done;
215	result <<= `7`; result \|= (byte & `127`);
216	byte = (--ptr); if (byte < `128`) goto* done;
217	result <<= `7`; result \|= (byte & `127`);
218	byte = (--ptr); if (byte < `128`) goto* done;
219	result <<= `7`; result \|= (byte & `127`);
220	byte = (--ptr); if (byte < `128`) goto* done;
221	return NULL; // Value is too long to be a varint32
222	done:
223	*OUTPUT = result;
224	return reinterpret_cast<const char*>(ptr+`1`);
225	} else {
226	return Parse32BackwardSlow(p, base, OUTPUT);
227	}
228	}
229
230	inline const char* Varint::Skip32Backward(const char* p, const char* base) {
231	if (p > base + kMax32) {
232	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
233	if ((--ptr) > `127`) return* NULL;
234	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
235	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
236	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
237	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
238	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
239	return NULL; // value is too long to be a varint32
240	} else {
241	return Skip32BackwardSlow(p, base);
242	}
243	}
244
245	inline const char* Varint::Parse32WithLimit(const char* p,
246	const char* l,
247	uint32* OUTPUT) {
248	if (p + kMax32 <= l) {
249	return Parse32(p, OUTPUT);
250	} else {
251	// Slow version with bounds checks
252	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
253	const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
254	uint32 b, result;
255	if (ptr >= limit) return NULL;
256	b = (ptr++); result = b & `127`; if (b < `128`) goto* done;
257	if (ptr >= limit) return NULL;
258	b = (ptr++); result \|= (b & `127`) << `7`; if (b < `128`) goto* done;
259	if (ptr >= limit) return NULL;
260	b = (ptr++); result \|= (b & `127`) << `14`; if (b < `128`) goto* done;
261	if (ptr >= limit) return NULL;
262	b = (ptr++); result \|= (b & `127`) << `21`; if (b < `128`) goto* done;
263	if (ptr >= limit) return NULL;
264	b = (ptr++); result \|= (b & `127`) << `28`; if (b < `16`) goto* done;
265	return NULL; // Value is too long to be a varint32
266	done:
267	*OUTPUT = result;
268	return reinterpret_cast<const char*>(ptr);
269	}
270
271	}
272
273	inline const char* Varint::Parse64(const char* p, uint64* OUTPUT) {
274	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
275	uint32 byte = *ptr;
276	if (byte < `128`) {
277	*OUTPUT = byte;
278	return reinterpret_cast<const char*>(ptr) + `1`;
279	} else {
280	return Parse64Fallback(p, OUTPUT);
281	}
282	}
283
284	inline const char* Varint::Skip64(const char* p) {
285	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
286	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
287	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
288	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
289	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
290	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
291	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
292	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
293	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
294	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
295	if (ptr++ < `128`) return* reinterpret_cast<const char*>(ptr);
296	return NULL; // value is too long to be a varint64
297	}
298
299	inline const char* Varint::Parse64Backward(const char* p, const char* b,
300	uint64* OUTPUT) {
301	if (p > b + kMax64) {
302	// Fast path
303	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
304	uint32 byte;
305	uint64 res;
306
307	byte = (--ptr); if (byte > `127`) return* NULL;
308
309	res = byte;
310	byte = (--ptr); if (byte < `128`) goto* done;
311	res <<= `7`; res \|= (byte & `127`);
312	byte = (--ptr); if (byte < `128`) goto* done;
313	res <<= `7`; res \|= (byte & `127`);
314	byte = (--ptr); if (byte < `128`) goto* done;
315	res <<= `7`; res \|= (byte & `127`);
316	byte = (--ptr); if (byte < `128`) goto* done;
317	res <<= `7`; res \|= (byte & `127`);
318	byte = (--ptr); if (byte < `128`) goto* done;
319	res <<= `7`; res \|= (byte & `127`);
320	byte = (--ptr); if (byte < `128`) goto* done;
321	res <<= `7`; res \|= (byte & `127`);
322	byte = (--ptr); if (byte < `128`) goto* done;
323	res <<= `7`; res \|= (byte & `127`);
324	byte = (--ptr); if (byte < `128`) goto* done;
325	res <<= `7`; res \|= (byte & `127`);
326	byte = (--ptr); if (byte < `128`) goto* done;
327	res <<= `7`; res \|= (byte & `127`);
328	byte = (--ptr); if (byte < `128`) goto* done;
329
330	return NULL; // Value is too long to be a varint64
331
332	done:
333	*OUTPUT = res;
334	return reinterpret_cast<const char*>(ptr + `1`);
335	} else {
336	return Parse64BackwardSlow(p, b, OUTPUT);
337	}
338	}
339
340	inline const char* Varint::Skip64Backward(const char* p, const char* b) {
341	if (p > b + kMax64) {
342	// Fast path
343	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
344	if ((--ptr) > `127`) return* NULL;
345	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
346	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
347	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
348	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
349	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
350	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
351	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
352	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
353	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
354	if ((--ptr) < `128`) return* reinterpret_cast<const char*>(ptr+`1`);
355	return NULL; // value is too long to be a varint64
356	} else {
357	return Skip64BackwardSlow(p, b);
358	}
359	}
360
361	inline const char* Varint::DecodeTwo32Values(const char* p,
362	uint32* a, uint32* b) {
363	const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
364	if (*ptr < `128`) {
365	// Special case for small values
366	a = (ptr & `0xf`);
367	b = ptr >> `4`;
368	return reinterpret_cast<const char*>(ptr) + `1`;
369	} else {
370	return DecodeTwo32ValuesSlow(p, a, b);
371	}
372	}
373
374	#if (defined __i386__ \|\| defined __x86_64__) && defined __GNUC__
375	inline int Varint::Length32(uint32 v) {
376	// Find the rightmost bit set, and index into a small table
377
378	// "ro" for the input spec means the input can come from either a
379	// register ("r") or offsetable memory ("o").
380	//
381	// If "n == 0", the "bsr" instruction sets the "Z" flag, so we
382	// conditionally move "-1" into the result.
383	//
384	// Note: the cmovz was introduced on PIII's, and may not work on
385	// older machines.
386	int bits;
387	const int neg1 = -`1`;
388	__asm__("bsr %1, %0\n\t"
389	"cmovz %2, %0"
390	: "=&r" (bits) // Output spec, early clobber
391	: "ro" (v), "r" (neg1) // Input spec
392	: "cc" // Clobbers condition-codes
393	);
394	return Varint::length32_bytes_required[bits+`1`];
395	}
396
397	#else
398	inline int Varint::Length32(uint32 v) {
399	/*
400	The following version is about 1.5X the code size, but is faster than
401	the loop below.
402
403	if (v < (1u << 14)) {
404	if (v < (1u << 7)) {
405	return 1;
406	} else {
407	return 2;
408	}
409	} else {
410	if (v < (1u << 28)) {
411	if (v < (1u << 21)) {
412	return 3;
413	} else {
414	return 4;
415	}
416	} else {
417	return 5;
418	}
419	}
420	*/
421
422	// Each byte of output stores 7 bits of "v" until "v" becomes zero
423	int nbytes = `0`;
424	do {
425	nbytes++;
426	v >>= `7`;
427	} while (v != `0`);
428	return nbytes;
429	}
430	#endif
431
432	inline void Varint::Append32(string* s, uint32 value) {
433	// Inline the fast-path for single-character output, but fall back to the .cc
434	// file for the full version. The size<capacity check is so the compiler can
435	// optimize out the string resize code.
436	if (value < `128` && s->size() < s->capacity()) {
437	s->push_back((unsigned char)value);
438	} else {
439	Append32Slow(s, value);
440	}
441	}
442
443	inline void Varint::Append64(string* s, uint64 value) {
444	// Inline the fast-path for single-character output, but fall back to the .cc
445	// file for the full version. The size<capacity check is so the compiler can
446	// optimize out the string resize code.
447	if (value < `128` && s->size() < s->capacity()) {
448	s->push_back((unsigned char)value);
449	} else {
450	Append64Slow(s, value);
451	}
452	}
453
454	inline char* Varint::Encode32Inline(char* sptr, uint32 v) {
455	// Operate on characters as unsigneds
456	unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
457	static const int B = `128`;
458	if (v < (`1`<<`7`)) {
459	*(ptr++) = v;
460	} else if (v < (`1`<<`14`)) {
461	*(ptr++) = v \| B;
462	*(ptr++) = v>>`7`;
463	} else if (v < (`1`<<`21`)) {
464	*(ptr++) = v \| B;
465	*(ptr++) = (v>>`7`) \| B;
466	*(ptr++) = v>>`14`;
467	} else if (v < (`1`<<`28`)) {
468	*(ptr++) = v \| B;
469	*(ptr++) = (v>>`7`) \| B;
470	*(ptr++) = (v>>`14`) \| B;
471	*(ptr++) = v>>`21`;
472	} else {
473	*(ptr++) = v \| B;
474	*(ptr++) = (v>>`7`) \| B;
475	*(ptr++) = (v>>`14`) \| B;
476	*(ptr++) = (v>>`21`) \| B;
477	*(ptr++) = v>>`28`;
478	}
479	return reinterpret_cast<char*>(ptr);
480	}
481
482	#if (-1 >> 1) != -1
483	#error FastDecodeDeltas() needs right-shift to sign-extend.
484	#endif
485	inline const char* Varint::FastDecodeDeltas(const char* ptr,
486	int64 goal,
487	int64* out) {
488	int64 value;
489	int64 sum = - goal;
490	int64 shift = `0`;
491	// Make decoding faster by eliminating unpredictable branching.
492	do {
493	value = static_cast<int8>(ptr++); // sign extend one byte of data*
494	sum += (value & `0x7F`) << shift;
495	shift += `7`;
496	// (value >> 7) is either -1(continuation byte) or 0 (stop byte)
497	shift &= value >> `7`;
498	// Loop if we haven't reached goal (sum < 0) or we haven't finished
499	// parsing current delta (value < 0). We write it in the form of
500	// (a \| b) < 0 as opposed to (a < 0 \|\| b < 0) as the former one is
501	// usually as fast as a test for (a < 0).
502	} while ((sum \| value) < `0`);
503
504	*out = goal + sum;
505	return ptr;
506	}
507
508	#endif /* _VARINT_H */
509

Browse the source code of Aerospike/modules/s2-geometry-library/geometry/util/coding/varint.h