Varint.h source code [folly/Varint.h]

1	/*
2	* Copyright 2013-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#pragma once
18
19	#include <type_traits>
20
21	#include <folly/Conv.h>
22	#include <folly/Expected.h>
23	#include <folly/Likely.h>
24	#include <folly/Portability.h>
25	#include <folly/Range.h>
26
27	namespace folly {
28
29	/**
30	* Variable-length integer encoding, using a little-endian, base-128
31	* representation.
32	*
33	* The MSb is set on all bytes except the last.
34	*
35	* Details:
36	* https://developers.google.com/protocol-buffers/docs/encoding#varints
37	*
38	* If you want to encode multiple values, GroupVarint (in GroupVarint.h)
39	* is faster and likely smaller.
40	*/
41
42	/**
43	* Maximum length (in bytes) of the varint encoding of a 32-bit value.
44	*/
45	constexpr size_t kMaxVarintLength32 = `5`;
46
47	/**
48	* Maximum length (in bytes) of the varint encoding of a 64-bit value.
49	*/
50	constexpr size_t kMaxVarintLength64 = `10`;
51
52	/**
53	* Encode a value in the given buffer, returning the number of bytes used
54	* for encoding.
55	* buf must have enough space to represent the value (at least
56	* kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
57	*/
58	size_t encodeVarint(uint64_t val, uint8_t* buf);
59
60	/**
61	* Determine the number of bytes needed to represent "val".
62	* 32-bit values need at most 5 bytes.
63	* 64-bit values need at most 10 bytes.
64	*/
65	int encodeVarintSize(uint64_t val);
66
67	/**
68	* Decode a value from a given buffer, advances data past the returned value.
69	* Throws on error.
70	*/
71	template <class T>
72	uint64_t decodeVarint(Range<T*>& data);
73
74	enum class DecodeVarintError {
75	TooManyBytes = `0`,
76	TooFewBytes = `1`,
77	};
78
79	/**
80	* A variant of decodeVarint() that does not throw on error. Useful in contexts
81	* where only part of a serialized varint may be attempted to be decoded, e.g.,
82	* when a serialized varint arrives on the boundary of a network packet.
83	*/
84	template <class T>
85	Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data);
86
87	/**
88	* ZigZag encoding that maps signed integers with a small absolute value
89	* to unsigned integers with a small (positive) values. Without this,
90	* encoding negative values using Varint would use up 9 or 10 bytes.
91	*
92	* if x >= 0, encodeZigZag(x) == 2*x
93	* if x < 0, encodeZigZag(x) == -2*x + 1
94	*/
95
96	inline uint64_t encodeZigZag(int64_t val) {
97	// Bit-twiddling magic stolen from the Google protocol buffer document;
98	// val >> 63 is an arithmetic shift because val is signed
99	auto uval = static_cast<uint64_t>(val);
100	return static_cast<uint64_t>((uval << `1`) ^ (val >> `63`));
101	}
102
103	inline int64_t decodeZigZag(uint64_t val) {
104	return static_cast<int64_t>((val >> `1`) ^ -(val & `1`));
105	}
106
107	// Implementation below
108
109	inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
110	uint8_t* p = buf;
111	while (val >= `128`) {
112	*p++ = `0x80` \| (val & `0x7f`);
113	val >>= `7`;
114	}
115	*p++ = uint8_t(val);
116	return size_t(p - buf);
117	}
118
119	inline int encodeVarintSize(uint64_t val) {
120	if (folly::kIsArchAmd64) {
121	// __builtin_clzll is undefined for 0
122	int highBit = `64` - __builtin_clzll(val \| `1`);
123	return (highBit + `6`) / `7`;
124	} else {
125	int s = `1`;
126	while (val >= `128`) {
127	++s;
128	val >>= `7`;
129	}
130	return s;
131	}
132	}
133
134	template <class T>
135	inline uint64_t decodeVarint(Range<T*>& data) {
136	auto expected = tryDecodeVarint(data);
137	if (!expected) {
138	throw std::invalid_argument(
139	expected.error() == DecodeVarintError::TooManyBytes
140	? "Invalid varint value: too many bytes."
141	: "Invalid varint value: too few bytes.");
142	}
143	return *expected;
144	}
145
146	template <class T>
147	inline Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data) {
148	static_assert(
149	std::is_same<typename std::remove_cv<T>::type, char>::value \|\|
150	std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
151	"Only character ranges are supported");
152
153	const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
154	const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
155	const int8_t* p = begin;
156	uint64_t val = `0`;
157
158	// end is always greater than or equal to begin, so this subtraction is safe
159	if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path
160	int64_t b;
161	do {
162	b = *p++;
163	val = (b & `0x7f`);
164	if (b >= `0`) {
165	break;
166	}
167	b = *p++;
168	val \|= (b & `0x7f`) << `7`;
169	if (b >= `0`) {
170	break;
171	}
172	b = *p++;
173	val \|= (b & `0x7f`) << `14`;
174	if (b >= `0`) {
175	break;
176	}
177	b = *p++;
178	val \|= (b & `0x7f`) << `21`;
179	if (b >= `0`) {
180	break;
181	}
182	b = *p++;
183	val \|= (b & `0x7f`) << `28`;
184	if (b >= `0`) {
185	break;
186	}
187	b = *p++;
188	val \|= (b & `0x7f`) << `35`;
189	if (b >= `0`) {
190	break;
191	}
192	b = *p++;
193	val \|= (b & `0x7f`) << `42`;
194	if (b >= `0`) {
195	break;
196	}
197	b = *p++;
198	val \|= (b & `0x7f`) << `49`;
199	if (b >= `0`) {
200	break;
201	}
202	b = *p++;
203	val \|= (b & `0x7f`) << `56`;
204	if (b >= `0`) {
205	break;
206	}
207	b = *p++;
208	val \|= (b & `0x01`) << `63`;
209	if (b >= `0`) {
210	break;
211	}
212	return makeUnexpected(DecodeVarintError::TooManyBytes);
213	} while (false);
214	} else {
215	int shift = `0`;
216	while (p != end && *p < `0`) {
217	val \|= static_cast<uint64_t>(*p++ & `0x7f`) << shift;
218	shift += `7`;
219	}
220	if (p == end) {
221	return makeUnexpected(DecodeVarintError::TooFewBytes);
222	}
223	val \|= static_cast<uint64_t>(*p++) << shift;
224	}
225
226	data.uncheckedAdvance(p - begin);
227	return val;
228	}
229
230	} // namespace folly
231

Browse the source code of folly/Varint.h