1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <type_traits>
20
21#include <folly/Conv.h>
22#include <folly/Expected.h>
23#include <folly/Likely.h>
24#include <folly/Portability.h>
25#include <folly/Range.h>
26
27namespace folly {
28
29/**
30 * Variable-length integer encoding, using a little-endian, base-128
31 * representation.
32 *
33 * The MSb is set on all bytes except the last.
34 *
35 * Details:
36 * https://developers.google.com/protocol-buffers/docs/encoding#varints
37 *
38 * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
39 * is faster and likely smaller.
40 */
41
42/**
43 * Maximum length (in bytes) of the varint encoding of a 32-bit value.
44 */
45constexpr size_t kMaxVarintLength32 = 5;
46
47/**
48 * Maximum length (in bytes) of the varint encoding of a 64-bit value.
49 */
50constexpr size_t kMaxVarintLength64 = 10;
51
52/**
53 * Encode a value in the given buffer, returning the number of bytes used
54 * for encoding.
55 * buf must have enough space to represent the value (at least
56 * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
57 */
58size_t encodeVarint(uint64_t val, uint8_t* buf);
59
60/**
61 * Determine the number of bytes needed to represent "val".
62 * 32-bit values need at most 5 bytes.
63 * 64-bit values need at most 10 bytes.
64 */
65int encodeVarintSize(uint64_t val);
66
67/**
68 * Decode a value from a given buffer, advances data past the returned value.
69 * Throws on error.
70 */
71template <class T>
72uint64_t decodeVarint(Range<T*>& data);
73
74enum class DecodeVarintError {
75 TooManyBytes = 0,
76 TooFewBytes = 1,
77};
78
79/**
80 * A variant of decodeVarint() that does not throw on error. Useful in contexts
81 * where only part of a serialized varint may be attempted to be decoded, e.g.,
82 * when a serialized varint arrives on the boundary of a network packet.
83 */
84template <class T>
85Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data);
86
87/**
88 * ZigZag encoding that maps signed integers with a small absolute value
89 * to unsigned integers with a small (positive) values. Without this,
90 * encoding negative values using Varint would use up 9 or 10 bytes.
91 *
92 * if x >= 0, encodeZigZag(x) == 2*x
93 * if x < 0, encodeZigZag(x) == -2*x + 1
94 */
95
96inline uint64_t encodeZigZag(int64_t val) {
97 // Bit-twiddling magic stolen from the Google protocol buffer document;
98 // val >> 63 is an arithmetic shift because val is signed
99 auto uval = static_cast<uint64_t>(val);
100 return static_cast<uint64_t>((uval << 1) ^ (val >> 63));
101}
102
103inline int64_t decodeZigZag(uint64_t val) {
104 return static_cast<int64_t>((val >> 1) ^ -(val & 1));
105}
106
107// Implementation below
108
109inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
110 uint8_t* p = buf;
111 while (val >= 128) {
112 *p++ = 0x80 | (val & 0x7f);
113 val >>= 7;
114 }
115 *p++ = uint8_t(val);
116 return size_t(p - buf);
117}
118
119inline int encodeVarintSize(uint64_t val) {
120 if (folly::kIsArchAmd64) {
121 // __builtin_clzll is undefined for 0
122 int highBit = 64 - __builtin_clzll(val | 1);
123 return (highBit + 6) / 7;
124 } else {
125 int s = 1;
126 while (val >= 128) {
127 ++s;
128 val >>= 7;
129 }
130 return s;
131 }
132}
133
134template <class T>
135inline uint64_t decodeVarint(Range<T*>& data) {
136 auto expected = tryDecodeVarint(data);
137 if (!expected) {
138 throw std::invalid_argument(
139 expected.error() == DecodeVarintError::TooManyBytes
140 ? "Invalid varint value: too many bytes."
141 : "Invalid varint value: too few bytes.");
142 }
143 return *expected;
144}
145
146template <class T>
147inline Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data) {
148 static_assert(
149 std::is_same<typename std::remove_cv<T>::type, char>::value ||
150 std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
151 "Only character ranges are supported");
152
153 const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
154 const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
155 const int8_t* p = begin;
156 uint64_t val = 0;
157
158 // end is always greater than or equal to begin, so this subtraction is safe
159 if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path
160 int64_t b;
161 do {
162 b = *p++;
163 val = (b & 0x7f);
164 if (b >= 0) {
165 break;
166 }
167 b = *p++;
168 val |= (b & 0x7f) << 7;
169 if (b >= 0) {
170 break;
171 }
172 b = *p++;
173 val |= (b & 0x7f) << 14;
174 if (b >= 0) {
175 break;
176 }
177 b = *p++;
178 val |= (b & 0x7f) << 21;
179 if (b >= 0) {
180 break;
181 }
182 b = *p++;
183 val |= (b & 0x7f) << 28;
184 if (b >= 0) {
185 break;
186 }
187 b = *p++;
188 val |= (b & 0x7f) << 35;
189 if (b >= 0) {
190 break;
191 }
192 b = *p++;
193 val |= (b & 0x7f) << 42;
194 if (b >= 0) {
195 break;
196 }
197 b = *p++;
198 val |= (b & 0x7f) << 49;
199 if (b >= 0) {
200 break;
201 }
202 b = *p++;
203 val |= (b & 0x7f) << 56;
204 if (b >= 0) {
205 break;
206 }
207 b = *p++;
208 val |= (b & 0x01) << 63;
209 if (b >= 0) {
210 break;
211 }
212 return makeUnexpected(DecodeVarintError::TooManyBytes);
213 } while (false);
214 } else {
215 int shift = 0;
216 while (p != end && *p < 0) {
217 val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
218 shift += 7;
219 }
220 if (p == end) {
221 return makeUnexpected(DecodeVarintError::TooFewBytes);
222 }
223 val |= static_cast<uint64_t>(*p++) << shift;
224 }
225
226 data.uncheckedAdvance(p - begin);
227 return val;
228}
229
230} // namespace folly
231