1 | // Copyright 2001 and onwards Google Inc. |
2 | |
3 | #include <string> |
4 | using std::string; |
5 | |
6 | #include "util/coding/varint.h" |
7 | |
8 | #ifndef COMPILER_MSVC |
9 | const int Varint::kMax32; |
10 | const int Varint::kMax64; |
11 | #endif |
12 | |
13 | char* Varint::Encode32(char* sptr, uint32 v) { |
14 | return Encode32Inline(sptr, v); |
15 | } |
16 | |
17 | char* Varint::Encode64(char* sptr, uint64 v) { |
18 | if (v < (1u << 28)) { |
19 | return Varint::Encode32(sptr, v); |
20 | } else { |
21 | // Operate on characters as unsigneds |
22 | unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr); |
23 | static const int B = 128; |
24 | uint32 v32 = v; |
25 | *(ptr++) = v32 | B; |
26 | *(ptr++) = (v32 >> 7) | B; |
27 | *(ptr++) = (v32 >> 14) | B; |
28 | *(ptr++) = (v32 >> 21) | B; |
29 | if (v < (1ull << 35)) { |
30 | *(ptr++) = (v >> 28); |
31 | return reinterpret_cast<char*>(ptr); |
32 | } else { |
33 | *(ptr++) = (v >> 28) | B; |
34 | return Varint::Encode32(reinterpret_cast<char*>(ptr), v >> 35); |
35 | } |
36 | } |
37 | } |
38 | |
39 | const char* Varint::Parse32Fallback(const char* ptr, uint32* OUTPUT) { |
40 | return Parse32FallbackInline(ptr, OUTPUT); |
41 | } |
42 | |
43 | const char* Varint::Parse64Fallback(const char* p, uint64* OUTPUT) { |
44 | const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); |
45 | // Fast path: need to accumulate data in upto three result fragments |
46 | // res1 bits 0..27 |
47 | // res2 bits 28..55 |
48 | // res3 bits 56..63 |
49 | |
50 | uint32 byte, res1, res2=0, res3=0; |
51 | byte = *(ptr++); res1 = byte & 127; |
52 | byte = *(ptr++); res1 |= (byte & 127) << 7; if (byte < 128) goto done1; |
53 | byte = *(ptr++); res1 |= (byte & 127) << 14; if (byte < 128) goto done1; |
54 | byte = *(ptr++); res1 |= (byte & 127) << 21; if (byte < 128) goto done1; |
55 | |
56 | byte = *(ptr++); res2 = byte & 127; if (byte < 128) goto done2; |
57 | byte = *(ptr++); res2 |= (byte & 127) << 7; if (byte < 128) goto done2; |
58 | byte = *(ptr++); res2 |= (byte & 127) << 14; if (byte < 128) goto done2; |
59 | byte = *(ptr++); res2 |= (byte & 127) << 21; if (byte < 128) goto done2; |
60 | |
61 | byte = *(ptr++); res3 = byte & 127; if (byte < 128) goto done3; |
62 | byte = *(ptr++); res3 |= (byte & 127) << 7; if (byte < 128) goto done3; |
63 | |
64 | return NULL; // Value is too long to be a varint64 |
65 | |
66 | done1: |
67 | assert(res2 == 0); |
68 | assert(res3 == 0); |
69 | *OUTPUT = res1; |
70 | return reinterpret_cast<const char*>(ptr); |
71 | |
72 | done2: |
73 | assert(res3 == 0); |
74 | *OUTPUT = res1 | (uint64(res2) << 28); |
75 | return reinterpret_cast<const char*>(ptr); |
76 | |
77 | done3: |
78 | *OUTPUT = res1 | (uint64(res2) << 28) | (uint64(res3) << 56); |
79 | return reinterpret_cast<const char*>(ptr); |
80 | } |
81 | |
82 | const char* Varint::Parse32BackwardSlow(const char* ptr, const char* base, |
83 | uint32* OUTPUT) { |
84 | // Since this method is rarely called, for simplicity, we just skip backward |
85 | // and then parse forward. |
86 | const char* prev = Skip32BackwardSlow(ptr, base); |
87 | if (prev == NULL) |
88 | return NULL; // no value before 'ptr' |
89 | |
90 | Parse32(prev, OUTPUT); |
91 | return prev; |
92 | } |
93 | |
94 | const char* Varint::Parse64BackwardSlow(const char* ptr, const char* base, |
95 | uint64* OUTPUT) { |
96 | // Since this method is rarely called, for simplicity, we just skip backward |
97 | // and then parse forward. |
98 | const char* prev = Skip64BackwardSlow(ptr, base); |
99 | if (prev == NULL) |
100 | return NULL; // no value before 'ptr' |
101 | |
102 | Parse64(prev, OUTPUT); |
103 | return prev; |
104 | } |
105 | |
106 | const char* Varint::Parse64WithLimit(const char* p, |
107 | const char* l, |
108 | uint64* OUTPUT) { |
109 | if (p + kMax64 <= l) { |
110 | return Parse64(p, OUTPUT); |
111 | } else { |
112 | const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); |
113 | const unsigned char* limit = reinterpret_cast<const unsigned char*>(l); |
114 | uint64 b, result; |
115 | if (ptr >= limit) return NULL; |
116 | b = *(ptr++); result = b & 127; if (b < 128) goto done; |
117 | if (ptr >= limit) return NULL; |
118 | b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; |
119 | if (ptr >= limit) return NULL; |
120 | b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; |
121 | if (ptr >= limit) return NULL; |
122 | b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; |
123 | if (ptr >= limit) return NULL; |
124 | b = *(ptr++); result |= (b & 127) << 28; if (b < 128) goto done; |
125 | if (ptr >= limit) return NULL; |
126 | b = *(ptr++); result |= (b & 127) << 35; if (b < 128) goto done; |
127 | if (ptr >= limit) return NULL; |
128 | b = *(ptr++); result |= (b & 127) << 42; if (b < 128) goto done; |
129 | if (ptr >= limit) return NULL; |
130 | b = *(ptr++); result |= (b & 127) << 49; if (b < 128) goto done; |
131 | if (ptr >= limit) return NULL; |
132 | b = *(ptr++); result |= (b & 127) << 56; if (b < 128) goto done; |
133 | if (ptr >= limit) return NULL; |
134 | b = *(ptr++); result |= (b & 127) << 63; if (b < 2) goto done; |
135 | return NULL; // Value is too long to be a varint64 |
136 | done: |
137 | *OUTPUT = result; |
138 | return reinterpret_cast<const char*>(ptr); |
139 | } |
140 | } |
141 | |
142 | const char* Varint::Skip32BackwardSlow(const char* p, const char* b) { |
143 | const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); |
144 | const unsigned char* base = reinterpret_cast<const unsigned char*>(b); |
145 | assert(ptr >= base); |
146 | |
147 | // If the initial pointer is at the base or if the previous byte is not |
148 | // the last byte of a varint, we return NULL since there is nothing to skip. |
149 | if (ptr == base) return NULL; |
150 | if (*(--ptr) > 127) return NULL; |
151 | |
152 | for (int i = 0; i < 5; i++) { |
153 | if (ptr == base) return reinterpret_cast<const char*>(ptr); |
154 | if (*(--ptr) < 128) return reinterpret_cast<const char*>(ptr + 1); |
155 | } |
156 | |
157 | return NULL; // value is too long to be a varint32 |
158 | } |
159 | |
160 | const char* Varint::Skip64BackwardSlow(const char* p, const char* b) { |
161 | const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); |
162 | const unsigned char* base = reinterpret_cast<const unsigned char*>(b); |
163 | assert(ptr >= base); |
164 | |
165 | // If the initial pointer is at the base or if the previous byte is not |
166 | // the last byte of a varint, we return NULL since there is nothing to skip. |
167 | if (ptr == base) return NULL; |
168 | if (*(--ptr) > 127) return NULL; |
169 | |
170 | for (int i = 0; i < 10; i++) { |
171 | if (ptr == base) return reinterpret_cast<const char*>(ptr); |
172 | if (*(--ptr) < 128) return reinterpret_cast<const char*>(ptr + 1); |
173 | } |
174 | |
175 | return NULL; // value is too long to be a varint64 |
176 | } |
177 | |
178 | void Varint::Append32Slow(string* s, uint32 value) { |
179 | char buf[Varint::kMax32]; |
180 | const char* p = Varint::Encode32(buf, value); |
181 | s->append(buf, p - buf); |
182 | } |
183 | |
184 | void Varint::Append64Slow(string* s, uint64 value) { |
185 | char buf[Varint::kMax64]; |
186 | const char* p = Varint::Encode64(buf, value); |
187 | s->append(buf, p - buf); |
188 | } |
189 | |
190 | void Varint::EncodeTwo32Values(string* s, uint32 a, uint32 b) { |
191 | uint64 v = 0; |
192 | int shift = 0; |
193 | while ((a > 0) || (b > 0)) { |
194 | uint8 one_byte = (a & 0xf) | ((b & 0xf) << 4); |
195 | v |= ((static_cast<uint64>(one_byte)) << shift); |
196 | shift += 8; |
197 | a >>= 4; |
198 | b >>= 4; |
199 | } |
200 | Append64(s, v); |
201 | } |
202 | |
203 | const char* Varint::DecodeTwo32ValuesSlow(const char* ptr, |
204 | uint32* a, uint32* b) { |
205 | uint64 v = 0; |
206 | const char* result = Varint::Parse64(ptr, &v); |
207 | *a = 0; |
208 | *b = 0; |
209 | int shift = 0; |
210 | while (v > 0) { |
211 | *a |= ((v & 0xf) << shift); |
212 | *b |= (((v >> 4) & 0xf) << shift); |
213 | v >>= 8; |
214 | shift += 4; |
215 | } |
216 | return result; |
217 | } |
218 | |
219 | inline int FastLength32(uint32 v) { |
220 | if (v < (1u << 14)) { |
221 | if (v < (1u << 7)) { |
222 | return 1; |
223 | } else { |
224 | return 2; |
225 | } |
226 | } else { |
227 | if (v < (1u << 28)) { |
228 | if (v < (1u << 21)) { |
229 | return 3; |
230 | } else { |
231 | return 4; |
232 | } |
233 | } else { |
234 | return 5; |
235 | } |
236 | } |
237 | } |
238 | |
239 | const char Varint::length32_bytes_required[33] = |
240 | { |
241 | 1, // Entry for "-1", which happens when the value is 0 |
242 | 1,1,1,1,1,1,1, |
243 | 2,2,2,2,2,2,2, |
244 | 3,3,3,3,3,3,3, |
245 | 4,4,4,4,4,4,4, |
246 | 5,5,5,5 |
247 | }; |
248 | |
249 | int Varint::Length32NonInline(uint32 v) { |
250 | return FastLength32(v); |
251 | } |
252 | |
253 | int Varint::Length64(uint64 v) { |
254 | uint32 tmp; |
255 | int nb; // Number of bytes we've determined from our tests |
256 | if (v < (1u << 28)) { |
257 | tmp = v; |
258 | nb = 0; |
259 | } else if (v < (1ull << 35)) { |
260 | return 5; |
261 | } else { |
262 | tmp = v >> 35; |
263 | nb = 5; |
264 | } |
265 | return nb + Varint::Length32(tmp); |
266 | } |
267 | |