1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // from google3/strings/strutil.h |
32 | |
33 | #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
34 | #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
35 | |
36 | #include <google/protobuf/stubs/common.h> |
37 | #include <google/protobuf/stubs/stringpiece.h> |
38 | #include <stdlib.h> |
39 | |
40 | #include <cstring> |
41 | #include <google/protobuf/port_def.inc> |
42 | #include <vector> |
43 | |
44 | namespace google { |
45 | namespace protobuf { |
46 | |
47 | #if defined(_MSC_VER) && _MSC_VER < 1800 |
48 | #define strtoll _strtoi64 |
49 | #define strtoull _strtoui64 |
50 | #elif defined(__DECCXX) && defined(__osf__) |
51 | // HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. |
52 | #define strtoll strtol |
53 | #define strtoull strtoul |
54 | #endif |
55 | |
56 | // ---------------------------------------------------------------------- |
57 | // ascii_isalnum() |
58 | // Check if an ASCII character is alphanumeric. We can't use ctype's |
59 | // isalnum() because it is affected by locale. This function is applied |
60 | // to identifiers in the protocol buffer language, not to natural-language |
61 | // strings, so locale should not be taken into account. |
62 | // ascii_isdigit() |
63 | // Like above, but only accepts digits. |
64 | // ascii_isspace() |
65 | // Check if the character is a space character. |
66 | // ---------------------------------------------------------------------- |
67 | |
68 | inline bool ascii_isalnum(char c) { |
69 | return ('a' <= c && c <= 'z') || |
70 | ('A' <= c && c <= 'Z') || |
71 | ('0' <= c && c <= '9'); |
72 | } |
73 | |
74 | inline bool ascii_isdigit(char c) { |
75 | return ('0' <= c && c <= '9'); |
76 | } |
77 | |
78 | inline bool ascii_isspace(char c) { |
79 | return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || |
80 | c == '\r'; |
81 | } |
82 | |
83 | inline bool ascii_isupper(char c) { |
84 | return c >= 'A' && c <= 'Z'; |
85 | } |
86 | |
87 | inline bool ascii_islower(char c) { |
88 | return c >= 'a' && c <= 'z'; |
89 | } |
90 | |
91 | inline char ascii_toupper(char c) { |
92 | return ascii_islower(c) ? c - ('a' - 'A') : c; |
93 | } |
94 | |
95 | inline char ascii_tolower(char c) { |
96 | return ascii_isupper(c) ? c + ('a' - 'A') : c; |
97 | } |
98 | |
99 | inline int hex_digit_to_int(char c) { |
100 | /* Assume ASCII. */ |
101 | int x = static_cast<unsigned char>(c); |
102 | if (x > '9') { |
103 | x += 9; |
104 | } |
105 | return x & 0xf; |
106 | } |
107 | |
108 | // ---------------------------------------------------------------------- |
109 | // HasPrefixString() |
110 | // Check if a string begins with a given prefix. |
111 | // StripPrefixString() |
112 | // Given a string and a putative prefix, returns the string minus the |
113 | // prefix string if the prefix matches, otherwise the original |
114 | // string. |
115 | // ---------------------------------------------------------------------- |
116 | inline bool HasPrefixString(StringPiece str, StringPiece prefix) { |
117 | return str.size() >= prefix.size() && |
118 | memcmp(s1: str.data(), s2: prefix.data(), n: prefix.size()) == 0; |
119 | } |
120 | |
121 | inline std::string StripPrefixString(const std::string& str, |
122 | const std::string& prefix) { |
123 | if (HasPrefixString(str, prefix)) { |
124 | return str.substr(pos: prefix.size()); |
125 | } else { |
126 | return str; |
127 | } |
128 | } |
129 | |
130 | // ---------------------------------------------------------------------- |
131 | // HasSuffixString() |
132 | // Return true if str ends in suffix. |
133 | // StripSuffixString() |
134 | // Given a string and a putative suffix, returns the string minus the |
135 | // suffix string if the suffix matches, otherwise the original |
136 | // string. |
137 | // ---------------------------------------------------------------------- |
138 | inline bool HasSuffixString(StringPiece str, StringPiece suffix) { |
139 | return str.size() >= suffix.size() && |
140 | memcmp(s1: str.data() + str.size() - suffix.size(), s2: suffix.data(), |
141 | n: suffix.size()) == 0; |
142 | } |
143 | |
144 | inline std::string StripSuffixString(const std::string& str, |
145 | const std::string& suffix) { |
146 | if (HasSuffixString(str, suffix)) { |
147 | return str.substr(pos: 0, n: str.size() - suffix.size()); |
148 | } else { |
149 | return str; |
150 | } |
151 | } |
152 | |
153 | // ---------------------------------------------------------------------- |
154 | // ReplaceCharacters |
155 | // Replaces any occurrence of the character 'remove' (or the characters |
156 | // in 'remove') with the character 'replacewith'. |
157 | // Good for keeping html characters or protocol characters (\t) out |
158 | // of places where they might cause a problem. |
159 | // StripWhitespace |
160 | // Removes whitespaces from both ends of the given string. |
161 | // ---------------------------------------------------------------------- |
162 | PROTOBUF_EXPORT void ReplaceCharacters(std::string* s, const char* remove, |
163 | char replacewith); |
164 | |
165 | PROTOBUF_EXPORT void StripWhitespace(std::string* s); |
166 | |
167 | // ---------------------------------------------------------------------- |
168 | // LowerString() |
169 | // UpperString() |
170 | // ToUpper() |
171 | // Convert the characters in "s" to lowercase or uppercase. ASCII-only: |
172 | // these functions intentionally ignore locale because they are applied to |
173 | // identifiers used in the Protocol Buffer language, not to natural-language |
174 | // strings. |
175 | // ---------------------------------------------------------------------- |
176 | |
177 | inline void LowerString(std::string* s) { |
178 | std::string::iterator end = s->end(); |
179 | for (std::string::iterator i = s->begin(); i != end; ++i) { |
180 | // tolower() changes based on locale. We don't want this! |
181 | if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; |
182 | } |
183 | } |
184 | |
185 | inline void UpperString(std::string* s) { |
186 | std::string::iterator end = s->end(); |
187 | for (std::string::iterator i = s->begin(); i != end; ++i) { |
188 | // toupper() changes based on locale. We don't want this! |
189 | if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; |
190 | } |
191 | } |
192 | |
193 | inline void ToUpper(std::string* s) { UpperString(s); } |
194 | |
195 | inline std::string ToUpper(const std::string& s) { |
196 | std::string out = s; |
197 | UpperString(s: &out); |
198 | return out; |
199 | } |
200 | |
201 | // ---------------------------------------------------------------------- |
202 | // StringReplace() |
203 | // Give me a string and two patterns "old" and "new", and I replace |
204 | // the first instance of "old" in the string with "new", if it |
205 | // exists. RETURN a new string, regardless of whether the replacement |
206 | // happened or not. |
207 | // ---------------------------------------------------------------------- |
208 | |
209 | PROTOBUF_EXPORT std::string StringReplace(const std::string& s, |
210 | const std::string& oldsub, |
211 | const std::string& newsub, |
212 | bool replace_all); |
213 | |
214 | // ---------------------------------------------------------------------- |
215 | // SplitStringUsing() |
216 | // Split a string using a character delimiter. Append the components |
217 | // to 'result'. If there are consecutive delimiters, this function skips |
218 | // over all of them. |
219 | // ---------------------------------------------------------------------- |
220 | PROTOBUF_EXPORT void SplitStringUsing(StringPiece full, const char* delim, |
221 | std::vector<std::string>* res); |
222 | |
223 | // Split a string using one or more byte delimiters, presented |
224 | // as a nul-terminated c string. Append the components to 'result'. |
225 | // If there are consecutive delimiters, this function will return |
226 | // corresponding empty strings. If you want to drop the empty |
227 | // strings, try SplitStringUsing(). |
228 | // |
229 | // If "full" is the empty string, yields an empty string as the only value. |
230 | // ---------------------------------------------------------------------- |
231 | PROTOBUF_EXPORT void SplitStringAllowEmpty(StringPiece full, const char* delim, |
232 | std::vector<std::string>* result); |
233 | |
234 | // ---------------------------------------------------------------------- |
235 | // Split() |
236 | // Split a string using a character delimiter. |
237 | // ---------------------------------------------------------------------- |
238 | inline std::vector<std::string> Split(StringPiece full, const char* delim, |
239 | bool skip_empty = true) { |
240 | std::vector<std::string> result; |
241 | if (skip_empty) { |
242 | SplitStringUsing(full, delim, res: &result); |
243 | } else { |
244 | SplitStringAllowEmpty(full, delim, result: &result); |
245 | } |
246 | return result; |
247 | } |
248 | |
249 | // ---------------------------------------------------------------------- |
250 | // JoinStrings() |
251 | // These methods concatenate a vector of strings into a C++ string, using |
252 | // the C-string "delim" as a separator between components. There are two |
253 | // flavors of the function, one flavor returns the concatenated string, |
254 | // another takes a pointer to the target string. In the latter case the |
255 | // target string is cleared and overwritten. |
256 | // ---------------------------------------------------------------------- |
257 | PROTOBUF_EXPORT void JoinStrings(const std::vector<std::string>& components, |
258 | const char* delim, std::string* result); |
259 | |
260 | inline std::string JoinStrings(const std::vector<std::string>& components, |
261 | const char* delim) { |
262 | std::string result; |
263 | JoinStrings(components, delim, result: &result); |
264 | return result; |
265 | } |
266 | |
267 | // ---------------------------------------------------------------------- |
268 | // UnescapeCEscapeSequences() |
269 | // Copies "source" to "dest", rewriting C-style escape sequences |
270 | // -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII |
271 | // equivalents. "dest" must be sufficiently large to hold all |
272 | // the characters in the rewritten string (i.e. at least as large |
273 | // as strlen(source) + 1 should be safe, since the replacements |
274 | // are always shorter than the original escaped sequences). It's |
275 | // safe for source and dest to be the same. RETURNS the length |
276 | // of dest. |
277 | // |
278 | // It allows hex sequences \xhh, or generally \xhhhhh with an |
279 | // arbitrary number of hex digits, but all of them together must |
280 | // specify a value of a single byte (e.g. \x0045 is equivalent |
281 | // to \x45, and \x1234 is erroneous). |
282 | // |
283 | // It also allows escape sequences of the form \uhhhh (exactly four |
284 | // hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight |
285 | // hex digits, upper or lower case) to specify a Unicode code |
286 | // point. The dest array will contain the UTF8-encoded version of |
287 | // that code-point (e.g., if source contains \u2019, then dest will |
288 | // contain the three bytes 0xE2, 0x80, and 0x99). |
289 | // |
290 | // Errors: In the first form of the call, errors are reported with |
291 | // LOG(ERROR). The same is true for the second form of the call if |
292 | // the pointer to the string std::vector is nullptr; otherwise, error |
293 | // messages are stored in the std::vector. In either case, the effect on |
294 | // the dest array is not defined, but rest of the source will be |
295 | // processed. |
296 | // ---------------------------------------------------------------------- |
297 | |
298 | PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); |
299 | PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, |
300 | std::vector<std::string>* errors); |
301 | |
302 | // ---------------------------------------------------------------------- |
303 | // UnescapeCEscapeString() |
304 | // This does the same thing as UnescapeCEscapeSequences, but creates |
305 | // a new string. The caller does not need to worry about allocating |
306 | // a dest buffer. This should be used for non performance critical |
307 | // tasks such as printing debug messages. It is safe for src and dest |
308 | // to be the same. |
309 | // |
310 | // The second call stores its errors in a supplied string vector. |
311 | // If the string vector pointer is nullptr, it reports the errors with LOG(). |
312 | // |
313 | // In the first and second calls, the length of dest is returned. In the |
314 | // the third call, the new string is returned. |
315 | // ---------------------------------------------------------------------- |
316 | |
317 | PROTOBUF_EXPORT int UnescapeCEscapeString(const std::string& src, |
318 | std::string* dest); |
319 | PROTOBUF_EXPORT int UnescapeCEscapeString(const std::string& src, |
320 | std::string* dest, |
321 | std::vector<std::string>* errors); |
322 | PROTOBUF_EXPORT std::string UnescapeCEscapeString(const std::string& src); |
323 | |
324 | // ---------------------------------------------------------------------- |
325 | // CEscape() |
326 | // Escapes 'src' using C-style escape sequences and returns the resulting |
327 | // string. |
328 | // |
329 | // Escaped chars: \n, \r, \t, ", ', \, and !isprint(). |
330 | // ---------------------------------------------------------------------- |
331 | PROTOBUF_EXPORT std::string CEscape(const std::string& src); |
332 | |
333 | // ---------------------------------------------------------------------- |
334 | // CEscapeAndAppend() |
335 | // Escapes 'src' using C-style escape sequences, and appends the escaped |
336 | // string to 'dest'. |
337 | // ---------------------------------------------------------------------- |
338 | PROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, std::string* dest); |
339 | |
340 | namespace strings { |
341 | // Like CEscape() but does not escape bytes with the upper bit set. |
342 | PROTOBUF_EXPORT std::string Utf8SafeCEscape(const std::string& src); |
343 | |
344 | // Like CEscape() but uses hex (\x) escapes instead of octals. |
345 | PROTOBUF_EXPORT std::string CHexEscape(const std::string& src); |
346 | } // namespace strings |
347 | |
348 | // ---------------------------------------------------------------------- |
349 | // strto32() |
350 | // strtou32() |
351 | // strto64() |
352 | // strtou64() |
353 | // Architecture-neutral plug compatible replacements for strtol() and |
354 | // strtoul(). Long's have different lengths on ILP-32 and LP-64 |
355 | // platforms, so using these is safer, from the point of view of |
356 | // overflow behavior, than using the standard libc functions. |
357 | // ---------------------------------------------------------------------- |
358 | PROTOBUF_EXPORT int32_t strto32_adaptor(const char* nptr, char** endptr, |
359 | int base); |
360 | PROTOBUF_EXPORT uint32_t strtou32_adaptor(const char* nptr, char** endptr, |
361 | int base); |
362 | |
363 | inline int32_t strto32(const char *nptr, char **endptr, int base) { |
364 | if (sizeof(int32_t) == sizeof(long)) |
365 | return strtol(nptr: nptr, endptr: endptr, base: base); |
366 | else |
367 | return strto32_adaptor(nptr, endptr, base); |
368 | } |
369 | |
370 | inline uint32_t strtou32(const char *nptr, char **endptr, int base) { |
371 | if (sizeof(uint32_t) == sizeof(unsigned long)) |
372 | return strtoul(nptr: nptr, endptr: endptr, base: base); |
373 | else |
374 | return strtou32_adaptor(nptr, endptr, base); |
375 | } |
376 | |
377 | // For now, long long is 64-bit on all the platforms we care about, so these |
378 | // functions can simply pass the call to strto[u]ll. |
379 | inline int64_t strto64(const char *nptr, char **endptr, int base) { |
380 | static_assert(sizeof(int64_t) == sizeof(long long), |
381 | "sizeof int64_t is not sizeof long long" ); |
382 | return strtoll(nptr: nptr, endptr: endptr, base: base); |
383 | } |
384 | |
385 | inline uint64_t strtou64(const char *nptr, char **endptr, int base) { |
386 | static_assert(sizeof(uint64_t) == sizeof(unsigned long long), |
387 | "sizeof uint64_t is not sizeof unsigned long long" ); |
388 | return strtoull(nptr: nptr, endptr: endptr, base: base); |
389 | } |
390 | |
391 | // ---------------------------------------------------------------------- |
392 | // safe_strtob() |
393 | // safe_strto32() |
394 | // safe_strtou32() |
395 | // safe_strto64() |
396 | // safe_strtou64() |
397 | // safe_strtof() |
398 | // safe_strtod() |
399 | // ---------------------------------------------------------------------- |
400 | PROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value); |
401 | |
402 | PROTOBUF_EXPORT bool safe_strto32(const std::string& str, int32_t* value); |
403 | PROTOBUF_EXPORT bool safe_strtou32(const std::string& str, uint32_t* value); |
404 | inline bool safe_strto32(const char* str, int32_t* value) { |
405 | return safe_strto32(str: std::string(str), value); |
406 | } |
407 | inline bool safe_strto32(StringPiece str, int32_t* value) { |
408 | return safe_strto32(str: str.ToString(), value); |
409 | } |
410 | inline bool safe_strtou32(const char* str, uint32_t* value) { |
411 | return safe_strtou32(str: std::string(str), value); |
412 | } |
413 | inline bool safe_strtou32(StringPiece str, uint32_t* value) { |
414 | return safe_strtou32(str: str.ToString(), value); |
415 | } |
416 | |
417 | PROTOBUF_EXPORT bool safe_strto64(const std::string& str, int64_t* value); |
418 | PROTOBUF_EXPORT bool safe_strtou64(const std::string& str, uint64_t* value); |
419 | inline bool safe_strto64(const char* str, int64_t* value) { |
420 | return safe_strto64(str: std::string(str), value); |
421 | } |
422 | inline bool safe_strto64(StringPiece str, int64_t* value) { |
423 | return safe_strto64(str: str.ToString(), value); |
424 | } |
425 | inline bool safe_strtou64(const char* str, uint64_t* value) { |
426 | return safe_strtou64(str: std::string(str), value); |
427 | } |
428 | inline bool safe_strtou64(StringPiece str, uint64_t* value) { |
429 | return safe_strtou64(str: str.ToString(), value); |
430 | } |
431 | |
432 | PROTOBUF_EXPORT bool safe_strtof(const char* str, float* value); |
433 | PROTOBUF_EXPORT bool safe_strtod(const char* str, double* value); |
434 | inline bool safe_strtof(const std::string& str, float* value) { |
435 | return safe_strtof(str: str.c_str(), value); |
436 | } |
437 | inline bool safe_strtod(const std::string& str, double* value) { |
438 | return safe_strtod(str: str.c_str(), value); |
439 | } |
440 | inline bool safe_strtof(StringPiece str, float* value) { |
441 | return safe_strtof(str: str.ToString(), value); |
442 | } |
443 | inline bool safe_strtod(StringPiece str, double* value) { |
444 | return safe_strtod(str: str.ToString(), value); |
445 | } |
446 | |
447 | // ---------------------------------------------------------------------- |
448 | // FastIntToBuffer() |
449 | // FastHexToBuffer() |
450 | // FastHex64ToBuffer() |
451 | // FastHex32ToBuffer() |
452 | // FastTimeToBuffer() |
453 | // These are intended for speed. FastIntToBuffer() assumes the |
454 | // integer is non-negative. FastHexToBuffer() puts output in |
455 | // hex rather than decimal. FastTimeToBuffer() puts the output |
456 | // into RFC822 format. |
457 | // |
458 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
459 | // padded to exactly 16 bytes (plus one byte for '\0') |
460 | // |
461 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
462 | // padded to exactly 8 bytes (plus one byte for '\0') |
463 | // |
464 | // All functions take the output buffer as an arg. |
465 | // They all return a pointer to the beginning of the output, |
466 | // which may not be the beginning of the input buffer. |
467 | // ---------------------------------------------------------------------- |
468 | |
469 | // Suggested buffer size for FastToBuffer functions. Also works with |
470 | // DoubleToBuffer() and FloatToBuffer(). |
471 | static const int kFastToBufferSize = 32; |
472 | |
473 | PROTOBUF_EXPORT char* FastInt32ToBuffer(int32_t i, char* buffer); |
474 | PROTOBUF_EXPORT char* FastInt64ToBuffer(int64_t i, char* buffer); |
475 | char* FastUInt32ToBuffer(uint32_t i, char* buffer); // inline below |
476 | char* FastUInt64ToBuffer(uint64_t i, char* buffer); // inline below |
477 | PROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); |
478 | PROTOBUF_EXPORT char* FastHex64ToBuffer(uint64_t i, char* buffer); |
479 | PROTOBUF_EXPORT char* FastHex32ToBuffer(uint32_t i, char* buffer); |
480 | |
481 | // at least 22 bytes long |
482 | inline char* FastIntToBuffer(int i, char* buffer) { |
483 | return (sizeof(i) == 4 ? |
484 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
485 | } |
486 | inline char* FastUIntToBuffer(unsigned int i, char* buffer) { |
487 | return (sizeof(i) == 4 ? |
488 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
489 | } |
490 | inline char* FastLongToBuffer(long i, char* buffer) { |
491 | return (sizeof(i) == 4 ? |
492 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
493 | } |
494 | inline char* FastULongToBuffer(unsigned long i, char* buffer) { |
495 | return (sizeof(i) == 4 ? |
496 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
497 | } |
498 | |
499 | // ---------------------------------------------------------------------- |
500 | // FastInt32ToBufferLeft() |
501 | // FastUInt32ToBufferLeft() |
502 | // FastInt64ToBufferLeft() |
503 | // FastUInt64ToBufferLeft() |
504 | // |
505 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
506 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
507 | // their output to the beginning of the buffer (hence the name, as the |
508 | // output is left-aligned). The caller is responsible for ensuring that |
509 | // the buffer has enough space to hold the output. |
510 | // |
511 | // Returns a pointer to the end of the string (i.e. the null character |
512 | // terminating the string). |
513 | // ---------------------------------------------------------------------- |
514 | |
515 | PROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32_t i, char* buffer); |
516 | PROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32_t i, char* buffer); |
517 | PROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64_t i, char* buffer); |
518 | PROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64_t i, char* buffer); |
519 | |
520 | // Just define these in terms of the above. |
521 | inline char* FastUInt32ToBuffer(uint32_t i, char* buffer) { |
522 | FastUInt32ToBufferLeft(i, buffer); |
523 | return buffer; |
524 | } |
525 | inline char* FastUInt64ToBuffer(uint64_t i, char* buffer) { |
526 | FastUInt64ToBufferLeft(i, buffer); |
527 | return buffer; |
528 | } |
529 | |
530 | inline std::string SimpleBtoa(bool value) { return value ? "true" : "false" ; } |
531 | |
532 | // ---------------------------------------------------------------------- |
533 | // SimpleItoa() |
534 | // Description: converts an integer to a string. |
535 | // |
536 | // Return value: string |
537 | // ---------------------------------------------------------------------- |
538 | PROTOBUF_EXPORT std::string SimpleItoa(int i); |
539 | PROTOBUF_EXPORT std::string SimpleItoa(unsigned int i); |
540 | PROTOBUF_EXPORT std::string SimpleItoa(long i); |
541 | PROTOBUF_EXPORT std::string SimpleItoa(unsigned long i); |
542 | PROTOBUF_EXPORT std::string SimpleItoa(long long i); |
543 | PROTOBUF_EXPORT std::string SimpleItoa(unsigned long long i); |
544 | |
545 | // ---------------------------------------------------------------------- |
546 | // SimpleDtoa() |
547 | // SimpleFtoa() |
548 | // DoubleToBuffer() |
549 | // FloatToBuffer() |
550 | // Description: converts a double or float to a string which, if |
551 | // passed to NoLocaleStrtod(), will produce the exact same original double |
552 | // (except in case of NaN; all NaNs are considered the same value). |
553 | // We try to keep the string short but it's not guaranteed to be as |
554 | // short as possible. |
555 | // |
556 | // DoubleToBuffer() and FloatToBuffer() write the text to the given |
557 | // buffer and return it. The buffer must be at least |
558 | // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize |
559 | // bytes for floats. kFastToBufferSize is also guaranteed to be large |
560 | // enough to hold either. |
561 | // |
562 | // Return value: string |
563 | // ---------------------------------------------------------------------- |
564 | PROTOBUF_EXPORT std::string SimpleDtoa(double value); |
565 | PROTOBUF_EXPORT std::string SimpleFtoa(float value); |
566 | |
567 | PROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); |
568 | PROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); |
569 | |
570 | // In practice, doubles should never need more than 24 bytes and floats |
571 | // should never need more than 14 (including null terminators), but we |
572 | // overestimate to be safe. |
573 | static const int kDoubleToBufferSize = 32; |
574 | static const int kFloatToBufferSize = 24; |
575 | |
576 | namespace strings { |
577 | |
578 | enum PadSpec { |
579 | NO_PAD = 1, |
580 | ZERO_PAD_2, |
581 | ZERO_PAD_3, |
582 | ZERO_PAD_4, |
583 | ZERO_PAD_5, |
584 | ZERO_PAD_6, |
585 | ZERO_PAD_7, |
586 | ZERO_PAD_8, |
587 | ZERO_PAD_9, |
588 | ZERO_PAD_10, |
589 | ZERO_PAD_11, |
590 | ZERO_PAD_12, |
591 | ZERO_PAD_13, |
592 | ZERO_PAD_14, |
593 | ZERO_PAD_15, |
594 | ZERO_PAD_16, |
595 | }; |
596 | |
597 | struct Hex { |
598 | uint64_t value; |
599 | enum PadSpec spec; |
600 | template <class Int> |
601 | explicit Hex(Int v, PadSpec s = NO_PAD) |
602 | : spec(s) { |
603 | // Prevent sign-extension by casting integers to |
604 | // their unsigned counterparts. |
605 | #ifdef LANG_CXX11 |
606 | static_assert( |
607 | sizeof(v) == 1 || sizeof(v) == 2 || sizeof(v) == 4 || sizeof(v) == 8, |
608 | "Unknown integer type" ); |
609 | #endif |
610 | value = sizeof(v) == 1 ? static_cast<uint8_t>(v) |
611 | : sizeof(v) == 2 ? static_cast<uint16_t>(v) |
612 | : sizeof(v) == 4 ? static_cast<uint32_t>(v) |
613 | : static_cast<uint64_t>(v); |
614 | } |
615 | }; |
616 | |
617 | struct PROTOBUF_EXPORT AlphaNum { |
618 | const char *piece_data_; // move these to string_ref eventually |
619 | size_t piece_size_; // move these to string_ref eventually |
620 | |
621 | char digits[kFastToBufferSize]; |
622 | |
623 | // No bool ctor -- bools convert to an integral type. |
624 | // A bool ctor would also convert incoming pointers (bletch). |
625 | |
626 | AlphaNum(int i32) |
627 | : piece_data_(digits), |
628 | piece_size_(FastInt32ToBufferLeft(i: i32, buffer: digits) - &digits[0]) {} |
629 | AlphaNum(unsigned int u32) |
630 | : piece_data_(digits), |
631 | piece_size_(FastUInt32ToBufferLeft(i: u32, buffer: digits) - &digits[0]) {} |
632 | AlphaNum(long long i64) |
633 | : piece_data_(digits), |
634 | piece_size_(FastInt64ToBufferLeft(i: i64, buffer: digits) - &digits[0]) {} |
635 | AlphaNum(unsigned long long u64) |
636 | : piece_data_(digits), |
637 | piece_size_(FastUInt64ToBufferLeft(i: u64, buffer: digits) - &digits[0]) {} |
638 | |
639 | // Note: on some architectures, "long" is only 32 bits, not 64, but the |
640 | // performance hit of using FastInt64ToBufferLeft to handle 32-bit values |
641 | // is quite minor. |
642 | AlphaNum(long i64) |
643 | : piece_data_(digits), |
644 | piece_size_(FastInt64ToBufferLeft(i: i64, buffer: digits) - &digits[0]) {} |
645 | AlphaNum(unsigned long u64) |
646 | : piece_data_(digits), |
647 | piece_size_(FastUInt64ToBufferLeft(i: u64, buffer: digits) - &digits[0]) {} |
648 | |
649 | AlphaNum(float f) |
650 | : piece_data_(digits), piece_size_(strlen(s: FloatToBuffer(i: f, buffer: digits))) {} |
651 | AlphaNum(double f) |
652 | : piece_data_(digits), piece_size_(strlen(s: DoubleToBuffer(i: f, buffer: digits))) {} |
653 | |
654 | AlphaNum(Hex hex); |
655 | |
656 | AlphaNum(const char* c_str) |
657 | : piece_data_(c_str), piece_size_(strlen(s: c_str)) {} |
658 | // TODO: Add a string_ref constructor, eventually |
659 | // AlphaNum(const StringPiece &pc) : piece(pc) {} |
660 | |
661 | AlphaNum(const std::string& str) |
662 | : piece_data_(str.data()), piece_size_(str.size()) {} |
663 | |
664 | AlphaNum(StringPiece str) |
665 | : piece_data_(str.data()), piece_size_(str.size()) {} |
666 | |
667 | size_t size() const { return piece_size_; } |
668 | const char *data() const { return piece_data_; } |
669 | |
670 | private: |
671 | // Use ":" not ':' |
672 | AlphaNum(char c); // NOLINT(runtime/explicit) |
673 | |
674 | // Disallow copy and assign. |
675 | AlphaNum(const AlphaNum&); |
676 | void operator=(const AlphaNum&); |
677 | }; |
678 | |
679 | } // namespace strings |
680 | |
681 | using strings::AlphaNum; |
682 | |
683 | // ---------------------------------------------------------------------- |
684 | // StrCat() |
685 | // This merges the given strings or numbers, with no delimiter. This |
686 | // is designed to be the fastest possible way to construct a string out |
687 | // of a mix of raw C strings, strings, bool values, |
688 | // and numeric values. |
689 | // |
690 | // Don't use this for user-visible strings. The localization process |
691 | // works poorly on strings built up out of fragments. |
692 | // |
693 | // For clarity and performance, don't use StrCat when appending to a |
694 | // string. In particular, avoid using any of these (anti-)patterns: |
695 | // str.append(StrCat(...) |
696 | // str += StrCat(...) |
697 | // str = StrCat(str, ...) |
698 | // where the last is the worse, with the potential to change a loop |
699 | // from a linear time operation with O(1) dynamic allocations into a |
700 | // quadratic time operation with O(n) dynamic allocations. StrAppend |
701 | // is a better choice than any of the above, subject to the restriction |
702 | // of StrAppend(&str, a, b, c, ...) that none of the a, b, c, ... may |
703 | // be a reference into str. |
704 | // ---------------------------------------------------------------------- |
705 | |
706 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b); |
707 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
708 | const AlphaNum& c); |
709 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
710 | const AlphaNum& c, const AlphaNum& d); |
711 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
712 | const AlphaNum& c, const AlphaNum& d, |
713 | const AlphaNum& e); |
714 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
715 | const AlphaNum& c, const AlphaNum& d, |
716 | const AlphaNum& e, const AlphaNum& f); |
717 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
718 | const AlphaNum& c, const AlphaNum& d, |
719 | const AlphaNum& e, const AlphaNum& f, |
720 | const AlphaNum& g); |
721 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
722 | const AlphaNum& c, const AlphaNum& d, |
723 | const AlphaNum& e, const AlphaNum& f, |
724 | const AlphaNum& g, const AlphaNum& h); |
725 | PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, |
726 | const AlphaNum& c, const AlphaNum& d, |
727 | const AlphaNum& e, const AlphaNum& f, |
728 | const AlphaNum& g, const AlphaNum& h, |
729 | const AlphaNum& i); |
730 | |
731 | inline std::string StrCat(const AlphaNum& a) { |
732 | return std::string(a.data(), a.size()); |
733 | } |
734 | |
735 | // ---------------------------------------------------------------------- |
736 | // StrAppend() |
737 | // Same as above, but adds the output to the given string. |
738 | // WARNING: For speed, StrAppend does not try to check each of its input |
739 | // arguments to be sure that they are not a subset of the string being |
740 | // appended to. That is, while this will work: |
741 | // |
742 | // string s = "foo"; |
743 | // s += s; |
744 | // |
745 | // This will not (necessarily) work: |
746 | // |
747 | // string s = "foo"; |
748 | // StrAppend(&s, s); |
749 | // |
750 | // Note: while StrCat supports appending up to 9 arguments, StrAppend |
751 | // is currently limited to 4. That's rarely an issue except when |
752 | // automatically transforming StrCat to StrAppend, and can easily be |
753 | // worked around as consecutive calls to StrAppend are quite efficient. |
754 | // ---------------------------------------------------------------------- |
755 | |
756 | PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a); |
757 | PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, |
758 | const AlphaNum& b); |
759 | PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, |
760 | const AlphaNum& b, const AlphaNum& c); |
761 | PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, |
762 | const AlphaNum& b, const AlphaNum& c, |
763 | const AlphaNum& d); |
764 | |
765 | // ---------------------------------------------------------------------- |
766 | // Join() |
767 | // These methods concatenate a range of components into a C++ string, using |
768 | // the C-string "delim" as a separator between components. |
769 | // ---------------------------------------------------------------------- |
770 | template <typename Iterator> |
771 | void Join(Iterator start, Iterator end, const char* delim, |
772 | std::string* result) { |
773 | for (Iterator it = start; it != end; ++it) { |
774 | if (it != start) { |
775 | result->append(s: delim); |
776 | } |
777 | StrAppend(result, *it); |
778 | } |
779 | } |
780 | |
781 | template <typename Range> |
782 | std::string Join(const Range& components, const char* delim) { |
783 | std::string result; |
784 | Join(components.begin(), components.end(), delim, &result); |
785 | return result; |
786 | } |
787 | |
788 | // ---------------------------------------------------------------------- |
789 | // ToHex() |
790 | // Return a lower-case hex string representation of the given integer. |
791 | // ---------------------------------------------------------------------- |
792 | PROTOBUF_EXPORT std::string ToHex(uint64_t num); |
793 | |
794 | // ---------------------------------------------------------------------- |
795 | // GlobalReplaceSubstring() |
796 | // Replaces all instances of a substring in a string. Does nothing |
797 | // if 'substring' is empty. Returns the number of replacements. |
798 | // |
799 | // NOTE: The string pieces must not overlap s. |
800 | // ---------------------------------------------------------------------- |
801 | PROTOBUF_EXPORT int GlobalReplaceSubstring(const std::string& substring, |
802 | const std::string& replacement, |
803 | std::string* s); |
804 | |
805 | // ---------------------------------------------------------------------- |
806 | // Base64Unescape() |
807 | // Converts "src" which is encoded in Base64 to its binary equivalent and |
808 | // writes it to "dest". If src contains invalid characters, dest is cleared |
809 | // and the function returns false. Returns true on success. |
810 | // ---------------------------------------------------------------------- |
811 | PROTOBUF_EXPORT bool Base64Unescape(StringPiece src, std::string* dest); |
812 | |
813 | // ---------------------------------------------------------------------- |
814 | // WebSafeBase64Unescape() |
815 | // This is a variation of Base64Unescape which uses '-' instead of '+', and |
816 | // '_' instead of '/'. src is not null terminated, instead specify len. I |
817 | // recommend that slen<szdest, but we honor szdest anyway. |
818 | // RETURNS the length of dest, or -1 if src contains invalid chars. |
819 | |
820 | // The variation that stores into a string clears the string first, and |
821 | // returns false (with dest empty) if src contains invalid chars; for |
822 | // this version src and dest must be different strings. |
823 | // ---------------------------------------------------------------------- |
824 | PROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, char* dest, |
825 | int szdest); |
826 | PROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, std::string* dest); |
827 | |
828 | // Return the length to use for the output buffer given to the base64 escape |
829 | // routines. Make sure to use the same value for do_padding in both. |
830 | // This function may return incorrect results if given input_len values that |
831 | // are extremely high, which should happen rarely. |
832 | PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, bool do_padding); |
833 | // Use this version when calling Base64Escape without a do_padding arg. |
834 | PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); |
835 | |
836 | // ---------------------------------------------------------------------- |
837 | // Base64Escape() |
838 | // WebSafeBase64Escape() |
839 | // Encode "src" to "dest" using base64 encoding. |
840 | // src is not null terminated, instead specify len. |
841 | // 'dest' should have at least CalculateBase64EscapedLen() length. |
842 | // RETURNS the length of dest. |
843 | // The WebSafe variation use '-' instead of '+' and '_' instead of '/' |
844 | // so that we can place the out in the URL or cookies without having |
845 | // to escape them. It also has an extra parameter "do_padding", |
846 | // which when set to false will prevent padding with "=". |
847 | // ---------------------------------------------------------------------- |
848 | PROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, char* dest, |
849 | int szdest); |
850 | PROTOBUF_EXPORT int WebSafeBase64Escape(const unsigned char* src, int slen, |
851 | char* dest, int szdest, |
852 | bool do_padding); |
853 | // Encode src into dest with padding. |
854 | PROTOBUF_EXPORT void Base64Escape(StringPiece src, std::string* dest); |
855 | // Encode src into dest web-safely without padding. |
856 | PROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, std::string* dest); |
857 | // Encode src into dest web-safely with padding. |
858 | PROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, |
859 | std::string* dest); |
860 | |
861 | PROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, |
862 | std::string* dest, bool do_padding); |
863 | PROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, |
864 | std::string* dest, bool do_padding); |
865 | |
866 | inline bool IsValidCodePoint(uint32_t code_point) { |
867 | return code_point < 0xD800 || |
868 | (code_point >= 0xE000 && code_point <= 0x10FFFF); |
869 | } |
870 | |
871 | static const int UTFmax = 4; |
872 | // ---------------------------------------------------------------------- |
873 | // EncodeAsUTF8Char() |
874 | // Helper to append a Unicode code point to a string as UTF8, without bringing |
875 | // in any external dependencies. The output buffer must be as least 4 bytes |
876 | // large. |
877 | // ---------------------------------------------------------------------- |
878 | PROTOBUF_EXPORT int EncodeAsUTF8Char(uint32_t code_point, char* output); |
879 | |
880 | // ---------------------------------------------------------------------- |
881 | // UTF8FirstLetterNumBytes() |
882 | // Length of the first UTF-8 character. |
883 | // ---------------------------------------------------------------------- |
884 | PROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); |
885 | |
886 | // From google3/third_party/absl/strings/escaping.h |
887 | |
888 | // ---------------------------------------------------------------------- |
889 | // CleanStringLineEndings() |
890 | // Clean up a multi-line string to conform to Unix line endings. |
891 | // Reads from src and appends to dst, so usually dst should be empty. |
892 | // |
893 | // If there is no line ending at the end of a non-empty string, it can |
894 | // be added automatically. |
895 | // |
896 | // Four different types of input are correctly handled: |
897 | // |
898 | // - Unix/Linux files: line ending is LF: pass through unchanged |
899 | // |
900 | // - DOS/Windows files: line ending is CRLF: convert to LF |
901 | // |
902 | // - Legacy Mac files: line ending is CR: convert to LF |
903 | // |
904 | // - Garbled files: random line endings: convert gracefully |
905 | // lonely CR, lonely LF, CRLF: convert to LF |
906 | // |
907 | // @param src The multi-line string to convert |
908 | // @param dst The converted string is appended to this string |
909 | // @param auto_end_last_line Automatically terminate the last line |
910 | // |
911 | // Limitations: |
912 | // |
913 | // This does not do the right thing for CRCRLF files created by |
914 | // broken programs that do another Unix->DOS conversion on files |
915 | // that are already in CRLF format. For this, a two-pass approach |
916 | // brute-force would be needed that |
917 | // |
918 | // (1) determines the presence of LF (first one is ok) |
919 | // (2) if yes, removes any CR, else convert every CR to LF |
920 | PROTOBUF_EXPORT void CleanStringLineEndings(const std::string& src, |
921 | std::string* dst, |
922 | bool auto_end_last_line); |
923 | |
924 | // Same as above, but transforms the argument in place. |
925 | PROTOBUF_EXPORT void CleanStringLineEndings(std::string* str, |
926 | bool auto_end_last_line); |
927 | |
928 | namespace strings { |
929 | inline bool EndsWith(StringPiece text, StringPiece suffix) { |
930 | return suffix.empty() || |
931 | (text.size() >= suffix.size() && |
932 | memcmp(s1: text.data() + (text.size() - suffix.size()), s2: suffix.data(), |
933 | n: suffix.size()) == 0); |
934 | } |
935 | } // namespace strings |
936 | |
937 | namespace internal { |
938 | |
939 | // A locale-independent version of the standard strtod(), which always |
940 | // uses a dot as the decimal separator. |
941 | double NoLocaleStrtod(const char* str, char** endptr); |
942 | |
943 | } // namespace internal |
944 | |
945 | } // namespace protobuf |
946 | } // namespace google |
947 | |
948 | #include <google/protobuf/port_undef.inc> |
949 | |
950 | #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ |
951 | |