| 1 | // |
| 2 | // Copyright 1999-2006 and onwards Google, Inc. |
| 3 | // |
| 4 | // Useful string functions and so forth. This is a grab-bag file. |
| 5 | // |
| 6 | // You might also want to look at memutil.h, which holds mem*() |
| 7 | // equivalents of a lot of the str*() functions in string.h, |
| 8 | // eg memstr, mempbrk, etc. |
| 9 | // |
| 10 | // If you need to process UTF8 strings, take a look at files in i18n/utf8. |
| 11 | |
| 12 | #ifndef STRINGS_STRUTIL_H_ |
| 13 | #define STRINGS_STRUTIL_H_ |
| 14 | |
| 15 | #include <functional> |
| 16 | using std::less; |
| 17 | |
| 18 | #include <hash_map> |
| 19 | using __gnu_cxx::hash_map; |
| 20 | |
| 21 | #include <hash_set> |
| 22 | using __gnu_cxx::hash_set; |
| 23 | |
| 24 | #include <set> |
| 25 | using std::set; |
| 26 | using std::multiset; |
| 27 | |
| 28 | #include <string> |
| 29 | using std::string; |
| 30 | |
| 31 | #include <utility> |
| 32 | using std::pair; |
| 33 | using std::make_pair; |
| 34 | |
| 35 | #include <vector> |
| 36 | using std::vector; |
| 37 | |
| 38 | #include <string.h> |
| 39 | #include <stdlib.h> |
| 40 | |
| 41 | // for strcasecmp (check SuSv3 -- this is the only header it's in!) |
| 42 | // MSVC doesn't have <strings.h>. Luckily, it defines equivalent |
| 43 | // functions (see port.h) |
| 44 | #ifndef COMPILER_MSVC |
| 45 | #include <strings.h> |
| 46 | #endif |
| 47 | #include <ctype.h> // not needed, but removing it will break the build |
| 48 | |
| 49 | using namespace std; |
| 50 | using namespace __gnu_cxx; |
| 51 | |
| 52 | // A buffer size which is large enough for all the FastToBuffer functions, as |
| 53 | // well as DoubleToBuffer and FloatToBuffer. We define this here in case other |
| 54 | // string headers depend on it. |
| 55 | static const int kFastToBufferSize = 32; |
| 56 | |
| 57 | #include "base/basictypes.h" |
| 58 | #include "base/logging.h" // for CHECK |
| 59 | #include "base/strtoint.h" |
| 60 | #include "base/int128.h" |
| 61 | #include "ascii_ctype.h" |
| 62 | //#include "charset.h" |
| 63 | //#include "escaping.h" |
| 64 | //#include "host_port.h" |
| 65 | #include "stringprintf.h" |
| 66 | #include "base/stl_decl.h" |
| 67 | #include "base/port.h" |
| 68 | #include "endian.h" |
| 69 | |
| 70 | // ---------------------------------------------------------------------- |
| 71 | // FpToString() |
| 72 | // FloatToString() |
| 73 | // IntToString() |
| 74 | // Int64ToString() |
| 75 | // UInt64ToString() |
| 76 | // Convert various types to their string representation, possibly padded |
| 77 | // with spaces, using snprintf format specifiers. |
| 78 | // "Fp" here stands for fingerprint: a 64-bit entity |
| 79 | // represented in 16 hex digits. |
| 80 | // ---------------------------------------------------------------------- |
| 81 | |
| 82 | string FpToString(Fprint fp); |
| 83 | string FloatToString(float f, const char* format); |
| 84 | string IntToString(int i, const char* format); |
| 85 | string Int64ToString(int64 i64, const char* format); |
| 86 | string UInt64ToString(uint64 ui64, const char* format); |
| 87 | |
| 88 | // The default formats are %7f, %7d, and %7u respectively |
| 89 | string FloatToString(float f); |
| 90 | string IntToString(int i); |
| 91 | string Int64ToString(int64 i64); |
| 92 | string UInt64ToString(uint64 ui64); |
| 93 | |
| 94 | // ---------------------------------------------------------------------- |
| 95 | // FastIntToBuffer() |
| 96 | // FastHexToBuffer() |
| 97 | // FastHex64ToBuffer() |
| 98 | // FastHex32ToBuffer() |
| 99 | // FastTimeToBuffer() |
| 100 | // These are intended for speed. FastIntToBuffer() assumes the |
| 101 | // integer is non-negative. FastHexToBuffer() puts output in |
| 102 | // hex rather than decimal. FastTimeToBuffer() puts the output |
| 103 | // into RFC822 format. |
| 104 | // |
| 105 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
| 106 | // padded to exactly 16 bytes (plus one byte for '\0') |
| 107 | // |
| 108 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
| 109 | // padded to exactly 8 bytes (plus one byte for '\0') |
| 110 | // |
| 111 | // All functions take the output buffer as an arg. FastInt() |
| 112 | // uses at most 22 bytes, FastTime() uses exactly 30 bytes. |
| 113 | // They all return a pointer to the beginning of the output, |
| 114 | // which may not be the beginning of the input buffer. (Though |
| 115 | // for FastTimeToBuffer(), we guarantee that it is.) |
| 116 | // |
| 117 | // NOTE: In 64-bit land, sizeof(time_t) is 8, so it is possible |
| 118 | // to pass to FastTimeToBuffer() a time whose year cannot be |
| 119 | // represented in 4 digits. In this case, the output buffer |
| 120 | // will contain the string "Invalid:<value>" |
| 121 | // ---------------------------------------------------------------------- |
| 122 | |
| 123 | // Previously documented minimums -- the buffers provided must be at least this |
| 124 | // long, though these numbers are subject to change: |
| 125 | // Int32, UInt32: 12 bytes |
| 126 | // Int64, UInt64, Hex: 22 bytes |
| 127 | // Time: 30 bytes |
| 128 | // Hex32: 9 bytes |
| 129 | // Hex64: 17 bytes |
| 130 | // Use kFastToBufferSize rather than hardcoding constants. |
| 131 | |
| 132 | char* FastInt32ToBuffer(int32 i, char* buffer); |
| 133 | char* FastInt64ToBuffer(int64 i, char* buffer); |
| 134 | char* FastUInt32ToBuffer(uint32 i, char* buffer); |
| 135 | char* FastUInt64ToBuffer(uint64 i, char* buffer); |
| 136 | char* FastHexToBuffer(int i, char* buffer); |
| 137 | char* FastTimeToBuffer(time_t t, char* buffer); |
| 138 | char* FastHex64ToBuffer(uint64 i, char* buffer); |
| 139 | char* FastHex32ToBuffer(uint32 i, char* buffer); |
| 140 | |
| 141 | // at least 22 bytes long |
| 142 | inline char* FastIntToBuffer(int i, char* buffer) { |
| 143 | return (sizeof(i) == 4 ? |
| 144 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
| 145 | } |
| 146 | inline char* FastUIntToBuffer(unsigned int i, char* buffer) { |
| 147 | return (sizeof(i) == 4 ? |
| 148 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
| 149 | } |
| 150 | inline char* FastLongToBuffer(long i, char* buffer) { |
| 151 | return (sizeof(i) == 4 ? |
| 152 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
| 153 | } |
| 154 | inline char* FastULongToBuffer(unsigned long i, char* buffer) { |
| 155 | return (sizeof(i) == 4 ? |
| 156 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
| 157 | } |
| 158 | |
| 159 | // A generic "number type" to buffer template and specializations. |
| 160 | // |
| 161 | // The specialization of FastNumToBuffer<>() should always be made explicit: |
| 162 | // FastNumToBuffer<int32>(mynums); // yes |
| 163 | // FastNumToBuffer(mynums); // no |
| 164 | template<typename T> char* FastNumToBuffer(T, char*); |
| 165 | template<> inline char* FastNumToBuffer<int32>(int32 i, char* buffer) { |
| 166 | return FastInt32ToBuffer(i, buffer); |
| 167 | } |
| 168 | template<> inline char* FastNumToBuffer<int64>(int64 i, char* buffer) { |
| 169 | return FastInt64ToBuffer(i, buffer); |
| 170 | } |
| 171 | template<> inline char* FastNumToBuffer<uint32>(uint32 i, char* buffer) { |
| 172 | return FastUInt32ToBuffer(i, buffer); |
| 173 | } |
| 174 | template<> inline char* FastNumToBuffer<uint64>(uint64 i, char* buffer) { |
| 175 | return FastUInt64ToBuffer(i, buffer); |
| 176 | } |
| 177 | |
| 178 | // ---------------------------------------------------------------------- |
| 179 | // FastInt32ToBufferLeft() |
| 180 | // FastUInt32ToBufferLeft() |
| 181 | // FastInt64ToBufferLeft() |
| 182 | // FastUInt64ToBufferLeft() |
| 183 | // |
| 184 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
| 185 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
| 186 | // their output to the beginning of the buffer (hence the name, as the |
| 187 | // output is left-aligned). The caller is responsible for ensuring that |
| 188 | // the buffer has enough space to hold the output. |
| 189 | // |
| 190 | // Returns a pointer to the end of the string (i.e. the null character |
| 191 | // terminating the string). |
| 192 | // ---------------------------------------------------------------------- |
| 193 | |
| 194 | char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes |
| 195 | char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes |
| 196 | char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes |
| 197 | char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes |
| 198 | |
| 199 | // Just define these in terms of the above. |
| 200 | inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { |
| 201 | FastUInt32ToBufferLeft(i, buffer); |
| 202 | return buffer; |
| 203 | } |
| 204 | inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { |
| 205 | FastUInt64ToBufferLeft(i, buffer); |
| 206 | return buffer; |
| 207 | } |
| 208 | |
| 209 | // ---------------------------------------------------------------------- |
| 210 | // ConsumeStrayLeadingZeroes |
| 211 | // Eliminates all leading zeroes (unless the string itself is composed |
| 212 | // of nothing but zeroes, in which case one is kept: 0...0 becomes 0). |
| 213 | void ConsumeStrayLeadingZeroes(string* str); |
| 214 | |
| 215 | // ---------------------------------------------------------------------- |
| 216 | // ParseLeadingInt32Value |
| 217 | // A simple parser for int32 values. Returns the parsed value |
| 218 | // if a valid integer is found; else returns deflt. It does not |
| 219 | // check if str is entirely consumed. |
| 220 | // This cannot handle decimal numbers with leading 0s, since they will be |
| 221 | // treated as octal. If you know it's decimal, use ParseLeadingDec32Value. |
| 222 | // -------------------------------------------------------------------- |
| 223 | int32 ParseLeadingInt32Value(const char* str, int32 deflt); |
| 224 | inline int32 ParseLeadingInt32Value(const string& str, int32 deflt) { |
| 225 | return ParseLeadingInt32Value(str.c_str(), deflt); |
| 226 | } |
| 227 | |
| 228 | // ParseLeadingUInt32Value |
| 229 | // A simple parser for uint32 values. Returns the parsed value |
| 230 | // if a valid integer is found; else returns deflt. It does not |
| 231 | // check if str is entirely consumed. |
| 232 | // This cannot handle decimal numbers with leading 0s, since they will be |
| 233 | // treated as octal. If you know it's decimal, use ParseLeadingUDec32Value. |
| 234 | // -------------------------------------------------------------------- |
| 235 | uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt); |
| 236 | inline uint32 ParseLeadingUInt32Value(const string& str, uint32 deflt) { |
| 237 | return ParseLeadingUInt32Value(str.c_str(), deflt); |
| 238 | } |
| 239 | |
| 240 | // ---------------------------------------------------------------------- |
| 241 | // ParseLeadingDec32Value |
| 242 | // A simple parser for decimal int32 values. Returns the parsed value |
| 243 | // if a valid integer is found; else returns deflt. It does not |
| 244 | // check if str is entirely consumed. |
| 245 | // The string passed in is treated as *10 based*. |
| 246 | // This can handle strings with leading 0s. |
| 247 | // See also: ParseLeadingDec64Value |
| 248 | // -------------------------------------------------------------------- |
| 249 | int32 ParseLeadingDec32Value(const char* str, int32 deflt); |
| 250 | inline int32 ParseLeadingDec32Value(const string& str, int32 deflt) { |
| 251 | return ParseLeadingDec32Value(str.c_str(), deflt); |
| 252 | } |
| 253 | |
| 254 | // ParseLeadingUDec32Value |
| 255 | // A simple parser for decimal uint32 values. Returns the parsed value |
| 256 | // if a valid integer is found; else returns deflt. It does not |
| 257 | // check if str is entirely consumed. |
| 258 | // The string passed in is treated as *10 based*. |
| 259 | // This can handle strings with leading 0s. |
| 260 | // See also: ParseLeadingUDec64Value |
| 261 | // -------------------------------------------------------------------- |
| 262 | uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt); |
| 263 | inline uint32 ParseLeadingUDec32Value(const string& str, uint32 deflt) { |
| 264 | return ParseLeadingUDec32Value(str.c_str(), deflt); |
| 265 | } |
| 266 | |
| 267 | // ---------------------------------------------------------------------- |
| 268 | // ParseLeadingUInt64Value |
| 269 | // ParseLeadingInt64Value |
| 270 | // ParseLeadingHex64Value |
| 271 | // ParseLeadingDec64Value |
| 272 | // ParseLeadingUDec64Value |
| 273 | // A simple parser for long long values. |
| 274 | // Returns the parsed value if a |
| 275 | // valid integer is found; else returns deflt |
| 276 | // -------------------------------------------------------------------- |
| 277 | uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt); |
| 278 | inline uint64 ParseLeadingUInt64Value(const string& str, uint64 deflt) { |
| 279 | return ParseLeadingUInt64Value(str.c_str(), deflt); |
| 280 | } |
| 281 | int64 ParseLeadingInt64Value(const char* str, int64 deflt); |
| 282 | inline int64 ParseLeadingInt64Value(const string& str, int64 deflt) { |
| 283 | return ParseLeadingInt64Value(str.c_str(), deflt); |
| 284 | } |
| 285 | uint64 ParseLeadingHex64Value(const char* str, uint64 deflt); |
| 286 | inline uint64 ParseLeadingHex64Value(const string& str, uint64 deflt) { |
| 287 | return ParseLeadingHex64Value(str.c_str(), deflt); |
| 288 | } |
| 289 | int64 ParseLeadingDec64Value(const char* str, int64 deflt); |
| 290 | inline int64 ParseLeadingDec64Value(const string& str, int64 deflt) { |
| 291 | return ParseLeadingDec64Value(str.c_str(), deflt); |
| 292 | } |
| 293 | uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt); |
| 294 | inline uint64 ParseLeadingUDec64Value(const string& str, uint64 deflt) { |
| 295 | return ParseLeadingUDec64Value(str.c_str(), deflt); |
| 296 | } |
| 297 | |
| 298 | // ------------------------------------------------------------------------- |
| 299 | // DictionaryParse |
| 300 | // This routine parses a common dictionary format (key and value separated |
| 301 | // by ':', entries separated by commas). This format is used for many |
| 302 | // complex commandline flags. It is also used to encode dictionaries for |
| 303 | // exporting them or writing them to a checkpoint. Returns a vector of |
| 304 | // <key, value> pairs. Returns true if there if no error in parsing, false |
| 305 | // otherwise. |
| 306 | // ------------------------------------------------------------------------- |
| 307 | bool DictionaryParse(const string& encoded_str, |
| 308 | vector<pair<string, string> >* items); |
| 309 | |
| 310 | #endif /* #ifndef STRINGS_STRUTIL_H_ */ |
| 311 | |