1 | // |
2 | // Copyright 1999-2006 and onwards Google, Inc. |
3 | // |
4 | // Useful string functions and so forth. This is a grab-bag file. |
5 | // |
6 | // You might also want to look at memutil.h, which holds mem*() |
7 | // equivalents of a lot of the str*() functions in string.h, |
8 | // eg memstr, mempbrk, etc. |
9 | // |
10 | // If you need to process UTF8 strings, take a look at files in i18n/utf8. |
11 | |
12 | #ifndef STRINGS_STRUTIL_H_ |
13 | #define STRINGS_STRUTIL_H_ |
14 | |
15 | #include <functional> |
16 | using std::less; |
17 | |
18 | #include <hash_map> |
19 | using __gnu_cxx::hash_map; |
20 | |
21 | #include <hash_set> |
22 | using __gnu_cxx::hash_set; |
23 | |
24 | #include <set> |
25 | using std::set; |
26 | using std::multiset; |
27 | |
28 | #include <string> |
29 | using std::string; |
30 | |
31 | #include <utility> |
32 | using std::pair; |
33 | using std::make_pair; |
34 | |
35 | #include <vector> |
36 | using std::vector; |
37 | |
38 | #include <string.h> |
39 | #include <stdlib.h> |
40 | |
41 | // for strcasecmp (check SuSv3 -- this is the only header it's in!) |
42 | // MSVC doesn't have <strings.h>. Luckily, it defines equivalent |
43 | // functions (see port.h) |
44 | #ifndef COMPILER_MSVC |
45 | #include <strings.h> |
46 | #endif |
47 | #include <ctype.h> // not needed, but removing it will break the build |
48 | |
49 | using namespace std; |
50 | using namespace __gnu_cxx; |
51 | |
52 | // A buffer size which is large enough for all the FastToBuffer functions, as |
53 | // well as DoubleToBuffer and FloatToBuffer. We define this here in case other |
54 | // string headers depend on it. |
55 | static const int kFastToBufferSize = 32; |
56 | |
57 | #include "base/basictypes.h" |
58 | #include "base/logging.h" // for CHECK |
59 | #include "base/strtoint.h" |
60 | #include "base/int128.h" |
61 | #include "ascii_ctype.h" |
62 | //#include "charset.h" |
63 | //#include "escaping.h" |
64 | //#include "host_port.h" |
65 | #include "stringprintf.h" |
66 | #include "base/stl_decl.h" |
67 | #include "base/port.h" |
68 | #include "endian.h" |
69 | |
70 | // ---------------------------------------------------------------------- |
71 | // FpToString() |
72 | // FloatToString() |
73 | // IntToString() |
74 | // Int64ToString() |
75 | // UInt64ToString() |
76 | // Convert various types to their string representation, possibly padded |
77 | // with spaces, using snprintf format specifiers. |
78 | // "Fp" here stands for fingerprint: a 64-bit entity |
79 | // represented in 16 hex digits. |
80 | // ---------------------------------------------------------------------- |
81 | |
82 | string FpToString(Fprint fp); |
83 | string FloatToString(float f, const char* format); |
84 | string IntToString(int i, const char* format); |
85 | string Int64ToString(int64 i64, const char* format); |
86 | string UInt64ToString(uint64 ui64, const char* format); |
87 | |
88 | // The default formats are %7f, %7d, and %7u respectively |
89 | string FloatToString(float f); |
90 | string IntToString(int i); |
91 | string Int64ToString(int64 i64); |
92 | string UInt64ToString(uint64 ui64); |
93 | |
94 | // ---------------------------------------------------------------------- |
95 | // FastIntToBuffer() |
96 | // FastHexToBuffer() |
97 | // FastHex64ToBuffer() |
98 | // FastHex32ToBuffer() |
99 | // FastTimeToBuffer() |
100 | // These are intended for speed. FastIntToBuffer() assumes the |
101 | // integer is non-negative. FastHexToBuffer() puts output in |
102 | // hex rather than decimal. FastTimeToBuffer() puts the output |
103 | // into RFC822 format. |
104 | // |
105 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
106 | // padded to exactly 16 bytes (plus one byte for '\0') |
107 | // |
108 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
109 | // padded to exactly 8 bytes (plus one byte for '\0') |
110 | // |
111 | // All functions take the output buffer as an arg. FastInt() |
112 | // uses at most 22 bytes, FastTime() uses exactly 30 bytes. |
113 | // They all return a pointer to the beginning of the output, |
114 | // which may not be the beginning of the input buffer. (Though |
115 | // for FastTimeToBuffer(), we guarantee that it is.) |
116 | // |
117 | // NOTE: In 64-bit land, sizeof(time_t) is 8, so it is possible |
118 | // to pass to FastTimeToBuffer() a time whose year cannot be |
119 | // represented in 4 digits. In this case, the output buffer |
120 | // will contain the string "Invalid:<value>" |
121 | // ---------------------------------------------------------------------- |
122 | |
123 | // Previously documented minimums -- the buffers provided must be at least this |
124 | // long, though these numbers are subject to change: |
125 | // Int32, UInt32: 12 bytes |
126 | // Int64, UInt64, Hex: 22 bytes |
127 | // Time: 30 bytes |
128 | // Hex32: 9 bytes |
129 | // Hex64: 17 bytes |
130 | // Use kFastToBufferSize rather than hardcoding constants. |
131 | |
132 | char* FastInt32ToBuffer(int32 i, char* buffer); |
133 | char* FastInt64ToBuffer(int64 i, char* buffer); |
134 | char* FastUInt32ToBuffer(uint32 i, char* buffer); |
135 | char* FastUInt64ToBuffer(uint64 i, char* buffer); |
136 | char* FastHexToBuffer(int i, char* buffer); |
137 | char* FastTimeToBuffer(time_t t, char* buffer); |
138 | char* FastHex64ToBuffer(uint64 i, char* buffer); |
139 | char* FastHex32ToBuffer(uint32 i, char* buffer); |
140 | |
141 | // at least 22 bytes long |
142 | inline char* FastIntToBuffer(int i, char* buffer) { |
143 | return (sizeof(i) == 4 ? |
144 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
145 | } |
146 | inline char* FastUIntToBuffer(unsigned int i, char* buffer) { |
147 | return (sizeof(i) == 4 ? |
148 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
149 | } |
150 | inline char* FastLongToBuffer(long i, char* buffer) { |
151 | return (sizeof(i) == 4 ? |
152 | FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); |
153 | } |
154 | inline char* FastULongToBuffer(unsigned long i, char* buffer) { |
155 | return (sizeof(i) == 4 ? |
156 | FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); |
157 | } |
158 | |
159 | // A generic "number type" to buffer template and specializations. |
160 | // |
161 | // The specialization of FastNumToBuffer<>() should always be made explicit: |
162 | // FastNumToBuffer<int32>(mynums); // yes |
163 | // FastNumToBuffer(mynums); // no |
164 | template<typename T> char* FastNumToBuffer(T, char*); |
165 | template<> inline char* FastNumToBuffer<int32>(int32 i, char* buffer) { |
166 | return FastInt32ToBuffer(i, buffer); |
167 | } |
168 | template<> inline char* FastNumToBuffer<int64>(int64 i, char* buffer) { |
169 | return FastInt64ToBuffer(i, buffer); |
170 | } |
171 | template<> inline char* FastNumToBuffer<uint32>(uint32 i, char* buffer) { |
172 | return FastUInt32ToBuffer(i, buffer); |
173 | } |
174 | template<> inline char* FastNumToBuffer<uint64>(uint64 i, char* buffer) { |
175 | return FastUInt64ToBuffer(i, buffer); |
176 | } |
177 | |
178 | // ---------------------------------------------------------------------- |
179 | // FastInt32ToBufferLeft() |
180 | // FastUInt32ToBufferLeft() |
181 | // FastInt64ToBufferLeft() |
182 | // FastUInt64ToBufferLeft() |
183 | // |
184 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
185 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
186 | // their output to the beginning of the buffer (hence the name, as the |
187 | // output is left-aligned). The caller is responsible for ensuring that |
188 | // the buffer has enough space to hold the output. |
189 | // |
190 | // Returns a pointer to the end of the string (i.e. the null character |
191 | // terminating the string). |
192 | // ---------------------------------------------------------------------- |
193 | |
194 | char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes |
195 | char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes |
196 | char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes |
197 | char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes |
198 | |
199 | // Just define these in terms of the above. |
200 | inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { |
201 | FastUInt32ToBufferLeft(i, buffer); |
202 | return buffer; |
203 | } |
204 | inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { |
205 | FastUInt64ToBufferLeft(i, buffer); |
206 | return buffer; |
207 | } |
208 | |
209 | // ---------------------------------------------------------------------- |
210 | // ConsumeStrayLeadingZeroes |
211 | // Eliminates all leading zeroes (unless the string itself is composed |
212 | // of nothing but zeroes, in which case one is kept: 0...0 becomes 0). |
213 | void ConsumeStrayLeadingZeroes(string* str); |
214 | |
215 | // ---------------------------------------------------------------------- |
216 | // ParseLeadingInt32Value |
217 | // A simple parser for int32 values. Returns the parsed value |
218 | // if a valid integer is found; else returns deflt. It does not |
219 | // check if str is entirely consumed. |
220 | // This cannot handle decimal numbers with leading 0s, since they will be |
221 | // treated as octal. If you know it's decimal, use ParseLeadingDec32Value. |
222 | // -------------------------------------------------------------------- |
223 | int32 ParseLeadingInt32Value(const char* str, int32 deflt); |
224 | inline int32 ParseLeadingInt32Value(const string& str, int32 deflt) { |
225 | return ParseLeadingInt32Value(str.c_str(), deflt); |
226 | } |
227 | |
228 | // ParseLeadingUInt32Value |
229 | // A simple parser for uint32 values. Returns the parsed value |
230 | // if a valid integer is found; else returns deflt. It does not |
231 | // check if str is entirely consumed. |
232 | // This cannot handle decimal numbers with leading 0s, since they will be |
233 | // treated as octal. If you know it's decimal, use ParseLeadingUDec32Value. |
234 | // -------------------------------------------------------------------- |
235 | uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt); |
236 | inline uint32 ParseLeadingUInt32Value(const string& str, uint32 deflt) { |
237 | return ParseLeadingUInt32Value(str.c_str(), deflt); |
238 | } |
239 | |
240 | // ---------------------------------------------------------------------- |
241 | // ParseLeadingDec32Value |
242 | // A simple parser for decimal int32 values. Returns the parsed value |
243 | // if a valid integer is found; else returns deflt. It does not |
244 | // check if str is entirely consumed. |
245 | // The string passed in is treated as *10 based*. |
246 | // This can handle strings with leading 0s. |
247 | // See also: ParseLeadingDec64Value |
248 | // -------------------------------------------------------------------- |
249 | int32 ParseLeadingDec32Value(const char* str, int32 deflt); |
250 | inline int32 ParseLeadingDec32Value(const string& str, int32 deflt) { |
251 | return ParseLeadingDec32Value(str.c_str(), deflt); |
252 | } |
253 | |
254 | // ParseLeadingUDec32Value |
255 | // A simple parser for decimal uint32 values. Returns the parsed value |
256 | // if a valid integer is found; else returns deflt. It does not |
257 | // check if str is entirely consumed. |
258 | // The string passed in is treated as *10 based*. |
259 | // This can handle strings with leading 0s. |
260 | // See also: ParseLeadingUDec64Value |
261 | // -------------------------------------------------------------------- |
262 | uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt); |
263 | inline uint32 ParseLeadingUDec32Value(const string& str, uint32 deflt) { |
264 | return ParseLeadingUDec32Value(str.c_str(), deflt); |
265 | } |
266 | |
267 | // ---------------------------------------------------------------------- |
268 | // ParseLeadingUInt64Value |
269 | // ParseLeadingInt64Value |
270 | // ParseLeadingHex64Value |
271 | // ParseLeadingDec64Value |
272 | // ParseLeadingUDec64Value |
273 | // A simple parser for long long values. |
274 | // Returns the parsed value if a |
275 | // valid integer is found; else returns deflt |
276 | // -------------------------------------------------------------------- |
277 | uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt); |
278 | inline uint64 ParseLeadingUInt64Value(const string& str, uint64 deflt) { |
279 | return ParseLeadingUInt64Value(str.c_str(), deflt); |
280 | } |
281 | int64 ParseLeadingInt64Value(const char* str, int64 deflt); |
282 | inline int64 ParseLeadingInt64Value(const string& str, int64 deflt) { |
283 | return ParseLeadingInt64Value(str.c_str(), deflt); |
284 | } |
285 | uint64 ParseLeadingHex64Value(const char* str, uint64 deflt); |
286 | inline uint64 ParseLeadingHex64Value(const string& str, uint64 deflt) { |
287 | return ParseLeadingHex64Value(str.c_str(), deflt); |
288 | } |
289 | int64 ParseLeadingDec64Value(const char* str, int64 deflt); |
290 | inline int64 ParseLeadingDec64Value(const string& str, int64 deflt) { |
291 | return ParseLeadingDec64Value(str.c_str(), deflt); |
292 | } |
293 | uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt); |
294 | inline uint64 ParseLeadingUDec64Value(const string& str, uint64 deflt) { |
295 | return ParseLeadingUDec64Value(str.c_str(), deflt); |
296 | } |
297 | |
298 | // ------------------------------------------------------------------------- |
299 | // DictionaryParse |
300 | // This routine parses a common dictionary format (key and value separated |
301 | // by ':', entries separated by commas). This format is used for many |
302 | // complex commandline flags. It is also used to encode dictionaries for |
303 | // exporting them or writing them to a checkpoint. Returns a vector of |
304 | // <key, value> pairs. Returns true if there if no error in parsing, false |
305 | // otherwise. |
306 | // ------------------------------------------------------------------------- |
307 | bool DictionaryParse(const string& encoded_str, |
308 | vector<pair<string, string> >* items); |
309 | |
310 | #endif /* #ifndef STRINGS_STRUTIL_H_ */ |
311 | |