1//
2// Copyright 1999-2006 and onwards Google, Inc.
3//
4// Useful string functions and so forth. This is a grab-bag file.
5//
6// You might also want to look at memutil.h, which holds mem*()
7// equivalents of a lot of the str*() functions in string.h,
8// eg memstr, mempbrk, etc.
9//
10// If you need to process UTF8 strings, take a look at files in i18n/utf8.
11
12#ifndef STRINGS_STRUTIL_H_
13#define STRINGS_STRUTIL_H_
14
15#include <functional>
16using std::less;
17
18#include <hash_map>
19using __gnu_cxx::hash_map;
20
21#include <hash_set>
22using __gnu_cxx::hash_set;
23
24#include <set>
25using std::set;
26using std::multiset;
27
28#include <string>
29using std::string;
30
31#include <utility>
32using std::pair;
33using std::make_pair;
34
35#include <vector>
36using std::vector;
37
38#include <string.h>
39#include <stdlib.h>
40
41// for strcasecmp (check SuSv3 -- this is the only header it's in!)
42// MSVC doesn't have <strings.h>. Luckily, it defines equivalent
43// functions (see port.h)
44#ifndef COMPILER_MSVC
45#include <strings.h>
46#endif
47#include <ctype.h> // not needed, but removing it will break the build
48
49using namespace std;
50using namespace __gnu_cxx;
51
52// A buffer size which is large enough for all the FastToBuffer functions, as
53// well as DoubleToBuffer and FloatToBuffer. We define this here in case other
54// string headers depend on it.
55static const int kFastToBufferSize = 32;
56
57#include "base/basictypes.h"
58#include "base/logging.h" // for CHECK
59#include "base/strtoint.h"
60#include "base/int128.h"
61#include "ascii_ctype.h"
62//#include "charset.h"
63//#include "escaping.h"
64//#include "host_port.h"
65#include "stringprintf.h"
66#include "base/stl_decl.h"
67#include "base/port.h"
68#include "endian.h"
69
70// ----------------------------------------------------------------------
71// FpToString()
72// FloatToString()
73// IntToString()
74// Int64ToString()
75// UInt64ToString()
76// Convert various types to their string representation, possibly padded
77// with spaces, using snprintf format specifiers.
78// "Fp" here stands for fingerprint: a 64-bit entity
79// represented in 16 hex digits.
80// ----------------------------------------------------------------------
81
82string FpToString(Fprint fp);
83string FloatToString(float f, const char* format);
84string IntToString(int i, const char* format);
85string Int64ToString(int64 i64, const char* format);
86string UInt64ToString(uint64 ui64, const char* format);
87
88// The default formats are %7f, %7d, and %7u respectively
89string FloatToString(float f);
90string IntToString(int i);
91string Int64ToString(int64 i64);
92string UInt64ToString(uint64 ui64);
93
94// ----------------------------------------------------------------------
95// FastIntToBuffer()
96// FastHexToBuffer()
97// FastHex64ToBuffer()
98// FastHex32ToBuffer()
99// FastTimeToBuffer()
100// These are intended for speed. FastIntToBuffer() assumes the
101// integer is non-negative. FastHexToBuffer() puts output in
102// hex rather than decimal. FastTimeToBuffer() puts the output
103// into RFC822 format.
104//
105// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
106// padded to exactly 16 bytes (plus one byte for '\0')
107//
108// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
109// padded to exactly 8 bytes (plus one byte for '\0')
110//
111// All functions take the output buffer as an arg. FastInt()
112// uses at most 22 bytes, FastTime() uses exactly 30 bytes.
113// They all return a pointer to the beginning of the output,
114// which may not be the beginning of the input buffer. (Though
115// for FastTimeToBuffer(), we guarantee that it is.)
116//
117// NOTE: In 64-bit land, sizeof(time_t) is 8, so it is possible
118// to pass to FastTimeToBuffer() a time whose year cannot be
119// represented in 4 digits. In this case, the output buffer
120// will contain the string "Invalid:<value>"
121// ----------------------------------------------------------------------
122
123// Previously documented minimums -- the buffers provided must be at least this
124// long, though these numbers are subject to change:
125// Int32, UInt32: 12 bytes
126// Int64, UInt64, Hex: 22 bytes
127// Time: 30 bytes
128// Hex32: 9 bytes
129// Hex64: 17 bytes
130// Use kFastToBufferSize rather than hardcoding constants.
131
132char* FastInt32ToBuffer(int32 i, char* buffer);
133char* FastInt64ToBuffer(int64 i, char* buffer);
134char* FastUInt32ToBuffer(uint32 i, char* buffer);
135char* FastUInt64ToBuffer(uint64 i, char* buffer);
136char* FastHexToBuffer(int i, char* buffer);
137char* FastTimeToBuffer(time_t t, char* buffer);
138char* FastHex64ToBuffer(uint64 i, char* buffer);
139char* FastHex32ToBuffer(uint32 i, char* buffer);
140
141// at least 22 bytes long
142inline char* FastIntToBuffer(int i, char* buffer) {
143 return (sizeof(i) == 4 ?
144 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
145}
146inline char* FastUIntToBuffer(unsigned int i, char* buffer) {
147 return (sizeof(i) == 4 ?
148 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
149}
150inline char* FastLongToBuffer(long i, char* buffer) {
151 return (sizeof(i) == 4 ?
152 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
153}
154inline char* FastULongToBuffer(unsigned long i, char* buffer) {
155 return (sizeof(i) == 4 ?
156 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
157}
158
159// A generic "number type" to buffer template and specializations.
160//
161// The specialization of FastNumToBuffer<>() should always be made explicit:
162// FastNumToBuffer<int32>(mynums); // yes
163// FastNumToBuffer(mynums); // no
164template<typename T> char* FastNumToBuffer(T, char*);
165template<> inline char* FastNumToBuffer<int32>(int32 i, char* buffer) {
166 return FastInt32ToBuffer(i, buffer);
167}
168template<> inline char* FastNumToBuffer<int64>(int64 i, char* buffer) {
169 return FastInt64ToBuffer(i, buffer);
170}
171template<> inline char* FastNumToBuffer<uint32>(uint32 i, char* buffer) {
172 return FastUInt32ToBuffer(i, buffer);
173}
174template<> inline char* FastNumToBuffer<uint64>(uint64 i, char* buffer) {
175 return FastUInt64ToBuffer(i, buffer);
176}
177
178// ----------------------------------------------------------------------
179// FastInt32ToBufferLeft()
180// FastUInt32ToBufferLeft()
181// FastInt64ToBufferLeft()
182// FastUInt64ToBufferLeft()
183//
184// Like the Fast*ToBuffer() functions above, these are intended for speed.
185// Unlike the Fast*ToBuffer() functions, however, these functions write
186// their output to the beginning of the buffer (hence the name, as the
187// output is left-aligned). The caller is responsible for ensuring that
188// the buffer has enough space to hold the output.
189//
190// Returns a pointer to the end of the string (i.e. the null character
191// terminating the string).
192// ----------------------------------------------------------------------
193
194char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes
195char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes
196char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes
197char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes
198
199// Just define these in terms of the above.
200inline char* FastUInt32ToBuffer(uint32 i, char* buffer) {
201 FastUInt32ToBufferLeft(i, buffer);
202 return buffer;
203}
204inline char* FastUInt64ToBuffer(uint64 i, char* buffer) {
205 FastUInt64ToBufferLeft(i, buffer);
206 return buffer;
207}
208
209// ----------------------------------------------------------------------
210// ConsumeStrayLeadingZeroes
211// Eliminates all leading zeroes (unless the string itself is composed
212// of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
213void ConsumeStrayLeadingZeroes(string* str);
214
215// ----------------------------------------------------------------------
216// ParseLeadingInt32Value
217// A simple parser for int32 values. Returns the parsed value
218// if a valid integer is found; else returns deflt. It does not
219// check if str is entirely consumed.
220// This cannot handle decimal numbers with leading 0s, since they will be
221// treated as octal. If you know it's decimal, use ParseLeadingDec32Value.
222// --------------------------------------------------------------------
223int32 ParseLeadingInt32Value(const char* str, int32 deflt);
224inline int32 ParseLeadingInt32Value(const string& str, int32 deflt) {
225 return ParseLeadingInt32Value(str.c_str(), deflt);
226}
227
228// ParseLeadingUInt32Value
229// A simple parser for uint32 values. Returns the parsed value
230// if a valid integer is found; else returns deflt. It does not
231// check if str is entirely consumed.
232// This cannot handle decimal numbers with leading 0s, since they will be
233// treated as octal. If you know it's decimal, use ParseLeadingUDec32Value.
234// --------------------------------------------------------------------
235uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt);
236inline uint32 ParseLeadingUInt32Value(const string& str, uint32 deflt) {
237 return ParseLeadingUInt32Value(str.c_str(), deflt);
238}
239
240// ----------------------------------------------------------------------
241// ParseLeadingDec32Value
242// A simple parser for decimal int32 values. Returns the parsed value
243// if a valid integer is found; else returns deflt. It does not
244// check if str is entirely consumed.
245// The string passed in is treated as *10 based*.
246// This can handle strings with leading 0s.
247// See also: ParseLeadingDec64Value
248// --------------------------------------------------------------------
249int32 ParseLeadingDec32Value(const char* str, int32 deflt);
250inline int32 ParseLeadingDec32Value(const string& str, int32 deflt) {
251 return ParseLeadingDec32Value(str.c_str(), deflt);
252}
253
254// ParseLeadingUDec32Value
255// A simple parser for decimal uint32 values. Returns the parsed value
256// if a valid integer is found; else returns deflt. It does not
257// check if str is entirely consumed.
258// The string passed in is treated as *10 based*.
259// This can handle strings with leading 0s.
260// See also: ParseLeadingUDec64Value
261// --------------------------------------------------------------------
262uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt);
263inline uint32 ParseLeadingUDec32Value(const string& str, uint32 deflt) {
264 return ParseLeadingUDec32Value(str.c_str(), deflt);
265}
266
267// ----------------------------------------------------------------------
268// ParseLeadingUInt64Value
269// ParseLeadingInt64Value
270// ParseLeadingHex64Value
271// ParseLeadingDec64Value
272// ParseLeadingUDec64Value
273// A simple parser for long long values.
274// Returns the parsed value if a
275// valid integer is found; else returns deflt
276// --------------------------------------------------------------------
277uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt);
278inline uint64 ParseLeadingUInt64Value(const string& str, uint64 deflt) {
279 return ParseLeadingUInt64Value(str.c_str(), deflt);
280}
281int64 ParseLeadingInt64Value(const char* str, int64 deflt);
282inline int64 ParseLeadingInt64Value(const string& str, int64 deflt) {
283 return ParseLeadingInt64Value(str.c_str(), deflt);
284}
285uint64 ParseLeadingHex64Value(const char* str, uint64 deflt);
286inline uint64 ParseLeadingHex64Value(const string& str, uint64 deflt) {
287 return ParseLeadingHex64Value(str.c_str(), deflt);
288}
289int64 ParseLeadingDec64Value(const char* str, int64 deflt);
290inline int64 ParseLeadingDec64Value(const string& str, int64 deflt) {
291 return ParseLeadingDec64Value(str.c_str(), deflt);
292}
293uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt);
294inline uint64 ParseLeadingUDec64Value(const string& str, uint64 deflt) {
295 return ParseLeadingUDec64Value(str.c_str(), deflt);
296}
297
298// -------------------------------------------------------------------------
299// DictionaryParse
300// This routine parses a common dictionary format (key and value separated
301// by ':', entries separated by commas). This format is used for many
302// complex commandline flags. It is also used to encode dictionaries for
303// exporting them or writing them to a checkpoint. Returns a vector of
304// <key, value> pairs. Returns true if there if no error in parsing, false
305// otherwise.
306// -------------------------------------------------------------------------
307bool DictionaryParse(const string& encoded_str,
308 vector<pair<string, string> >* items);
309
310#endif /* #ifndef STRINGS_STRUTIL_H_ */
311