1// Copyright 2008 and onwards Google, Inc.
2//
3// Functions for splitting and parsing strings. Functions may be migrated
4// to this file from strutil.h in the future.
5//
6#ifndef STRINGS_SPLIT_H_
7#define STRINGS_SPLIT_H_
8
9#include <string>
10using std::string;
11
12#include <vector>
13using std::vector;
14
15#include <set>
16using std::set;
17using std::multiset;
18
19#include <utility>
20using std::pair;
21using std::make_pair;
22
23#include <hash_map>
24using __gnu_cxx::hash_map;
25
26#include <hash_set>
27using __gnu_cxx::hash_set;
28
29
30using namespace std;
31using namespace __gnu_cxx;
32
33// ----------------------------------------------------------------------
34// SplitStringAllowEmpty()
35// SplitStringToHashsetAllowEmpty()
36// SplitStringToSetAllowEmpty()
37// SplitStringToHashmapAllowEmpty()
38
39// Split a string using one or more character delimiters, presented
40// as a nul-terminated c string. Append the components to 'result'.
41// If there are consecutive delimiters, this function will return
42// corresponding empty strings.
43//
44// If "full" is the empty string, yields an empty string as the only value.
45// ----------------------------------------------------------------------
46void SplitStringAllowEmpty(const string& full, const char* delim,
47 vector<string>* res);
48void SplitStringToHashsetAllowEmpty(const string& full, const char* delim,
49 hash_set<string>* res);
50void SplitStringToSetAllowEmpty(const string& full, const char* delim,
51 set<string>* res);
52// The even-positioned (0-based) components become the keys for the
53// odd-positioned components that follow them. When there is an odd
54// number of components, the value for the last key will be unchanged
55// if the key was already present in the hash table, or will be the
56// empty string if the key is a newly inserted key.
57void SplitStringToHashmapAllowEmpty(const string& full, const char* delim,
58 hash_map<string, string>* result);
59
60// ----------------------------------------------------------------------
61// SplitStringUsing()
62// SplitStringToHashsetUsing()
63// SplitStringToSetUsing()
64// SplitStringToHashmapUsing()
65
66// Split a string using one or more character delimiters, presented
67// as a nul-terminated c string. Append the components to 'result'.
68// If there are consecutive delimiters, this function skips over
69// all of them.
70// ----------------------------------------------------------------------
71void SplitStringUsing(const string& full, const char* delim,
72 vector<string>* res);
73void SplitStringToHashsetUsing(const string& full, const char* delim,
74 hash_set<string>* res);
75void SplitStringToSetUsing(const string& full, const char* delim,
76 set<string>* res);
77// The even-positioned (0-based) components become the keys for the
78// odd-positioned components that follow them. When there is an odd
79// number of components, the value for the last key will be unchanged
80// if the key was already present in the hash table, or will be the
81// empty string if the key is a newly inserted key.
82void SplitStringToHashmapUsing(const string& full, const char* delim,
83 hash_map<string, string>* result);
84
85// ----------------------------------------------------------------------
86// SplitOneIntToken()
87// SplitOneInt32Token()
88// SplitOneUint32Token()
89// SplitOneInt64Token()
90// SplitOneUint64Token()
91// SplitOneDoubleToken()
92// SplitOneFloatToken()
93// Parse a single "delim" delimited number from "*source" into "*value".
94// Modify *source to point after the delimiter.
95// If no delimiter is present after the number, set *source to NULL.
96//
97// If the start of *source is not an number, return false.
98// If the int is followed by the null character, return true.
99// If the int is not followed by a character from delim, return false.
100// If *source is NULL, return false.
101//
102// They cannot handle decimal numbers with leading 0s, since they will be
103// treated as octal.
104// ----------------------------------------------------------------------
105bool SplitOneIntToken(const char** source, const char* delim,
106 int* value);
107bool SplitOneInt32Token(const char** source, const char* delim,
108 int32* value);
109bool SplitOneUint32Token(const char** source, const char* delim,
110 uint32* value);
111bool SplitOneInt64Token(const char** source, const char* delim,
112 int64* value);
113bool SplitOneUint64Token(const char** source, const char* delim,
114 uint64* value);
115bool SplitOneDoubleToken(const char** source, const char* delim,
116 double* value);
117bool SplitOneFloatToken(const char** source, const char* delim,
118 float* value);
119
120// Some aliases, so that the function names are standardized against the names
121// of the reflection setters/getters in proto2. This makes it easier to use
122// certain macros with reflection when creating custom text formats for protos.
123
124inline bool SplitOneUInt32Token(const char** source, const char* delim,
125 uint32* value) {
126 return SplitOneUint32Token(source, delim, value);
127}
128
129inline bool SplitOneUInt64Token(const char** source, const char* delim,
130 uint64* value) {
131 return SplitOneUint64Token(source, delim, value);
132}
133
134// ----------------------------------------------------------------------
135// SplitOneDecimalIntToken()
136// SplitOneDecimalInt32Token()
137// SplitOneDecimalUint32Token()
138// SplitOneDecimalInt64Token()
139// SplitOneDecimalUint64Token()
140// Parse a single "delim"-delimited number from "*source" into "*value".
141// Unlike SplitOneIntToken, etc., this function always interprets
142// the numbers as decimal.
143bool SplitOneDecimalIntToken(const char** source, const char* delim,
144 int* value);
145bool SplitOneDecimalInt32Token(const char** source, const char* delim,
146 int32* value);
147bool SplitOneDecimalUint32Token(const char** source, const char* delim,
148 uint32* value);
149bool SplitOneDecimalInt64Token(const char** source, const char* delim,
150 int64* value);
151bool SplitOneDecimalUint64Token(const char** source, const char* delim,
152 uint64* value);
153
154// ----------------------------------------------------------------------
155// SplitOneHexUint32Token()
156// SplitOneHexUint64Token()
157// Once more, for hexadecimal numbers (unsigned only).
158bool SplitOneHexUint32Token(const char** source, const char* delim,
159 uint32* value);
160bool SplitOneHexUint64Token(const char** source, const char* delim,
161 uint64* value);
162
163
164#endif // STRINGS_SPLIT_H_
165