1 | // Copyright 2008 and onwards Google, Inc. |
2 | // |
3 | // Functions for splitting and parsing strings. Functions may be migrated |
4 | // to this file from strutil.h in the future. |
5 | // |
6 | #ifndef STRINGS_SPLIT_H_ |
7 | #define STRINGS_SPLIT_H_ |
8 | |
9 | #include <string> |
10 | using std::string; |
11 | |
12 | #include <vector> |
13 | using std::vector; |
14 | |
15 | #include <set> |
16 | using std::set; |
17 | using std::multiset; |
18 | |
19 | #include <utility> |
20 | using std::pair; |
21 | using std::make_pair; |
22 | |
23 | #include <hash_map> |
24 | using __gnu_cxx::hash_map; |
25 | |
26 | #include <hash_set> |
27 | using __gnu_cxx::hash_set; |
28 | |
29 | |
30 | using namespace std; |
31 | using namespace __gnu_cxx; |
32 | |
33 | // ---------------------------------------------------------------------- |
34 | // SplitStringAllowEmpty() |
35 | // SplitStringToHashsetAllowEmpty() |
36 | // SplitStringToSetAllowEmpty() |
37 | // SplitStringToHashmapAllowEmpty() |
38 | |
39 | // Split a string using one or more character delimiters, presented |
40 | // as a nul-terminated c string. Append the components to 'result'. |
41 | // If there are consecutive delimiters, this function will return |
42 | // corresponding empty strings. |
43 | // |
44 | // If "full" is the empty string, yields an empty string as the only value. |
45 | // ---------------------------------------------------------------------- |
46 | void SplitStringAllowEmpty(const string& full, const char* delim, |
47 | vector<string>* res); |
48 | void SplitStringToHashsetAllowEmpty(const string& full, const char* delim, |
49 | hash_set<string>* res); |
50 | void SplitStringToSetAllowEmpty(const string& full, const char* delim, |
51 | set<string>* res); |
52 | // The even-positioned (0-based) components become the keys for the |
53 | // odd-positioned components that follow them. When there is an odd |
54 | // number of components, the value for the last key will be unchanged |
55 | // if the key was already present in the hash table, or will be the |
56 | // empty string if the key is a newly inserted key. |
57 | void SplitStringToHashmapAllowEmpty(const string& full, const char* delim, |
58 | hash_map<string, string>* result); |
59 | |
60 | // ---------------------------------------------------------------------- |
61 | // SplitStringUsing() |
62 | // SplitStringToHashsetUsing() |
63 | // SplitStringToSetUsing() |
64 | // SplitStringToHashmapUsing() |
65 | |
66 | // Split a string using one or more character delimiters, presented |
67 | // as a nul-terminated c string. Append the components to 'result'. |
68 | // If there are consecutive delimiters, this function skips over |
69 | // all of them. |
70 | // ---------------------------------------------------------------------- |
71 | void SplitStringUsing(const string& full, const char* delim, |
72 | vector<string>* res); |
73 | void SplitStringToHashsetUsing(const string& full, const char* delim, |
74 | hash_set<string>* res); |
75 | void SplitStringToSetUsing(const string& full, const char* delim, |
76 | set<string>* res); |
77 | // The even-positioned (0-based) components become the keys for the |
78 | // odd-positioned components that follow them. When there is an odd |
79 | // number of components, the value for the last key will be unchanged |
80 | // if the key was already present in the hash table, or will be the |
81 | // empty string if the key is a newly inserted key. |
82 | void SplitStringToHashmapUsing(const string& full, const char* delim, |
83 | hash_map<string, string>* result); |
84 | |
85 | // ---------------------------------------------------------------------- |
86 | // SplitOneIntToken() |
87 | // SplitOneInt32Token() |
88 | // SplitOneUint32Token() |
89 | // SplitOneInt64Token() |
90 | // SplitOneUint64Token() |
91 | // SplitOneDoubleToken() |
92 | // SplitOneFloatToken() |
93 | // Parse a single "delim" delimited number from "*source" into "*value". |
94 | // Modify *source to point after the delimiter. |
95 | // If no delimiter is present after the number, set *source to NULL. |
96 | // |
97 | // If the start of *source is not an number, return false. |
98 | // If the int is followed by the null character, return true. |
99 | // If the int is not followed by a character from delim, return false. |
100 | // If *source is NULL, return false. |
101 | // |
102 | // They cannot handle decimal numbers with leading 0s, since they will be |
103 | // treated as octal. |
104 | // ---------------------------------------------------------------------- |
105 | bool SplitOneIntToken(const char** source, const char* delim, |
106 | int* value); |
107 | bool SplitOneInt32Token(const char** source, const char* delim, |
108 | int32* value); |
109 | bool SplitOneUint32Token(const char** source, const char* delim, |
110 | uint32* value); |
111 | bool SplitOneInt64Token(const char** source, const char* delim, |
112 | int64* value); |
113 | bool SplitOneUint64Token(const char** source, const char* delim, |
114 | uint64* value); |
115 | bool SplitOneDoubleToken(const char** source, const char* delim, |
116 | double* value); |
117 | bool SplitOneFloatToken(const char** source, const char* delim, |
118 | float* value); |
119 | |
120 | // Some aliases, so that the function names are standardized against the names |
121 | // of the reflection setters/getters in proto2. This makes it easier to use |
122 | // certain macros with reflection when creating custom text formats for protos. |
123 | |
124 | inline bool SplitOneUInt32Token(const char** source, const char* delim, |
125 | uint32* value) { |
126 | return SplitOneUint32Token(source, delim, value); |
127 | } |
128 | |
129 | inline bool SplitOneUInt64Token(const char** source, const char* delim, |
130 | uint64* value) { |
131 | return SplitOneUint64Token(source, delim, value); |
132 | } |
133 | |
134 | // ---------------------------------------------------------------------- |
135 | // SplitOneDecimalIntToken() |
136 | // SplitOneDecimalInt32Token() |
137 | // SplitOneDecimalUint32Token() |
138 | // SplitOneDecimalInt64Token() |
139 | // SplitOneDecimalUint64Token() |
140 | // Parse a single "delim"-delimited number from "*source" into "*value". |
141 | // Unlike SplitOneIntToken, etc., this function always interprets |
142 | // the numbers as decimal. |
143 | bool SplitOneDecimalIntToken(const char** source, const char* delim, |
144 | int* value); |
145 | bool SplitOneDecimalInt32Token(const char** source, const char* delim, |
146 | int32* value); |
147 | bool SplitOneDecimalUint32Token(const char** source, const char* delim, |
148 | uint32* value); |
149 | bool SplitOneDecimalInt64Token(const char** source, const char* delim, |
150 | int64* value); |
151 | bool SplitOneDecimalUint64Token(const char** source, const char* delim, |
152 | uint64* value); |
153 | |
154 | // ---------------------------------------------------------------------- |
155 | // SplitOneHexUint32Token() |
156 | // SplitOneHexUint64Token() |
157 | // Once more, for hexadecimal numbers (unsigned only). |
158 | bool SplitOneHexUint32Token(const char** source, const char* delim, |
159 | uint32* value); |
160 | bool SplitOneHexUint64Token(const char** source, const char* delim, |
161 | uint64* value); |
162 | |
163 | |
164 | #endif // STRINGS_SPLIT_H_ |
165 | |