1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | // Copyright (C) 2009-2013, International Business Machines |
4 | // Corporation and others. All Rights Reserved. |
5 | // |
6 | // Copyright 2001 and onwards Google Inc. |
7 | // Author: Sanjay Ghemawat |
8 | |
9 | // This code is a contribution of Google code, and the style used here is |
10 | // a compromise between the original Google code and the ICU coding guidelines. |
11 | // For example, data types are ICU-ified (size_t,int->int32_t), |
12 | // and API comments doxygen-ified, but function names and behavior are |
13 | // as in the original, if possible. |
14 | // Assertion-style error handling, not available in ICU, was changed to |
15 | // parameter "pinning" similar to UnicodeString. |
16 | // |
17 | // In addition, this is only a partial port of the original Google code, |
18 | // limited to what was needed so far. The (nearly) complete original code |
19 | // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib |
20 | // (see ICU ticket 6765, r25517). |
21 | |
22 | #ifndef __STRINGPIECE_H__ |
23 | #define __STRINGPIECE_H__ |
24 | |
25 | /** |
26 | * \file |
27 | * \brief C++ API: StringPiece: Read-only byte string wrapper class. |
28 | */ |
29 | |
30 | #include "unicode/utypes.h" |
31 | |
32 | #if U_SHOW_CPLUSPLUS_API |
33 | |
34 | #include <cstddef> |
35 | #include <type_traits> |
36 | |
37 | #include "unicode/uobject.h" |
38 | #include "unicode/std_string.h" |
39 | |
40 | // Arghh! I wish C++ literals were "string". |
41 | |
42 | U_NAMESPACE_BEGIN |
43 | |
44 | /** |
45 | * A string-like object that points to a sized piece of memory. |
46 | * |
47 | * We provide non-explicit singleton constructors so users can pass |
48 | * in a "const char*" or a "string" wherever a "StringPiece" is |
49 | * expected. |
50 | * |
51 | * Functions or methods may use StringPiece parameters to accept either a |
52 | * "const char*" or a "string" value that will be implicitly converted to a |
53 | * StringPiece. |
54 | * |
55 | * Systematic usage of StringPiece is encouraged as it will reduce unnecessary |
56 | * conversions from "const char*" to "string" and back again. |
57 | * |
58 | * @stable ICU 4.2 |
59 | */ |
60 | class U_COMMON_API StringPiece : public UMemory { |
61 | private: |
62 | const char* ptr_; |
63 | int32_t length_; |
64 | |
65 | public: |
66 | /** |
67 | * Default constructor, creates an empty StringPiece. |
68 | * @stable ICU 4.2 |
69 | */ |
70 | StringPiece() : ptr_(nullptr), length_(0) { } |
71 | |
72 | /** |
73 | * Constructs from a NUL-terminated const char * pointer. |
74 | * @param str a NUL-terminated const char * pointer |
75 | * @stable ICU 4.2 |
76 | */ |
77 | StringPiece(const char* str); |
78 | #ifndef U_HIDE_DRAFT_API |
79 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) |
80 | /** |
81 | * Constructs from a NUL-terminated const char8_t * pointer. |
82 | * @param str a NUL-terminated const char8_t * pointer |
83 | * @draft ICU 67 |
84 | */ |
85 | StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} |
86 | #endif |
87 | /** |
88 | * Constructs an empty StringPiece. |
89 | * Needed for type disambiguation from multiple other overloads. |
90 | * @param p nullptr |
91 | * @draft ICU 67 |
92 | */ |
93 | StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} |
94 | #endif // U_HIDE_DRAFT_API |
95 | |
96 | /** |
97 | * Constructs from a std::string. |
98 | * @stable ICU 4.2 |
99 | */ |
100 | StringPiece(const std::string& str) |
101 | : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } |
102 | #ifndef U_HIDE_DRAFT_API |
103 | #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) |
104 | /** |
105 | * Constructs from a std::u8string. |
106 | * @draft ICU 67 |
107 | */ |
108 | StringPiece(const std::u8string& str) |
109 | : ptr_(reinterpret_cast<const char*>(str.data())), |
110 | length_(static_cast<int32_t>(str.size())) { } |
111 | #endif |
112 | #endif // U_HIDE_DRAFT_API |
113 | |
114 | #ifndef U_HIDE_DRAFT_API |
115 | /** |
116 | * Constructs from some other implementation of a string piece class, from any |
117 | * C++ record type that has these two methods: |
118 | * |
119 | * \code{.cpp} |
120 | * |
121 | * struct OtherStringPieceClass { |
122 | * const char* data(); // or const char8_t* |
123 | * size_t size(); |
124 | * }; |
125 | * |
126 | * \endcode |
127 | * |
128 | * The other string piece class will typically be std::string_view from C++17 |
129 | * or absl::string_view from Abseil. |
130 | * |
131 | * Starting with C++20, data() may also return a const char8_t* pointer, |
132 | * as from std::u8string_view. |
133 | * |
134 | * @param str the other string piece |
135 | * @draft ICU 65 |
136 | */ |
137 | template <typename T, |
138 | typename = typename std::enable_if< |
139 | (std::is_same<decltype(T().data()), const char*>::value |
140 | #if defined(__cpp_char8_t) |
141 | || std::is_same<decltype(T().data()), const char8_t*>::value |
142 | #endif |
143 | ) && |
144 | std::is_same<decltype(T().size()), size_t>::value>::type> |
145 | StringPiece(T str) |
146 | : ptr_(reinterpret_cast<const char*>(str.data())), |
147 | length_(static_cast<int32_t>(str.size())) {} |
148 | #endif // U_HIDE_DRAFT_API |
149 | |
150 | /** |
151 | * Constructs from a const char * pointer and a specified length. |
152 | * @param offset a const char * pointer (need not be terminated) |
153 | * @param len the length of the string; must be non-negative |
154 | * @stable ICU 4.2 |
155 | */ |
156 | StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } |
157 | #ifndef U_HIDE_DRAFT_API |
158 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) |
159 | /** |
160 | * Constructs from a const char8_t * pointer and a specified length. |
161 | * @param str a const char8_t * pointer (need not be terminated) |
162 | * @param len the length of the string; must be non-negative |
163 | * @draft ICU 67 |
164 | */ |
165 | StringPiece(const char8_t* str, int32_t len) : |
166 | StringPiece(reinterpret_cast<const char*>(str), len) {} |
167 | #endif |
168 | #endif // U_HIDE_DRAFT_API |
169 | |
170 | /** |
171 | * Substring of another StringPiece. |
172 | * @param x the other StringPiece |
173 | * @param pos start position in x; must be non-negative and <= x.length(). |
174 | * @stable ICU 4.2 |
175 | */ |
176 | StringPiece(const StringPiece& x, int32_t pos); |
177 | /** |
178 | * Substring of another StringPiece. |
179 | * @param x the other StringPiece |
180 | * @param pos start position in x; must be non-negative and <= x.length(). |
181 | * @param len length of the substring; |
182 | * must be non-negative and will be pinned to at most x.length() - pos. |
183 | * @stable ICU 4.2 |
184 | */ |
185 | StringPiece(const StringPiece& x, int32_t pos, int32_t len); |
186 | |
187 | /** |
188 | * Returns the string pointer. May be nullptr if it is empty. |
189 | * |
190 | * data() may return a pointer to a buffer with embedded NULs, and the |
191 | * returned buffer may or may not be null terminated. Therefore it is |
192 | * typically a mistake to pass data() to a routine that expects a NUL |
193 | * terminated string. |
194 | * @return the string pointer |
195 | * @stable ICU 4.2 |
196 | */ |
197 | const char* data() const { return ptr_; } |
198 | /** |
199 | * Returns the string length. Same as length(). |
200 | * @return the string length |
201 | * @stable ICU 4.2 |
202 | */ |
203 | int32_t size() const { return length_; } |
204 | /** |
205 | * Returns the string length. Same as size(). |
206 | * @return the string length |
207 | * @stable ICU 4.2 |
208 | */ |
209 | int32_t length() const { return length_; } |
210 | /** |
211 | * Returns whether the string is empty. |
212 | * @return TRUE if the string is empty |
213 | * @stable ICU 4.2 |
214 | */ |
215 | UBool empty() const { return length_ == 0; } |
216 | |
217 | /** |
218 | * Sets to an empty string. |
219 | * @stable ICU 4.2 |
220 | */ |
221 | void clear() { ptr_ = nullptr; length_ = 0; } |
222 | |
223 | /** |
224 | * Reset the stringpiece to refer to new data. |
225 | * @param xdata pointer the new string data. Need not be nul terminated. |
226 | * @param len the length of the new data |
227 | * @stable ICU 4.8 |
228 | */ |
229 | void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } |
230 | |
231 | /** |
232 | * Reset the stringpiece to refer to new data. |
233 | * @param str a pointer to a NUL-terminated string. |
234 | * @stable ICU 4.8 |
235 | */ |
236 | void set(const char* str); |
237 | |
238 | #ifndef U_HIDE_DRAFT_API |
239 | #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) |
240 | /** |
241 | * Resets the stringpiece to refer to new data. |
242 | * @param xdata pointer the new string data. Need not be NUL-terminated. |
243 | * @param len the length of the new data |
244 | * @draft ICU 67 |
245 | */ |
246 | inline void set(const char8_t* xdata, int32_t len) { |
247 | set(reinterpret_cast<const char*>(xdata), len); |
248 | } |
249 | |
250 | /** |
251 | * Resets the stringpiece to refer to new data. |
252 | * @param str a pointer to a NUL-terminated string. |
253 | * @draft ICU 67 |
254 | */ |
255 | inline void set(const char8_t* str) { |
256 | set(reinterpret_cast<const char*>(str)); |
257 | } |
258 | #endif |
259 | #endif // U_HIDE_DRAFT_API |
260 | |
261 | /** |
262 | * Removes the first n string units. |
263 | * @param n prefix length, must be non-negative and <=length() |
264 | * @stable ICU 4.2 |
265 | */ |
266 | void remove_prefix(int32_t n) { |
267 | if (n >= 0) { |
268 | if (n > length_) { |
269 | n = length_; |
270 | } |
271 | ptr_ += n; |
272 | length_ -= n; |
273 | } |
274 | } |
275 | |
276 | /** |
277 | * Removes the last n string units. |
278 | * @param n suffix length, must be non-negative and <=length() |
279 | * @stable ICU 4.2 |
280 | */ |
281 | void remove_suffix(int32_t n) { |
282 | if (n >= 0) { |
283 | if (n <= length_) { |
284 | length_ -= n; |
285 | } else { |
286 | length_ = 0; |
287 | } |
288 | } |
289 | } |
290 | |
291 | #ifndef U_HIDE_DRAFT_API |
292 | /** |
293 | * Searches the StringPiece for the given search string (needle); |
294 | * @param needle The string for which to search. |
295 | * @param offset Where to start searching within this string (haystack). |
296 | * @return The offset of needle in haystack, or -1 if not found. |
297 | * @draft ICU 67 |
298 | */ |
299 | int32_t find(StringPiece needle, int32_t offset); |
300 | |
301 | /** |
302 | * Compares this StringPiece with the other StringPiece, with semantics |
303 | * similar to std::string::compare(). |
304 | * @param other The string to compare to. |
305 | * @return below zero if this < other; above zero if this > other; 0 if this == other. |
306 | * @draft ICU 67 |
307 | */ |
308 | int32_t compare(StringPiece other); |
309 | #endif // U_HIDE_DRAFT_API |
310 | |
311 | /** |
312 | * Maximum integer, used as a default value for substring methods. |
313 | * @stable ICU 4.2 |
314 | */ |
315 | static const int32_t npos; // = 0x7fffffff; |
316 | |
317 | /** |
318 | * Returns a substring of this StringPiece. |
319 | * @param pos start position; must be non-negative and <= length(). |
320 | * @param len length of the substring; |
321 | * must be non-negative and will be pinned to at most length() - pos. |
322 | * @return the substring StringPiece |
323 | * @stable ICU 4.2 |
324 | */ |
325 | StringPiece substr(int32_t pos, int32_t len = npos) const { |
326 | return StringPiece(*this, pos, len); |
327 | } |
328 | }; |
329 | |
330 | /** |
331 | * Global operator == for StringPiece |
332 | * @param x The first StringPiece to compare. |
333 | * @param y The second StringPiece to compare. |
334 | * @return TRUE if the string data is equal |
335 | * @stable ICU 4.8 |
336 | */ |
337 | U_EXPORT UBool U_EXPORT2 |
338 | operator==(const StringPiece& x, const StringPiece& y); |
339 | |
340 | /** |
341 | * Global operator != for StringPiece |
342 | * @param x The first StringPiece to compare. |
343 | * @param y The second StringPiece to compare. |
344 | * @return TRUE if the string data is not equal |
345 | * @stable ICU 4.8 |
346 | */ |
347 | inline UBool operator!=(const StringPiece& x, const StringPiece& y) { |
348 | return !(x == y); |
349 | } |
350 | |
351 | U_NAMESPACE_END |
352 | |
353 | #endif /* U_SHOW_CPLUSPLUS_API */ |
354 | |
355 | #endif // __STRINGPIECE_H__ |
356 | |