1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3// Copyright (C) 2009-2013, International Business Machines
4// Corporation and others. All Rights Reserved.
5//
6// Copyright 2001 and onwards Google Inc.
7// Author: Sanjay Ghemawat
8
9// This code is a contribution of Google code, and the style used here is
10// a compromise between the original Google code and the ICU coding guidelines.
11// For example, data types are ICU-ified (size_t,int->int32_t),
12// and API comments doxygen-ified, but function names and behavior are
13// as in the original, if possible.
14// Assertion-style error handling, not available in ICU, was changed to
15// parameter "pinning" similar to UnicodeString.
16//
17// In addition, this is only a partial port of the original Google code,
18// limited to what was needed so far. The (nearly) complete original code
19// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
20// (see ICU ticket 6765, r25517).
21
22#ifndef __STRINGPIECE_H__
23#define __STRINGPIECE_H__
24
25/**
26 * \file
27 * \brief C++ API: StringPiece: Read-only byte string wrapper class.
28 */
29
30#include "unicode/utypes.h"
31
32#if U_SHOW_CPLUSPLUS_API
33
34#include <cstddef>
35#include <type_traits>
36
37#include "unicode/uobject.h"
38#include "unicode/std_string.h"
39
40// Arghh! I wish C++ literals were "string".
41
42U_NAMESPACE_BEGIN
43
44/**
45 * A string-like object that points to a sized piece of memory.
46 *
47 * We provide non-explicit singleton constructors so users can pass
48 * in a "const char*" or a "string" wherever a "StringPiece" is
49 * expected.
50 *
51 * Functions or methods may use StringPiece parameters to accept either a
52 * "const char*" or a "string" value that will be implicitly converted to a
53 * StringPiece.
54 *
55 * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
56 * conversions from "const char*" to "string" and back again.
57 *
58 * @stable ICU 4.2
59 */
60class U_COMMON_API StringPiece : public UMemory {
61 private:
62 const char* ptr_;
63 int32_t length_;
64
65 public:
66 /**
67 * Default constructor, creates an empty StringPiece.
68 * @stable ICU 4.2
69 */
70 StringPiece() : ptr_(nullptr), length_(0) { }
71
72 /**
73 * Constructs from a NUL-terminated const char * pointer.
74 * @param str a NUL-terminated const char * pointer
75 * @stable ICU 4.2
76 */
77 StringPiece(const char* str);
78#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
79 /**
80 * Constructs from a NUL-terminated const char8_t * pointer.
81 * @param str a NUL-terminated const char8_t * pointer
82 * @stable ICU 67
83 */
84 StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
85#endif
86 /**
87 * Constructs an empty StringPiece.
88 * Needed for type disambiguation from multiple other overloads.
89 * @param p nullptr
90 * @stable ICU 67
91 */
92 StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
93
94 /**
95 * Constructs from a std::string.
96 * @stable ICU 4.2
97 */
98 StringPiece(const std::string& str)
99 : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
100#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
101 /**
102 * Constructs from a std::u8string.
103 * @stable ICU 67
104 */
105 StringPiece(const std::u8string& str)
106 : ptr_(reinterpret_cast<const char*>(str.data())),
107 length_(static_cast<int32_t>(str.size())) { }
108#endif
109
110 /**
111 * Constructs from some other implementation of a string piece class, from any
112 * C++ record type that has these two methods:
113 *
114 * \code{.cpp}
115 *
116 * struct OtherStringPieceClass {
117 * const char* data(); // or const char8_t*
118 * size_t size();
119 * };
120 *
121 * \endcode
122 *
123 * The other string piece class will typically be std::string_view from C++17
124 * or absl::string_view from Abseil.
125 *
126 * Starting with C++20, data() may also return a const char8_t* pointer,
127 * as from std::u8string_view.
128 *
129 * @param str the other string piece
130 * @stable ICU 65
131 */
132 template <typename T,
133 typename = typename std::enable_if<
134 (std::is_same<decltype(T().data()), const char*>::value
135#if defined(__cpp_char8_t)
136 || std::is_same<decltype(T().data()), const char8_t*>::value
137#endif
138 ) &&
139 std::is_same<decltype(T().size()), size_t>::value>::type>
140 StringPiece(T str)
141 : ptr_(reinterpret_cast<const char*>(str.data())),
142 length_(static_cast<int32_t>(str.size())) {}
143
144 /**
145 * Constructs from a const char * pointer and a specified length.
146 * @param offset a const char * pointer (need not be terminated)
147 * @param len the length of the string; must be non-negative
148 * @stable ICU 4.2
149 */
150 StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
151#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
152 /**
153 * Constructs from a const char8_t * pointer and a specified length.
154 * @param str a const char8_t * pointer (need not be terminated)
155 * @param len the length of the string; must be non-negative
156 * @stable ICU 67
157 */
158 StringPiece(const char8_t* str, int32_t len) :
159 StringPiece(reinterpret_cast<const char*>(str), len) {}
160#endif
161
162 /**
163 * Substring of another StringPiece.
164 * @param x the other StringPiece
165 * @param pos start position in x; must be non-negative and <= x.length().
166 * @stable ICU 4.2
167 */
168 StringPiece(const StringPiece& x, int32_t pos);
169 /**
170 * Substring of another StringPiece.
171 * @param x the other StringPiece
172 * @param pos start position in x; must be non-negative and <= x.length().
173 * @param len length of the substring;
174 * must be non-negative and will be pinned to at most x.length() - pos.
175 * @stable ICU 4.2
176 */
177 StringPiece(const StringPiece& x, int32_t pos, int32_t len);
178
179 /**
180 * Returns the string pointer. May be nullptr if it is empty.
181 *
182 * data() may return a pointer to a buffer with embedded NULs, and the
183 * returned buffer may or may not be null terminated. Therefore it is
184 * typically a mistake to pass data() to a routine that expects a NUL
185 * terminated string.
186 * @return the string pointer
187 * @stable ICU 4.2
188 */
189 const char* data() const { return ptr_; }
190 /**
191 * Returns the string length. Same as length().
192 * @return the string length
193 * @stable ICU 4.2
194 */
195 int32_t size() const { return length_; }
196 /**
197 * Returns the string length. Same as size().
198 * @return the string length
199 * @stable ICU 4.2
200 */
201 int32_t length() const { return length_; }
202 /**
203 * Returns whether the string is empty.
204 * @return true if the string is empty
205 * @stable ICU 4.2
206 */
207 UBool empty() const { return length_ == 0; }
208
209 /**
210 * Sets to an empty string.
211 * @stable ICU 4.2
212 */
213 void clear() { ptr_ = nullptr; length_ = 0; }
214
215 /**
216 * Reset the stringpiece to refer to new data.
217 * @param xdata pointer the new string data. Need not be nul terminated.
218 * @param len the length of the new data
219 * @stable ICU 4.8
220 */
221 void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
222
223 /**
224 * Reset the stringpiece to refer to new data.
225 * @param str a pointer to a NUL-terminated string.
226 * @stable ICU 4.8
227 */
228 void set(const char* str);
229
230#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
231 /**
232 * Resets the stringpiece to refer to new data.
233 * @param xdata pointer the new string data. Need not be NUL-terminated.
234 * @param len the length of the new data
235 * @stable ICU 67
236 */
237 inline void set(const char8_t* xdata, int32_t len) {
238 set(reinterpret_cast<const char*>(xdata), len);
239 }
240
241 /**
242 * Resets the stringpiece to refer to new data.
243 * @param str a pointer to a NUL-terminated string.
244 * @stable ICU 67
245 */
246 inline void set(const char8_t* str) {
247 set(reinterpret_cast<const char*>(str));
248 }
249#endif
250
251 /**
252 * Removes the first n string units.
253 * @param n prefix length, must be non-negative and <=length()
254 * @stable ICU 4.2
255 */
256 void remove_prefix(int32_t n) {
257 if (n >= 0) {
258 if (n > length_) {
259 n = length_;
260 }
261 ptr_ += n;
262 length_ -= n;
263 }
264 }
265
266 /**
267 * Removes the last n string units.
268 * @param n suffix length, must be non-negative and <=length()
269 * @stable ICU 4.2
270 */
271 void remove_suffix(int32_t n) {
272 if (n >= 0) {
273 if (n <= length_) {
274 length_ -= n;
275 } else {
276 length_ = 0;
277 }
278 }
279 }
280
281 /**
282 * Searches the StringPiece for the given search string (needle);
283 * @param needle The string for which to search.
284 * @param offset Where to start searching within this string (haystack).
285 * @return The offset of needle in haystack, or -1 if not found.
286 * @stable ICU 67
287 */
288 int32_t find(StringPiece needle, int32_t offset);
289
290 /**
291 * Compares this StringPiece with the other StringPiece, with semantics
292 * similar to std::string::compare().
293 * @param other The string to compare to.
294 * @return below zero if this < other; above zero if this > other; 0 if this == other.
295 * @stable ICU 67
296 */
297 int32_t compare(StringPiece other);
298
299 /**
300 * Maximum integer, used as a default value for substring methods.
301 * @stable ICU 4.2
302 */
303 static const int32_t npos; // = 0x7fffffff;
304
305 /**
306 * Returns a substring of this StringPiece.
307 * @param pos start position; must be non-negative and <= length().
308 * @param len length of the substring;
309 * must be non-negative and will be pinned to at most length() - pos.
310 * @return the substring StringPiece
311 * @stable ICU 4.2
312 */
313 StringPiece substr(int32_t pos, int32_t len = npos) const {
314 return StringPiece(*this, pos, len);
315 }
316};
317
318/**
319 * Global operator == for StringPiece
320 * @param x The first StringPiece to compare.
321 * @param y The second StringPiece to compare.
322 * @return true if the string data is equal
323 * @stable ICU 4.8
324 */
325U_EXPORT UBool U_EXPORT2
326operator==(const StringPiece& x, const StringPiece& y);
327
328/**
329 * Global operator != for StringPiece
330 * @param x The first StringPiece to compare.
331 * @param y The second StringPiece to compare.
332 * @return true if the string data is not equal
333 * @stable ICU 4.8
334 */
335inline bool operator!=(const StringPiece& x, const StringPiece& y) {
336 return !(x == y);
337}
338
339U_NAMESPACE_END
340
341#endif /* U_SHOW_CPLUSPLUS_API */
342
343#endif // __STRINGPIECE_H__
344