1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// A StringPiece points to part or all of a string, Cord, double-quoted string
32// literal, or other string-like object. A StringPiece does *not* own the
33// string to which it points. A StringPiece is not null-terminated.
34//
35// You can use StringPiece as a function or method parameter. A StringPiece
36// parameter can receive a double-quoted string literal argument, a "const
37// char*" argument, a string argument, or a StringPiece argument with no data
38// copying. Systematic use of StringPiece for arguments reduces data
39// copies and strlen() calls.
40//
41// Prefer passing StringPieces by value:
42// void MyFunction(StringPiece arg);
43// If circumstances require, you may also pass by const reference:
44// void MyFunction(const StringPiece& arg); // not preferred
45// Both of these have the same lifetime semantics. Passing by value
46// generates slightly smaller code. For more discussion, see the thread
47// go/stringpiecebyvalue on c-users.
48//
49// StringPiece is also suitable for local variables if you know that
50// the lifetime of the underlying object is longer than the lifetime
51// of your StringPiece variable.
52//
53// Beware of binding a StringPiece to a temporary:
54// StringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem
55//
56// This code is okay:
57// string str = obj.MethodReturningString(); // str owns its contents
58// StringPiece sp(str); // GOOD, because str outlives sp
59//
60// StringPiece is sometimes a poor choice for a return value and usually a poor
61// choice for a data member. If you do use a StringPiece this way, it is your
62// responsibility to ensure that the object pointed to by the StringPiece
63// outlives the StringPiece.
64//
65// A StringPiece may represent just part of a string; thus the name "Piece".
66// For example, when splitting a string, vector<StringPiece> is a natural data
67// type for the output. For another example, a Cord is a non-contiguous,
68// potentially very long string-like object. The Cord class has an interface
69// that iteratively provides StringPiece objects that point to the
70// successive pieces of a Cord object.
71//
72// A StringPiece is not null-terminated. If you write code that scans a
73// StringPiece, you must check its length before reading any characters.
74// Common idioms that work on null-terminated strings do not work on
75// StringPiece objects.
76//
77// There are several ways to create a null StringPiece:
78// StringPiece()
79// StringPiece(nullptr)
80// StringPiece(nullptr, 0)
81// For all of the above, sp.data() == nullptr, sp.length() == 0,
82// and sp.empty() == true. Also, if you create a StringPiece with
83// a non-null pointer then sp.data() != nullptr. Once created,
84// sp.data() will stay either nullptr or not-nullptr, except if you call
85// sp.clear() or sp.set().
86//
87// Thus, you can use StringPiece(nullptr) to signal an out-of-band value
88// that is different from other StringPiece values. This is similar
89// to the way that const char* p1 = nullptr; is different from
90// const char* p2 = "";.
91//
92// There are many ways to create an empty StringPiece:
93// StringPiece()
94// StringPiece(nullptr)
95// StringPiece(nullptr, 0)
96// StringPiece("")
97// StringPiece("", 0)
98// StringPiece("abcdef", 0)
99// StringPiece("abcdef"+6, 0)
100// For all of the above, sp.length() will be 0 and sp.empty() will be true.
101// For some empty StringPiece values, sp.data() will be nullptr.
102// For some empty StringPiece values, sp.data() will not be nullptr.
103//
104// Be careful not to confuse: null StringPiece and empty StringPiece.
105// The set of empty StringPieces properly includes the set of null StringPieces.
106// That is, every null StringPiece is an empty StringPiece,
107// but some non-null StringPieces are empty Stringpieces too.
108//
109// All empty StringPiece values compare equal to each other.
110// Even a null StringPieces compares equal to a non-null empty StringPiece:
111// StringPiece() == StringPiece("", 0)
112// StringPiece(nullptr) == StringPiece("abc", 0)
113// StringPiece(nullptr, 0) == StringPiece("abcdef"+6, 0)
114//
115// Look carefully at this example:
116// StringPiece("") == nullptr
117// True or false? TRUE, because StringPiece::operator== converts
118// the right-hand side from nullptr to StringPiece(nullptr),
119// and then compares two zero-length spans of characters.
120// However, we are working to make this example produce a compile error.
121//
122// Suppose you want to write:
123// bool TestWhat?(StringPiece sp) { return sp == nullptr; } // BAD
124// Do not do that. Write one of these instead:
125// bool TestNull(StringPiece sp) { return sp.data() == nullptr; }
126// bool TestEmpty(StringPiece sp) { return sp.empty(); }
127// The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty?
128// Right now, TestWhat? behaves likes TestEmpty.
129// We are working to make TestWhat? produce a compile error.
130// TestNull is good to test for an out-of-band signal.
131// TestEmpty is good to test for an empty StringPiece.
132//
133// Caveats (again):
134// (1) The lifetime of the pointed-to string (or piece of a string)
135// must be longer than the lifetime of the StringPiece.
136// (2) There may or may not be a '\0' character after the end of
137// StringPiece data.
138// (3) A null StringPiece is empty.
139// An empty StringPiece may or may not be a null StringPiece.
140
141#ifndef GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
142#define GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
143
144#include <assert.h>
145#include <stddef.h>
146#include <string.h>
147#include <iosfwd>
148#include <limits>
149#include <string>
150
151#if defined(__cpp_lib_string_view)
152#include <string_view>
153#endif
154
155#include <google/protobuf/stubs/hash.h>
156
157#include <google/protobuf/port_def.inc>
158
159namespace google {
160namespace protobuf {
161namespace stringpiece_internal {
162
163class PROTOBUF_EXPORT StringPiece {
164 public:
165 using traits_type = std::char_traits<char>;
166 using value_type = char;
167 using pointer = char*;
168 using const_pointer = const char*;
169 using reference = char&;
170 using const_reference = const char&;
171 using const_iterator = const char*;
172 using iterator = const_iterator;
173 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
174 using reverse_iterator = const_reverse_iterator;
175 using size_type = size_t;
176 using difference_type = std::ptrdiff_t;
177
178 private:
179 const char* ptr_;
180 size_type length_;
181
182 static constexpr size_type kMaxSize =
183 (std::numeric_limits<difference_type>::max)();
184
185 static size_type CheckSize(size_type size) {
186#if !defined(NDEBUG) || defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0
187 if (PROTOBUF_PREDICT_FALSE(size > kMaxSize)) {
188 // Some people grep for this message in logs
189 // so take care if you ever change it.
190 LogFatalSizeTooBig(size, "string length exceeds max size");
191 }
192#endif
193 return size;
194 }
195
196 // Out-of-line error path.
197 static void LogFatalSizeTooBig(size_type size, const char* details);
198
199 public:
200 // We provide non-explicit singleton constructors so users can pass
201 // in a "const char*" or a "string" wherever a "StringPiece" is
202 // expected.
203 //
204 // Style guide exception granted:
205 // http://goto/style-guide-exception-20978288
206 StringPiece() : ptr_(nullptr), length_(0) {}
207
208 StringPiece(const char* str) // NOLINT(runtime/explicit)
209 : ptr_(str), length_(0) {
210 if (str != nullptr) {
211 length_ = CheckSize(size: strlen(s: str));
212 }
213 }
214
215 template <class Allocator>
216 StringPiece( // NOLINT(runtime/explicit)
217 const std::basic_string<char, std::char_traits<char>, Allocator>& str)
218 : ptr_(str.data()), length_(0) {
219 length_ = CheckSize(size: str.size());
220 }
221
222#if defined(__cpp_lib_string_view)
223 StringPiece( // NOLINT(runtime/explicit)
224 std::string_view str)
225 : ptr_(str.data()), length_(0) {
226 length_ = CheckSize(str.size());
227 }
228#endif
229
230 StringPiece(const char* offset, size_type len)
231 : ptr_(offset), length_(CheckSize(size: len)) {}
232
233 // data() may return a pointer to a buffer with embedded NULs, and the
234 // returned buffer may or may not be null terminated. Therefore it is
235 // typically a mistake to pass data() to a routine that expects a NUL
236 // terminated string.
237 const_pointer data() const { return ptr_; }
238 size_type size() const { return length_; }
239 size_type length() const { return length_; }
240 bool empty() const { return length_ == 0; }
241
242 char operator[](size_type i) const {
243 assert(i < length_);
244 return ptr_[i];
245 }
246
247 void remove_prefix(size_type n) {
248 assert(length_ >= n);
249 ptr_ += n;
250 length_ -= n;
251 }
252
253 void remove_suffix(size_type n) {
254 assert(length_ >= n);
255 length_ -= n;
256 }
257
258 // returns {-1, 0, 1}
259 int compare(StringPiece x) const {
260 size_type min_size = length_ < x.length_ ? length_ : x.length_;
261 int r = memcmp(s1: ptr_, s2: x.ptr_, n: static_cast<size_t>(min_size));
262 if (r < 0) return -1;
263 if (r > 0) return 1;
264 if (length_ < x.length_) return -1;
265 if (length_ > x.length_) return 1;
266 return 0;
267 }
268
269 std::string as_string() const { return ToString(); }
270 // We also define ToString() here, since many other string-like
271 // interfaces name the routine that converts to a C++ string
272 // "ToString", and it's confusing to have the method that does that
273 // for a StringPiece be called "as_string()". We also leave the
274 // "as_string()" method defined here for existing code.
275 std::string ToString() const {
276 if (ptr_ == nullptr) return "";
277 return std::string(data(), static_cast<size_type>(size()));
278 }
279
280 explicit operator std::string() const { return ToString(); }
281
282 void CopyToString(std::string* target) const;
283 void AppendToString(std::string* target) const;
284
285 bool starts_with(StringPiece x) const {
286 return (length_ >= x.length_) &&
287 (memcmp(s1: ptr_, s2: x.ptr_, n: static_cast<size_t>(x.length_)) == 0);
288 }
289
290 bool ends_with(StringPiece x) const {
291 return ((length_ >= x.length_) &&
292 (memcmp(s1: ptr_ + (length_-x.length_), s2: x.ptr_,
293 n: static_cast<size_t>(x.length_)) == 0));
294 }
295
296 // Checks whether StringPiece starts with x and if so advances the beginning
297 // of it to past the match. It's basically a shortcut for starts_with
298 // followed by remove_prefix.
299 bool Consume(StringPiece x);
300 // Like above but for the end of the string.
301 bool ConsumeFromEnd(StringPiece x);
302
303 // standard STL container boilerplate
304 static const size_type npos;
305 const_iterator begin() const { return ptr_; }
306 const_iterator end() const { return ptr_ + length_; }
307 const_reverse_iterator rbegin() const {
308 return const_reverse_iterator(ptr_ + length_);
309 }
310 const_reverse_iterator rend() const {
311 return const_reverse_iterator(ptr_);
312 }
313 size_type max_size() const { return length_; }
314 size_type capacity() const { return length_; }
315
316 // cpplint.py emits a false positive [build/include_what_you_use]
317 size_type copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT
318
319 bool contains(StringPiece s) const;
320
321 size_type find(StringPiece s, size_type pos = 0) const;
322 size_type find(char c, size_type pos = 0) const;
323 size_type rfind(StringPiece s, size_type pos = npos) const;
324 size_type rfind(char c, size_type pos = npos) const;
325
326 size_type find_first_of(StringPiece s, size_type pos = 0) const;
327 size_type find_first_of(char c, size_type pos = 0) const {
328 return find(c, pos);
329 }
330 size_type find_first_not_of(StringPiece s, size_type pos = 0) const;
331 size_type find_first_not_of(char c, size_type pos = 0) const;
332 size_type find_last_of(StringPiece s, size_type pos = npos) const;
333 size_type find_last_of(char c, size_type pos = npos) const {
334 return rfind(c, pos);
335 }
336 size_type find_last_not_of(StringPiece s, size_type pos = npos) const;
337 size_type find_last_not_of(char c, size_type pos = npos) const;
338
339 StringPiece substr(size_type pos, size_type n = npos) const;
340};
341
342// This large function is defined inline so that in a fairly common case where
343// one of the arguments is a literal, the compiler can elide a lot of the
344// following comparisons.
345inline bool operator==(StringPiece x, StringPiece y) {
346 StringPiece::size_type len = x.size();
347 if (len != y.size()) {
348 return false;
349 }
350
351 return x.data() == y.data() || len <= 0 ||
352 memcmp(s1: x.data(), s2: y.data(), n: static_cast<size_t>(len)) == 0;
353}
354
355inline bool operator!=(StringPiece x, StringPiece y) {
356 return !(x == y);
357}
358
359inline bool operator<(StringPiece x, StringPiece y) {
360 const StringPiece::size_type min_size =
361 x.size() < y.size() ? x.size() : y.size();
362 const int r = memcmp(s1: x.data(), s2: y.data(), n: static_cast<size_t>(min_size));
363 return (r < 0) || (r == 0 && x.size() < y.size());
364}
365
366inline bool operator>(StringPiece x, StringPiece y) {
367 return y < x;
368}
369
370inline bool operator<=(StringPiece x, StringPiece y) {
371 return !(x > y);
372}
373
374inline bool operator>=(StringPiece x, StringPiece y) {
375 return !(x < y);
376}
377
378// allow StringPiece to be logged
379extern std::ostream& operator<<(std::ostream& o, StringPiece piece);
380
381} // namespace stringpiece_internal
382
383using ::google::protobuf::stringpiece_internal::StringPiece;
384
385} // namespace protobuf
386} // namespace google
387
388GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START
389template<> struct hash<StringPiece> {
390 size_t operator()(const StringPiece& s) const {
391 size_t result = 0;
392 for (const char *str = s.data(), *end = str + s.size(); str < end; str++) {
393 result = 5 * result + static_cast<size_t>(*str);
394 }
395 return result;
396 }
397};
398GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END
399
400#include <google/protobuf/port_undef.inc>
401
402#endif // STRINGS_STRINGPIECE_H_
403