1//
2// Copyright 2017 The Abseil Authors.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// https://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// -----------------------------------------------------------------------------
17// File: string_view.h
18// -----------------------------------------------------------------------------
19//
20// This file contains the definition of the `absl::string_view` class. A
21// `string_view` points to a contiguous span of characters, often part or all of
22// another `std::string`, double-quoted string literal, character array, or even
23// another `string_view`.
24//
25// This `absl::string_view` abstraction is designed to be a drop-in
26// replacement for the C++17 `std::string_view` abstraction.
27#ifndef ABSL_STRINGS_STRING_VIEW_H_
28#define ABSL_STRINGS_STRING_VIEW_H_
29
30#include <algorithm>
31#include "absl/base/config.h"
32
33#ifdef ABSL_HAVE_STD_STRING_VIEW
34
35#include <string_view> // IWYU pragma: export
36
37namespace absl {
38using std::string_view;
39} // namespace absl
40
41#else // ABSL_HAVE_STD_STRING_VIEW
42
43#include <cassert>
44#include <cstddef>
45#include <cstring>
46#include <iosfwd>
47#include <iterator>
48#include <limits>
49#include <string>
50
51#include "absl/base/internal/throw_delegate.h"
52#include "absl/base/macros.h"
53#include "absl/base/port.h"
54
55namespace absl {
56
57// absl::string_view
58//
59// A `string_view` provides a lightweight view into the string data provided by
60// a `std::string`, double-quoted string literal, character array, or even
61// another `string_view`. A `string_view` does *not* own the string to which it
62// points, and that data cannot be modified through the view.
63//
64// You can use `string_view` as a function or method parameter anywhere a
65// parameter can receive a double-quoted string literal, `const char*`,
66// `std::string`, or another `absl::string_view` argument with no need to copy
67// the string data. Systematic use of `string_view` within function arguments
68// reduces data copies and `strlen()` calls.
69//
70// Because of its small size, prefer passing `string_view` by value:
71//
72// void MyFunction(absl::string_view arg);
73//
74// If circumstances require, you may also pass one by const reference:
75//
76// void MyFunction(const absl::string_view& arg); // not preferred
77//
78// Passing by value generates slightly smaller code for many architectures.
79//
80// In either case, the source data of the `string_view` must outlive the
81// `string_view` itself.
82//
83// A `string_view` is also suitable for local variables if you know that the
84// lifetime of the underlying object is longer than the lifetime of your
85// `string_view` variable. However, beware of binding a `string_view` to a
86// temporary value:
87//
88// // BAD use of string_view: lifetime problem
89// absl::string_view sv = obj.ReturnAString();
90//
91// // GOOD use of string_view: str outlives sv
92// std::string str = obj.ReturnAString();
93// absl::string_view sv = str;
94//
95// Due to lifetime issues, a `string_view` is sometimes a poor choice for a
96// return value and usually a poor choice for a data member. If you do use a
97// `string_view` this way, it is your responsibility to ensure that the object
98// pointed to by the `string_view` outlives the `string_view`.
99//
100// A `string_view` may represent a whole string or just part of a string. For
101// example, when splitting a string, `std::vector<absl::string_view>` is a
102// natural data type for the output.
103//
104// When constructed from a source which is nul-terminated, the `string_view`
105// itself will not include the nul-terminator unless a specific size (including
106// the nul) is passed to the constructor. As a result, common idioms that work
107// on nul-terminated strings do not work on `string_view` objects. If you write
108// code that scans a `string_view`, you must check its length rather than test
109// for nul, for example. Note, however, that nuls may still be embedded within
110// a `string_view` explicitly.
111//
112// You may create a null `string_view` in two ways:
113//
114// absl::string_view sv();
115// absl::string_view sv(nullptr, 0);
116//
117// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
118// `sv.empty() == true`. Also, if you create a `string_view` with a non-null
119// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
120// signal an undefined value that is different from other `string_view` values
121// in a similar fashion to how `const char* p1 = nullptr;` is different from
122// `const char* p2 = "";`. However, in practice, it is not recommended to rely
123// on this behavior.
124//
125// Be careful not to confuse a null `string_view` with an empty one. A null
126// `string_view` is an empty `string_view`, but some empty `string_view`s are
127// not null. Prefer checking for emptiness over checking for null.
128//
129// There are many ways to create an empty string_view:
130//
131// const char* nullcp = nullptr;
132// // string_view.size() will return 0 in all cases.
133// absl::string_view();
134// absl::string_view(nullcp, 0);
135// absl::string_view("");
136// absl::string_view("", 0);
137// absl::string_view("abcdef", 0);
138// absl::string_view("abcdef" + 6, 0);
139//
140// All empty `string_view` objects whether null or not, are equal:
141//
142// absl::string_view() == absl::string_view("", 0)
143// absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0)
144class string_view {
145 public:
146 using traits_type = std::char_traits<char>;
147 using value_type = char;
148 using pointer = char*;
149 using const_pointer = const char*;
150 using reference = char&;
151 using const_reference = const char&;
152 using const_iterator = const char*;
153 using iterator = const_iterator;
154 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
155 using reverse_iterator = const_reverse_iterator;
156 using size_type = size_t;
157 using difference_type = std::ptrdiff_t;
158
159 static constexpr size_type npos = static_cast<size_type>(-1);
160
161 // Null `string_view` constructor
162 constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
163
164 // Implicit constructors
165
166 template <typename Allocator>
167 string_view( // NOLINT(runtime/explicit)
168 const std::basic_string<char, std::char_traits<char>, Allocator>&
169 str) noexcept
170 : ptr_(str.data()), length_(CheckLengthInternal(str.size())) {}
171
172 // Implicit constructor of a `string_view` from nul-terminated `str`. When
173 // accepting possibly null strings, use `absl::NullSafeStringView(str)`
174 // instead (see below).
175#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \
176 (defined(__GNUC__) && !defined(__clang__))
177 // GCC has __builtin_strlen according to
178 // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but
179 // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above.
180 // __builtin_strlen is constexpr.
181 constexpr string_view(const char* str) // NOLINT(runtime/explicit)
182 : ptr_(str),
183 length_(CheckLengthInternal(str ? __builtin_strlen(str) : 0)) {}
184#else
185 constexpr string_view(const char* str) // NOLINT(runtime/explicit)
186 : ptr_(str), length_(CheckLengthInternal(str ? strlen(str) : 0)) {}
187#endif
188
189 // Implicit constructor of a `string_view` from a `const char*` and length.
190 constexpr string_view(const char* data, size_type len)
191 : ptr_(data), length_(CheckLengthInternal(len)) {}
192
193 // NOTE: Harmlessly omitted to work around gdb bug.
194 // constexpr string_view(const string_view&) noexcept = default;
195 // string_view& operator=(const string_view&) noexcept = default;
196
197 // Iterators
198
199 // string_view::begin()
200 //
201 // Returns an iterator pointing to the first character at the beginning of the
202 // `string_view`, or `end()` if the `string_view` is empty.
203 constexpr const_iterator begin() const noexcept { return ptr_; }
204
205 // string_view::end()
206 //
207 // Returns an iterator pointing just beyond the last character at the end of
208 // the `string_view`. This iterator acts as a placeholder; attempting to
209 // access it results in undefined behavior.
210 constexpr const_iterator end() const noexcept { return ptr_ + length_; }
211
212 // string_view::cbegin()
213 //
214 // Returns a const iterator pointing to the first character at the beginning
215 // of the `string_view`, or `end()` if the `string_view` is empty.
216 constexpr const_iterator cbegin() const noexcept { return begin(); }
217
218 // string_view::cend()
219 //
220 // Returns a const iterator pointing just beyond the last character at the end
221 // of the `string_view`. This pointer acts as a placeholder; attempting to
222 // access its element results in undefined behavior.
223 constexpr const_iterator cend() const noexcept { return end(); }
224
225 // string_view::rbegin()
226 //
227 // Returns a reverse iterator pointing to the last character at the end of the
228 // `string_view`, or `rend()` if the `string_view` is empty.
229 const_reverse_iterator rbegin() const noexcept {
230 return const_reverse_iterator(end());
231 }
232
233 // string_view::rend()
234 //
235 // Returns a reverse iterator pointing just before the first character at the
236 // beginning of the `string_view`. This pointer acts as a placeholder;
237 // attempting to access its element results in undefined behavior.
238 const_reverse_iterator rend() const noexcept {
239 return const_reverse_iterator(begin());
240 }
241
242 // string_view::crbegin()
243 //
244 // Returns a const reverse iterator pointing to the last character at the end
245 // of the `string_view`, or `crend()` if the `string_view` is empty.
246 const_reverse_iterator crbegin() const noexcept { return rbegin(); }
247
248 // string_view::crend()
249 //
250 // Returns a const reverse iterator pointing just before the first character
251 // at the beginning of the `string_view`. This pointer acts as a placeholder;
252 // attempting to access its element results in undefined behavior.
253 const_reverse_iterator crend() const noexcept { return rend(); }
254
255 // Capacity Utilities
256
257 // string_view::size()
258 //
259 // Returns the number of characters in the `string_view`.
260 constexpr size_type size() const noexcept {
261 return length_;
262 }
263
264 // string_view::length()
265 //
266 // Returns the number of characters in the `string_view`. Alias for `size()`.
267 constexpr size_type length() const noexcept { return size(); }
268
269 // string_view::max_size()
270 //
271 // Returns the maximum number of characters the `string_view` can hold.
272 constexpr size_type max_size() const noexcept { return kMaxSize; }
273
274 // string_view::empty()
275 //
276 // Checks if the `string_view` is empty (refers to no characters).
277 constexpr bool empty() const noexcept { return length_ == 0; }
278
279 // string_view::operator[]
280 //
281 // Returns the ith element of an `string_view` using the array operator.
282 // Note that this operator does not perform any bounds checking.
283 constexpr const_reference operator[](size_type i) const { return ptr_[i]; }
284
285 // string_view::front()
286 //
287 // Returns the first element of a `string_view`.
288 constexpr const_reference front() const { return ptr_[0]; }
289
290 // string_view::back()
291 //
292 // Returns the last element of a `string_view`.
293 constexpr const_reference back() const { return ptr_[size() - 1]; }
294
295 // string_view::data()
296 //
297 // Returns a pointer to the underlying character array (which is of course
298 // stored elsewhere). Note that `string_view::data()` may contain embedded nul
299 // characters, but the returned buffer may or may not be nul-terminated;
300 // therefore, do not pass `data()` to a routine that expects a nul-terminated
301 // std::string.
302 constexpr const_pointer data() const noexcept { return ptr_; }
303
304 // Modifiers
305
306 // string_view::remove_prefix()
307 //
308 // Removes the first `n` characters from the `string_view`. Note that the
309 // underlying std::string is not changed, only the view.
310 void remove_prefix(size_type n) {
311 assert(n <= length_);
312 ptr_ += n;
313 length_ -= n;
314 }
315
316 // string_view::remove_suffix()
317 //
318 // Removes the last `n` characters from the `string_view`. Note that the
319 // underlying std::string is not changed, only the view.
320 void remove_suffix(size_type n) {
321 assert(n <= length_);
322 length_ -= n;
323 }
324
325 // string_view::swap()
326 //
327 // Swaps this `string_view` with another `string_view`.
328 void swap(string_view& s) noexcept {
329 auto t = *this;
330 *this = s;
331 s = t;
332 }
333
334 // Explicit conversion operators
335
336 // Converts to `std::basic_string`.
337 template <typename A>
338 explicit operator std::basic_string<char, traits_type, A>() const {
339 if (!data()) return {};
340 return std::basic_string<char, traits_type, A>(data(), size());
341 }
342
343 // string_view::copy()
344 //
345 // Copies the contents of the `string_view` at offset `pos` and length `n`
346 // into `buf`.
347 size_type copy(char* buf, size_type n, size_type pos = 0) const;
348
349 // string_view::substr()
350 //
351 // Returns a "substring" of the `string_view` (at offset `pos` and length
352 // `n`) as another string_view. This function throws `std::out_of_bounds` if
353 // `pos > size`.
354 string_view substr(size_type pos, size_type n = npos) const {
355 if (ABSL_PREDICT_FALSE(pos > length_))
356 base_internal::ThrowStdOutOfRange("absl::string_view::substr");
357 n = (std::min)(n, length_ - pos);
358 return string_view(ptr_ + pos, n);
359 }
360
361 // string_view::compare()
362 //
363 // Performs a lexicographical comparison between the `string_view` and
364 // another `absl::string_view`, returning -1 if `this` is less than, 0 if
365 // `this` is equal to, and 1 if `this` is greater than the passed std::string
366 // view. Note that in the case of data equality, a further comparison is made
367 // on the respective sizes of the two `string_view`s to determine which is
368 // smaller, equal, or greater.
369 int compare(string_view x) const noexcept {
370 auto min_length = (std::min)(length_, x.length_);
371 if (min_length > 0) {
372 int r = memcmp(ptr_, x.ptr_, min_length);
373 if (r < 0) return -1;
374 if (r > 0) return 1;
375 }
376 if (length_ < x.length_) return -1;
377 if (length_ > x.length_) return 1;
378 return 0;
379 }
380
381 // Overload of `string_view::compare()` for comparing a substring of the
382 // 'string_view` and another `absl::string_view`.
383 int compare(size_type pos1, size_type count1, string_view v) const {
384 return substr(pos1, count1).compare(v);
385 }
386
387 // Overload of `string_view::compare()` for comparing a substring of the
388 // `string_view` and a substring of another `absl::string_view`.
389 int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
390 size_type count2) const {
391 return substr(pos1, count1).compare(v.substr(pos2, count2));
392 }
393
394 // Overload of `string_view::compare()` for comparing a `string_view` and a
395 // a different C-style std::string `s`.
396 int compare(const char* s) const { return compare(string_view(s)); }
397
398 // Overload of `string_view::compare()` for comparing a substring of the
399 // `string_view` and a different std::string C-style std::string `s`.
400 int compare(size_type pos1, size_type count1, const char* s) const {
401 return substr(pos1, count1).compare(string_view(s));
402 }
403
404 // Overload of `string_view::compare()` for comparing a substring of the
405 // `string_view` and a substring of a different C-style std::string `s`.
406 int compare(size_type pos1, size_type count1, const char* s,
407 size_type count2) const {
408 return substr(pos1, count1).compare(string_view(s, count2));
409 }
410
411 // Find Utilities
412
413 // string_view::find()
414 //
415 // Finds the first occurrence of the substring `s` within the `string_view`,
416 // returning the position of the first character's match, or `npos` if no
417 // match was found.
418 size_type find(string_view s, size_type pos = 0) const noexcept;
419
420 // Overload of `string_view::find()` for finding the given character `c`
421 // within the `string_view`.
422 size_type find(char c, size_type pos = 0) const noexcept;
423
424 // string_view::rfind()
425 //
426 // Finds the last occurrence of a substring `s` within the `string_view`,
427 // returning the position of the first character's match, or `npos` if no
428 // match was found.
429 size_type rfind(string_view s, size_type pos = npos) const
430 noexcept;
431
432 // Overload of `string_view::rfind()` for finding the last given character `c`
433 // within the `string_view`.
434 size_type rfind(char c, size_type pos = npos) const noexcept;
435
436 // string_view::find_first_of()
437 //
438 // Finds the first occurrence of any of the characters in `s` within the
439 // `string_view`, returning the start position of the match, or `npos` if no
440 // match was found.
441 size_type find_first_of(string_view s, size_type pos = 0) const
442 noexcept;
443
444 // Overload of `string_view::find_first_of()` for finding a character `c`
445 // within the `string_view`.
446 size_type find_first_of(char c, size_type pos = 0) const
447 noexcept {
448 return find(c, pos);
449 }
450
451 // string_view::find_last_of()
452 //
453 // Finds the last occurrence of any of the characters in `s` within the
454 // `string_view`, returning the start position of the match, or `npos` if no
455 // match was found.
456 size_type find_last_of(string_view s, size_type pos = npos) const
457 noexcept;
458
459 // Overload of `string_view::find_last_of()` for finding a character `c`
460 // within the `string_view`.
461 size_type find_last_of(char c, size_type pos = npos) const
462 noexcept {
463 return rfind(c, pos);
464 }
465
466 // string_view::find_first_not_of()
467 //
468 // Finds the first occurrence of any of the characters not in `s` within the
469 // `string_view`, returning the start position of the first non-match, or
470 // `npos` if no non-match was found.
471 size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
472
473 // Overload of `string_view::find_first_not_of()` for finding a character
474 // that is not `c` within the `string_view`.
475 size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
476
477 // string_view::find_last_not_of()
478 //
479 // Finds the last occurrence of any of the characters not in `s` within the
480 // `string_view`, returning the start position of the last non-match, or
481 // `npos` if no non-match was found.
482 size_type find_last_not_of(string_view s,
483 size_type pos = npos) const noexcept;
484
485 // Overload of `string_view::find_last_not_of()` for finding a character
486 // that is not `c` within the `string_view`.
487 size_type find_last_not_of(char c, size_type pos = npos) const
488 noexcept;
489
490 private:
491 static constexpr size_type kMaxSize =
492 (std::numeric_limits<difference_type>::max)();
493
494 static constexpr size_type CheckLengthInternal(size_type len) {
495 return ABSL_ASSERT(len <= kMaxSize), len;
496 }
497
498 const char* ptr_;
499 size_type length_;
500};
501
502// This large function is defined inline so that in a fairly common case where
503// one of the arguments is a literal, the compiler can elide a lot of the
504// following comparisons.
505inline bool operator==(string_view x, string_view y) noexcept {
506 auto len = x.size();
507 if (len != y.size()) {
508 return false;
509 }
510
511 return x.data() == y.data() || len <= 0 ||
512 memcmp(x.data(), y.data(), len) == 0;
513}
514
515inline bool operator!=(string_view x, string_view y) noexcept {
516 return !(x == y);
517}
518
519inline bool operator<(string_view x, string_view y) noexcept {
520 auto min_size = (std::min)(x.size(), y.size());
521 const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
522 return (r < 0) || (r == 0 && x.size() < y.size());
523}
524
525inline bool operator>(string_view x, string_view y) noexcept { return y < x; }
526
527inline bool operator<=(string_view x, string_view y) noexcept {
528 return !(y < x);
529}
530
531inline bool operator>=(string_view x, string_view y) noexcept {
532 return !(x < y);
533}
534
535// IO Insertion Operator
536std::ostream& operator<<(std::ostream& o, string_view piece);
537
538} // namespace absl
539
540#endif // ABSL_HAVE_STD_STRING_VIEW
541
542namespace absl {
543
544// ClippedSubstr()
545//
546// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
547// Provided because std::string_view::substr throws if `pos > size()`
548inline string_view ClippedSubstr(string_view s, size_t pos,
549 size_t n = string_view::npos) {
550 pos = (std::min)(pos, static_cast<size_t>(s.size()));
551 return s.substr(pos, n);
552}
553
554// NullSafeStringView()
555//
556// Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
557// This function should be used where an `absl::string_view` can be created from
558// a possibly-null pointer.
559inline string_view NullSafeStringView(const char* p) {
560 return p ? string_view(p) : string_view();
561}
562
563} // namespace absl
564
565#endif // ABSL_STRINGS_STRING_VIEW_H_
566