1 | // |
2 | // Copyright 2017 The Abseil Authors. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // https://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | // |
16 | // ----------------------------------------------------------------------------- |
17 | // File: string_view.h |
18 | // ----------------------------------------------------------------------------- |
19 | // |
20 | // This file contains the definition of the `absl::string_view` class. A |
21 | // `string_view` points to a contiguous span of characters, often part or all of |
22 | // another `std::string`, double-quoted string literal, character array, or even |
23 | // another `string_view`. |
24 | // |
25 | // This `absl::string_view` abstraction is designed to be a drop-in |
26 | // replacement for the C++17 `std::string_view` abstraction. |
27 | #ifndef ABSL_STRINGS_STRING_VIEW_H_ |
28 | #define ABSL_STRINGS_STRING_VIEW_H_ |
29 | |
30 | #include <algorithm> |
31 | #include "absl/base/config.h" |
32 | |
33 | #ifdef ABSL_HAVE_STD_STRING_VIEW |
34 | |
35 | #include <string_view> // IWYU pragma: export |
36 | |
37 | namespace absl { |
38 | using std::string_view; |
39 | } // namespace absl |
40 | |
41 | #else // ABSL_HAVE_STD_STRING_VIEW |
42 | |
43 | #include <cassert> |
44 | #include <cstddef> |
45 | #include <cstring> |
46 | #include <iosfwd> |
47 | #include <iterator> |
48 | #include <limits> |
49 | #include <string> |
50 | |
51 | #include "absl/base/internal/throw_delegate.h" |
52 | #include "absl/base/macros.h" |
53 | #include "absl/base/port.h" |
54 | |
55 | namespace absl { |
56 | |
57 | // absl::string_view |
58 | // |
59 | // A `string_view` provides a lightweight view into the string data provided by |
60 | // a `std::string`, double-quoted string literal, character array, or even |
61 | // another `string_view`. A `string_view` does *not* own the string to which it |
62 | // points, and that data cannot be modified through the view. |
63 | // |
64 | // You can use `string_view` as a function or method parameter anywhere a |
65 | // parameter can receive a double-quoted string literal, `const char*`, |
66 | // `std::string`, or another `absl::string_view` argument with no need to copy |
67 | // the string data. Systematic use of `string_view` within function arguments |
68 | // reduces data copies and `strlen()` calls. |
69 | // |
70 | // Because of its small size, prefer passing `string_view` by value: |
71 | // |
72 | // void MyFunction(absl::string_view arg); |
73 | // |
74 | // If circumstances require, you may also pass one by const reference: |
75 | // |
76 | // void MyFunction(const absl::string_view& arg); // not preferred |
77 | // |
78 | // Passing by value generates slightly smaller code for many architectures. |
79 | // |
80 | // In either case, the source data of the `string_view` must outlive the |
81 | // `string_view` itself. |
82 | // |
83 | // A `string_view` is also suitable for local variables if you know that the |
84 | // lifetime of the underlying object is longer than the lifetime of your |
85 | // `string_view` variable. However, beware of binding a `string_view` to a |
86 | // temporary value: |
87 | // |
88 | // // BAD use of string_view: lifetime problem |
89 | // absl::string_view sv = obj.ReturnAString(); |
90 | // |
91 | // // GOOD use of string_view: str outlives sv |
92 | // std::string str = obj.ReturnAString(); |
93 | // absl::string_view sv = str; |
94 | // |
95 | // Due to lifetime issues, a `string_view` is sometimes a poor choice for a |
96 | // return value and usually a poor choice for a data member. If you do use a |
97 | // `string_view` this way, it is your responsibility to ensure that the object |
98 | // pointed to by the `string_view` outlives the `string_view`. |
99 | // |
100 | // A `string_view` may represent a whole string or just part of a string. For |
101 | // example, when splitting a string, `std::vector<absl::string_view>` is a |
102 | // natural data type for the output. |
103 | // |
104 | // When constructed from a source which is nul-terminated, the `string_view` |
105 | // itself will not include the nul-terminator unless a specific size (including |
106 | // the nul) is passed to the constructor. As a result, common idioms that work |
107 | // on nul-terminated strings do not work on `string_view` objects. If you write |
108 | // code that scans a `string_view`, you must check its length rather than test |
109 | // for nul, for example. Note, however, that nuls may still be embedded within |
110 | // a `string_view` explicitly. |
111 | // |
112 | // You may create a null `string_view` in two ways: |
113 | // |
114 | // absl::string_view sv(); |
115 | // absl::string_view sv(nullptr, 0); |
116 | // |
117 | // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and |
118 | // `sv.empty() == true`. Also, if you create a `string_view` with a non-null |
119 | // pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to |
120 | // signal an undefined value that is different from other `string_view` values |
121 | // in a similar fashion to how `const char* p1 = nullptr;` is different from |
122 | // `const char* p2 = "";`. However, in practice, it is not recommended to rely |
123 | // on this behavior. |
124 | // |
125 | // Be careful not to confuse a null `string_view` with an empty one. A null |
126 | // `string_view` is an empty `string_view`, but some empty `string_view`s are |
127 | // not null. Prefer checking for emptiness over checking for null. |
128 | // |
129 | // There are many ways to create an empty string_view: |
130 | // |
131 | // const char* nullcp = nullptr; |
132 | // // string_view.size() will return 0 in all cases. |
133 | // absl::string_view(); |
134 | // absl::string_view(nullcp, 0); |
135 | // absl::string_view(""); |
136 | // absl::string_view("", 0); |
137 | // absl::string_view("abcdef", 0); |
138 | // absl::string_view("abcdef" + 6, 0); |
139 | // |
140 | // All empty `string_view` objects whether null or not, are equal: |
141 | // |
142 | // absl::string_view() == absl::string_view("", 0) |
143 | // absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0) |
144 | class string_view { |
145 | public: |
146 | using traits_type = std::char_traits<char>; |
147 | using value_type = char; |
148 | using pointer = char*; |
149 | using const_pointer = const char*; |
150 | using reference = char&; |
151 | using const_reference = const char&; |
152 | using const_iterator = const char*; |
153 | using iterator = const_iterator; |
154 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; |
155 | using reverse_iterator = const_reverse_iterator; |
156 | using size_type = size_t; |
157 | using difference_type = std::ptrdiff_t; |
158 | |
159 | static constexpr size_type npos = static_cast<size_type>(-1); |
160 | |
161 | // Null `string_view` constructor |
162 | constexpr string_view() noexcept : ptr_(nullptr), length_(0) {} |
163 | |
164 | // Implicit constructors |
165 | |
166 | template <typename Allocator> |
167 | string_view( // NOLINT(runtime/explicit) |
168 | const std::basic_string<char, std::char_traits<char>, Allocator>& |
169 | str) noexcept |
170 | : ptr_(str.data()), length_(CheckLengthInternal(str.size())) {} |
171 | |
172 | // Implicit constructor of a `string_view` from nul-terminated `str`. When |
173 | // accepting possibly null strings, use `absl::NullSafeStringView(str)` |
174 | // instead (see below). |
175 | #if ABSL_HAVE_BUILTIN(__builtin_strlen) || \ |
176 | (defined(__GNUC__) && !defined(__clang__)) |
177 | // GCC has __builtin_strlen according to |
178 | // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but |
179 | // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above. |
180 | // __builtin_strlen is constexpr. |
181 | constexpr string_view(const char* str) // NOLINT(runtime/explicit) |
182 | : ptr_(str), |
183 | length_(CheckLengthInternal(str ? __builtin_strlen(str) : 0)) {} |
184 | #else |
185 | constexpr string_view(const char* str) // NOLINT(runtime/explicit) |
186 | : ptr_(str), length_(CheckLengthInternal(str ? strlen(str) : 0)) {} |
187 | #endif |
188 | |
189 | // Implicit constructor of a `string_view` from a `const char*` and length. |
190 | constexpr string_view(const char* data, size_type len) |
191 | : ptr_(data), length_(CheckLengthInternal(len)) {} |
192 | |
193 | // NOTE: Harmlessly omitted to work around gdb bug. |
194 | // constexpr string_view(const string_view&) noexcept = default; |
195 | // string_view& operator=(const string_view&) noexcept = default; |
196 | |
197 | // Iterators |
198 | |
199 | // string_view::begin() |
200 | // |
201 | // Returns an iterator pointing to the first character at the beginning of the |
202 | // `string_view`, or `end()` if the `string_view` is empty. |
203 | constexpr const_iterator begin() const noexcept { return ptr_; } |
204 | |
205 | // string_view::end() |
206 | // |
207 | // Returns an iterator pointing just beyond the last character at the end of |
208 | // the `string_view`. This iterator acts as a placeholder; attempting to |
209 | // access it results in undefined behavior. |
210 | constexpr const_iterator end() const noexcept { return ptr_ + length_; } |
211 | |
212 | // string_view::cbegin() |
213 | // |
214 | // Returns a const iterator pointing to the first character at the beginning |
215 | // of the `string_view`, or `end()` if the `string_view` is empty. |
216 | constexpr const_iterator cbegin() const noexcept { return begin(); } |
217 | |
218 | // string_view::cend() |
219 | // |
220 | // Returns a const iterator pointing just beyond the last character at the end |
221 | // of the `string_view`. This pointer acts as a placeholder; attempting to |
222 | // access its element results in undefined behavior. |
223 | constexpr const_iterator cend() const noexcept { return end(); } |
224 | |
225 | // string_view::rbegin() |
226 | // |
227 | // Returns a reverse iterator pointing to the last character at the end of the |
228 | // `string_view`, or `rend()` if the `string_view` is empty. |
229 | const_reverse_iterator rbegin() const noexcept { |
230 | return const_reverse_iterator(end()); |
231 | } |
232 | |
233 | // string_view::rend() |
234 | // |
235 | // Returns a reverse iterator pointing just before the first character at the |
236 | // beginning of the `string_view`. This pointer acts as a placeholder; |
237 | // attempting to access its element results in undefined behavior. |
238 | const_reverse_iterator rend() const noexcept { |
239 | return const_reverse_iterator(begin()); |
240 | } |
241 | |
242 | // string_view::crbegin() |
243 | // |
244 | // Returns a const reverse iterator pointing to the last character at the end |
245 | // of the `string_view`, or `crend()` if the `string_view` is empty. |
246 | const_reverse_iterator crbegin() const noexcept { return rbegin(); } |
247 | |
248 | // string_view::crend() |
249 | // |
250 | // Returns a const reverse iterator pointing just before the first character |
251 | // at the beginning of the `string_view`. This pointer acts as a placeholder; |
252 | // attempting to access its element results in undefined behavior. |
253 | const_reverse_iterator crend() const noexcept { return rend(); } |
254 | |
255 | // Capacity Utilities |
256 | |
257 | // string_view::size() |
258 | // |
259 | // Returns the number of characters in the `string_view`. |
260 | constexpr size_type size() const noexcept { |
261 | return length_; |
262 | } |
263 | |
264 | // string_view::length() |
265 | // |
266 | // Returns the number of characters in the `string_view`. Alias for `size()`. |
267 | constexpr size_type length() const noexcept { return size(); } |
268 | |
269 | // string_view::max_size() |
270 | // |
271 | // Returns the maximum number of characters the `string_view` can hold. |
272 | constexpr size_type max_size() const noexcept { return kMaxSize; } |
273 | |
274 | // string_view::empty() |
275 | // |
276 | // Checks if the `string_view` is empty (refers to no characters). |
277 | constexpr bool empty() const noexcept { return length_ == 0; } |
278 | |
279 | // string_view::operator[] |
280 | // |
281 | // Returns the ith element of an `string_view` using the array operator. |
282 | // Note that this operator does not perform any bounds checking. |
283 | constexpr const_reference operator[](size_type i) const { return ptr_[i]; } |
284 | |
285 | // string_view::front() |
286 | // |
287 | // Returns the first element of a `string_view`. |
288 | constexpr const_reference front() const { return ptr_[0]; } |
289 | |
290 | // string_view::back() |
291 | // |
292 | // Returns the last element of a `string_view`. |
293 | constexpr const_reference back() const { return ptr_[size() - 1]; } |
294 | |
295 | // string_view::data() |
296 | // |
297 | // Returns a pointer to the underlying character array (which is of course |
298 | // stored elsewhere). Note that `string_view::data()` may contain embedded nul |
299 | // characters, but the returned buffer may or may not be nul-terminated; |
300 | // therefore, do not pass `data()` to a routine that expects a nul-terminated |
301 | // std::string. |
302 | constexpr const_pointer data() const noexcept { return ptr_; } |
303 | |
304 | // Modifiers |
305 | |
306 | // string_view::remove_prefix() |
307 | // |
308 | // Removes the first `n` characters from the `string_view`. Note that the |
309 | // underlying std::string is not changed, only the view. |
310 | void remove_prefix(size_type n) { |
311 | assert(n <= length_); |
312 | ptr_ += n; |
313 | length_ -= n; |
314 | } |
315 | |
316 | // string_view::remove_suffix() |
317 | // |
318 | // Removes the last `n` characters from the `string_view`. Note that the |
319 | // underlying std::string is not changed, only the view. |
320 | void remove_suffix(size_type n) { |
321 | assert(n <= length_); |
322 | length_ -= n; |
323 | } |
324 | |
325 | // string_view::swap() |
326 | // |
327 | // Swaps this `string_view` with another `string_view`. |
328 | void swap(string_view& s) noexcept { |
329 | auto t = *this; |
330 | *this = s; |
331 | s = t; |
332 | } |
333 | |
334 | // Explicit conversion operators |
335 | |
336 | // Converts to `std::basic_string`. |
337 | template <typename A> |
338 | explicit operator std::basic_string<char, traits_type, A>() const { |
339 | if (!data()) return {}; |
340 | return std::basic_string<char, traits_type, A>(data(), size()); |
341 | } |
342 | |
343 | // string_view::copy() |
344 | // |
345 | // Copies the contents of the `string_view` at offset `pos` and length `n` |
346 | // into `buf`. |
347 | size_type copy(char* buf, size_type n, size_type pos = 0) const; |
348 | |
349 | // string_view::substr() |
350 | // |
351 | // Returns a "substring" of the `string_view` (at offset `pos` and length |
352 | // `n`) as another string_view. This function throws `std::out_of_bounds` if |
353 | // `pos > size`. |
354 | string_view substr(size_type pos, size_type n = npos) const { |
355 | if (ABSL_PREDICT_FALSE(pos > length_)) |
356 | base_internal::ThrowStdOutOfRange("absl::string_view::substr" ); |
357 | n = (std::min)(n, length_ - pos); |
358 | return string_view(ptr_ + pos, n); |
359 | } |
360 | |
361 | // string_view::compare() |
362 | // |
363 | // Performs a lexicographical comparison between the `string_view` and |
364 | // another `absl::string_view`, returning -1 if `this` is less than, 0 if |
365 | // `this` is equal to, and 1 if `this` is greater than the passed std::string |
366 | // view. Note that in the case of data equality, a further comparison is made |
367 | // on the respective sizes of the two `string_view`s to determine which is |
368 | // smaller, equal, or greater. |
369 | int compare(string_view x) const noexcept { |
370 | auto min_length = (std::min)(length_, x.length_); |
371 | if (min_length > 0) { |
372 | int r = memcmp(ptr_, x.ptr_, min_length); |
373 | if (r < 0) return -1; |
374 | if (r > 0) return 1; |
375 | } |
376 | if (length_ < x.length_) return -1; |
377 | if (length_ > x.length_) return 1; |
378 | return 0; |
379 | } |
380 | |
381 | // Overload of `string_view::compare()` for comparing a substring of the |
382 | // 'string_view` and another `absl::string_view`. |
383 | int compare(size_type pos1, size_type count1, string_view v) const { |
384 | return substr(pos1, count1).compare(v); |
385 | } |
386 | |
387 | // Overload of `string_view::compare()` for comparing a substring of the |
388 | // `string_view` and a substring of another `absl::string_view`. |
389 | int compare(size_type pos1, size_type count1, string_view v, size_type pos2, |
390 | size_type count2) const { |
391 | return substr(pos1, count1).compare(v.substr(pos2, count2)); |
392 | } |
393 | |
394 | // Overload of `string_view::compare()` for comparing a `string_view` and a |
395 | // a different C-style std::string `s`. |
396 | int compare(const char* s) const { return compare(string_view(s)); } |
397 | |
398 | // Overload of `string_view::compare()` for comparing a substring of the |
399 | // `string_view` and a different std::string C-style std::string `s`. |
400 | int compare(size_type pos1, size_type count1, const char* s) const { |
401 | return substr(pos1, count1).compare(string_view(s)); |
402 | } |
403 | |
404 | // Overload of `string_view::compare()` for comparing a substring of the |
405 | // `string_view` and a substring of a different C-style std::string `s`. |
406 | int compare(size_type pos1, size_type count1, const char* s, |
407 | size_type count2) const { |
408 | return substr(pos1, count1).compare(string_view(s, count2)); |
409 | } |
410 | |
411 | // Find Utilities |
412 | |
413 | // string_view::find() |
414 | // |
415 | // Finds the first occurrence of the substring `s` within the `string_view`, |
416 | // returning the position of the first character's match, or `npos` if no |
417 | // match was found. |
418 | size_type find(string_view s, size_type pos = 0) const noexcept; |
419 | |
420 | // Overload of `string_view::find()` for finding the given character `c` |
421 | // within the `string_view`. |
422 | size_type find(char c, size_type pos = 0) const noexcept; |
423 | |
424 | // string_view::rfind() |
425 | // |
426 | // Finds the last occurrence of a substring `s` within the `string_view`, |
427 | // returning the position of the first character's match, or `npos` if no |
428 | // match was found. |
429 | size_type rfind(string_view s, size_type pos = npos) const |
430 | noexcept; |
431 | |
432 | // Overload of `string_view::rfind()` for finding the last given character `c` |
433 | // within the `string_view`. |
434 | size_type rfind(char c, size_type pos = npos) const noexcept; |
435 | |
436 | // string_view::find_first_of() |
437 | // |
438 | // Finds the first occurrence of any of the characters in `s` within the |
439 | // `string_view`, returning the start position of the match, or `npos` if no |
440 | // match was found. |
441 | size_type find_first_of(string_view s, size_type pos = 0) const |
442 | noexcept; |
443 | |
444 | // Overload of `string_view::find_first_of()` for finding a character `c` |
445 | // within the `string_view`. |
446 | size_type find_first_of(char c, size_type pos = 0) const |
447 | noexcept { |
448 | return find(c, pos); |
449 | } |
450 | |
451 | // string_view::find_last_of() |
452 | // |
453 | // Finds the last occurrence of any of the characters in `s` within the |
454 | // `string_view`, returning the start position of the match, or `npos` if no |
455 | // match was found. |
456 | size_type find_last_of(string_view s, size_type pos = npos) const |
457 | noexcept; |
458 | |
459 | // Overload of `string_view::find_last_of()` for finding a character `c` |
460 | // within the `string_view`. |
461 | size_type find_last_of(char c, size_type pos = npos) const |
462 | noexcept { |
463 | return rfind(c, pos); |
464 | } |
465 | |
466 | // string_view::find_first_not_of() |
467 | // |
468 | // Finds the first occurrence of any of the characters not in `s` within the |
469 | // `string_view`, returning the start position of the first non-match, or |
470 | // `npos` if no non-match was found. |
471 | size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept; |
472 | |
473 | // Overload of `string_view::find_first_not_of()` for finding a character |
474 | // that is not `c` within the `string_view`. |
475 | size_type find_first_not_of(char c, size_type pos = 0) const noexcept; |
476 | |
477 | // string_view::find_last_not_of() |
478 | // |
479 | // Finds the last occurrence of any of the characters not in `s` within the |
480 | // `string_view`, returning the start position of the last non-match, or |
481 | // `npos` if no non-match was found. |
482 | size_type find_last_not_of(string_view s, |
483 | size_type pos = npos) const noexcept; |
484 | |
485 | // Overload of `string_view::find_last_not_of()` for finding a character |
486 | // that is not `c` within the `string_view`. |
487 | size_type find_last_not_of(char c, size_type pos = npos) const |
488 | noexcept; |
489 | |
490 | private: |
491 | static constexpr size_type kMaxSize = |
492 | (std::numeric_limits<difference_type>::max)(); |
493 | |
494 | static constexpr size_type CheckLengthInternal(size_type len) { |
495 | return ABSL_ASSERT(len <= kMaxSize), len; |
496 | } |
497 | |
498 | const char* ptr_; |
499 | size_type length_; |
500 | }; |
501 | |
502 | // This large function is defined inline so that in a fairly common case where |
503 | // one of the arguments is a literal, the compiler can elide a lot of the |
504 | // following comparisons. |
505 | inline bool operator==(string_view x, string_view y) noexcept { |
506 | auto len = x.size(); |
507 | if (len != y.size()) { |
508 | return false; |
509 | } |
510 | |
511 | return x.data() == y.data() || len <= 0 || |
512 | memcmp(x.data(), y.data(), len) == 0; |
513 | } |
514 | |
515 | inline bool operator!=(string_view x, string_view y) noexcept { |
516 | return !(x == y); |
517 | } |
518 | |
519 | inline bool operator<(string_view x, string_view y) noexcept { |
520 | auto min_size = (std::min)(x.size(), y.size()); |
521 | const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); |
522 | return (r < 0) || (r == 0 && x.size() < y.size()); |
523 | } |
524 | |
525 | inline bool operator>(string_view x, string_view y) noexcept { return y < x; } |
526 | |
527 | inline bool operator<=(string_view x, string_view y) noexcept { |
528 | return !(y < x); |
529 | } |
530 | |
531 | inline bool operator>=(string_view x, string_view y) noexcept { |
532 | return !(x < y); |
533 | } |
534 | |
535 | // IO Insertion Operator |
536 | std::ostream& operator<<(std::ostream& o, string_view piece); |
537 | |
538 | } // namespace absl |
539 | |
540 | #endif // ABSL_HAVE_STD_STRING_VIEW |
541 | |
542 | namespace absl { |
543 | |
544 | // ClippedSubstr() |
545 | // |
546 | // Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`. |
547 | // Provided because std::string_view::substr throws if `pos > size()` |
548 | inline string_view ClippedSubstr(string_view s, size_t pos, |
549 | size_t n = string_view::npos) { |
550 | pos = (std::min)(pos, static_cast<size_t>(s.size())); |
551 | return s.substr(pos, n); |
552 | } |
553 | |
554 | // NullSafeStringView() |
555 | // |
556 | // Creates an `absl::string_view` from a pointer `p` even if it's null-valued. |
557 | // This function should be used where an `absl::string_view` can be created from |
558 | // a possibly-null pointer. |
559 | inline string_view NullSafeStringView(const char* p) { |
560 | return p ? string_view(p) : string_view(); |
561 | } |
562 | |
563 | } // namespace absl |
564 | |
565 | #endif // ABSL_STRINGS_STRING_VIEW_H_ |
566 | |