1 | //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef LLVM_ADT_STRINGREF_H |
11 | #define LLVM_ADT_STRINGREF_H |
12 | |
13 | #include "llvm/ADT/STLExtras.h" |
14 | #include "llvm/ADT/iterator_range.h" |
15 | #include "llvm/Support/Compiler.h" |
16 | #include <algorithm> |
17 | #include <cassert> |
18 | #include <cstddef> |
19 | #include <cstring> |
20 | #include <limits> |
21 | #include <string> |
22 | #include <type_traits> |
23 | #include <utility> |
24 | |
25 | namespace llvm { |
26 | |
27 | class APInt; |
28 | class hash_code; |
29 | template <typename T> class SmallVectorImpl; |
30 | class StringRef; |
31 | |
32 | /// Helper functions for StringRef::getAsInteger. |
33 | bool getAsUnsignedInteger(StringRef Str, unsigned Radix, |
34 | unsigned long long &Result); |
35 | |
36 | bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); |
37 | |
38 | bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, |
39 | unsigned long long &Result); |
40 | bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); |
41 | |
42 | /// StringRef - Represent a constant reference to a string, i.e. a character |
43 | /// array and a length, which need not be null terminated. |
44 | /// |
45 | /// This class does not own the string data, it is expected to be used in |
46 | /// situations where the character data resides in some other buffer, whose |
47 | /// lifetime extends past that of the StringRef. For this reason, it is not in |
48 | /// general safe to store a StringRef. |
49 | class StringRef { |
50 | public: |
51 | static const size_t npos = ~size_t(0); |
52 | |
53 | using iterator = const char *; |
54 | using const_iterator = const char *; |
55 | using size_type = size_t; |
56 | |
57 | private: |
58 | /// The start of the string, in an external buffer. |
59 | const char *Data = nullptr; |
60 | |
61 | /// The length of the string. |
62 | size_t Length = 0; |
63 | |
64 | // Workaround memcmp issue with null pointers (undefined behavior) |
65 | // by providing a specialized version |
66 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
67 | static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { |
68 | if (Length == 0) { return 0; } |
69 | return ::memcmp(Lhs,Rhs,Length); |
70 | } |
71 | |
72 | public: |
73 | /// @name Constructors |
74 | /// @{ |
75 | |
76 | /// Construct an empty string ref. |
77 | /*implicit*/ StringRef() = default; |
78 | |
79 | /// Disable conversion from nullptr. This prevents things like |
80 | /// if (S == nullptr) |
81 | StringRef(std::nullptr_t) = delete; |
82 | |
83 | /// Construct a string ref from a cstring. |
84 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
85 | /*implicit*/ StringRef(const char *Str) |
86 | : Data(Str), Length(Str ? ::strlen(Str) : 0) {} |
87 | |
88 | /// Construct a string ref from a pointer and length. |
89 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
90 | /*implicit*/ constexpr StringRef(const char *data, size_t length) |
91 | : Data(data), Length(length) {} |
92 | |
93 | /// Construct a string ref from an std::string. |
94 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
95 | /*implicit*/ StringRef(const std::string &Str) |
96 | : Data(Str.data()), Length(Str.length()) {} |
97 | |
98 | static StringRef withNullAsEmpty(const char *data) { |
99 | return StringRef(data ? data : "" ); |
100 | } |
101 | |
102 | /// @} |
103 | /// @name Iterators |
104 | /// @{ |
105 | |
106 | iterator begin() const { return Data; } |
107 | |
108 | iterator end() const { return Data + Length; } |
109 | |
110 | const unsigned char *bytes_begin() const { |
111 | return reinterpret_cast<const unsigned char *>(begin()); |
112 | } |
113 | const unsigned char *bytes_end() const { |
114 | return reinterpret_cast<const unsigned char *>(end()); |
115 | } |
116 | iterator_range<const unsigned char *> bytes() const { |
117 | return make_range(bytes_begin(), bytes_end()); |
118 | } |
119 | |
120 | /// @} |
121 | /// @name String Operations |
122 | /// @{ |
123 | |
124 | /// data - Get a pointer to the start of the string (which may not be null |
125 | /// terminated). |
126 | LLVM_NODISCARD |
127 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
128 | const char *data() const { return Data; } |
129 | |
130 | /// empty - Check if the string is empty. |
131 | LLVM_NODISCARD |
132 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
133 | bool empty() const { return Length == 0; } |
134 | |
135 | /// size - Get the string size. |
136 | LLVM_NODISCARD |
137 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
138 | size_t size() const { return Length; } |
139 | |
140 | /// front - Get the first character in the string. |
141 | LLVM_NODISCARD |
142 | char front() const { |
143 | assert(!empty()); |
144 | return Data[0]; |
145 | } |
146 | |
147 | /// back - Get the last character in the string. |
148 | LLVM_NODISCARD |
149 | char back() const { |
150 | assert(!empty()); |
151 | return Data[Length-1]; |
152 | } |
153 | |
154 | // copy - Allocate copy in Allocator and return StringRef to it. |
155 | template <typename Allocator> |
156 | LLVM_NODISCARD StringRef copy(Allocator &A) const { |
157 | // Don't request a length 0 copy from the allocator. |
158 | if (empty()) |
159 | return StringRef(); |
160 | char *S = A.template Allocate<char>(Length); |
161 | std::copy(begin(), end(), S); |
162 | return StringRef(S, Length); |
163 | } |
164 | |
165 | /// equals - Check for string equality, this is more efficient than |
166 | /// compare() when the relative ordering of inequal strings isn't needed. |
167 | LLVM_NODISCARD |
168 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
169 | bool equals(StringRef RHS) const { |
170 | return (Length == RHS.Length && |
171 | compareMemory(Data, RHS.Data, RHS.Length) == 0); |
172 | } |
173 | |
174 | /// equals_lower - Check for string equality, ignoring case. |
175 | LLVM_NODISCARD |
176 | bool equals_lower(StringRef RHS) const { |
177 | return Length == RHS.Length && compare_lower(RHS) == 0; |
178 | } |
179 | |
180 | /// compare - Compare two strings; the result is -1, 0, or 1 if this string |
181 | /// is lexicographically less than, equal to, or greater than the \p RHS. |
182 | LLVM_NODISCARD |
183 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
184 | int compare(StringRef RHS) const { |
185 | // Check the prefix for a mismatch. |
186 | if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) |
187 | return Res < 0 ? -1 : 1; |
188 | |
189 | // Otherwise the prefixes match, so we only need to check the lengths. |
190 | if (Length == RHS.Length) |
191 | return 0; |
192 | return Length < RHS.Length ? -1 : 1; |
193 | } |
194 | |
195 | /// compare_lower - Compare two strings, ignoring case. |
196 | LLVM_NODISCARD |
197 | int compare_lower(StringRef RHS) const; |
198 | |
199 | /// compare_numeric - Compare two strings, treating sequences of digits as |
200 | /// numbers. |
201 | LLVM_NODISCARD |
202 | int compare_numeric(StringRef RHS) const; |
203 | |
204 | /// Determine the edit distance between this string and another |
205 | /// string. |
206 | /// |
207 | /// \param Other the string to compare this string against. |
208 | /// |
209 | /// \param AllowReplacements whether to allow character |
210 | /// replacements (change one character into another) as a single |
211 | /// operation, rather than as two operations (an insertion and a |
212 | /// removal). |
213 | /// |
214 | /// \param MaxEditDistance If non-zero, the maximum edit distance that |
215 | /// this routine is allowed to compute. If the edit distance will exceed |
216 | /// that maximum, returns \c MaxEditDistance+1. |
217 | /// |
218 | /// \returns the minimum number of character insertions, removals, |
219 | /// or (if \p AllowReplacements is \c true) replacements needed to |
220 | /// transform one of the given strings into the other. If zero, |
221 | /// the strings are identical. |
222 | LLVM_NODISCARD |
223 | unsigned edit_distance(StringRef Other, bool AllowReplacements = true, |
224 | unsigned MaxEditDistance = 0) const; |
225 | |
226 | /// str - Get the contents as an std::string. |
227 | LLVM_NODISCARD |
228 | std::string str() const { |
229 | if (!Data) return std::string(); |
230 | return std::string(Data, Length); |
231 | } |
232 | |
233 | /// @} |
234 | /// @name Operator Overloads |
235 | /// @{ |
236 | |
237 | LLVM_NODISCARD |
238 | char operator[](size_t Index) const { |
239 | assert(Index < Length && "Invalid index!" ); |
240 | return Data[Index]; |
241 | } |
242 | |
243 | /// Disallow accidental assignment from a temporary std::string. |
244 | /// |
245 | /// The declaration here is extra complicated so that `stringRef = {}` |
246 | /// and `stringRef = "abc"` continue to select the move assignment operator. |
247 | template <typename T> |
248 | typename std::enable_if<std::is_same<T, std::string>::value, |
249 | StringRef>::type & |
250 | operator=(T &&Str) = delete; |
251 | |
252 | /// @} |
253 | /// @name Type Conversions |
254 | /// @{ |
255 | |
256 | operator std::string() const { |
257 | return str(); |
258 | } |
259 | |
260 | /// @} |
261 | /// @name String Predicates |
262 | /// @{ |
263 | |
264 | /// Check if this string starts with the given \p Prefix. |
265 | LLVM_NODISCARD |
266 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
267 | bool startswith(StringRef Prefix) const { |
268 | return Length >= Prefix.Length && |
269 | compareMemory(Data, Prefix.Data, Prefix.Length) == 0; |
270 | } |
271 | |
272 | /// Check if this string starts with the given \p Prefix, ignoring case. |
273 | LLVM_NODISCARD |
274 | bool startswith_lower(StringRef Prefix) const; |
275 | |
276 | /// Check if this string ends with the given \p Suffix. |
277 | LLVM_NODISCARD |
278 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
279 | bool endswith(StringRef Suffix) const { |
280 | return Length >= Suffix.Length && |
281 | compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; |
282 | } |
283 | |
284 | /// Check if this string ends with the given \p Suffix, ignoring case. |
285 | LLVM_NODISCARD |
286 | bool endswith_lower(StringRef Suffix) const; |
287 | |
288 | /// @} |
289 | /// @name String Searching |
290 | /// @{ |
291 | |
292 | /// Search for the first character \p C in the string. |
293 | /// |
294 | /// \returns The index of the first occurrence of \p C, or npos if not |
295 | /// found. |
296 | LLVM_NODISCARD |
297 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
298 | size_t find(char C, size_t From = 0) const { |
299 | size_t FindBegin = std::min(From, Length); |
300 | if (FindBegin < Length) { // Avoid calling memchr with nullptr. |
301 | // Just forward to memchr, which is faster than a hand-rolled loop. |
302 | if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin)) |
303 | return static_cast<const char *>(P) - Data; |
304 | } |
305 | return npos; |
306 | } |
307 | |
308 | /// Search for the first character \p C in the string, ignoring case. |
309 | /// |
310 | /// \returns The index of the first occurrence of \p C, or npos if not |
311 | /// found. |
312 | LLVM_NODISCARD |
313 | size_t find_lower(char C, size_t From = 0) const; |
314 | |
315 | /// Search for the first character satisfying the predicate \p F |
316 | /// |
317 | /// \returns The index of the first character satisfying \p F starting from |
318 | /// \p From, or npos if not found. |
319 | LLVM_NODISCARD |
320 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
321 | size_t find_if(function_ref<bool(char)> F, size_t From = 0) const { |
322 | StringRef S = drop_front(From); |
323 | while (!S.empty()) { |
324 | if (F(S.front())) |
325 | return size() - S.size(); |
326 | S = S.drop_front(); |
327 | } |
328 | return npos; |
329 | } |
330 | |
331 | /// Search for the first character not satisfying the predicate \p F |
332 | /// |
333 | /// \returns The index of the first character not satisfying \p F starting |
334 | /// from \p From, or npos if not found. |
335 | LLVM_NODISCARD |
336 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
337 | size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const { |
338 | return find_if([F](char c) { return !F(c); }, From); |
339 | } |
340 | |
341 | /// Search for the first string \p Str in the string. |
342 | /// |
343 | /// \returns The index of the first occurrence of \p Str, or npos if not |
344 | /// found. |
345 | LLVM_NODISCARD |
346 | size_t find(StringRef Str, size_t From = 0) const; |
347 | |
348 | /// Search for the first string \p Str in the string, ignoring case. |
349 | /// |
350 | /// \returns The index of the first occurrence of \p Str, or npos if not |
351 | /// found. |
352 | LLVM_NODISCARD |
353 | size_t find_lower(StringRef Str, size_t From = 0) const; |
354 | |
355 | /// Search for the last character \p C in the string. |
356 | /// |
357 | /// \returns The index of the last occurrence of \p C, or npos if not |
358 | /// found. |
359 | LLVM_NODISCARD |
360 | size_t rfind(char C, size_t From = npos) const { |
361 | From = std::min(From, Length); |
362 | size_t i = From; |
363 | while (i != 0) { |
364 | --i; |
365 | if (Data[i] == C) |
366 | return i; |
367 | } |
368 | return npos; |
369 | } |
370 | |
371 | /// Search for the last character \p C in the string, ignoring case. |
372 | /// |
373 | /// \returns The index of the last occurrence of \p C, or npos if not |
374 | /// found. |
375 | LLVM_NODISCARD |
376 | size_t rfind_lower(char C, size_t From = npos) const; |
377 | |
378 | /// Search for the last string \p Str in the string. |
379 | /// |
380 | /// \returns The index of the last occurrence of \p Str, or npos if not |
381 | /// found. |
382 | LLVM_NODISCARD |
383 | size_t rfind(StringRef Str) const; |
384 | |
385 | /// Search for the last string \p Str in the string, ignoring case. |
386 | /// |
387 | /// \returns The index of the last occurrence of \p Str, or npos if not |
388 | /// found. |
389 | LLVM_NODISCARD |
390 | size_t rfind_lower(StringRef Str) const; |
391 | |
392 | /// Find the first character in the string that is \p C, or npos if not |
393 | /// found. Same as find. |
394 | LLVM_NODISCARD |
395 | size_t find_first_of(char C, size_t From = 0) const { |
396 | return find(C, From); |
397 | } |
398 | |
399 | /// Find the first character in the string that is in \p Chars, or npos if |
400 | /// not found. |
401 | /// |
402 | /// Complexity: O(size() + Chars.size()) |
403 | LLVM_NODISCARD |
404 | size_t find_first_of(StringRef Chars, size_t From = 0) const; |
405 | |
406 | /// Find the first character in the string that is not \p C or npos if not |
407 | /// found. |
408 | LLVM_NODISCARD |
409 | size_t find_first_not_of(char C, size_t From = 0) const; |
410 | |
411 | /// Find the first character in the string that is not in the string |
412 | /// \p Chars, or npos if not found. |
413 | /// |
414 | /// Complexity: O(size() + Chars.size()) |
415 | LLVM_NODISCARD |
416 | size_t find_first_not_of(StringRef Chars, size_t From = 0) const; |
417 | |
418 | /// Find the last character in the string that is \p C, or npos if not |
419 | /// found. |
420 | LLVM_NODISCARD |
421 | size_t find_last_of(char C, size_t From = npos) const { |
422 | return rfind(C, From); |
423 | } |
424 | |
425 | /// Find the last character in the string that is in \p C, or npos if not |
426 | /// found. |
427 | /// |
428 | /// Complexity: O(size() + Chars.size()) |
429 | LLVM_NODISCARD |
430 | size_t find_last_of(StringRef Chars, size_t From = npos) const; |
431 | |
432 | /// Find the last character in the string that is not \p C, or npos if not |
433 | /// found. |
434 | LLVM_NODISCARD |
435 | size_t find_last_not_of(char C, size_t From = npos) const; |
436 | |
437 | /// Find the last character in the string that is not in \p Chars, or |
438 | /// npos if not found. |
439 | /// |
440 | /// Complexity: O(size() + Chars.size()) |
441 | LLVM_NODISCARD |
442 | size_t find_last_not_of(StringRef Chars, size_t From = npos) const; |
443 | |
444 | /// Return true if the given string is a substring of *this, and false |
445 | /// otherwise. |
446 | LLVM_NODISCARD |
447 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
448 | bool contains(StringRef Other) const { return find(Other) != npos; } |
449 | |
450 | /// Return true if the given character is contained in *this, and false |
451 | /// otherwise. |
452 | LLVM_NODISCARD |
453 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
454 | bool contains(char C) const { return find_first_of(C) != npos; } |
455 | |
456 | /// Return true if the given string is a substring of *this, and false |
457 | /// otherwise. |
458 | LLVM_NODISCARD |
459 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
460 | bool contains_lower(StringRef Other) const { |
461 | return find_lower(Other) != npos; |
462 | } |
463 | |
464 | /// Return true if the given character is contained in *this, and false |
465 | /// otherwise. |
466 | LLVM_NODISCARD |
467 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
468 | bool contains_lower(char C) const { return find_lower(C) != npos; } |
469 | |
470 | /// @} |
471 | /// @name Helpful Algorithms |
472 | /// @{ |
473 | |
474 | /// Return the number of occurrences of \p C in the string. |
475 | LLVM_NODISCARD |
476 | size_t count(char C) const { |
477 | size_t Count = 0; |
478 | for (size_t i = 0, e = Length; i != e; ++i) |
479 | if (Data[i] == C) |
480 | ++Count; |
481 | return Count; |
482 | } |
483 | |
484 | /// Return the number of non-overlapped occurrences of \p Str in |
485 | /// the string. |
486 | size_t count(StringRef Str) const; |
487 | |
488 | /// Parse the current string as an integer of the specified radix. If |
489 | /// \p Radix is specified as zero, this does radix autosensing using |
490 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |
491 | /// |
492 | /// If the string is invalid or if only a subset of the string is valid, |
493 | /// this returns true to signify the error. The string is considered |
494 | /// erroneous if empty or if it overflows T. |
495 | template <typename T> |
496 | typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type |
497 | getAsInteger(unsigned Radix, T &Result) const { |
498 | long long LLVal; |
499 | if (getAsSignedInteger(*this, Radix, LLVal) || |
500 | static_cast<T>(LLVal) != LLVal) |
501 | return true; |
502 | Result = LLVal; |
503 | return false; |
504 | } |
505 | |
506 | template <typename T> |
507 | typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type |
508 | getAsInteger(unsigned Radix, T &Result) const { |
509 | unsigned long long ULLVal; |
510 | // The additional cast to unsigned long long is required to avoid the |
511 | // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type |
512 | // 'unsigned __int64' when instantiating getAsInteger with T = bool. |
513 | if (getAsUnsignedInteger(*this, Radix, ULLVal) || |
514 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |
515 | return true; |
516 | Result = ULLVal; |
517 | return false; |
518 | } |
519 | |
520 | /// Parse the current string as an integer of the specified radix. If |
521 | /// \p Radix is specified as zero, this does radix autosensing using |
522 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |
523 | /// |
524 | /// If the string does not begin with a number of the specified radix, |
525 | /// this returns true to signify the error. The string is considered |
526 | /// erroneous if empty or if it overflows T. |
527 | /// The portion of the string representing the discovered numeric value |
528 | /// is removed from the beginning of the string. |
529 | template <typename T> |
530 | typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type |
531 | consumeInteger(unsigned Radix, T &Result) { |
532 | long long LLVal; |
533 | if (consumeSignedInteger(*this, Radix, LLVal) || |
534 | static_cast<long long>(static_cast<T>(LLVal)) != LLVal) |
535 | return true; |
536 | Result = LLVal; |
537 | return false; |
538 | } |
539 | |
540 | template <typename T> |
541 | typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type |
542 | consumeInteger(unsigned Radix, T &Result) { |
543 | unsigned long long ULLVal; |
544 | if (consumeUnsignedInteger(*this, Radix, ULLVal) || |
545 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |
546 | return true; |
547 | Result = ULLVal; |
548 | return false; |
549 | } |
550 | |
551 | /// Parse the current string as an integer of the specified \p Radix, or of |
552 | /// an autosensed radix if the \p Radix given is 0. The current value in |
553 | /// \p Result is discarded, and the storage is changed to be wide enough to |
554 | /// store the parsed integer. |
555 | /// |
556 | /// \returns true if the string does not solely consist of a valid |
557 | /// non-empty number in the appropriate base. |
558 | /// |
559 | /// APInt::fromString is superficially similar but assumes the |
560 | /// string is well-formed in the given radix. |
561 | bool getAsInteger(unsigned Radix, APInt &Result) const; |
562 | |
563 | /// Parse the current string as an IEEE double-precision floating |
564 | /// point value. The string must be a well-formed double. |
565 | /// |
566 | /// If \p AllowInexact is false, the function will fail if the string |
567 | /// cannot be represented exactly. Otherwise, the function only fails |
568 | /// in case of an overflow or underflow. |
569 | bool getAsDouble(double &Result, bool AllowInexact = true) const; |
570 | |
571 | /// @} |
572 | /// @name String Operations |
573 | /// @{ |
574 | |
575 | // Convert the given ASCII string to lowercase. |
576 | LLVM_NODISCARD |
577 | std::string lower() const; |
578 | |
579 | /// Convert the given ASCII string to uppercase. |
580 | LLVM_NODISCARD |
581 | std::string upper() const; |
582 | |
583 | /// @} |
584 | /// @name Substring Operations |
585 | /// @{ |
586 | |
587 | /// Return a reference to the substring from [Start, Start + N). |
588 | /// |
589 | /// \param Start The index of the starting character in the substring; if |
590 | /// the index is npos or greater than the length of the string then the |
591 | /// empty substring will be returned. |
592 | /// |
593 | /// \param N The number of characters to included in the substring. If N |
594 | /// exceeds the number of characters remaining in the string, the string |
595 | /// suffix (starting with \p Start) will be returned. |
596 | LLVM_NODISCARD |
597 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
598 | StringRef substr(size_t Start, size_t N = npos) const { |
599 | Start = std::min(Start, Length); |
600 | return StringRef(Data + Start, std::min(N, Length - Start)); |
601 | } |
602 | |
603 | /// Return a StringRef equal to 'this' but with only the first \p N |
604 | /// elements remaining. If \p N is greater than the length of the |
605 | /// string, the entire string is returned. |
606 | LLVM_NODISCARD |
607 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
608 | StringRef take_front(size_t N = 1) const { |
609 | if (N >= size()) |
610 | return *this; |
611 | return drop_back(size() - N); |
612 | } |
613 | |
614 | /// Return a StringRef equal to 'this' but with only the last \p N |
615 | /// elements remaining. If \p N is greater than the length of the |
616 | /// string, the entire string is returned. |
617 | LLVM_NODISCARD |
618 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
619 | StringRef take_back(size_t N = 1) const { |
620 | if (N >= size()) |
621 | return *this; |
622 | return drop_front(size() - N); |
623 | } |
624 | |
625 | /// Return the longest prefix of 'this' such that every character |
626 | /// in the prefix satisfies the given predicate. |
627 | LLVM_NODISCARD |
628 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
629 | StringRef take_while(function_ref<bool(char)> F) const { |
630 | return substr(0, find_if_not(F)); |
631 | } |
632 | |
633 | /// Return the longest prefix of 'this' such that no character in |
634 | /// the prefix satisfies the given predicate. |
635 | LLVM_NODISCARD |
636 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
637 | StringRef take_until(function_ref<bool(char)> F) const { |
638 | return substr(0, find_if(F)); |
639 | } |
640 | |
641 | /// Return a StringRef equal to 'this' but with the first \p N elements |
642 | /// dropped. |
643 | LLVM_NODISCARD |
644 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
645 | StringRef drop_front(size_t N = 1) const { |
646 | assert(size() >= N && "Dropping more elements than exist" ); |
647 | return substr(N); |
648 | } |
649 | |
650 | /// Return a StringRef equal to 'this' but with the last \p N elements |
651 | /// dropped. |
652 | LLVM_NODISCARD |
653 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
654 | StringRef drop_back(size_t N = 1) const { |
655 | assert(size() >= N && "Dropping more elements than exist" ); |
656 | return substr(0, size()-N); |
657 | } |
658 | |
659 | /// Return a StringRef equal to 'this', but with all characters satisfying |
660 | /// the given predicate dropped from the beginning of the string. |
661 | LLVM_NODISCARD |
662 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
663 | StringRef drop_while(function_ref<bool(char)> F) const { |
664 | return substr(find_if_not(F)); |
665 | } |
666 | |
667 | /// Return a StringRef equal to 'this', but with all characters not |
668 | /// satisfying the given predicate dropped from the beginning of the string. |
669 | LLVM_NODISCARD |
670 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
671 | StringRef drop_until(function_ref<bool(char)> F) const { |
672 | return substr(find_if(F)); |
673 | } |
674 | |
675 | /// Returns true if this StringRef has the given prefix and removes that |
676 | /// prefix. |
677 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
678 | bool consume_front(StringRef Prefix) { |
679 | if (!startswith(Prefix)) |
680 | return false; |
681 | |
682 | *this = drop_front(Prefix.size()); |
683 | return true; |
684 | } |
685 | |
686 | /// Returns true if this StringRef has the given suffix and removes that |
687 | /// suffix. |
688 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
689 | bool consume_back(StringRef Suffix) { |
690 | if (!endswith(Suffix)) |
691 | return false; |
692 | |
693 | *this = drop_back(Suffix.size()); |
694 | return true; |
695 | } |
696 | |
697 | /// Return a reference to the substring from [Start, End). |
698 | /// |
699 | /// \param Start The index of the starting character in the substring; if |
700 | /// the index is npos or greater than the length of the string then the |
701 | /// empty substring will be returned. |
702 | /// |
703 | /// \param End The index following the last character to include in the |
704 | /// substring. If this is npos or exceeds the number of characters |
705 | /// remaining in the string, the string suffix (starting with \p Start) |
706 | /// will be returned. If this is less than \p Start, an empty string will |
707 | /// be returned. |
708 | LLVM_NODISCARD |
709 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
710 | StringRef slice(size_t Start, size_t End) const { |
711 | Start = std::min(Start, Length); |
712 | End = std::min(std::max(Start, End), Length); |
713 | return StringRef(Data + Start, End - Start); |
714 | } |
715 | |
716 | /// Split into two substrings around the first occurrence of a separator |
717 | /// character. |
718 | /// |
719 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |
720 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |
721 | /// maximal. If \p Separator is not in the string, then the result is a |
722 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |
723 | /// |
724 | /// \param Separator The character to split on. |
725 | /// \returns The split substrings. |
726 | LLVM_NODISCARD |
727 | std::pair<StringRef, StringRef> split(char Separator) const { |
728 | return split(StringRef(&Separator, 1)); |
729 | } |
730 | |
731 | /// Split into two substrings around the first occurrence of a separator |
732 | /// string. |
733 | /// |
734 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |
735 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |
736 | /// maximal. If \p Separator is not in the string, then the result is a |
737 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |
738 | /// |
739 | /// \param Separator - The string to split on. |
740 | /// \return - The split substrings. |
741 | LLVM_NODISCARD |
742 | std::pair<StringRef, StringRef> split(StringRef Separator) const { |
743 | size_t Idx = find(Separator); |
744 | if (Idx == npos) |
745 | return std::make_pair(*this, StringRef()); |
746 | return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); |
747 | } |
748 | |
749 | /// Split into two substrings around the last occurrence of a separator |
750 | /// string. |
751 | /// |
752 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |
753 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |
754 | /// minimal. If \p Separator is not in the string, then the result is a |
755 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |
756 | /// |
757 | /// \param Separator - The string to split on. |
758 | /// \return - The split substrings. |
759 | LLVM_NODISCARD |
760 | std::pair<StringRef, StringRef> rsplit(StringRef Separator) const { |
761 | size_t Idx = rfind(Separator); |
762 | if (Idx == npos) |
763 | return std::make_pair(*this, StringRef()); |
764 | return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); |
765 | } |
766 | |
767 | /// Split into substrings around the occurrences of a separator string. |
768 | /// |
769 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |
770 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |
771 | /// elements are added to A. |
772 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |
773 | /// still count when considering \p MaxSplit |
774 | /// An useful invariant is that |
775 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |
776 | /// |
777 | /// \param A - Where to put the substrings. |
778 | /// \param Separator - The string to split on. |
779 | /// \param MaxSplit - The maximum number of times the string is split. |
780 | /// \param KeepEmpty - True if empty substring should be added. |
781 | void split(SmallVectorImpl<StringRef> &A, |
782 | StringRef Separator, int MaxSplit = -1, |
783 | bool KeepEmpty = true) const; |
784 | |
785 | /// Split into substrings around the occurrences of a separator character. |
786 | /// |
787 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |
788 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |
789 | /// elements are added to A. |
790 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |
791 | /// still count when considering \p MaxSplit |
792 | /// An useful invariant is that |
793 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |
794 | /// |
795 | /// \param A - Where to put the substrings. |
796 | /// \param Separator - The string to split on. |
797 | /// \param MaxSplit - The maximum number of times the string is split. |
798 | /// \param KeepEmpty - True if empty substring should be added. |
799 | void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, |
800 | bool KeepEmpty = true) const; |
801 | |
802 | /// Split into two substrings around the last occurrence of a separator |
803 | /// character. |
804 | /// |
805 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |
806 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |
807 | /// minimal. If \p Separator is not in the string, then the result is a |
808 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |
809 | /// |
810 | /// \param Separator - The character to split on. |
811 | /// \return - The split substrings. |
812 | LLVM_NODISCARD |
813 | std::pair<StringRef, StringRef> rsplit(char Separator) const { |
814 | return rsplit(StringRef(&Separator, 1)); |
815 | } |
816 | |
817 | /// Return string with consecutive \p Char characters starting from the |
818 | /// the left removed. |
819 | LLVM_NODISCARD |
820 | StringRef ltrim(char Char) const { |
821 | return drop_front(std::min(Length, find_first_not_of(Char))); |
822 | } |
823 | |
824 | /// Return string with consecutive characters in \p Chars starting from |
825 | /// the left removed. |
826 | LLVM_NODISCARD |
827 | StringRef ltrim(StringRef Chars = " \t\n\v\f\r" ) const { |
828 | return drop_front(std::min(Length, find_first_not_of(Chars))); |
829 | } |
830 | |
831 | /// Return string with consecutive \p Char characters starting from the |
832 | /// right removed. |
833 | LLVM_NODISCARD |
834 | StringRef rtrim(char Char) const { |
835 | return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); |
836 | } |
837 | |
838 | /// Return string with consecutive characters in \p Chars starting from |
839 | /// the right removed. |
840 | LLVM_NODISCARD |
841 | StringRef rtrim(StringRef Chars = " \t\n\v\f\r" ) const { |
842 | return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); |
843 | } |
844 | |
845 | /// Return string with consecutive \p Char characters starting from the |
846 | /// left and right removed. |
847 | LLVM_NODISCARD |
848 | StringRef trim(char Char) const { |
849 | return ltrim(Char).rtrim(Char); |
850 | } |
851 | |
852 | /// Return string with consecutive characters in \p Chars starting from |
853 | /// the left and right removed. |
854 | LLVM_NODISCARD |
855 | StringRef trim(StringRef Chars = " \t\n\v\f\r" ) const { |
856 | return ltrim(Chars).rtrim(Chars); |
857 | } |
858 | |
859 | /// @} |
860 | }; |
861 | |
862 | /// A wrapper around a string literal that serves as a proxy for constructing |
863 | /// global tables of StringRefs with the length computed at compile time. |
864 | /// In order to avoid the invocation of a global constructor, StringLiteral |
865 | /// should *only* be used in a constexpr context, as such: |
866 | /// |
867 | /// constexpr StringLiteral S("test"); |
868 | /// |
869 | class StringLiteral : public StringRef { |
870 | private: |
871 | constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { |
872 | } |
873 | |
874 | public: |
875 | template <size_t N> |
876 | constexpr StringLiteral(const char (&Str)[N]) |
877 | #if defined(__clang__) && __has_attribute(enable_if) |
878 | #pragma clang diagnostic push |
879 | #pragma clang diagnostic ignored "-Wgcc-compat" |
880 | __attribute((enable_if(__builtin_strlen(Str) == N - 1, |
881 | "invalid string literal" ))) |
882 | #pragma clang diagnostic pop |
883 | #endif |
884 | : StringRef(Str, N - 1) { |
885 | } |
886 | |
887 | // Explicit construction for strings like "foo\0bar". |
888 | template <size_t N> |
889 | static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { |
890 | return StringLiteral(Str, N - 1); |
891 | } |
892 | }; |
893 | |
894 | /// @name StringRef Comparison Operators |
895 | /// @{ |
896 | |
897 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
898 | inline bool operator==(StringRef LHS, StringRef RHS) { |
899 | return LHS.equals(RHS); |
900 | } |
901 | |
902 | LLVM_ATTRIBUTE_ALWAYS_INLINE |
903 | inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } |
904 | |
905 | inline bool operator<(StringRef LHS, StringRef RHS) { |
906 | return LHS.compare(RHS) == -1; |
907 | } |
908 | |
909 | inline bool operator<=(StringRef LHS, StringRef RHS) { |
910 | return LHS.compare(RHS) != 1; |
911 | } |
912 | |
913 | inline bool operator>(StringRef LHS, StringRef RHS) { |
914 | return LHS.compare(RHS) == 1; |
915 | } |
916 | |
917 | inline bool operator>=(StringRef LHS, StringRef RHS) { |
918 | return LHS.compare(RHS) != -1; |
919 | } |
920 | |
921 | inline std::string &operator+=(std::string &buffer, StringRef string) { |
922 | return buffer.append(string.data(), string.size()); |
923 | } |
924 | |
925 | /// @} |
926 | |
927 | /// Compute a hash_code for a StringRef. |
928 | LLVM_NODISCARD |
929 | hash_code hash_value(StringRef S); |
930 | |
931 | // StringRefs can be treated like a POD type. |
932 | template <typename T> struct isPodLike; |
933 | template <> struct isPodLike<StringRef> { static const bool value = true; }; |
934 | |
935 | } // end namespace llvm |
936 | |
937 | #endif // LLVM_ADT_STRINGREF_H |
938 | |