StringRef.h source code [llvm/llvm/include/llvm/ADT/StringRef.h]

1	//===- StringRef.h - Constant String Reference Wrapper ----------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_ADT_STRINGREF_H
10	#define LLVM_ADT_STRINGREF_H
11
12	#include "llvm/ADT/DenseMapInfo.h"
13	#include "llvm/ADT/STLFunctionalExtras.h"
14	#include "llvm/ADT/iterator_range.h"
15	#include "llvm/Support/Compiler.h"
16	#include <algorithm>
17	#include <cassert>
18	#include <cstddef>
19	#include <cstring>
20	#include <iterator>
21	#include <limits>
22	#include <string>
23	#include <string_view>
24	#include <type_traits>
25	#include <utility>
26
27	namespace llvm {
28
29	class APInt;
30	class hash_code;
31	template <typename T> class SmallVectorImpl;
32	class StringRef;
33
34	/// Helper functions for StringRef::getAsInteger.
35	bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
36	unsigned long long &Result);
37
38	bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
39
40	bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
41	unsigned long long &Result);
42	bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
43
44	/// StringRef - Represent a constant reference to a string, i.e. a character
45	/// array and a length, which need not be null terminated.
46	///
47	/// This class does not own the string data, it is expected to be used in
48	/// situations where the character data resides in some other buffer, whose
49	/// lifetime extends past that of the StringRef. For this reason, it is not in
50	/// general safe to store a StringRef.
51	class LLVM_GSL_POINTER StringRef {
52	public:
53	static constexpr size_t npos = ~size_t(`0`);
54
55	using iterator = const char *;
56	using const_iterator = const char *;
57	using size_type = size_t;
58	using value_type = char;
59	using reverse_iterator = std::reverse_iterator<iterator>;
60	using const_reverse_iterator = std::reverse_iterator<const_iterator>;
61
62	private:
63	/// The start of the string, in an external buffer.
64	const char Data = nullptr*;
65
66	/// The length of the string.
67	size_t Length = `0`;
68
69	// Workaround memcmp issue with null pointers (undefined behavior)
70	// by providing a specialized version
71	static int compareMemory(const char Lhs, const* char *Rhs, size_t Length) {
72	if (Length == `0`) { return `0`; }
73	return ::memcmp(s1: Lhs,s2: Rhs,n: Length);
74	}
75
76	public:
77	/// @name Constructors
78	/// @{
79
80	/// Construct an empty string ref.
81	/implicit/ StringRef() = default;
82
83	/// Disable conversion from nullptr. This prevents things like
84	/// if (S == nullptr)
85	StringRef(std::nullptr_t) = delete;
86
87	/// Construct a string ref from a cstring.
88	/implicit/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND)
89	: Data(Str), Length(Str ?
90	// GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
91	#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
92	__builtin_strlen(Str)
93	#else
94	std::char_traits<char>::length(s: Str)
95	#endif
96	: `0`) {
97	}
98
99	/// Construct a string ref from a pointer and length.
100	/implicit/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND,
101	size_t length)
102	: Data(data), Length(length) {}
103
104	/// Construct a string ref from an std::string.
105	/implicit/ StringRef(const std::string &Str)
106	: Data(Str.data()), Length(Str.length()) {}
107
108	/// Construct a string ref from an std::string_view.
109	/implicit/ constexpr StringRef(std::string_view Str)
110	: Data(Str.data()), Length(Str.size()) {}
111
112	/// @}
113	/// @name Iterators
114	/// @{
115
116	iterator begin() const { return data(); }
117
118	iterator end() const { return data() + size(); }
119
120	reverse_iterator rbegin() const {
121	return std::make_reverse_iterator(i: end());
122	}
123
124	reverse_iterator rend() const {
125	return std::make_reverse_iterator(i: begin());
126	}
127
128	const unsigned char bytes_begin() const* {
129	return reinterpret_cast<const unsigned char *>(begin());
130	}
131	const unsigned char bytes_end() const* {
132	return reinterpret_cast<const unsigned char *>(end());
133	}
134	iterator_range<const unsigned char > bytes() const* {
135	return make_range(x: bytes_begin(), y: bytes_end());
136	}
137
138	/// @}
139	/// @name String Operations
140	/// @{
141
142	/// data - Get a pointer to the start of the string (which may not be null
143	/// terminated).
144	[[nodiscard]] constexpr const char data() const* { return Data; }
145
146	/// empty - Check if the string is empty.
147	[[nodiscard]] constexpr bool empty() const { return size() == `0`; }
148
149	/// size - Get the string size.
150	[[nodiscard]] constexpr size_t size() const { return Length; }
151
152	/// front - Get the first character in the string.
153	[[nodiscard]] char front() const {
154	assert(!empty());
155	return data()[`0`];
156	}
157
158	/// back - Get the last character in the string.
159	[[nodiscard]] char back() const {
160	assert(!empty());
161	return data()[size() - `1`];
162	}
163
164	// copy - Allocate copy in Allocator and return StringRef to it.
165	template <typename Allocator>
166	[[nodiscard]] StringRef copy(Allocator &A) const {
167	// Don't request a length 0 copy from the allocator.
168	if (empty())
169	return StringRef ();
170	char S = A.template Allocate<char*>(size());
171	std::copy(begin(), end(), S);
172	return StringRef (S, size());
173	}
174
175	/// Check for string equality, ignoring case.
176	[[nodiscard]] bool equals_insensitive(StringRef RHS) const {
177	return size() == RHS.size() && compare_insensitive(RHS) == `0`;
178	}
179
180	/// compare - Compare two strings; the result is negative, zero, or positive
181	/// if this string is lexicographically less than, equal to, or greater than
182	/// the \p RHS.
183	[[nodiscard]] int compare(StringRef RHS) const {
184	// Check the prefix for a mismatch.
185	if (int Res =
186	compareMemory(Lhs: data(), Rhs: RHS.data(), Length: std::min(a: size(), b: RHS.size())))
187	return Res < `0` ? -`1` : `1`;
188
189	// Otherwise the prefixes match, so we only need to check the lengths.
190	if (size() == RHS.size())
191	return `0`;
192	return size() < RHS.size() ? -`1` : `1`;
193	}
194
195	/// Compare two strings, ignoring case.
196	[[nodiscard]] int compare_insensitive(StringRef RHS) const;
197
198	/// compare_numeric - Compare two strings, treating sequences of digits as
199	/// numbers.
200	[[nodiscard]] int compare_numeric(StringRef RHS) const;
201
202	/// Determine the edit distance between this string and another
203	/// string.
204	///
205	/// \param Other the string to compare this string against.
206	///
207	/// \param AllowReplacements whether to allow character
208	/// replacements (change one character into another) as a single
209	/// operation, rather than as two operations (an insertion and a
210	/// removal).
211	///
212	/// \param MaxEditDistance If non-zero, the maximum edit distance that
213	/// this routine is allowed to compute. If the edit distance will exceed
214	/// that maximum, returns \c MaxEditDistance+1.
215	///
216	/// \returns the minimum number of character insertions, removals,
217	/// or (if \p AllowReplacements is \c true) replacements needed to
218	/// transform one of the given strings into the other. If zero,
219	/// the strings are identical.
220	[[nodiscard]] unsigned edit_distance(StringRef Other,
221	bool AllowReplacements = true,
222	unsigned MaxEditDistance = `0`) const;
223
224	[[nodiscard]] unsigned
225	edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
226	unsigned MaxEditDistance = `0`) const;
227
228	/// str - Get the contents as an std::string.
229	[[nodiscard]] std::string str() const {
230	if (!data())
231	return std::string ();
232	return std::string (data(), size());
233	}
234
235	/// @}
236	/// @name Operator Overloads
237	/// @{
238
239	[[nodiscard]] char operator[](size_t Index) const {
240	assert(Index < size() && "Invalid index!");
241	return data()[Index];
242	}
243
244	/// Disallow accidental assignment from a temporary std::string.
245	///
246	/// The declaration here is extra complicated so that `stringRef = {}`
247	/// and `stringRef = "abc"` continue to select the move assignment operator.
248	template <typename T>
249	std::enable_if_t<std::is_same<T, std::string>::value, StringRef> &
250	operator=(T &&Str) = delete;
251
252	/// @}
253	/// @name Type Conversions
254	/// @{
255
256	constexpr operator std::string_view() const {
257	return std::string_view (data(), size());
258	}
259
260	/// @}
261	/// @name String Predicates
262	/// @{
263
264	/// Check if this string starts with the given \p Prefix.
265	[[nodiscard]] bool starts_with(StringRef Prefix) const {
266	return size() >= Prefix.size() &&
267	compareMemory(Lhs: data(), Rhs: Prefix.data(), Length: Prefix.size()) == `0`;
268	}
269	[[nodiscard]] bool starts_with(char Prefix) const {
270	return !empty() && front() == Prefix;
271	}
272
273	/// Check if this string starts with the given \p Prefix, ignoring case.
274	[[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const;
275
276	/// Check if this string ends with the given \p Suffix.
277	[[nodiscard]] bool ends_with(StringRef Suffix) const {
278	return size() >= Suffix.size() &&
279	compareMemory(Lhs: end() - Suffix.size(), Rhs: Suffix.data(),
280	Length: Suffix.size()) == `0`;
281	}
282	[[nodiscard]] bool ends_with(char Suffix) const {
283	return !empty() && back() == Suffix;
284	}
285
286	/// Check if this string ends with the given \p Suffix, ignoring case.
287	[[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const;
288
289	/// @}
290	/// @name String Searching
291	/// @{
292
293	/// Search for the first character \p C in the string.
294	///
295	/// \returns The index of the first occurrence of \p C, or npos if not
296	/// found.
297	[[nodiscard]] size_t find(char C, size_t From = `0`) const {
298	return std::string_view(*this).find(c: C, pos: From);
299	}
300
301	/// Search for the first character \p C in the string, ignoring case.
302	///
303	/// \returns The index of the first occurrence of \p C, or npos if not
304	/// found.
305	[[nodiscard]] size_t find_insensitive(char C, size_t From = `0`) const;
306
307	/// Search for the first character satisfying the predicate \p F
308	///
309	/// \returns The index of the first character satisfying \p F starting from
310	/// \p From, or npos if not found.
311	[[nodiscard]] size_t find_if(function_ref<bool(char)> F,
312	size_t From = `0`) const {
313	StringRef S = drop_front(N: From);
314	while (!S.empty()) {
315	if (F (S.front()))
316	return size() - S.size();
317	S = S.drop_front();
318	}
319	return npos;
320	}
321
322	/// Search for the first character not satisfying the predicate \p F
323	///
324	/// \returns The index of the first character not satisfying \p F starting
325	/// from \p From, or npos if not found.
326	[[nodiscard]] size_t find_if_not(function_ref<bool(char)> F,
327	size_t From = `0`) const {
328	return find_if(F: [F](char c) { return !F (c); }, From);
329	}
330
331	/// Search for the first string \p Str in the string.
332	///
333	/// \returns The index of the first occurrence of \p Str, or npos if not
334	/// found.
335	[[nodiscard]] size_t find(StringRef Str, size_t From = `0`) const;
336
337	/// Search for the first string \p Str in the string, ignoring case.
338	///
339	/// \returns The index of the first occurrence of \p Str, or npos if not
340	/// found.
341	[[nodiscard]] size_t find_insensitive(StringRef Str, size_t From = `0`) const;
342
343	/// Search for the last character \p C in the string.
344	///
345	/// \returns The index of the last occurrence of \p C, or npos if not
346	/// found.
347	[[nodiscard]] size_t rfind(char C, size_t From = npos) const {
348	size_t I = std::min(a: From, b: size());
349	while (I) {
350	--I;
351	if (data()[I] == C)
352	return I;
353	}
354	return npos;
355	}
356
357	/// Search for the last character \p C in the string, ignoring case.
358	///
359	/// \returns The index of the last occurrence of \p C, or npos if not
360	/// found.
361	[[nodiscard]] size_t rfind_insensitive(char C, size_t From = npos) const;
362
363	/// Search for the last string \p Str in the string.
364	///
365	/// \returns The index of the last occurrence of \p Str, or npos if not
366	/// found.
367	[[nodiscard]] size_t rfind(StringRef Str) const;
368
369	/// Search for the last string \p Str in the string, ignoring case.
370	///
371	/// \returns The index of the last occurrence of \p Str, or npos if not
372	/// found.
373	[[nodiscard]] size_t rfind_insensitive(StringRef Str) const;
374
375	/// Find the first character in the string that is \p C, or npos if not
376	/// found. Same as find.
377	[[nodiscard]] size_t find_first_of(char C, size_t From = `0`) const {
378	return find(C, From);
379	}
380
381	/// Find the first character in the string that is in \p Chars, or npos if
382	/// not found.
383	///
384	/// Complexity: O(size() + Chars.size())
385	[[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = `0`) const;
386
387	/// Find the first character in the string that is not \p C or npos if not
388	/// found.
389	[[nodiscard]] size_t find_first_not_of(char C, size_t From = `0`) const;
390
391	/// Find the first character in the string that is not in the string
392	/// \p Chars, or npos if not found.
393	///
394	/// Complexity: O(size() + Chars.size())
395	[[nodiscard]] size_t find_first_not_of(StringRef Chars,
396	size_t From = `0`) const;
397
398	/// Find the last character in the string that is \p C, or npos if not
399	/// found.
400	[[nodiscard]] size_t find_last_of(char C, size_t From = npos) const {
401	return rfind(C, From);
402	}
403
404	/// Find the last character in the string that is in \p C, or npos if not
405	/// found.
406	///
407	/// Complexity: O(size() + Chars.size())
408	[[nodiscard]] size_t find_last_of(StringRef Chars,
409	size_t From = npos) const;
410
411	/// Find the last character in the string that is not \p C, or npos if not
412	/// found.
413	[[nodiscard]] size_t find_last_not_of(char C, size_t From = npos) const;
414
415	/// Find the last character in the string that is not in \p Chars, or
416	/// npos if not found.
417	///
418	/// Complexity: O(size() + Chars.size())
419	[[nodiscard]] size_t find_last_not_of(StringRef Chars,
420	size_t From = npos) const;
421
422	/// Return true if the given string is a substring of this, and false*
423	/// otherwise.
424	[[nodiscard]] bool contains(StringRef Other) const {
425	return find(Str: Other) != npos;
426	}
427
428	/// Return true if the given character is contained in this, and false*
429	/// otherwise.
430	[[nodiscard]] bool contains(char C) const {
431	return find_first_of(C) != npos;
432	}
433
434	/// Return true if the given string is a substring of this, and false*
435	/// otherwise.
436	[[nodiscard]] bool contains_insensitive(StringRef Other) const {
437	return find_insensitive(Str: Other) != npos;
438	}
439
440	/// Return true if the given character is contained in this, and false*
441	/// otherwise.
442	[[nodiscard]] bool contains_insensitive(char C) const {
443	return find_insensitive(C) != npos;
444	}
445
446	/// @}
447	/// @name Helpful Algorithms
448	/// @{
449
450	/// Return the number of occurrences of \p C in the string.
451	[[nodiscard]] size_t count(char C) const {
452	size_t Count = `0`;
453	for (size_t I = `0`; I != size(); ++I)
454	if (data()[I] == C)
455	++Count;
456	return Count;
457	}
458
459	/// Return the number of non-overlapped occurrences of \p Str in
460	/// the string.
461	size_t count(StringRef Str) const;
462
463	/// Parse the current string as an integer of the specified radix. If
464	/// \p Radix is specified as zero, this does radix autosensing using
465	/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
466	///
467	/// If the string is invalid or if only a subset of the string is valid,
468	/// this returns true to signify the error. The string is considered
469	/// erroneous if empty or if it overflows T.
470	template <typename T> bool getAsInteger(unsigned Radix, T &Result) const {
471	if constexpr (std::numeric_limits<T>::is_signed) {
472	long long LLVal;
473	if (getAsSignedInteger(Str: *this, Radix, Result&: LLVal) \|\|
474	static_cast<T>(LLVal) != LLVal)
475	return true;
476	Result = LLVal;
477	} else {
478	unsigned long long ULLVal;
479	// The additional cast to unsigned long long is required to avoid the
480	// Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
481	// 'unsigned __int64' when instantiating getAsInteger with T = bool.
482	if (getAsUnsignedInteger(Str: *this, Radix, Result&: ULLVal) \|\|
483	static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
484	return true;
485	Result = ULLVal;
486	}
487	return false;
488	}
489
490	/// Parse the current string as an integer of the specified radix. If
491	/// \p Radix is specified as zero, this does radix autosensing using
492	/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
493	///
494	/// If the string does not begin with a number of the specified radix,
495	/// this returns true to signify the error. The string is considered
496	/// erroneous if empty or if it overflows T.
497	/// The portion of the string representing the discovered numeric value
498	/// is removed from the beginning of the string.
499	template <typename T> bool consumeInteger(unsigned Radix, T &Result) {
500	if constexpr (std::numeric_limits<T>::is_signed) {
501	long long LLVal;
502	if (consumeSignedInteger(Str&: *this, Radix, Result&: LLVal) \|\|
503	static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
504	return true;
505	Result = LLVal;
506	} else {
507	unsigned long long ULLVal;
508	if (consumeUnsignedInteger(Str&: *this, Radix, Result&: ULLVal) \|\|
509	static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
510	return true;
511	Result = ULLVal;
512	}
513	return false;
514	}
515
516	/// Parse the current string as an integer of the specified \p Radix, or of
517	/// an autosensed radix if the \p Radix given is 0. The current value in
518	/// \p Result is discarded, and the storage is changed to be wide enough to
519	/// store the parsed integer.
520	///
521	/// \returns true if the string does not solely consist of a valid
522	/// non-empty number in the appropriate base.
523	///
524	/// APInt::fromString is superficially similar but assumes the
525	/// string is well-formed in the given radix.
526	bool getAsInteger(unsigned Radix, APInt &Result) const;
527
528	/// Parse the current string as an integer of the specified \p Radix. If
529	/// \p Radix is specified as zero, this does radix autosensing using
530	/// extended C rules: 0 is octal, 0x is hex, 0b is binary.
531	///
532	/// If the string does not begin with a number of the specified radix,
533	/// this returns true to signify the error. The string is considered
534	/// erroneous if empty.
535	/// The portion of the string representing the discovered numeric value
536	/// is removed from the beginning of the string.
537	bool consumeInteger(unsigned Radix, APInt &Result);
538
539	/// Parse the current string as an IEEE double-precision floating
540	/// point value. The string must be a well-formed double.
541	///
542	/// If \p AllowInexact is false, the function will fail if the string
543	/// cannot be represented exactly. Otherwise, the function only fails
544	/// in case of an overflow or underflow, or an invalid floating point
545	/// representation.
546	bool getAsDouble(double &Result, bool AllowInexact = true) const;
547
548	/// @}
549	/// @name String Operations
550	/// @{
551
552	// Convert the given ASCII string to lowercase.
553	[[nodiscard]] std::string lower() const;
554
555	/// Convert the given ASCII string to uppercase.
556	[[nodiscard]] std::string upper() const;
557
558	/// @}
559	/// @name Substring Operations
560	/// @{
561
562	/// Return a reference to the substring from [Start, Start + N).
563	///
564	/// \param Start The index of the starting character in the substring; if
565	/// the index is npos or greater than the length of the string then the
566	/// empty substring will be returned.
567	///
568	/// \param N The number of characters to included in the substring. If N
569	/// exceeds the number of characters remaining in the string, the string
570	/// suffix (starting with \p Start) will be returned.
571	[[nodiscard]] constexpr StringRef substr(size_t Start,
572	size_t N = npos) const {
573	Start = std::min(a: Start, b: size());
574	return StringRef (data() + Start, std::min(a: N, b: size() - Start));
575	}
576
577	/// Return a StringRef equal to 'this' but with only the first \p N
578	/// elements remaining. If \p N is greater than the length of the
579	/// string, the entire string is returned.
580	[[nodiscard]] StringRef take_front(size_t N = `1`) const {
581	if (N >= size())
582	return *this;
583	return drop_back(N: size() - N);
584	}
585
586	/// Return a StringRef equal to 'this' but with only the last \p N
587	/// elements remaining. If \p N is greater than the length of the
588	/// string, the entire string is returned.
589	[[nodiscard]] StringRef take_back(size_t N = `1`) const {
590	if (N >= size())
591	return *this;
592	return drop_front(N: size() - N);
593	}
594
595	/// Return the longest prefix of 'this' such that every character
596	/// in the prefix satisfies the given predicate.
597	[[nodiscard]] StringRef take_while(function_ref<bool(char)> F) const {
598	return substr(Start: `0`, N: find_if_not(F));
599	}
600
601	/// Return the longest prefix of 'this' such that no character in
602	/// the prefix satisfies the given predicate.
603	[[nodiscard]] StringRef take_until(function_ref<bool(char)> F) const {
604	return substr(Start: `0`, N: find_if(F));
605	}
606
607	/// Return a StringRef equal to 'this' but with the first \p N elements
608	/// dropped.
609	[[nodiscard]] StringRef drop_front(size_t N = `1`) const {
610	assert(size() >= N && "Dropping more elements than exist");
611	return substr(Start: N);
612	}
613
614	/// Return a StringRef equal to 'this' but with the last \p N elements
615	/// dropped.
616	[[nodiscard]] StringRef drop_back(size_t N = `1`) const {
617	assert(size() >= N && "Dropping more elements than exist");
618	return substr(Start: `0`, N: size()-N);
619	}
620
621	/// Return a StringRef equal to 'this', but with all characters satisfying
622	/// the given predicate dropped from the beginning of the string.
623	[[nodiscard]] StringRef drop_while(function_ref<bool(char)> F) const {
624	return substr(Start: find_if_not(F));
625	}
626
627	/// Return a StringRef equal to 'this', but with all characters not
628	/// satisfying the given predicate dropped from the beginning of the string.
629	[[nodiscard]] StringRef drop_until(function_ref<bool(char)> F) const {
630	return substr(Start: find_if(F));
631	}
632
633	/// Returns true if this StringRef has the given prefix and removes that
634	/// prefix.
635	bool consume_front(StringRef Prefix) {
636	if (!starts_with(Prefix))
637	return false;
638
639	*this = substr(Start: Prefix.size());
640	return true;
641	}
642
643	/// Returns true if this StringRef has the given prefix, ignoring case,
644	/// and removes that prefix.
645	bool consume_front_insensitive(StringRef Prefix) {
646	if (!starts_with_insensitive(Prefix))
647	return false;
648
649	*this = substr(Start: Prefix.size());
650	return true;
651	}
652
653	/// Returns true if this StringRef has the given suffix and removes that
654	/// suffix.
655	bool consume_back(StringRef Suffix) {
656	if (!ends_with(Suffix))
657	return false;
658
659	*this = substr(Start: `0`, N: size() - Suffix.size());
660	return true;
661	}
662
663	/// Returns true if this StringRef has the given suffix, ignoring case,
664	/// and removes that suffix.
665	bool consume_back_insensitive(StringRef Suffix) {
666	if (!ends_with_insensitive(Suffix))
667	return false;
668
669	*this = substr(Start: `0`, N: size() - Suffix.size());
670	return true;
671	}
672
673	/// Return a reference to the substring from [Start, End).
674	///
675	/// \param Start The index of the starting character in the substring; if
676	/// the index is npos or greater than the length of the string then the
677	/// empty substring will be returned.
678	///
679	/// \param End The index following the last character to include in the
680	/// substring. If this is npos or exceeds the number of characters
681	/// remaining in the string, the string suffix (starting with \p Start)
682	/// will be returned. If this is less than \p Start, an empty string will
683	/// be returned.
684	[[nodiscard]] StringRef slice(size_t Start, size_t End) const {
685	Start = std::min(a: Start, b: size());
686	End = std::clamp(val: End, lo: Start, hi: size());
687	return StringRef (data() + Start, End - Start);
688	}
689
690	/// Split into two substrings around the first occurrence of a separator
691	/// character.
692	///
693	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
694	/// such that (this == LHS + Separator + RHS) is true and RHS is*
695	/// maximal. If \p Separator is not in the string, then the result is a
696	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
697	///
698	/// \param Separator The character to split on.
699	/// \returns The split substrings.
700	[[nodiscard]] std::pair<StringRef, StringRef> split(char Separator) const {
701	return split(Separator: StringRef (&Separator, `1`));
702	}
703
704	/// Split into two substrings around the first occurrence of a separator
705	/// string.
706	///
707	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
708	/// such that (this == LHS + Separator + RHS) is true and RHS is*
709	/// maximal. If \p Separator is not in the string, then the result is a
710	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
711	///
712	/// \param Separator - The string to split on.
713	/// \return - The split substrings.
714	[[nodiscard]] std::pair<StringRef, StringRef>
715	split(StringRef Separator) const {
716	size_t Idx = find(Str: Separator);
717	if (Idx == npos)
718	return std::make_pair(x: *this, y: StringRef ());
719	return std::make_pair(x: slice(Start: `0`, End: Idx), y: substr(Start: Idx + Separator.size()));
720	}
721
722	/// Split into two substrings around the last occurrence of a separator
723	/// string.
724	///
725	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
726	/// such that (this == LHS + Separator + RHS) is true and RHS is*
727	/// minimal. If \p Separator is not in the string, then the result is a
728	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
729	///
730	/// \param Separator - The string to split on.
731	/// \return - The split substrings.
732	[[nodiscard]] std::pair<StringRef, StringRef>
733	rsplit(StringRef Separator) const {
734	size_t Idx = rfind(Str: Separator);
735	if (Idx == npos)
736	return std::make_pair(x: *this, y: StringRef ());
737	return std::make_pair(x: slice(Start: `0`, End: Idx), y: substr(Start: Idx + Separator.size()));
738	}
739
740	/// Split into substrings around the occurrences of a separator string.
741	///
742	/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
743	/// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
744	/// elements are added to A.
745	/// If \p KeepEmpty is false, empty strings are not added to \p A. They
746	/// still count when considering \p MaxSplit
747	/// An useful invariant is that
748	/// Separator.join(A) == this if MaxSplit == -1 and KeepEmpty == true*
749	///
750	/// \param A - Where to put the substrings.
751	/// \param Separator - The string to split on.
752	/// \param MaxSplit - The maximum number of times the string is split.
753	/// \param KeepEmpty - True if empty substring should be added.
754	void split(SmallVectorImpl<StringRef> &A,
755	StringRef Separator, int MaxSplit = -`1`,
756	bool KeepEmpty = true) const;
757
758	/// Split into substrings around the occurrences of a separator character.
759	///
760	/// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
761	/// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
762	/// elements are added to A.
763	/// If \p KeepEmpty is false, empty strings are not added to \p A. They
764	/// still count when considering \p MaxSplit
765	/// An useful invariant is that
766	/// Separator.join(A) == this if MaxSplit == -1 and KeepEmpty == true*
767	///
768	/// \param A - Where to put the substrings.
769	/// \param Separator - The string to split on.
770	/// \param MaxSplit - The maximum number of times the string is split.
771	/// \param KeepEmpty - True if empty substring should be added.
772	void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -`1`,
773	bool KeepEmpty = true) const;
774
775	/// Split into two substrings around the last occurrence of a separator
776	/// character.
777	///
778	/// If \p Separator is in the string, then the result is a pair (LHS, RHS)
779	/// such that (this == LHS + Separator + RHS) is true and RHS is*
780	/// minimal. If \p Separator is not in the string, then the result is a
781	/// pair (LHS, RHS) where (this == LHS) and (RHS == "").*
782	///
783	/// \param Separator - The character to split on.
784	/// \return - The split substrings.
785	[[nodiscard]] std::pair<StringRef, StringRef> rsplit(char Separator) const {
786	return rsplit(Separator: StringRef (&Separator, `1`));
787	}
788
789	/// Return string with consecutive \p Char characters starting from the
790	/// the left removed.
791	[[nodiscard]] StringRef ltrim(char Char) const {
792	return drop_front(N: std::min(a: size(), b: find_first_not_of(C: Char)));
793	}
794
795	/// Return string with consecutive characters in \p Chars starting from
796	/// the left removed.
797	[[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
798	return drop_front(N: std::min(a: size(), b: find_first_not_of(Chars)));
799	}
800
801	/// Return string with consecutive \p Char characters starting from the
802	/// right removed.
803	[[nodiscard]] StringRef rtrim(char Char) const {
804	return drop_back(N: size() - std::min(a: size(), b: find_last_not_of(C: Char) + `1`));
805	}
806
807	/// Return string with consecutive characters in \p Chars starting from
808	/// the right removed.
809	[[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
810	return drop_back(N: size() - std::min(a: size(), b: find_last_not_of(Chars) + `1`));
811	}
812
813	/// Return string with consecutive \p Char characters starting from the
814	/// left and right removed.
815	[[nodiscard]] StringRef trim(char Char) const {
816	return ltrim(Char).rtrim(Char);
817	}
818
819	/// Return string with consecutive characters in \p Chars starting from
820	/// the left and right removed.
821	[[nodiscard]] StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
822	return ltrim(Chars).rtrim(Chars);
823	}
824
825	/// Detect the line ending style of the string.
826	///
827	/// If the string contains a line ending, return the line ending character
828	/// sequence that is detected. Otherwise return '\n' for unix line endings.
829	///
830	/// \return - The line ending character sequence.
831	[[nodiscard]] StringRef detectEOL() const {
832	size_t Pos = find(C: `'\r'`);
833	if (Pos == npos) {
834	// If there is no carriage return, assume unix
835	return "\n";
836	}
837	if (Pos + `1` < size() && data()[Pos + `1`] == `'\n'`)
838	return "\r\n"; // Windows
839	if (Pos > `0` && data()[Pos - `1`] == `'\n'`)
840	return "\n\r"; // You monster!
841	return "\r"; // Classic Mac
842	}
843	/// @}
844	};
845
846	/// A wrapper around a string literal that serves as a proxy for constructing
847	/// global tables of StringRefs with the length computed at compile time.
848	/// In order to avoid the invocation of a global constructor, StringLiteral
849	/// should only* be used in a constexpr context, as such:*
850	///
851	/// constexpr StringLiteral S("test");
852	///
853	class StringLiteral : public StringRef {
854	private:
855	constexpr StringLiteral(const char *Str, size_t N) : StringRef (Str, N) {
856	}
857
858	public:
859	template <size_t N>
860	constexpr StringLiteral(const char (&Str)[N])
861	#if defined(__clang__) && __has_attribute(enable_if)
862	#pragma clang diagnostic push
863	#pragma clang diagnostic ignored "-Wgcc-compat"
864	__attribute((enable_if(__builtin_strlen(Str) == N - `1`,
865	"invalid string literal")))
866	#pragma clang diagnostic pop
867	#endif
868	: StringRef(Str, N - `1`) {
869	}
870
871	// Explicit construction for strings like "foo\0bar".
872	template <size_t N>
873	static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
874	return StringLiteral(Str, N - `1`);
875	}
876	};
877
878	/// @name StringRef Comparison Operators
879	/// @{
880
881	inline bool operator==(StringRef LHS, StringRef RHS) {
882	if (LHS.size() != RHS.size())
883	return false;
884	if (LHS.empty())
885	return true;
886	return ::memcmp(s1: LHS.data(), s2: RHS.data(), n: LHS.size()) == `0`;
887	}
888
889	inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
890
891	inline bool operator<(StringRef LHS, StringRef RHS) {
892	return LHS.compare(RHS) < `0`;
893	}
894
895	inline bool operator<=(StringRef LHS, StringRef RHS) {
896	return LHS.compare(RHS) <= `0`;
897	}
898
899	inline bool operator>(StringRef LHS, StringRef RHS) {
900	return LHS.compare(RHS) > `0`;
901	}
902
903	inline bool operator>=(StringRef LHS, StringRef RHS) {
904	return LHS.compare(RHS) >= `0`;
905	}
906
907	inline std::string &operator+=(std::string &buffer, StringRef string) {
908	return buffer.append(s: string.data(), n: string.size());
909	}
910
911	/// @}
912
913	/// Compute a hash_code for a StringRef.
914	[[nodiscard]] hash_code hash_value(StringRef S);
915
916	// Provide DenseMapInfo for StringRefs.
917	template <> struct DenseMapInfo<StringRef, void> {
918	static inline StringRef getEmptyKey() {
919	return StringRef (
920	reinterpret_cast<const char >(~static_cast*<uintptr_t>(`0`)), `0`);
921	}
922
923	static inline StringRef getTombstoneKey() {
924	return StringRef (
925	reinterpret_cast<const char >(~static_cast*<uintptr_t>(`1`)), `0`);
926	}
927
928	static unsigned getHashValue(StringRef Val);
929
930	static bool isEqual(StringRef LHS, StringRef RHS) {
931	if (RHS.data() == getEmptyKey().data())
932	return LHS.data() == getEmptyKey().data();
933	if (RHS.data() == getTombstoneKey().data())
934	return LHS.data() == getTombstoneKey().data();
935	return LHS == RHS;
936	}
937	};
938
939	} // end namespace llvm
940
941	#endif // LLVM_ADT_STRINGREF_H
942

Browse the source code of llvm/llvm/include/llvm/ADT/StringRef.h