StringExtras.h source code [include/llvm-8/llvm/ADT/StringExtras.h]

1	//===- llvm/ADT/StringExtras.h - Useful string functions --------- C++ --===//
2	//
3	// The LLVM Compiler Infrastructure
4	//
5	// This file is distributed under the University of Illinois Open Source
6	// License. See LICENSE.TXT for details.
7	//
8	//===----------------------------------------------------------------------===//
9	//
10	// This file contains some functions that are useful when dealing with strings.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_ADT_STRINGEXTRAS_H
15	#define LLVM_ADT_STRINGEXTRAS_H
16
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/SmallString.h"
19	#include "llvm/ADT/StringRef.h"
20	#include "llvm/ADT/Twine.h"
21	#include <cassert>
22	#include <cstddef>
23	#include <cstdint>
24	#include <cstdlib>
25	#include <cstring>
26	#include <iterator>
27	#include <string>
28	#include <utility>
29
30	namespace llvm {
31
32	template<typename T> class SmallVectorImpl;
33	class raw_ostream;
34
35	/// hexdigit - Return the hexadecimal character for the
36	/// given number \p X (which should be less than 16).
37	inline char hexdigit(unsigned X, bool LowerCase = false) {
38	const char HexChar = LowerCase ? `'a'` : `'A'`;
39	return X < `10` ? `'0'` + X : HexChar + X - `10`;
40	}
41
42	/// Given an array of c-style strings terminated by a null pointer, construct
43	/// a vector of StringRefs representing the same strings without the terminating
44	/// null string.
45	inline std::vector<StringRef> toStringRefArray(const char *const *Strings) {
46	std::vector<StringRef> Result;
47	while (*Strings)
48	Result.push_back(*Strings++);
49	return Result;
50	}
51
52	/// Construct a string ref from a boolean.
53	inline StringRef toStringRef(bool B) { return StringRef (B ? "true" : "false"); }
54
55	/// Construct a string ref from an array ref of unsigned chars.
56	inline StringRef toStringRef(ArrayRef<uint8_t> Input) {
57	return StringRef (reinterpret_cast<const char *>(Input.begin()), Input.size());
58	}
59
60	/// Construct a string ref from an array ref of unsigned chars.
61	inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
62	return {Input.bytes_begin(), Input.bytes_end()};
63	}
64
65	/// Interpret the given character \p C as a hexadecimal digit and return its
66	/// value.
67	///
68	/// If \p C is not a valid hex digit, -1U is returned.
69	inline unsigned hexDigitValue(char C) {
70	if (C >= `'0'` && C <= `'9'`) return C-`'0'`;
71	if (C >= `'a'` && C <= `'f'`) return C-`'a'`+`10U`;
72	if (C >= `'A'` && C <= `'F'`) return C-`'A'`+`10U`;
73	return -`1U`;
74	}
75
76	/// Checks if character \p C is one of the 10 decimal digits.
77	inline bool isDigit(char C) { return C >= `'0'` && C <= `'9'`; }
78
79	/// Checks if character \p C is a hexadecimal numeric character.
80	inline bool isHexDigit(char C) { return hexDigitValue(C) != -`1U`; }
81
82	/// Checks if character \p C is a valid letter as classified by "C" locale.
83	inline bool isAlpha(char C) {
84	return (`'a'` <= C && C <= `'z'`) \|\| (`'A'` <= C && C <= `'Z'`);
85	}
86
87	/// Checks whether character \p C is either a decimal digit or an uppercase or
88	/// lowercase letter as classified by "C" locale.
89	inline bool isAlnum(char C) { return isAlpha(C) \|\| isDigit(C); }
90
91	/// Checks whether character \p C is valid ASCII (high bit is zero).
92	inline bool isASCII(char C) { return static_cast<unsigned char>(C) <= `127`; }
93
94	/// Checks whether all characters in S are ASCII.
95	inline bool isASCII(llvm::StringRef S) {
96	for (char C : S)
97	if (LLVM_UNLIKELY(!isASCII(C)))
98	return false;
99	return true;
100	}
101
102	/// Checks whether character \p C is printable.
103	///
104	/// Locale-independent version of the C standard library isprint whose results
105	/// may differ on different platforms.
106	inline bool isPrint(char C) {
107	unsigned char UC = static_cast<unsigned char>(C);
108	return (`0x20` <= UC) && (UC <= `0x7E`);
109	}
110
111	/// Returns the corresponding lowercase character if \p x is uppercase.
112	inline char toLower(char x) {
113	if (x >= `'A'` && x <= `'Z'`)
114	return x - `'A'` + `'a'`;
115	return x;
116	}
117
118	/// Returns the corresponding uppercase character if \p x is lowercase.
119	inline char toUpper(char x) {
120	if (x >= `'a'` && x <= `'z'`)
121	return x - `'a'` + `'A'`;
122	return x;
123	}
124
125	inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
126	char Buffer[`17`];
127	char *BufPtr = std::end(Buffer);
128
129	if (X == `0`) *--BufPtr = `'0'`;
130
131	while (X) {
132	unsigned char Mod = static_cast<unsigned char>(X) & `15`;
133	*--BufPtr = hexdigit(Mod, LowerCase);
134	X >>= `4`;
135	}
136
137	return std::string (BufPtr, std::end(Buffer));
138	}
139
140	/// Convert buffer \p Input to its hexadecimal representation.
141	/// The returned string is double the size of \p Input.
142	inline std::string toHex(StringRef Input, bool LowerCase = false) {
143	static const char *const LUT = "0123456789ABCDEF";
144	const uint8_t Offset = LowerCase ? `32` : `0`;
145	size_t Length = Input.size();
146
147	std::string Output;
148	Output.reserve(`2` * Length);
149	for (size_t i = `0`; i < Length; ++i) {
150	const unsigned char c = Input [i];
151	Output.push_back(LUT[c >> `4`] \| Offset);
152	Output.push_back(LUT[c & `15`] \| Offset);
153	}
154	return Output;
155	}
156
157	inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
158	return toHex(toStringRef(Input), LowerCase);
159	}
160
161	inline uint8_t hexFromNibbles(char MSB, char LSB) {
162	unsigned U1 = hexDigitValue(MSB);
163	unsigned U2 = hexDigitValue(LSB);
164	assert(U1 != -`1U` && U2 != -`1U`);
165
166	return static_cast<uint8_t>((U1 << `4`) \| U2);
167	}
168
169	/// Convert hexadecimal string \p Input to its binary representation.
170	/// The return string is half the size of \p Input.
171	inline std::string fromHex(StringRef Input) {
172	if (Input.empty())
173	return std::string ();
174
175	std::string Output;
176	Output.reserve((Input.size() + `1`) / `2`);
177	if (Input.size() % `2` == `1`) {
178	Output.push_back(hexFromNibbles(`'0'`, Input.front()));
179	Input = Input.drop_front();
180	}
181
182	assert(Input.size() % `2` == `0`);
183	while (!Input.empty()) {
184	uint8_t Hex = hexFromNibbles(Input [`0`], Input [`1`]);
185	Output.push_back(Hex);
186	Input = Input.drop_front(`2`);
187	}
188	return Output;
189	}
190
191	/// Convert the string \p S to an integer of the specified type using
192	/// the radix \p Base. If \p Base is 0, auto-detects the radix.
193	/// Returns true if the number was successfully converted, false otherwise.
194	template <typename N> bool to_integer(StringRef S, N &Num, unsigned Base = `0`) {
195	return !S.getAsInteger(Base, Num);
196	}
197
198	namespace detail {
199	template <typename N>
200	inline bool to_float(const Twine &T, N &Num, N (StrTo)(const* char , char* **)) {
201	SmallString<`32`> Storage;
202	StringRef S = T.toNullTerminatedStringRef(Storage);
203	char *End;
204	N Temp = StrTo(S.data(), &End);
205	if (*End != `'\0'`)
206	return false;
207	Num = Temp;
208	return true;
209	}
210	}
211
212	inline bool to_float(const Twine &T, float &Num) {
213	return detail::to_float(T, Num, strtof);
214	}
215
216	inline bool to_float(const Twine &T, double &Num) {
217	return detail::to_float(T, Num, strtod);
218	}
219
220	inline bool to_float(const Twine &T, long double &Num) {
221	return detail::to_float(T, Num, strtold);
222	}
223
224	inline std::string utostr(uint64_t X, bool isNeg = false) {
225	char Buffer[`21`];
226	char *BufPtr = std::end(Buffer);
227
228	if (X == `0`) --BufPtr = `'0'`; // Handle special case...*
229
230	while (X) {
231	--BufPtr = `'0'` + char*(X % `10`);
232	X /= `10`;
233	}
234
235	if (isNeg) --BufPtr = `'-'`; // Add negative sign...*
236	return std::string (BufPtr, std::end(Buffer));
237	}
238
239	inline std::string itostr(int64_t X) {
240	if (X < `0`)
241	return utostr(static_cast<uint64_t>(-X), true);
242	else
243	return utostr(static_cast<uint64_t>(X));
244	}
245
246	/// StrInStrNoCase - Portable version of strcasestr. Locates the first
247	/// occurrence of string 's1' in string 's2', ignoring case. Returns
248	/// the offset of s2 in s1 or npos if s2 cannot be found.
249	StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2);
250
251	/// getToken - This function extracts one token from source, ignoring any
252	/// leading characters that appear in the Delimiters string, and ending the
253	/// token at any of the characters that appear in the Delimiters string. If
254	/// there are no tokens in the source string, an empty string is returned.
255	/// The function returns a pair containing the extracted token and the
256	/// remaining tail string.
257	std::pair<StringRef, StringRef> getToken(StringRef Source,
258	StringRef Delimiters = " \t\n\v\f\r");
259
260	/// SplitString - Split up the specified string according to the specified
261	/// delimiters, appending the result fragments to the output list.
262	void SplitString(StringRef Source,
263	SmallVectorImpl<StringRef> &OutFragments,
264	StringRef Delimiters = " \t\n\v\f\r");
265
266	/// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).
267	inline StringRef getOrdinalSuffix(unsigned Val) {
268	// It is critically important that we do this perfectly for
269	// user-written sequences with over 100 elements.
270	switch (Val % `100`) {
271	case `11`:
272	case `12`:
273	case `13`:
274	return "th";
275	default:
276	switch (Val % `10`) {
277	case `1`: return "st";
278	case `2`: return "nd";
279	case `3`: return "rd";
280	default: return "th";
281	}
282	}
283	}
284
285	/// Print each character of the specified string, escaping it if it is not
286	/// printable or if it is an escape char.
287	void printEscapedString(StringRef Name, raw_ostream &Out);
288
289	/// Print each character of the specified string, escaping HTML special
290	/// characters.
291	void printHTMLEscaped(StringRef String, raw_ostream &Out);
292
293	/// printLowerCase - Print each character as lowercase if it is uppercase.
294	void printLowerCase(StringRef String, raw_ostream &Out);
295
296	namespace detail {
297
298	template <typename IteratorT>
299	inline std::string join_impl(IteratorT Begin, IteratorT End,
300	StringRef Separator, std::input_iterator_tag) {
301	std::string S;
302	if (Begin == End)
303	return S;
304
305	S += (*Begin);
306	while (++Begin != End) {
307	S += Separator;
308	S += (*Begin);
309	}
310	return S;
311	}
312
313	template <typename IteratorT>
314	inline std::string join_impl(IteratorT Begin, IteratorT End,
315	StringRef Separator, std::forward_iterator_tag) {
316	std::string S;
317	if (Begin == End)
318	return S;
319
320	size_t Len = (std::distance(Begin, End) - `1`) * Separator.size();
321	for (IteratorT I = Begin; I != End; ++I)
322	Len += (*Begin).size();
323	S.reserve(Len);
324	S += (*Begin);
325	while (++Begin != End) {
326	S += Separator;
327	S += (*Begin);
328	}
329	return S;
330	}
331
332	template <typename Sep>
333	inline void join_items_impl(std::string &Result, Sep Separator) {}
334
335	template <typename Sep, typename Arg>
336	inline void join_items_impl(std::string &Result, Sep Separator,
337	const Arg &Item) {
338	Result += Item;
339	}
340
341	template <typename Sep, typename Arg1, typename... Args>
342	inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1,
343	Args &&... Items) {
344	Result += A1;
345	Result += Separator;
346	join_items_impl(Result, Separator, std::forward<Args>(Items)...);
347	}
348
349	inline size_t join_one_item_size(char C) { return `1`; }
350	inline size_t join_one_item_size(const char S) { return* S ? ::strlen(S) : `0`; }
351
352	template <typename T> inline size_t join_one_item_size(const T &Str) {
353	return Str.size();
354	}
355
356	inline size_t join_items_size() { return `0`; }
357
358	template <typename A1> inline size_t join_items_size(const A1 &A) {
359	return join_one_item_size(A);
360	}
361	template <typename A1, typename... Args>
362	inline size_t join_items_size(const A1 &A, Args &&... Items) {
363	return join_one_item_size(A) + join_items_size(std::forward<Args>(Items)...);
364	}
365
366	} // end namespace detail
367
368	/// Joins the strings in the range [Begin, End), adding Separator between
369	/// the elements.
370	template <typename IteratorT>
371	inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
372	using tag = typename std::iterator_traits<IteratorT>::iterator_category;
373	return detail::join_impl(Begin, End, Separator, tag());
374	}
375
376	/// Joins the strings in the range [R.begin(), R.end()), adding Separator
377	/// between the elements.
378	template <typename Range>
379	inline std::string join(Range &&R, StringRef Separator) {
380	return join(R.begin(), R.end(), Separator);
381	}
382
383	/// Joins the strings in the parameter pack \p Items, adding \p Separator
384	/// between the elements. All arguments must be implicitly convertible to
385	/// std::string, or there should be an overload of std::string::operator+=()
386	/// that accepts the argument explicitly.
387	template <typename Sep, typename... Args>
388	inline std::string join_items(Sep Separator, Args &&... Items) {
389	std::string Result;
390	if (sizeof...(Items) == `0`)
391	return Result;
392
393	size_t NS = detail::join_one_item_size(Separator);
394	size_t NI = detail::join_items_size(std::forward<Args>(Items)...);
395	Result.reserve(NI + (sizeof...(Items) - `1`) * NS + `1`);
396	detail::join_items_impl(Result, Separator, std::forward<Args>(Items)...);
397	return Result;
398	}
399
400	} // end namespace llvm
401
402	#endif // LLVM_ADT_STRINGEXTRAS_H
403

Browse the source code of include/llvm-8/llvm/ADT/StringExtras.h