sstring.h source code [CoreCLR/inc/sstring.h]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4	// ---------------------------------------------------------------------------
5	// SString.h (Safe String)
6	//
7
8	// ---------------------------------------------------------------------------
9
10	// ------------------------------------------------------------------------------------------
11	// SString is the "standard" string representation for the EE. Its has two purposes.
12	// (1) it provides an easy-to-use, relatively efficient, string class for APIs to standardize
13	// on.
14	// (2) it completely encapsulates all "unsafe" string operations - that is, string operations
15	// which yield possible buffer overrun bugs. Typesafe use of this API should help guarantee
16	// safety.
17	//
18	// A SString is conceptually unicode, although the internal conversion might be delayed as long as possible
19	// Basically it's up to the implementation whether conversion takes place immediately or is delayed, and if
20	// delayed, what operations trigger the conversion.
21	//
22	// Note that anywhere you express a "position" in a string, it is in terms of the Unicode representation of the
23	// string.
24	//
25	// If you need a direct non-unicode representation, you will have to provide a fresh SString which can
26	// recieve a conversion operation if necessary.
27	//
28	// The alternate encodings available are:
29	// 1. ASCII - string consisting entirely of ASCII (7 bit) characters. This is the only 1 byte encoding
30	// guaranteed to be fixed width. Such a string is also a valid instance of all the other 1 byte string
31	// representations, and we take advantage of this fact.
32	// 2. UTF-8 - standard multibyte unicode encoding.
33	// 3. ANSI - Potentially multibyte encoding using the ANSI page determined by GetACP().
34	//
35	// @todo: Note that we could also provide support for several other cases (but currently do not.)
36	// - Page specified by GetOEMCP() (OEM page)
37	// - Arbitrary page support
38	//
39	// @todo: argument & overflow/underflow checking needs to be added
40	// ------------------------------------------------------------------------------------------
41
42
43	#ifndef _SSTRING_H_
44	#define _SSTRING_H_
45
46	#include "utilcode.h"
47	#include "sbuffer.h"
48
49	// ==========================================================================================
50	// Documentational typedefs: use these to indicate specific representations of 8 bit strings:
51	// ==========================================================================================
52
53	// Note that LPCSTR means ASCII (7-bit) only!
54
55	typedef CHAR ASCII;
56	typedef ASCII *LPASCII;
57	typedef const ASCII *LPCASCII;
58
59	typedef CHAR ANSI;
60	typedef ANSI *LPANSI;
61	typedef const ANSI *LPCANSI;
62
63	typedef CHAR UTF8;
64	typedef UTF8 *LPUTF8;
65	typedef const UTF8 *LPCUTF8;
66
67	// ==========================================================================================
68	// SString is the base class for safe strings.
69	// ==========================================================================================
70
71
72	typedef DPTR(class SString) PTR_SString;
73	class SString : private SBuffer
74	{
75	friend struct _DacGlobals;
76
77	private:
78	enum Representation
79	{
80	// Note: bits are meaningful: xVS V == Variable? S == Single byte width?
81	REPRESENTATION_EMPTY = `0x00`, // 000
82	REPRESENTATION_UNICODE = `0x04`, // 100
83	REPRESENTATION_ASCII = `0x01`, // 001
84	REPRESENTATION_UTF8 = `0x03`, // 011
85	REPRESENTATION_ANSI = `0x07`, // 111
86
87	REPRESENTATION_VARIABLE_MASK = `0x02`,
88	REPRESENTATION_SINGLE_MASK = `0x01`,
89	REPRESENTATION_MASK = `0x07`,
90	};
91
92	// Minimum guess for Printf buffer size
93	const static COUNT_T MINIMUM_GUESS = `20`;
94
95
96	#ifdef _DEBUG
97	// Used to have a public ctor of this form - made it too easy to lose
98	// utf8 info by accident. Now you have to specify the representation type
99	// explicitly - this privator ctor prevents reinsertion of this ctor.
100	explicit SString(const ASCII *)
101	{
102	_ASSERTE(!"Don't call this.");
103	}
104	#endif
105
106	protected:
107	class Index;
108	class UIndex;
109
110	friend class Index;
111	friend class UIndex;
112
113	public:
114
115	// UIterator is character-level assignable.
116	class UIterator;
117
118	// CIterators/Iterator'string must be modified by SString APIs.
119	class CIterator;
120	class Iterator;
121
122	// Tokens for constructor overloads
123	enum tagUTF8Literal { Utf8Literal };
124	enum tagLiteral { Literal };
125	enum tagUTF8 { Utf8 };
126	enum tagANSI { Ansi };
127	enum tagASCII {Ascii };
128	#ifdef SSTRING_CONSOLECODEPAGE
129	enum tagCONSOLE { Console };
130	#endif
131
132	static void Startup();
133	static CHECK CheckStartup();
134
135	static const SString &Empty();
136
137	SString();
138
139	explicit SString(const SString &s);
140
141	SString(const SString &s1, const SString &s2);
142	SString(const SString &s1, const SString &s2, const SString &s3);
143	SString(const SString &s1, const SString &s2, const SString &s3, const SString &s4);
144	SString(const SString &s, const CIterator &i, COUNT_T length);
145	SString(const SString &s, const CIterator &start, const CIterator &end);
146	SString(const WCHAR *string);
147	SString(const WCHAR *string, COUNT_T count);
148	SString(enum tagASCII dummyTag, const ASCII *string);
149	SString(enum tagASCII dummyTag, const ASCII *string, COUNT_T count);
150	SString(enum tagUTF8 dummytag, const UTF8 *string);
151	SString(enum tagUTF8 dummytag, const UTF8 *string, COUNT_T count);
152	SString(enum tagANSI dummytag, const ANSI *string);
153	SString(enum tagANSI dummytag, const ANSI *string, COUNT_T count);
154	#ifdef SSTRING_CONSOLECODEPAGE
155	SString(enum tagCONSOLE dummytag, const CONSOLE *string);
156	SString(enum tagCONSOLE dummytag, const CONSOLE *string, COUNT_T count);
157	#endif
158	SString(WCHAR character);
159
160	// NOTE: Literals MUST be read-only never-freed strings.
161	SString(enum tagLiteral dummytag, const CHAR *literal);
162	SString(enum tagUTF8Literal dummytag, const UTF8 *literal);
163	SString(enum tagLiteral dummytag, const WCHAR *literal);
164	SString(enum tagLiteral dummytag, const WCHAR *literal, COUNT_T count);
165
166	// Set this string to the concatenation of s1,s2,s3,s4
167	void Set(const SString &s);
168	void Set(const SString &s1, const SString &s2);
169	void Set(const SString &s1, const SString &s2, const SString &s3);
170	void Set(const SString &s1, const SString &s2, const SString &s3, const SString &s4);
171
172	// Set this string to the substring of s, starting at i, of length characters.
173	void Set(const SString &s, const CIterator &i, COUNT_T length);
174
175	// Set this string to the substring of s, starting at start and ending at end (exclusive)
176	void Set(const SString &s, const CIterator &start, const CIterator &end);
177
178	// Set this string to a copy of the given string
179	void Set(const WCHAR *string);
180	void SetASCII(const ASCII *string);
181	void SetUTF8(const UTF8 *string);
182	void SetANSI(const ANSI *string);
183	#ifdef SSTRING_CONSOLECODEPAGE
184	void SetConsole(const CONSOLE *string);
185	#endif
186
187	// Set this string to a copy of the first count chars of the given string
188	void Set(const WCHAR *string, COUNT_T count);
189
190	// Set this string to a prellocated copy of a given string.
191	// The caller is the owner of the bufffer and has to coordinate its lifetime.
192	void SetPreallocated(const WCHAR *string, COUNT_T count);
193
194	void SetASCII(const ASCII *string, COUNT_T count);
195
196	void SetUTF8(const UTF8 *string, COUNT_T count);
197	void SetANSI(const ANSI *string, COUNT_T count);
198	#ifdef SSTRING_CONSOLECODEPAGE
199	void SetConsole(const CONSOLE *string, COUNT_T count);
200	#endif
201
202	// Set this string to the unicode character
203	void Set(WCHAR character);
204
205	// Set this string to the UTF8 character
206	void SetUTF8(CHAR character);
207
208	// This this string to the given literal. We share the mem and don't make a copy.
209	void SetLiteral(const CHAR *literal);
210	void SetLiteral(const WCHAR *literal);
211
212	// ------------------------------------------------------------------
213	// Public operations
214	// ------------------------------------------------------------------
215
216	// Normalizes the string representation to unicode. This can be used to
217	// make basic read-only operations non-failing.
218	void Normalize() const;
219
220	// Return the number of characters in the string (excluding the terminating NULL).
221	COUNT_T GetCount() const;
222	BOOL IsEmpty() const;
223
224	// Return whether a single byte string has all characters which fit in the ASCII set.
225	// (Note that this will return FALSE if the string has been converted to unicode for any
226	// reason.)
227	BOOL IsASCII() const;
228
229	// !!!!!!!!!!!!!! WARNING about case insensitive operations !!!!!!!!!!!!!!!
230	//
231	// THIS IS NOT SUPPORTED FULLY ON WIN9x
232	// SString case-insensitive comparison is based off LCMapString,
233	// which does not work on characters outside the current OS code page.
234	//
235	// Case insensitive code in SString is primarily targeted at
236	// supporting path comparisons, which is supported correctly on 9x,
237	// since file system names are limited to the OS code page.
238	//
239	// !!!!!!!!!!!!!! WARNING about case insensitive operations !!!!!!!!!!!!!!!
240
241	// Compute a content-based hash value
242	ULONG Hash() const;
243	ULONG HashCaseInsensitive() const;
244	ULONG HashCaseInsensitive(LocaleID locale) const;
245
246	// Do a string comparison. Return 0 if the strings
247	// have the same value, -1 if this is "less than" s, or 1 if
248	// this is "greater than" s.
249	int Compare(const SString &s) const;
250	int CompareCaseInsensitive(const SString &s) const; // invariant locale
251	int CompareCaseInsensitive(const SString &s, LocaleID locale) const;
252
253	// Do a case sensitive string comparison. Return TRUE if the strings
254	// have the same value FALSE if not.
255	BOOL Equals(const SString &s) const;
256	BOOL EqualsCaseInsensitive(const SString &s) const; // invariant locale
257	BOOL EqualsCaseInsensitive(const SString &s, LocaleID locale) const;
258
259	// Match s to a portion of the string starting at the position.
260	// Return TRUE if the strings have the same value
261	// (regardless of representation), FALSE if not.
262	BOOL Match(const CIterator &i, const SString &s) const;
263	BOOL MatchCaseInsensitive(const CIterator &i, const SString &s) const; // invariant locale
264	BOOL MatchCaseInsensitive(const CIterator &i, const SString &s, LocaleID locale) const;
265
266	BOOL Match(const CIterator &i, WCHAR c) const;
267	BOOL MatchCaseInsensitive(const CIterator &i, WCHAR c) const; // invariant locale
268	BOOL MatchCaseInsensitive(const CIterator &i, WCHAR c, LocaleID locale) const;
269
270	// Like match, but advances the iterator past the match
271	// if successful
272	BOOL Skip(CIterator &i, const SString &s) const;
273	BOOL Skip(CIterator &i, WCHAR c) const;
274
275	// Start searching for a match of the given string, starting at
276	// the given iterator point.
277	// If a match exists, move the iterator to point to the nearest
278	// occurence of s in the string and return TRUE.
279	// If no match exists, return FALSE and leave the iterator unchanged.
280	BOOL Find(CIterator &i, const SString &s) const;
281	BOOL Find(CIterator &i, const WCHAR s) const*;
282	BOOL FindASCII(CIterator &i, const ASCII s) const*;
283	BOOL FindUTF8(CIterator &i, const UTF8 s) const*;
284	BOOL Find(CIterator &i, WCHAR c) const;
285
286	BOOL FindBack(CIterator &i, const SString &s) const;
287	BOOL FindBack(CIterator &i, const WCHAR s) const*;
288	BOOL FindBackASCII(CIterator &i, const ASCII s) const*;
289	BOOL FindBackUTF8(CIterator &i, const UTF8 s) const*;
290	BOOL FindBack(CIterator &i, WCHAR c) const;
291
292	// Returns TRUE if this string begins with the contents of s
293	BOOL BeginsWith(const SString &s) const;
294	BOOL BeginsWithCaseInsensitive(const SString &s) const; // invariant locale
295	BOOL BeginsWithCaseInsensitive(const SString &s, LocaleID locale) const;
296
297	// Returns TRUE if this string ends with the contents of s
298	BOOL EndsWith(const SString &s) const;
299	BOOL EndsWithCaseInsensitive(const SString &s) const; // invariant locale
300	BOOL EndsWithCaseInsensitive(const SString &s, LocaleID locale) const;
301
302	// Sets this string to an empty string "".
303	void Clear();
304
305	// Truncate the string to the iterator position
306	void Truncate(const Iterator &i);
307
308	// Append s to the end of this string.
309	void Append(const SString &s);
310	void Append(const WCHAR *s);
311	void AppendASCII(const CHAR *s);
312	void AppendUTF8(const CHAR *s);
313
314	// Append char c to the end of this string.
315	void Append(const WCHAR c);
316	void AppendUTF8(const CHAR c);
317
318	// Insert s into this string at the 'position'th character.
319	void Insert(const Iterator &i, const SString &s);
320	void Insert(const Iterator &i, const WCHAR *s);
321	void InsertASCII(const Iterator &i, const CHAR *s);
322	void InsertUTF8(const Iterator &i, const CHAR *s);
323
324	// Delete substring position + length
325	void Delete(const Iterator &i, COUNT_T length);
326
327	// Replace character at i with c
328	void Replace(const Iterator &i, WCHAR c);
329
330	// Replace substring at (i,i+length) with s
331	void Replace(const Iterator &i, COUNT_T length, const SString &s);
332
333	// Make sure that string buffer has room to grow
334	void Preallocate(COUNT_T characters) const;
335
336	// Shrink buffer size as much as possible (reallocate if necessary.)
337	void Trim() const;
338
339	// ------------------------------------------------------------------
340	// Iterators:
341	// ------------------------------------------------------------------
342
343	// SString splits iterators into two categories.
344	//
345	// CIterator and Iterator are cheap to create, but allow only read-only
346	// access to the string.
347	//
348	// UIterator forces a unicode conversion, but allows
349	// assignment to individual string characters. They are also a bit more
350	// efficient once created.
351
352	// ------------------------------------------------------------------
353	// UIterator:
354	// ------------------------------------------------------------------
355
356	protected:
357
358	class UIndex : public SBuffer::Index
359	{
360	friend class SString;
361	friend class Indexer<WCHAR, UIterator>;
362
363	protected:
364
365	UIndex();
366	UIndex(SString *string, SCOUNT_T index);
367	WCHAR &GetAt(SCOUNT_T delta) const;
368	void Skip(SCOUNT_T delta);
369	SCOUNT_T Subtract(const UIndex &i) const;
370	CHECK DoCheck(SCOUNT_T delta) const;
371
372	WCHAR GetUnicode() const*;
373	};
374
375	public:
376
377	class UIterator : public UIndex, public Indexer<WCHAR, UIterator>
378	{
379	friend class SString;
380
381	public:
382	UIterator()
383	{
384	}
385
386	UIterator(SString string, int* index)
387	: UIndex (string, index)
388	{
389	}
390	};
391
392	UIterator BeginUnicode();
393	UIterator EndUnicode();
394
395	// For CIterator & Iterator, we try our best to iterate the string without
396	// modifying it. (Currently, we do require an ASCII or Unicode string
397	// for simple WCHAR retrival, but you could imagine being more flexible
398	// going forward - perhaps even supporting iterating multibyte encodings
399	// directly.)
400	//
401	// Because of the runtime-changable nature of the string, CIterators
402	// require an extra member to record the character size. They also
403	// are unable to properly implement GetAt as required by the template
404	// (since there may not be a direct WCHAR pointer), so they provide
405	// further customization in a subclass.
406	//
407	// Normally the user expects to cast Iterators to CIterators transparently, so
408	// we provide a constructor on CIterator to support this.
409
410	protected:
411
412	class Index : public SBuffer::Index
413	{
414	friend class SString;
415
416	friend class Indexer<const WCHAR, CIterator>;
417	friend class Indexer<WCHAR, Iterator>;
418
419	protected:
420	int m_characterSizeShift;
421
422	Index();
423	Index(SString *string, SCOUNT_T index);
424	BYTE &GetAt(SCOUNT_T delta) const;
425	void Skip(SCOUNT_T delta);
426	SCOUNT_T Subtract(const Index &i) const;
427	CHECK DoCheck(SCOUNT_T delta) const;
428
429	void Resync(const SString string, BYTE ptr) const;
430
431	const WCHAR GetUnicode() const*;
432	const CHAR GetASCII() const*;
433
434	public:
435	// Note these should supercede the Indexer versions
436	// since this class comes first in the inheritence list
437	WCHAR operator() const*;
438	void operator->() const;
439	WCHAR operator[](int index) const;
440	};
441
442	public:
443
444	class CIterator : public Index, public Indexer<const WCHAR, CIterator>
445	{
446	friend class SString;
447
448	public:
449	const Iterator &ConstCast() const
450	{
451	return (const* Iterator )this*;
452	}
453
454	Iterator &ConstCast()
455	{
456	return (Iterator )this;
457	}
458
459	operator const SBuffer::CIterator &() const
460	{
461	return (const* SBuffer::CIterator )this*;
462	}
463
464	operator SBuffer::CIterator &()
465	{
466	return (SBuffer::CIterator )this;
467	}
468
469	CIterator()
470	{
471	}
472
473	CIterator(const SString string, int* index)
474	: Index (const_cast<SString *>(string), index)
475	{
476	}
477
478	// explicitly resolve these for gcc
479	WCHAR operator() const* { return Index::operator*(); }
480	void operator->() const { Index::operator->(); }
481	WCHAR operator[](int index) const { return Index::operator[](index); }
482	};
483
484	class Iterator : public Index, public Indexer<WCHAR, Iterator>
485	{
486	friend class SString;
487
488	public:
489	operator const CIterator &() const
490	{
491	return (const* CIterator )this*;
492	}
493
494	operator CIterator &()
495	{
496	return (CIterator )this;
497	}
498
499	operator const SBuffer::Iterator &() const
500	{
501	return (const* SBuffer::Iterator )this*;
502	}
503
504	operator SBuffer::Iterator &()
505	{
506	return (SBuffer::Iterator )this;
507	}
508
509	Iterator()
510	{
511	}
512
513	Iterator(SString string, int* index)
514	: Index (string, index)
515	{
516	SUPPORTS_DAC;
517	}
518
519	// explicitly resolve these for gcc
520	WCHAR operator() const* { return Index::operator*(); }
521	void operator->() const { Index::operator->(); }
522	WCHAR operator[](int index) const { return Index::operator[](index); }
523	};
524
525	CIterator Begin() const;
526	CIterator End() const;
527
528	Iterator Begin();
529	Iterator End();
530
531	// ------------------------------------------------------------------
532	// Conversion:
533	// ------------------------------------------------------------------
534
535	// Get a const pointer to the string in the current representation.
536	// This pointer can not be cached because it will become invalid if
537	// the SString changes representation or reallocates its buffer.
538
539	// You can always get a unicode string. This will force a conversion
540	// if necessary.
541	const WCHAR GetUnicode() const*;
542	const WCHAR GetUnicode(const* CIterator &i) const;
543
544	void LowerCase();
545	void UpperCase();
546
547	// Helper function to convert string in-place to lower-case (no allocation overhead for SString instance)
548	static void LowerCase(__inout_z LPWSTR wszString);
549
550	// These routines will use the given scratch string if necessary
551	// to perform a conversion to the desired representation
552
553	// Use a local declaration of InlineScratchBuffer or StackScratchBuffer for parameters of
554	// AbstractScratchBuffer.
555	class AbstractScratchBuffer;
556
557	// These routines will use the given scratch buffer if necessary
558	// to perform a conversion to the desired representation. Note that
559	// the lifetime of the pointer return is limited by BOTH the
560	// scratch string and the source (this) string.
561	//
562	// Typical usage:
563	//
564	// SString s = ...;*
565	// {
566	// StackScratchBuffer buffer;
567	// const UTF8 utf8 = s->GetUTF8(buffer);*
568	// CallFoo(utf8);
569	// }
570	// // No more pointers to returned buffer allowed.
571
572	const UTF8 GetUTF8(AbstractScratchBuffer &scratch) const*;
573	const UTF8 GetUTF8(AbstractScratchBuffer &scratch, COUNT_T pcbUtf8) const;
574	const ANSI GetANSI(AbstractScratchBuffer &scratch) const*;
575	#ifdef SSTRING_CONSOLECODEPAGE
576	const CONSOLE GetConsole(AbstractScratchBuffer &scratch) const*;
577	#endif
578
579	// Used when the representation is known, throws if the representation doesn't match
580	const UTF8 GetUTF8NoConvert() const*;
581
582	// Converts/copies into the given output string
583	void ConvertToUnicode(SString &dest) const;
584	void ConvertToANSI(SString &dest) const;
585	COUNT_T ConvertToUTF8(SString &dest) const;
586	#ifdef SSTRING_CONSOLECODEPAGE
587	void ConvertToConsole(SString &dest) const;
588	#endif
589
590	//-------------------------------------------------------------------
591	// Accessing the string contents directly
592	//-------------------------------------------------------------------
593
594	// To write directly to the SString's underlying buffer:
595	// 1) Call OpenXXXBuffer() and pass it the count of characters
596	// you need. (Not including the null-terminator).
597	// 2) That returns a pointer to the raw buffer which you can write to.
598	// 3) When you are done writing to the pointer, call CloseBuffer()
599	// and pass it the count of characters you actually wrote (not including
600	// the null). The pointer from step 1 is now invalid.
601
602	// example usage:
603	// void GetName(SString & str) {
604	// char p = str.OpenANSIBuffer(3);*
605	// strcpy(p, "Cat");
606	// str.CloseBuffer();
607	// }
608
609	// Regarding the null-terminator:
610	// 1) Note that we wrote 4 characters (3 + a null). That's ok. OpenBuffer
611	// allocates 1 extra byte for the null.
612	// 2) If we only wrote 3 characters and no null, that's ok too. CloseBuffer()
613	// will add a null-terminator.
614
615	// You should open the buffer, write the data, and immediately close it.
616	// No sstring operations are valid while the buffer is opened.
617	//
618	// In a debug build, Open/Close will do lots of little checks to make sure
619	// you don't buffer overflow while it's opened. In a retail build, this
620	// is a very streamlined action.
621
622
623	// Open the raw buffer for writing countChars characters (not including the null).
624	WCHAR *OpenUnicodeBuffer(COUNT_T maxCharCount);
625	UTF8 *OpenUTF8Buffer(COUNT_T maxSingleCharCount);
626	ANSI *OpenANSIBuffer(COUNT_T maxSingleCharCount);
627
628	//Returns the unicode string, the caller is reponsible for lifetime of the string
629	WCHAR *GetCopyOfUnicodeString();
630
631	// Get the max size that can be passed to OpenUnicodeBuffer without causing allocations.
632	COUNT_T GetUnicodeAllocation();
633
634	// Call after OpenXXXBuffer().
635
636	// Provide the count of characters actually used (not including the
637	// null terminator). This will make sure the SString's size is correct
638	// and that we have a null-terminator.
639	void CloseBuffer(COUNT_T finalCount);
640
641	// Close the buffer. Assumes that we completely filled the buffer
642	// that OpenBuffer() gave back. If we didn't write all the characters,
643	// call CloseBuffer(int) instead.
644	void CloseBuffer();
645
646	#ifdef DACCESS_COMPILE
647	// DAC access to string functions.
648	// Note that other accessors above are not DAC-safe and will return TARGET pointers into
649	// the string instead of copying the string over to the host.
650	// @dbgtodo dac support: Prevent usage of such DAC-unsafe SString APIs in DAC code
651
652	// Instantiate a copy of the raw buffer in the host and return a pointer to it
653	void * DacGetRawContent() const;
654
655	// Instantiate a copy of the raw buffer in the host. Requires that the underlying
656	// representation is already unicode.
657	const WCHAR * DacGetRawUnicode() const;
658
659	// Copy the string from the target into the provided buffer, converting to unicode if necessary
660	bool DacGetUnicode(COUNT_T bufChars,
661	__out_z __inout_ecount(bufChars) WCHAR * buffer,
662	COUNT_T * needChars) const;
663
664	void EnumMemoryRegions(CLRDataEnumMemoryFlags flags) const
665	{
666	SUPPORTS_DAC;
667	SBuffer::EnumMemoryRegions(flags);
668	}
669	#endif
670
671	//---------------------------------------------------------------------
672	// Utilities
673	//---------------------------------------------------------------------
674
675	// WARNING: The MBCS version of printf function are factory for globalization
676	// issues when used to format Unicode strings (%S). The Unicode versions are
677	// preffered in this case.
678	void Printf(const CHAR *format, ...);
679	void VPrintf(const CHAR *format, va_list args);
680
681	void Printf(const WCHAR *format, ...);
682	void PPrintf(const WCHAR *format, ...);
683	void VPrintf(const WCHAR *format, va_list args);
684
685	void PVPrintf(const WCHAR *format, va_list args);
686
687	void AppendPrintf(const CHAR *format, ...);
688	void AppendVPrintf(const CHAR *format, va_list args);
689
690	void AppendPrintf(const WCHAR *format, ...);
691	void AppendVPrintf(const WCHAR *format, va_list args);
692
693	BOOL LoadResource(CCompRC::ResourceCategory eCategory, int resourceID);
694	HRESULT LoadResourceAndReturnHR(CCompRC::ResourceCategory eCategory, int resourceID);
695	HRESULT LoadResourceAndReturnHR(CCompRC* pResourceDLL, CCompRC::ResourceCategory eCategory, int resourceID);
696	BOOL FormatMessage(DWORD dwFlags, LPCVOID lpSource, DWORD dwMessageId, DWORD dwLanguageId,
697	const SString &arg1 = Empty(), const SString &arg2 = Empty(),
698	const SString &arg3 = Empty(), const SString &arg4 = Empty(),
699	const SString &arg5 = Empty(), const SString &arg6 = Empty(),
700	const SString &arg7 = Empty(), const SString &arg8 = Empty(),
701	const SString &arg9 = Empty(), const SString &arg10 = Empty());
702
703	#if 1
704	// @todo - get rid of this and move it outside of SString
705	void MakeFullNamespacePath(const SString &nameSpace, const SString &name);
706	#endif
707
708	//--------------------------------------------------------------------
709	// Operators
710	//--------------------------------------------------------------------
711
712	operator const WCHAR * () const { WRAPPER_NO_CONTRACT; return GetUnicode(); }
713
714	WCHAR operator[](int index) { WRAPPER_NO_CONTRACT; return Begin()[index]; }
715	WCHAR operator[](int index) const { WRAPPER_NO_CONTRACT; return Begin()[index]; }
716
717	SString &operator= (const SString &s) { WRAPPER_NO_CONTRACT; Set(s); return *this; }
718	SString &operator+= (const SString &s) { WRAPPER_NO_CONTRACT; Append(s); return *this; }
719
720	// -------------------------------------------------------------------
721	// Check functions
722	// -------------------------------------------------------------------
723
724	CHECK CheckIteratorRange(const CIterator &i) const;
725	CHECK CheckIteratorRange(const CIterator &i, COUNT_T length) const;
726	CHECK CheckEmpty() const;
727
728	static CHECK CheckCount(COUNT_T count);
729	static CHECK CheckRepresentation(int representation);
730
731	#if CHECK_INVARIANTS
732	static CHECK CheckASCIIString(const ASCII *string);
733	static CHECK CheckASCIIString(const ASCII *string, COUNT_T count);
734
735	CHECK Check() const;
736	CHECK Invariant() const;
737	CHECK InternalInvariant() const;
738	#endif // CHECK_INVARIANTS
739
740	// Helpers for CRT function equivalance.
741	static int __cdecl _stricmp(const CHAR buffer1, const* CHAR *buffer2);
742	static int __cdecl _strnicmp(const CHAR buffer1, const* CHAR *buffer2, COUNT_T count);
743
744	static int __cdecl _wcsicmp(const WCHAR buffer1, const* WCHAR *buffer2);
745	static int __cdecl _wcsnicmp(const WCHAR buffer1, const* WCHAR *buffer2, COUNT_T count);
746
747	// C++ convenience overloads
748	static int _tstricmp(const CHAR buffer1, const* CHAR *buffer2);
749	static int _tstricmp(const WCHAR buffer1, const* WCHAR *buffer2);
750
751	static int _tstrnicmp(const CHAR buffer1, const* CHAR *buffer2, COUNT_T count);
752	static int _tstrnicmp(const WCHAR buffer1, const* WCHAR *buffer2, COUNT_T count);
753
754	// -------------------------------------------------------------------
755	// Internal routines
756	// -------------------------------------------------------------------
757
758
759	protected:
760	// Use this via InlineSString<X>
761	SString(void *buffer, COUNT_T size);
762
763	private:
764	static int CaseCompareHelperA(const CHAR buffer1, const* CHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount);
765	static int CaseCompareHelper(const WCHAR buffer1, const* WCHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount);
766
767	// Internal helpers:
768
769	static const BYTE s_EmptyBuffer[`2`];
770
771	static UINT s_ACP;
772	SVAL_DECL(BOOL, s_IsANSIMultibyte);
773
774	#ifdef SSTRING_CONSOLECODEPAGE
775	static UINT s_ConsoleCP;
776	static BOOL s_IsConsoleMultibyte;
777	#endif
778
779	const static LocaleID s_defaultLCID;
780
781	SPTR_DECL(SString,s_Empty);
782
783	COUNT_T GetRawCount() const;
784
785	// Get buffer as appropriate string rep
786	ASCII GetRawASCII() const*;
787	UTF8 GetRawUTF8() const*;
788	ANSI GetRawANSI() const*;
789	WCHAR GetRawUnicode() const*;
790	#ifdef SSTRING_CONSOLECODEPAGE
791	CONSOLE GetRawConsole() const*;
792	#endif
793
794	void InitEmpty();
795
796	Representation GetRepresentation() const;
797	void SetRepresentation(Representation representation);
798	BOOL IsRepresentation(Representation representation) const;
799	BOOL IsFixedSize() const;
800	BOOL IsIteratable() const;
801	BOOL IsSingleByte() const;
802
803	int GetCharacterSizeShift() const;
804
805	COUNT_T SizeToCount(COUNT_T size) const;
806	COUNT_T CountToSize(COUNT_T count) const;
807
808	COUNT_T GetBufferSizeInCharIncludeNullChar() const;
809
810	BOOL IsLiteral() const;
811	BOOL IsAllocated() const;
812	BOOL IsBufferOpen() const;
813	BOOL IsASCIIScanned() const;
814	void SetASCIIScanned() const;
815	void SetNormalized() const;
816	BOOL IsNormalized() const;
817	void ClearNormalized() const;
818
819	void EnsureWritable() const;
820	void ConvertToFixed() const;
821	void ConvertToIteratable() const;
822
823	void ConvertASCIIToUnicode(SString &dest) const;
824	void ConvertToUnicode() const;
825	void ConvertToUnicode(const CIterator &i) const;
826
827	const SString &GetCompatibleString(const SString &s, SString &scratch) const;
828	const SString &GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const;
829	BOOL ScanASCII() const;
830	void NullTerminate();
831
832	void Resize(COUNT_T count, Representation representation,
833	Preserve preserve = DONT_PRESERVE);
834
835	void OpenBuffer(Representation representation, COUNT_T countChars);
836	};
837
838	// ===========================================================================
839	// InlineSString is used for stack allocation of strings, or when the string contents
840	// are expected or known to be small. Note that it still supports expandability via
841	// heap allocation if necessary.
842	// ===========================================================================
843
844	template <COUNT_T MEMSIZE>
845	class InlineSString : public SString
846	{
847	private:
848	BYTE m_inline[SBUFFER_PADDED_SIZE(MEMSIZE)];
849
850	public:
851	FORCEINLINE InlineSString()
852	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
853	{
854	WRAPPER_NO_CONTRACT;
855	}
856
857	FORCEINLINE InlineSString(const SString &s)
858	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
859	{
860	WRAPPER_NO_CONTRACT;
861	Set(s);
862	}
863
864	FORCEINLINE InlineSString(const SString &s1, const SString &s2)
865	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
866	{
867	WRAPPER_NO_CONTRACT;
868	Set(s1, s2);
869	}
870
871	FORCEINLINE InlineSString(const SString &s1, const SString &s2, const SString &s3)
872	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
873	{
874	WRAPPER_NO_CONTRACT;
875	Set(s1, s2, s3);
876	}
877
878	FORCEINLINE InlineSString(const SString &s1, const SString &s2, const SString &s3, const SString &s4)
879	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
880	{
881	WRAPPER_NO_CONTRACT;
882	Set(s1, s2, s3, s4);
883	}
884
885	FORCEINLINE InlineSString(const SString &s, const CIterator &start, const CIterator &end)
886	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
887	{
888	WRAPPER_NO_CONTRACT;
889	Set(s, start, end);
890	}
891
892	FORCEINLINE InlineSString(const SString &s, const CIterator &i, COUNT_T length)
893	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
894	{
895	WRAPPER_NO_CONTRACT;
896	Set(s, i, length);
897	}
898
899	FORCEINLINE InlineSString(const WCHAR *string)
900	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
901	{
902	WRAPPER_NO_CONTRACT;
903	Set(string);
904	}
905
906	FORCEINLINE InlineSString(const WCHAR *string, COUNT_T count)
907	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
908	{
909	WRAPPER_NO_CONTRACT;
910	Set(string, count);
911	}
912
913	FORCEINLINE InlineSString(enum tagASCII, const CHAR *string)
914	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
915	{
916	WRAPPER_NO_CONTRACT;
917	SetASCII(string);
918	}
919
920	FORCEINLINE InlineSString(enum tagASCII, const CHAR *string, COUNT_T count)
921	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
922	{
923	WRAPPER_NO_CONTRACT;
924	SetASCII(string, count);
925	}
926
927	FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 *string)
928	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
929	{
930	WRAPPER_NO_CONTRACT;
931	SetUTF8(string);
932	}
933
934	FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 *string, COUNT_T count)
935	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
936	{
937	WRAPPER_NO_CONTRACT;
938	SetUTF8(string, count);
939	}
940
941	FORCEINLINE InlineSString(enum tagANSI dummytag, const ANSI *string)
942	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
943	{
944	WRAPPER_NO_CONTRACT;
945	SetANSI(string);
946	}
947
948	FORCEINLINE InlineSString(enum tagANSI dummytag, const ANSI *string, COUNT_T count)
949	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
950	{
951	WRAPPER_NO_CONTRACT;
952	SetANSI(string, count);
953	}
954
955	#ifdef SSTRING_CONSOLECODEPAGE
956	FORCEINLINE InlineSString(enum tagCONSOLE dummytag, const CONSOLE *string)
957	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
958	{
959	WRAPPER_NO_CONTRACT;
960	SetCONSOLE(string);
961	}
962
963	FORCEINLINE InlineSString(enum tagCONSOLE dummytag, const CONSOLE *string, COUNT_T count)
964	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
965	{
966	WRAPPER_NO_CONTRACT;
967	SetCONSOLE(string, count);
968	}
969	#endif
970
971	FORCEINLINE InlineSString(WCHAR character)
972	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
973	{
974	WRAPPER_NO_CONTRACT;
975	Set(character);
976	}
977
978	FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 character)
979	: SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
980	{
981	WRAPPER_NO_CONTRACT;
982	SetUTF8(character);
983	}
984
985	FORCEINLINE InlineSString<MEMSIZE> &operator= (const SString &s)
986	{
987	WRAPPER_NO_CONTRACT;
988	Set(s);
989	return *this;
990	}
991
992	FORCEINLINE InlineSString<MEMSIZE> &operator= (const InlineSString<MEMSIZE> &s)
993	{
994	WRAPPER_NO_CONTRACT;
995	Set(s);
996	return *this;
997	}
998	};
999
1000	// ================================================================================
1001	// StackSString is a lot like CQuickBytes. Use it to create an SString object
1002	// using some stack space as a preallocated buffer.
1003	// ================================================================================
1004
1005	typedef InlineSString<`512`> StackSString;
1006
1007	// This is a smaller version for when it is known that the string that's going to
1008	// be needed is small and it's preferable not to take up the stack space.
1009	typedef InlineSString<`32`> SmallStackSString;
1010
1011	// To be used specifically for path strings.
1012	#ifdef _DEBUG
1013	// This is a smaller version for debug builds to exercise the buffer allocation path
1014	typedef InlineSString<`32`> PathString;
1015	typedef InlineSString<`2` * `32`> LongPathString;
1016	#else
1017	// Set it to the current MAX_PATH
1018	typedef InlineSString<`260`> PathString;
1019	typedef InlineSString<`2` * `260`> LongPathString;
1020	#endif
1021
1022	// ================================================================================
1023	// Quick macro to create an SString around a literal string.
1024	// usage:
1025	// s = SL("My literal String");
1026	// ================================================================================
1027
1028	#define SL(_literal) SString(SString::Literal, _literal)
1029
1030	// ================================================================================
1031	// ScratchBuffer classes are used by the GetXXX() routines to allocate scratch space in.
1032	// ================================================================================
1033
1034	class SString::AbstractScratchBuffer : private SString
1035	{
1036	protected:
1037	// Do not use this class directly - use
1038	// ScratchBuffer or StackScratchBuffer.
1039	AbstractScratchBuffer(void *buffer, COUNT_T size);
1040	};
1041
1042	template <COUNT_T MEMSIZE>
1043	class ScratchBuffer : public SString::AbstractScratchBuffer
1044	{
1045	private:
1046	BYTE m_inline[MEMSIZE];
1047
1048	public:
1049	ScratchBuffer()
1050	: AbstractScratchBuffer((void *)m_inline, MEMSIZE)
1051	{
1052	WRAPPER_NO_CONTRACT;
1053	}
1054	};
1055
1056	typedef ScratchBuffer<`256`> StackScratchBuffer;
1057
1058	// ================================================================================
1059	// Special contract definition - THROWS_UNLESS_NORMALIZED
1060	// this is used for operations which might fail for generalized strings but
1061	// not if the string has already been converted to unicode. Rather than just
1062	// setting this on all conversions to unicode, we only set it when explicitly
1063	// asked. This should expose more potential problems.
1064	// ================================================================================
1065
1066	#define THROWS_UNLESS_NORMALIZED \
1067	if (IsNormalized()) NOTHROW; else THROWS
1068
1069	#define THROWS_UNLESS_BOTH_NORMALIZED(s) \
1070	if (IsNormalized() && s.IsNormalized()) NOTHROW; else THROWS
1071
1072	#define FAULTS_UNLESS_NORMALIZED(stmt) \
1073	if (IsNormalized()) FORBID_FAULT; else INJECT_FAULT(stmt)
1074
1075	#define FAULTS_UNLESS_BOTH_NORMALIZED(s, stmt) \
1076	if (IsNormalized() && s.IsNormalized()) FORBID_FAULT; else INJECT_FAULT(stmt)
1077
1078	// ================================================================================
1079	// Inline definitions
1080	// ================================================================================
1081
1082	#include <sstring.inl>
1083
1084	#endif // _SSTRING_H_
1085

Browse the source code of CoreCLR/inc/sstring.h