1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4// ---------------------------------------------------------------------------
5// SString.h (Safe String)
6//
7
8// ---------------------------------------------------------------------------
9
10// ------------------------------------------------------------------------------------------
11// SString is the "standard" string representation for the EE. Its has two purposes.
12// (1) it provides an easy-to-use, relatively efficient, string class for APIs to standardize
13// on.
14// (2) it completely encapsulates all "unsafe" string operations - that is, string operations
15// which yield possible buffer overrun bugs. Typesafe use of this API should help guarantee
16// safety.
17//
18// A SString is conceptually unicode, although the internal conversion might be delayed as long as possible
19// Basically it's up to the implementation whether conversion takes place immediately or is delayed, and if
20// delayed, what operations trigger the conversion.
21//
22// Note that anywhere you express a "position" in a string, it is in terms of the Unicode representation of the
23// string.
24//
25// If you need a direct non-unicode representation, you will have to provide a fresh SString which can
26// recieve a conversion operation if necessary.
27//
28// The alternate encodings available are:
29// 1. ASCII - string consisting entirely of ASCII (7 bit) characters. This is the only 1 byte encoding
30// guaranteed to be fixed width. Such a string is also a valid instance of all the other 1 byte string
31// representations, and we take advantage of this fact.
32// 2. UTF-8 - standard multibyte unicode encoding.
33// 3. ANSI - Potentially multibyte encoding using the ANSI page determined by GetACP().
34//
35// @todo: Note that we could also provide support for several other cases (but currently do not.)
36// - Page specified by GetOEMCP() (OEM page)
37// - Arbitrary page support
38//
39// @todo: argument & overflow/underflow checking needs to be added
40// ------------------------------------------------------------------------------------------
41
42
43#ifndef _SSTRING_H_
44#define _SSTRING_H_
45
46#include "utilcode.h"
47#include "sbuffer.h"
48
49// ==========================================================================================
50// Documentational typedefs: use these to indicate specific representations of 8 bit strings:
51// ==========================================================================================
52
53// Note that LPCSTR means ASCII (7-bit) only!
54
55typedef CHAR ASCII;
56typedef ASCII *LPASCII;
57typedef const ASCII *LPCASCII;
58
59typedef CHAR ANSI;
60typedef ANSI *LPANSI;
61typedef const ANSI *LPCANSI;
62
63typedef CHAR UTF8;
64typedef UTF8 *LPUTF8;
65typedef const UTF8 *LPCUTF8;
66
67// ==========================================================================================
68// SString is the base class for safe strings.
69// ==========================================================================================
70
71
72typedef DPTR(class SString) PTR_SString;
73class SString : private SBuffer
74{
75 friend struct _DacGlobals;
76
77private:
78 enum Representation
79 {
80 // Note: bits are meaningful: xVS V == Variable? S == Single byte width?
81 REPRESENTATION_EMPTY = 0x00, // 000
82 REPRESENTATION_UNICODE = 0x04, // 100
83 REPRESENTATION_ASCII = 0x01, // 001
84 REPRESENTATION_UTF8 = 0x03, // 011
85 REPRESENTATION_ANSI = 0x07, // 111
86
87 REPRESENTATION_VARIABLE_MASK = 0x02,
88 REPRESENTATION_SINGLE_MASK = 0x01,
89 REPRESENTATION_MASK = 0x07,
90 };
91
92 // Minimum guess for Printf buffer size
93 const static COUNT_T MINIMUM_GUESS = 20;
94
95
96#ifdef _DEBUG
97 // Used to have a public ctor of this form - made it too easy to lose
98 // utf8 info by accident. Now you have to specify the representation type
99 // explicitly - this privator ctor prevents reinsertion of this ctor.
100 explicit SString(const ASCII *)
101 {
102 _ASSERTE(!"Don't call this.");
103 }
104#endif
105
106 protected:
107 class Index;
108 class UIndex;
109
110 friend class Index;
111 friend class UIndex;
112
113 public:
114
115 // UIterator is character-level assignable.
116 class UIterator;
117
118 // CIterators/Iterator'string must be modified by SString APIs.
119 class CIterator;
120 class Iterator;
121
122 // Tokens for constructor overloads
123 enum tagUTF8Literal { Utf8Literal };
124 enum tagLiteral { Literal };
125 enum tagUTF8 { Utf8 };
126 enum tagANSI { Ansi };
127 enum tagASCII {Ascii };
128#ifdef SSTRING_CONSOLECODEPAGE
129 enum tagCONSOLE { Console };
130#endif
131
132 static void Startup();
133 static CHECK CheckStartup();
134
135 static const SString &Empty();
136
137 SString();
138
139 explicit SString(const SString &s);
140
141 SString(const SString &s1, const SString &s2);
142 SString(const SString &s1, const SString &s2, const SString &s3);
143 SString(const SString &s1, const SString &s2, const SString &s3, const SString &s4);
144 SString(const SString &s, const CIterator &i, COUNT_T length);
145 SString(const SString &s, const CIterator &start, const CIterator &end);
146 SString(const WCHAR *string);
147 SString(const WCHAR *string, COUNT_T count);
148 SString(enum tagASCII dummyTag, const ASCII *string);
149 SString(enum tagASCII dummyTag, const ASCII *string, COUNT_T count);
150 SString(enum tagUTF8 dummytag, const UTF8 *string);
151 SString(enum tagUTF8 dummytag, const UTF8 *string, COUNT_T count);
152 SString(enum tagANSI dummytag, const ANSI *string);
153 SString(enum tagANSI dummytag, const ANSI *string, COUNT_T count);
154#ifdef SSTRING_CONSOLECODEPAGE
155 SString(enum tagCONSOLE dummytag, const CONSOLE *string);
156 SString(enum tagCONSOLE dummytag, const CONSOLE *string, COUNT_T count);
157#endif
158 SString(WCHAR character);
159
160 // NOTE: Literals MUST be read-only never-freed strings.
161 SString(enum tagLiteral dummytag, const CHAR *literal);
162 SString(enum tagUTF8Literal dummytag, const UTF8 *literal);
163 SString(enum tagLiteral dummytag, const WCHAR *literal);
164 SString(enum tagLiteral dummytag, const WCHAR *literal, COUNT_T count);
165
166 // Set this string to the concatenation of s1,s2,s3,s4
167 void Set(const SString &s);
168 void Set(const SString &s1, const SString &s2);
169 void Set(const SString &s1, const SString &s2, const SString &s3);
170 void Set(const SString &s1, const SString &s2, const SString &s3, const SString &s4);
171
172 // Set this string to the substring of s, starting at i, of length characters.
173 void Set(const SString &s, const CIterator &i, COUNT_T length);
174
175 // Set this string to the substring of s, starting at start and ending at end (exclusive)
176 void Set(const SString &s, const CIterator &start, const CIterator &end);
177
178 // Set this string to a copy of the given string
179 void Set(const WCHAR *string);
180 void SetASCII(const ASCII *string);
181 void SetUTF8(const UTF8 *string);
182 void SetANSI(const ANSI *string);
183#ifdef SSTRING_CONSOLECODEPAGE
184 void SetConsole(const CONSOLE *string);
185#endif
186
187 // Set this string to a copy of the first count chars of the given string
188 void Set(const WCHAR *string, COUNT_T count);
189
190 // Set this string to a prellocated copy of a given string.
191 // The caller is the owner of the bufffer and has to coordinate its lifetime.
192 void SetPreallocated(const WCHAR *string, COUNT_T count);
193
194 void SetASCII(const ASCII *string, COUNT_T count);
195
196 void SetUTF8(const UTF8 *string, COUNT_T count);
197 void SetANSI(const ANSI *string, COUNT_T count);
198#ifdef SSTRING_CONSOLECODEPAGE
199 void SetConsole(const CONSOLE *string, COUNT_T count);
200#endif
201
202 // Set this string to the unicode character
203 void Set(WCHAR character);
204
205 // Set this string to the UTF8 character
206 void SetUTF8(CHAR character);
207
208 // This this string to the given literal. We share the mem and don't make a copy.
209 void SetLiteral(const CHAR *literal);
210 void SetLiteral(const WCHAR *literal);
211
212 // ------------------------------------------------------------------
213 // Public operations
214 // ------------------------------------------------------------------
215
216 // Normalizes the string representation to unicode. This can be used to
217 // make basic read-only operations non-failing.
218 void Normalize() const;
219
220 // Return the number of characters in the string (excluding the terminating NULL).
221 COUNT_T GetCount() const;
222 BOOL IsEmpty() const;
223
224 // Return whether a single byte string has all characters which fit in the ASCII set.
225 // (Note that this will return FALSE if the string has been converted to unicode for any
226 // reason.)
227 BOOL IsASCII() const;
228
229 // !!!!!!!!!!!!!! WARNING about case insensitive operations !!!!!!!!!!!!!!!
230 //
231 // THIS IS NOT SUPPORTED FULLY ON WIN9x
232 // SString case-insensitive comparison is based off LCMapString,
233 // which does not work on characters outside the current OS code page.
234 //
235 // Case insensitive code in SString is primarily targeted at
236 // supporting path comparisons, which is supported correctly on 9x,
237 // since file system names are limited to the OS code page.
238 //
239 // !!!!!!!!!!!!!! WARNING about case insensitive operations !!!!!!!!!!!!!!!
240
241 // Compute a content-based hash value
242 ULONG Hash() const;
243 ULONG HashCaseInsensitive() const;
244 ULONG HashCaseInsensitive(LocaleID locale) const;
245
246 // Do a string comparison. Return 0 if the strings
247 // have the same value, -1 if this is "less than" s, or 1 if
248 // this is "greater than" s.
249 int Compare(const SString &s) const;
250 int CompareCaseInsensitive(const SString &s) const; // invariant locale
251 int CompareCaseInsensitive(const SString &s, LocaleID locale) const;
252
253 // Do a case sensitive string comparison. Return TRUE if the strings
254 // have the same value FALSE if not.
255 BOOL Equals(const SString &s) const;
256 BOOL EqualsCaseInsensitive(const SString &s) const; // invariant locale
257 BOOL EqualsCaseInsensitive(const SString &s, LocaleID locale) const;
258
259 // Match s to a portion of the string starting at the position.
260 // Return TRUE if the strings have the same value
261 // (regardless of representation), FALSE if not.
262 BOOL Match(const CIterator &i, const SString &s) const;
263 BOOL MatchCaseInsensitive(const CIterator &i, const SString &s) const; // invariant locale
264 BOOL MatchCaseInsensitive(const CIterator &i, const SString &s, LocaleID locale) const;
265
266 BOOL Match(const CIterator &i, WCHAR c) const;
267 BOOL MatchCaseInsensitive(const CIterator &i, WCHAR c) const; // invariant locale
268 BOOL MatchCaseInsensitive(const CIterator &i, WCHAR c, LocaleID locale) const;
269
270 // Like match, but advances the iterator past the match
271 // if successful
272 BOOL Skip(CIterator &i, const SString &s) const;
273 BOOL Skip(CIterator &i, WCHAR c) const;
274
275 // Start searching for a match of the given string, starting at
276 // the given iterator point.
277 // If a match exists, move the iterator to point to the nearest
278 // occurence of s in the string and return TRUE.
279 // If no match exists, return FALSE and leave the iterator unchanged.
280 BOOL Find(CIterator &i, const SString &s) const;
281 BOOL Find(CIterator &i, const WCHAR *s) const;
282 BOOL FindASCII(CIterator &i, const ASCII *s) const;
283 BOOL FindUTF8(CIterator &i, const UTF8 *s) const;
284 BOOL Find(CIterator &i, WCHAR c) const;
285
286 BOOL FindBack(CIterator &i, const SString &s) const;
287 BOOL FindBack(CIterator &i, const WCHAR *s) const;
288 BOOL FindBackASCII(CIterator &i, const ASCII *s) const;
289 BOOL FindBackUTF8(CIterator &i, const UTF8 *s) const;
290 BOOL FindBack(CIterator &i, WCHAR c) const;
291
292 // Returns TRUE if this string begins with the contents of s
293 BOOL BeginsWith(const SString &s) const;
294 BOOL BeginsWithCaseInsensitive(const SString &s) const; // invariant locale
295 BOOL BeginsWithCaseInsensitive(const SString &s, LocaleID locale) const;
296
297 // Returns TRUE if this string ends with the contents of s
298 BOOL EndsWith(const SString &s) const;
299 BOOL EndsWithCaseInsensitive(const SString &s) const; // invariant locale
300 BOOL EndsWithCaseInsensitive(const SString &s, LocaleID locale) const;
301
302 // Sets this string to an empty string "".
303 void Clear();
304
305 // Truncate the string to the iterator position
306 void Truncate(const Iterator &i);
307
308 // Append s to the end of this string.
309 void Append(const SString &s);
310 void Append(const WCHAR *s);
311 void AppendASCII(const CHAR *s);
312 void AppendUTF8(const CHAR *s);
313
314 // Append char c to the end of this string.
315 void Append(const WCHAR c);
316 void AppendUTF8(const CHAR c);
317
318 // Insert s into this string at the 'position'th character.
319 void Insert(const Iterator &i, const SString &s);
320 void Insert(const Iterator &i, const WCHAR *s);
321 void InsertASCII(const Iterator &i, const CHAR *s);
322 void InsertUTF8(const Iterator &i, const CHAR *s);
323
324 // Delete substring position + length
325 void Delete(const Iterator &i, COUNT_T length);
326
327 // Replace character at i with c
328 void Replace(const Iterator &i, WCHAR c);
329
330 // Replace substring at (i,i+length) with s
331 void Replace(const Iterator &i, COUNT_T length, const SString &s);
332
333 // Make sure that string buffer has room to grow
334 void Preallocate(COUNT_T characters) const;
335
336 // Shrink buffer size as much as possible (reallocate if necessary.)
337 void Trim() const;
338
339 // ------------------------------------------------------------------
340 // Iterators:
341 // ------------------------------------------------------------------
342
343 // SString splits iterators into two categories.
344 //
345 // CIterator and Iterator are cheap to create, but allow only read-only
346 // access to the string.
347 //
348 // UIterator forces a unicode conversion, but allows
349 // assignment to individual string characters. They are also a bit more
350 // efficient once created.
351
352 // ------------------------------------------------------------------
353 // UIterator:
354 // ------------------------------------------------------------------
355
356 protected:
357
358 class UIndex : public SBuffer::Index
359 {
360 friend class SString;
361 friend class Indexer<WCHAR, UIterator>;
362
363 protected:
364
365 UIndex();
366 UIndex(SString *string, SCOUNT_T index);
367 WCHAR &GetAt(SCOUNT_T delta) const;
368 void Skip(SCOUNT_T delta);
369 SCOUNT_T Subtract(const UIndex &i) const;
370 CHECK DoCheck(SCOUNT_T delta) const;
371
372 WCHAR *GetUnicode() const;
373 };
374
375 public:
376
377 class UIterator : public UIndex, public Indexer<WCHAR, UIterator>
378 {
379 friend class SString;
380
381 public:
382 UIterator()
383 {
384 }
385
386 UIterator(SString *string, int index)
387 : UIndex(string, index)
388 {
389 }
390 };
391
392 UIterator BeginUnicode();
393 UIterator EndUnicode();
394
395 // For CIterator & Iterator, we try our best to iterate the string without
396 // modifying it. (Currently, we do require an ASCII or Unicode string
397 // for simple WCHAR retrival, but you could imagine being more flexible
398 // going forward - perhaps even supporting iterating multibyte encodings
399 // directly.)
400 //
401 // Because of the runtime-changable nature of the string, CIterators
402 // require an extra member to record the character size. They also
403 // are unable to properly implement GetAt as required by the template
404 // (since there may not be a direct WCHAR pointer), so they provide
405 // further customization in a subclass.
406 //
407 // Normally the user expects to cast Iterators to CIterators transparently, so
408 // we provide a constructor on CIterator to support this.
409
410 protected:
411
412 class Index : public SBuffer::Index
413 {
414 friend class SString;
415
416 friend class Indexer<const WCHAR, CIterator>;
417 friend class Indexer<WCHAR, Iterator>;
418
419 protected:
420 int m_characterSizeShift;
421
422 Index();
423 Index(SString *string, SCOUNT_T index);
424 BYTE &GetAt(SCOUNT_T delta) const;
425 void Skip(SCOUNT_T delta);
426 SCOUNT_T Subtract(const Index &i) const;
427 CHECK DoCheck(SCOUNT_T delta) const;
428
429 void Resync(const SString *string, BYTE *ptr) const;
430
431 const WCHAR *GetUnicode() const;
432 const CHAR *GetASCII() const;
433
434 public:
435 // Note these should supercede the Indexer versions
436 // since this class comes first in the inheritence list
437 WCHAR operator*() const;
438 void operator->() const;
439 WCHAR operator[](int index) const;
440 };
441
442 public:
443
444 class CIterator : public Index, public Indexer<const WCHAR, CIterator>
445 {
446 friend class SString;
447
448 public:
449 const Iterator &ConstCast() const
450 {
451 return *(const Iterator *)this;
452 }
453
454 Iterator &ConstCast()
455 {
456 return *(Iterator *)this;
457 }
458
459 operator const SBuffer::CIterator &() const
460 {
461 return *(const SBuffer::CIterator *)this;
462 }
463
464 operator SBuffer::CIterator &()
465 {
466 return *(SBuffer::CIterator *)this;
467 }
468
469 CIterator()
470 {
471 }
472
473 CIterator(const SString *string, int index)
474 : Index(const_cast<SString *>(string), index)
475 {
476 }
477
478 // explicitly resolve these for gcc
479 WCHAR operator*() const { return Index::operator*(); }
480 void operator->() const { Index::operator->(); }
481 WCHAR operator[](int index) const { return Index::operator[](index); }
482 };
483
484 class Iterator : public Index, public Indexer<WCHAR, Iterator>
485 {
486 friend class SString;
487
488 public:
489 operator const CIterator &() const
490 {
491 return *(const CIterator *)this;
492 }
493
494 operator CIterator &()
495 {
496 return *(CIterator *)this;
497 }
498
499 operator const SBuffer::Iterator &() const
500 {
501 return *(const SBuffer::Iterator *)this;
502 }
503
504 operator SBuffer::Iterator &()
505 {
506 return *(SBuffer::Iterator *)this;
507 }
508
509 Iterator()
510 {
511 }
512
513 Iterator(SString *string, int index)
514 : Index(string, index)
515 {
516 SUPPORTS_DAC;
517 }
518
519 // explicitly resolve these for gcc
520 WCHAR operator*() const { return Index::operator*(); }
521 void operator->() const { Index::operator->(); }
522 WCHAR operator[](int index) const { return Index::operator[](index); }
523 };
524
525 CIterator Begin() const;
526 CIterator End() const;
527
528 Iterator Begin();
529 Iterator End();
530
531 // ------------------------------------------------------------------
532 // Conversion:
533 // ------------------------------------------------------------------
534
535 // Get a const pointer to the string in the current representation.
536 // This pointer can not be cached because it will become invalid if
537 // the SString changes representation or reallocates its buffer.
538
539 // You can always get a unicode string. This will force a conversion
540 // if necessary.
541 const WCHAR *GetUnicode() const;
542 const WCHAR *GetUnicode(const CIterator &i) const;
543
544 void LowerCase();
545 void UpperCase();
546
547 // Helper function to convert string in-place to lower-case (no allocation overhead for SString instance)
548 static void LowerCase(__inout_z LPWSTR wszString);
549
550 // These routines will use the given scratch string if necessary
551 // to perform a conversion to the desired representation
552
553 // Use a local declaration of InlineScratchBuffer or StackScratchBuffer for parameters of
554 // AbstractScratchBuffer.
555 class AbstractScratchBuffer;
556
557 // These routines will use the given scratch buffer if necessary
558 // to perform a conversion to the desired representation. Note that
559 // the lifetime of the pointer return is limited by BOTH the
560 // scratch string and the source (this) string.
561 //
562 // Typical usage:
563 //
564 // SString *s = ...;
565 // {
566 // StackScratchBuffer buffer;
567 // const UTF8 *utf8 = s->GetUTF8(buffer);
568 // CallFoo(utf8);
569 // }
570 // // No more pointers to returned buffer allowed.
571
572 const UTF8 *GetUTF8(AbstractScratchBuffer &scratch) const;
573 const UTF8 *GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const;
574 const ANSI *GetANSI(AbstractScratchBuffer &scratch) const;
575#ifdef SSTRING_CONSOLECODEPAGE
576 const CONSOLE *GetConsole(AbstractScratchBuffer &scratch) const;
577#endif
578
579 // Used when the representation is known, throws if the representation doesn't match
580 const UTF8 *GetUTF8NoConvert() const;
581
582 // Converts/copies into the given output string
583 void ConvertToUnicode(SString &dest) const;
584 void ConvertToANSI(SString &dest) const;
585 COUNT_T ConvertToUTF8(SString &dest) const;
586#ifdef SSTRING_CONSOLECODEPAGE
587 void ConvertToConsole(SString &dest) const;
588#endif
589
590 //-------------------------------------------------------------------
591 // Accessing the string contents directly
592 //-------------------------------------------------------------------
593
594 // To write directly to the SString's underlying buffer:
595 // 1) Call OpenXXXBuffer() and pass it the count of characters
596 // you need. (Not including the null-terminator).
597 // 2) That returns a pointer to the raw buffer which you can write to.
598 // 3) When you are done writing to the pointer, call CloseBuffer()
599 // and pass it the count of characters you actually wrote (not including
600 // the null). The pointer from step 1 is now invalid.
601
602 // example usage:
603 // void GetName(SString & str) {
604 // char * p = str.OpenANSIBuffer(3);
605 // strcpy(p, "Cat");
606 // str.CloseBuffer();
607 // }
608
609 // Regarding the null-terminator:
610 // 1) Note that we wrote 4 characters (3 + a null). That's ok. OpenBuffer
611 // allocates 1 extra byte for the null.
612 // 2) If we only wrote 3 characters and no null, that's ok too. CloseBuffer()
613 // will add a null-terminator.
614
615 // You should open the buffer, write the data, and immediately close it.
616 // No sstring operations are valid while the buffer is opened.
617 //
618 // In a debug build, Open/Close will do lots of little checks to make sure
619 // you don't buffer overflow while it's opened. In a retail build, this
620 // is a very streamlined action.
621
622
623 // Open the raw buffer for writing countChars characters (not including the null).
624 WCHAR *OpenUnicodeBuffer(COUNT_T maxCharCount);
625 UTF8 *OpenUTF8Buffer(COUNT_T maxSingleCharCount);
626 ANSI *OpenANSIBuffer(COUNT_T maxSingleCharCount);
627
628 //Returns the unicode string, the caller is reponsible for lifetime of the string
629 WCHAR *GetCopyOfUnicodeString();
630
631 // Get the max size that can be passed to OpenUnicodeBuffer without causing allocations.
632 COUNT_T GetUnicodeAllocation();
633
634 // Call after OpenXXXBuffer().
635
636 // Provide the count of characters actually used (not including the
637 // null terminator). This will make sure the SString's size is correct
638 // and that we have a null-terminator.
639 void CloseBuffer(COUNT_T finalCount);
640
641 // Close the buffer. Assumes that we completely filled the buffer
642 // that OpenBuffer() gave back. If we didn't write all the characters,
643 // call CloseBuffer(int) instead.
644 void CloseBuffer();
645
646#ifdef DACCESS_COMPILE
647 // DAC access to string functions.
648 // Note that other accessors above are not DAC-safe and will return TARGET pointers into
649 // the string instead of copying the string over to the host.
650 // @dbgtodo dac support: Prevent usage of such DAC-unsafe SString APIs in DAC code
651
652 // Instantiate a copy of the raw buffer in the host and return a pointer to it
653 void * DacGetRawContent() const;
654
655 // Instantiate a copy of the raw buffer in the host. Requires that the underlying
656 // representation is already unicode.
657 const WCHAR * DacGetRawUnicode() const;
658
659 // Copy the string from the target into the provided buffer, converting to unicode if necessary
660 bool DacGetUnicode(COUNT_T bufChars,
661 __out_z __inout_ecount(bufChars) WCHAR * buffer,
662 COUNT_T * needChars) const;
663
664 void EnumMemoryRegions(CLRDataEnumMemoryFlags flags) const
665 {
666 SUPPORTS_DAC;
667 SBuffer::EnumMemoryRegions(flags);
668 }
669#endif
670
671 //---------------------------------------------------------------------
672 // Utilities
673 //---------------------------------------------------------------------
674
675 // WARNING: The MBCS version of printf function are factory for globalization
676 // issues when used to format Unicode strings (%S). The Unicode versions are
677 // preffered in this case.
678 void Printf(const CHAR *format, ...);
679 void VPrintf(const CHAR *format, va_list args);
680
681 void Printf(const WCHAR *format, ...);
682 void PPrintf(const WCHAR *format, ...);
683 void VPrintf(const WCHAR *format, va_list args);
684
685 void PVPrintf(const WCHAR *format, va_list args);
686
687 void AppendPrintf(const CHAR *format, ...);
688 void AppendVPrintf(const CHAR *format, va_list args);
689
690 void AppendPrintf(const WCHAR *format, ...);
691 void AppendVPrintf(const WCHAR *format, va_list args);
692
693 BOOL LoadResource(CCompRC::ResourceCategory eCategory, int resourceID);
694 HRESULT LoadResourceAndReturnHR(CCompRC::ResourceCategory eCategory, int resourceID);
695 HRESULT LoadResourceAndReturnHR(CCompRC* pResourceDLL, CCompRC::ResourceCategory eCategory, int resourceID);
696 BOOL FormatMessage(DWORD dwFlags, LPCVOID lpSource, DWORD dwMessageId, DWORD dwLanguageId,
697 const SString &arg1 = Empty(), const SString &arg2 = Empty(),
698 const SString &arg3 = Empty(), const SString &arg4 = Empty(),
699 const SString &arg5 = Empty(), const SString &arg6 = Empty(),
700 const SString &arg7 = Empty(), const SString &arg8 = Empty(),
701 const SString &arg9 = Empty(), const SString &arg10 = Empty());
702
703#if 1
704 // @todo - get rid of this and move it outside of SString
705 void MakeFullNamespacePath(const SString &nameSpace, const SString &name);
706#endif
707
708 //--------------------------------------------------------------------
709 // Operators
710 //--------------------------------------------------------------------
711
712 operator const WCHAR * () const { WRAPPER_NO_CONTRACT; return GetUnicode(); }
713
714 WCHAR operator[](int index) { WRAPPER_NO_CONTRACT; return Begin()[index]; }
715 WCHAR operator[](int index) const { WRAPPER_NO_CONTRACT; return Begin()[index]; }
716
717 SString &operator= (const SString &s) { WRAPPER_NO_CONTRACT; Set(s); return *this; }
718 SString &operator+= (const SString &s) { WRAPPER_NO_CONTRACT; Append(s); return *this; }
719
720 // -------------------------------------------------------------------
721 // Check functions
722 // -------------------------------------------------------------------
723
724 CHECK CheckIteratorRange(const CIterator &i) const;
725 CHECK CheckIteratorRange(const CIterator &i, COUNT_T length) const;
726 CHECK CheckEmpty() const;
727
728 static CHECK CheckCount(COUNT_T count);
729 static CHECK CheckRepresentation(int representation);
730
731#if CHECK_INVARIANTS
732 static CHECK CheckASCIIString(const ASCII *string);
733 static CHECK CheckASCIIString(const ASCII *string, COUNT_T count);
734
735 CHECK Check() const;
736 CHECK Invariant() const;
737 CHECK InternalInvariant() const;
738#endif // CHECK_INVARIANTS
739
740 // Helpers for CRT function equivalance.
741 static int __cdecl _stricmp(const CHAR *buffer1, const CHAR *buffer2);
742 static int __cdecl _strnicmp(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count);
743
744 static int __cdecl _wcsicmp(const WCHAR *buffer1, const WCHAR *buffer2);
745 static int __cdecl _wcsnicmp(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count);
746
747 // C++ convenience overloads
748 static int _tstricmp(const CHAR *buffer1, const CHAR *buffer2);
749 static int _tstricmp(const WCHAR *buffer1, const WCHAR *buffer2);
750
751 static int _tstrnicmp(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count);
752 static int _tstrnicmp(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count);
753
754 // -------------------------------------------------------------------
755 // Internal routines
756 // -------------------------------------------------------------------
757
758
759 protected:
760 // Use this via InlineSString<X>
761 SString(void *buffer, COUNT_T size);
762
763 private:
764 static int CaseCompareHelperA(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount);
765 static int CaseCompareHelper(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount);
766
767 // Internal helpers:
768
769 static const BYTE s_EmptyBuffer[2];
770
771 static UINT s_ACP;
772 SVAL_DECL(BOOL, s_IsANSIMultibyte);
773
774#ifdef SSTRING_CONSOLECODEPAGE
775 static UINT s_ConsoleCP;
776 static BOOL s_IsConsoleMultibyte;
777#endif
778
779 const static LocaleID s_defaultLCID;
780
781 SPTR_DECL(SString,s_Empty);
782
783 COUNT_T GetRawCount() const;
784
785 // Get buffer as appropriate string rep
786 ASCII *GetRawASCII() const;
787 UTF8 *GetRawUTF8() const;
788 ANSI *GetRawANSI() const;
789 WCHAR *GetRawUnicode() const;
790#ifdef SSTRING_CONSOLECODEPAGE
791 CONSOLE *GetRawConsole() const;
792#endif
793
794 void InitEmpty();
795
796 Representation GetRepresentation() const;
797 void SetRepresentation(Representation representation);
798 BOOL IsRepresentation(Representation representation) const;
799 BOOL IsFixedSize() const;
800 BOOL IsIteratable() const;
801 BOOL IsSingleByte() const;
802
803 int GetCharacterSizeShift() const;
804
805 COUNT_T SizeToCount(COUNT_T size) const;
806 COUNT_T CountToSize(COUNT_T count) const;
807
808 COUNT_T GetBufferSizeInCharIncludeNullChar() const;
809
810 BOOL IsLiteral() const;
811 BOOL IsAllocated() const;
812 BOOL IsBufferOpen() const;
813 BOOL IsASCIIScanned() const;
814 void SetASCIIScanned() const;
815 void SetNormalized() const;
816 BOOL IsNormalized() const;
817 void ClearNormalized() const;
818
819 void EnsureWritable() const;
820 void ConvertToFixed() const;
821 void ConvertToIteratable() const;
822
823 void ConvertASCIIToUnicode(SString &dest) const;
824 void ConvertToUnicode() const;
825 void ConvertToUnicode(const CIterator &i) const;
826
827 const SString &GetCompatibleString(const SString &s, SString &scratch) const;
828 const SString &GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const;
829 BOOL ScanASCII() const;
830 void NullTerminate();
831
832 void Resize(COUNT_T count, Representation representation,
833 Preserve preserve = DONT_PRESERVE);
834
835 void OpenBuffer(Representation representation, COUNT_T countChars);
836};
837
838// ===========================================================================
839// InlineSString is used for stack allocation of strings, or when the string contents
840// are expected or known to be small. Note that it still supports expandability via
841// heap allocation if necessary.
842// ===========================================================================
843
844template <COUNT_T MEMSIZE>
845class InlineSString : public SString
846{
847private:
848 BYTE m_inline[SBUFFER_PADDED_SIZE(MEMSIZE)];
849
850public:
851 FORCEINLINE InlineSString()
852 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
853 {
854 WRAPPER_NO_CONTRACT;
855 }
856
857 FORCEINLINE InlineSString(const SString &s)
858 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
859 {
860 WRAPPER_NO_CONTRACT;
861 Set(s);
862 }
863
864 FORCEINLINE InlineSString(const SString &s1, const SString &s2)
865 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
866 {
867 WRAPPER_NO_CONTRACT;
868 Set(s1, s2);
869 }
870
871 FORCEINLINE InlineSString(const SString &s1, const SString &s2, const SString &s3)
872 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
873 {
874 WRAPPER_NO_CONTRACT;
875 Set(s1, s2, s3);
876 }
877
878 FORCEINLINE InlineSString(const SString &s1, const SString &s2, const SString &s3, const SString &s4)
879 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
880 {
881 WRAPPER_NO_CONTRACT;
882 Set(s1, s2, s3, s4);
883 }
884
885 FORCEINLINE InlineSString(const SString &s, const CIterator &start, const CIterator &end)
886 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
887 {
888 WRAPPER_NO_CONTRACT;
889 Set(s, start, end);
890 }
891
892 FORCEINLINE InlineSString(const SString &s, const CIterator &i, COUNT_T length)
893 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
894 {
895 WRAPPER_NO_CONTRACT;
896 Set(s, i, length);
897 }
898
899 FORCEINLINE InlineSString(const WCHAR *string)
900 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
901 {
902 WRAPPER_NO_CONTRACT;
903 Set(string);
904 }
905
906 FORCEINLINE InlineSString(const WCHAR *string, COUNT_T count)
907 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
908 {
909 WRAPPER_NO_CONTRACT;
910 Set(string, count);
911 }
912
913 FORCEINLINE InlineSString(enum tagASCII, const CHAR *string)
914 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
915 {
916 WRAPPER_NO_CONTRACT;
917 SetASCII(string);
918 }
919
920 FORCEINLINE InlineSString(enum tagASCII, const CHAR *string, COUNT_T count)
921 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
922 {
923 WRAPPER_NO_CONTRACT;
924 SetASCII(string, count);
925 }
926
927 FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 *string)
928 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
929 {
930 WRAPPER_NO_CONTRACT;
931 SetUTF8(string);
932 }
933
934 FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 *string, COUNT_T count)
935 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
936 {
937 WRAPPER_NO_CONTRACT;
938 SetUTF8(string, count);
939 }
940
941 FORCEINLINE InlineSString(enum tagANSI dummytag, const ANSI *string)
942 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
943 {
944 WRAPPER_NO_CONTRACT;
945 SetANSI(string);
946 }
947
948 FORCEINLINE InlineSString(enum tagANSI dummytag, const ANSI *string, COUNT_T count)
949 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
950 {
951 WRAPPER_NO_CONTRACT;
952 SetANSI(string, count);
953 }
954
955#ifdef SSTRING_CONSOLECODEPAGE
956 FORCEINLINE InlineSString(enum tagCONSOLE dummytag, const CONSOLE *string)
957 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
958 {
959 WRAPPER_NO_CONTRACT;
960 SetCONSOLE(string);
961 }
962
963 FORCEINLINE InlineSString(enum tagCONSOLE dummytag, const CONSOLE *string, COUNT_T count)
964 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
965 {
966 WRAPPER_NO_CONTRACT;
967 SetCONSOLE(string, count);
968 }
969#endif
970
971 FORCEINLINE InlineSString(WCHAR character)
972 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
973 {
974 WRAPPER_NO_CONTRACT;
975 Set(character);
976 }
977
978 FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 character)
979 : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE))
980 {
981 WRAPPER_NO_CONTRACT;
982 SetUTF8(character);
983 }
984
985 FORCEINLINE InlineSString<MEMSIZE> &operator= (const SString &s)
986 {
987 WRAPPER_NO_CONTRACT;
988 Set(s);
989 return *this;
990 }
991
992 FORCEINLINE InlineSString<MEMSIZE> &operator= (const InlineSString<MEMSIZE> &s)
993 {
994 WRAPPER_NO_CONTRACT;
995 Set(s);
996 return *this;
997 }
998};
999
1000// ================================================================================
1001// StackSString is a lot like CQuickBytes. Use it to create an SString object
1002// using some stack space as a preallocated buffer.
1003// ================================================================================
1004
1005typedef InlineSString<512> StackSString;
1006
1007// This is a smaller version for when it is known that the string that's going to
1008// be needed is small and it's preferable not to take up the stack space.
1009typedef InlineSString<32> SmallStackSString;
1010
1011// To be used specifically for path strings.
1012#ifdef _DEBUG
1013// This is a smaller version for debug builds to exercise the buffer allocation path
1014typedef InlineSString<32> PathString;
1015typedef InlineSString<2 * 32> LongPathString;
1016#else
1017// Set it to the current MAX_PATH
1018typedef InlineSString<260> PathString;
1019typedef InlineSString<2 * 260> LongPathString;
1020#endif
1021
1022// ================================================================================
1023// Quick macro to create an SString around a literal string.
1024// usage:
1025// s = SL("My literal String");
1026// ================================================================================
1027
1028#define SL(_literal) SString(SString::Literal, _literal)
1029
1030// ================================================================================
1031// ScratchBuffer classes are used by the GetXXX() routines to allocate scratch space in.
1032// ================================================================================
1033
1034class SString::AbstractScratchBuffer : private SString
1035{
1036 protected:
1037 // Do not use this class directly - use
1038 // ScratchBuffer or StackScratchBuffer.
1039 AbstractScratchBuffer(void *buffer, COUNT_T size);
1040};
1041
1042template <COUNT_T MEMSIZE>
1043class ScratchBuffer : public SString::AbstractScratchBuffer
1044{
1045 private:
1046 BYTE m_inline[MEMSIZE];
1047
1048 public:
1049 ScratchBuffer()
1050 : AbstractScratchBuffer((void *)m_inline, MEMSIZE)
1051 {
1052 WRAPPER_NO_CONTRACT;
1053 }
1054};
1055
1056typedef ScratchBuffer<256> StackScratchBuffer;
1057
1058// ================================================================================
1059// Special contract definition - THROWS_UNLESS_NORMALIZED
1060// this is used for operations which might fail for generalized strings but
1061// not if the string has already been converted to unicode. Rather than just
1062// setting this on all conversions to unicode, we only set it when explicitly
1063// asked. This should expose more potential problems.
1064// ================================================================================
1065
1066#define THROWS_UNLESS_NORMALIZED \
1067 if (IsNormalized()) NOTHROW; else THROWS
1068
1069#define THROWS_UNLESS_BOTH_NORMALIZED(s) \
1070 if (IsNormalized() && s.IsNormalized()) NOTHROW; else THROWS
1071
1072#define FAULTS_UNLESS_NORMALIZED(stmt) \
1073 if (IsNormalized()) FORBID_FAULT; else INJECT_FAULT(stmt)
1074
1075#define FAULTS_UNLESS_BOTH_NORMALIZED(s, stmt) \
1076 if (IsNormalized() && s.IsNormalized()) FORBID_FAULT; else INJECT_FAULT(stmt)
1077
1078// ================================================================================
1079// Inline definitions
1080// ================================================================================
1081
1082#include <sstring.inl>
1083
1084#endif // _SSTRING_H_
1085