1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // --------------------------------------------------------------------------- |
5 | // SString.h (Safe String) |
6 | // |
7 | |
8 | // --------------------------------------------------------------------------- |
9 | |
10 | // ------------------------------------------------------------------------------------------ |
11 | // SString is the "standard" string representation for the EE. Its has two purposes. |
12 | // (1) it provides an easy-to-use, relatively efficient, string class for APIs to standardize |
13 | // on. |
14 | // (2) it completely encapsulates all "unsafe" string operations - that is, string operations |
15 | // which yield possible buffer overrun bugs. Typesafe use of this API should help guarantee |
16 | // safety. |
17 | // |
18 | // A SString is conceptually unicode, although the internal conversion might be delayed as long as possible |
19 | // Basically it's up to the implementation whether conversion takes place immediately or is delayed, and if |
20 | // delayed, what operations trigger the conversion. |
21 | // |
22 | // Note that anywhere you express a "position" in a string, it is in terms of the Unicode representation of the |
23 | // string. |
24 | // |
25 | // If you need a direct non-unicode representation, you will have to provide a fresh SString which can |
26 | // recieve a conversion operation if necessary. |
27 | // |
28 | // The alternate encodings available are: |
29 | // 1. ASCII - string consisting entirely of ASCII (7 bit) characters. This is the only 1 byte encoding |
30 | // guaranteed to be fixed width. Such a string is also a valid instance of all the other 1 byte string |
31 | // representations, and we take advantage of this fact. |
32 | // 2. UTF-8 - standard multibyte unicode encoding. |
33 | // 3. ANSI - Potentially multibyte encoding using the ANSI page determined by GetACP(). |
34 | // |
35 | // @todo: Note that we could also provide support for several other cases (but currently do not.) |
36 | // - Page specified by GetOEMCP() (OEM page) |
37 | // - Arbitrary page support |
38 | // |
39 | // @todo: argument & overflow/underflow checking needs to be added |
40 | // ------------------------------------------------------------------------------------------ |
41 | |
42 | |
43 | #ifndef _SSTRING_H_ |
44 | #define _SSTRING_H_ |
45 | |
46 | #include "utilcode.h" |
47 | #include "sbuffer.h" |
48 | |
49 | // ========================================================================================== |
50 | // Documentational typedefs: use these to indicate specific representations of 8 bit strings: |
51 | // ========================================================================================== |
52 | |
53 | // Note that LPCSTR means ASCII (7-bit) only! |
54 | |
55 | typedef CHAR ASCII; |
56 | typedef ASCII *LPASCII; |
57 | typedef const ASCII *LPCASCII; |
58 | |
59 | typedef CHAR ANSI; |
60 | typedef ANSI *LPANSI; |
61 | typedef const ANSI *LPCANSI; |
62 | |
63 | typedef CHAR UTF8; |
64 | typedef UTF8 *LPUTF8; |
65 | typedef const UTF8 *LPCUTF8; |
66 | |
67 | // ========================================================================================== |
68 | // SString is the base class for safe strings. |
69 | // ========================================================================================== |
70 | |
71 | |
72 | typedef DPTR(class SString) PTR_SString; |
73 | class SString : private SBuffer |
74 | { |
75 | friend struct _DacGlobals; |
76 | |
77 | private: |
78 | enum Representation |
79 | { |
80 | // Note: bits are meaningful: xVS V == Variable? S == Single byte width? |
81 | REPRESENTATION_EMPTY = 0x00, // 000 |
82 | REPRESENTATION_UNICODE = 0x04, // 100 |
83 | REPRESENTATION_ASCII = 0x01, // 001 |
84 | REPRESENTATION_UTF8 = 0x03, // 011 |
85 | REPRESENTATION_ANSI = 0x07, // 111 |
86 | |
87 | REPRESENTATION_VARIABLE_MASK = 0x02, |
88 | REPRESENTATION_SINGLE_MASK = 0x01, |
89 | REPRESENTATION_MASK = 0x07, |
90 | }; |
91 | |
92 | // Minimum guess for Printf buffer size |
93 | const static COUNT_T MINIMUM_GUESS = 20; |
94 | |
95 | |
96 | #ifdef _DEBUG |
97 | // Used to have a public ctor of this form - made it too easy to lose |
98 | // utf8 info by accident. Now you have to specify the representation type |
99 | // explicitly - this privator ctor prevents reinsertion of this ctor. |
100 | explicit SString(const ASCII *) |
101 | { |
102 | _ASSERTE(!"Don't call this." ); |
103 | } |
104 | #endif |
105 | |
106 | protected: |
107 | class Index; |
108 | class UIndex; |
109 | |
110 | friend class Index; |
111 | friend class UIndex; |
112 | |
113 | public: |
114 | |
115 | // UIterator is character-level assignable. |
116 | class UIterator; |
117 | |
118 | // CIterators/Iterator'string must be modified by SString APIs. |
119 | class CIterator; |
120 | class Iterator; |
121 | |
122 | // Tokens for constructor overloads |
123 | enum tagUTF8Literal { Utf8Literal }; |
124 | enum tagLiteral { Literal }; |
125 | enum tagUTF8 { Utf8 }; |
126 | enum tagANSI { Ansi }; |
127 | enum tagASCII {Ascii }; |
128 | #ifdef SSTRING_CONSOLECODEPAGE |
129 | enum tagCONSOLE { Console }; |
130 | #endif |
131 | |
132 | static void Startup(); |
133 | static CHECK CheckStartup(); |
134 | |
135 | static const SString &Empty(); |
136 | |
137 | SString(); |
138 | |
139 | explicit SString(const SString &s); |
140 | |
141 | SString(const SString &s1, const SString &s2); |
142 | SString(const SString &s1, const SString &s2, const SString &s3); |
143 | SString(const SString &s1, const SString &s2, const SString &s3, const SString &s4); |
144 | SString(const SString &s, const CIterator &i, COUNT_T length); |
145 | SString(const SString &s, const CIterator &start, const CIterator &end); |
146 | SString(const WCHAR *string); |
147 | SString(const WCHAR *string, COUNT_T count); |
148 | SString(enum tagASCII dummyTag, const ASCII *string); |
149 | SString(enum tagASCII dummyTag, const ASCII *string, COUNT_T count); |
150 | SString(enum tagUTF8 dummytag, const UTF8 *string); |
151 | SString(enum tagUTF8 dummytag, const UTF8 *string, COUNT_T count); |
152 | SString(enum tagANSI dummytag, const ANSI *string); |
153 | SString(enum tagANSI dummytag, const ANSI *string, COUNT_T count); |
154 | #ifdef SSTRING_CONSOLECODEPAGE |
155 | SString(enum tagCONSOLE dummytag, const CONSOLE *string); |
156 | SString(enum tagCONSOLE dummytag, const CONSOLE *string, COUNT_T count); |
157 | #endif |
158 | SString(WCHAR character); |
159 | |
160 | // NOTE: Literals MUST be read-only never-freed strings. |
161 | SString(enum tagLiteral dummytag, const CHAR *literal); |
162 | SString(enum tagUTF8Literal dummytag, const UTF8 *literal); |
163 | SString(enum tagLiteral dummytag, const WCHAR *literal); |
164 | SString(enum tagLiteral dummytag, const WCHAR *literal, COUNT_T count); |
165 | |
166 | // Set this string to the concatenation of s1,s2,s3,s4 |
167 | void Set(const SString &s); |
168 | void Set(const SString &s1, const SString &s2); |
169 | void Set(const SString &s1, const SString &s2, const SString &s3); |
170 | void Set(const SString &s1, const SString &s2, const SString &s3, const SString &s4); |
171 | |
172 | // Set this string to the substring of s, starting at i, of length characters. |
173 | void Set(const SString &s, const CIterator &i, COUNT_T length); |
174 | |
175 | // Set this string to the substring of s, starting at start and ending at end (exclusive) |
176 | void Set(const SString &s, const CIterator &start, const CIterator &end); |
177 | |
178 | // Set this string to a copy of the given string |
179 | void Set(const WCHAR *string); |
180 | void SetASCII(const ASCII *string); |
181 | void SetUTF8(const UTF8 *string); |
182 | void SetANSI(const ANSI *string); |
183 | #ifdef SSTRING_CONSOLECODEPAGE |
184 | void SetConsole(const CONSOLE *string); |
185 | #endif |
186 | |
187 | // Set this string to a copy of the first count chars of the given string |
188 | void Set(const WCHAR *string, COUNT_T count); |
189 | |
190 | // Set this string to a prellocated copy of a given string. |
191 | // The caller is the owner of the bufffer and has to coordinate its lifetime. |
192 | void SetPreallocated(const WCHAR *string, COUNT_T count); |
193 | |
194 | void SetASCII(const ASCII *string, COUNT_T count); |
195 | |
196 | void SetUTF8(const UTF8 *string, COUNT_T count); |
197 | void SetANSI(const ANSI *string, COUNT_T count); |
198 | #ifdef SSTRING_CONSOLECODEPAGE |
199 | void SetConsole(const CONSOLE *string, COUNT_T count); |
200 | #endif |
201 | |
202 | // Set this string to the unicode character |
203 | void Set(WCHAR character); |
204 | |
205 | // Set this string to the UTF8 character |
206 | void SetUTF8(CHAR character); |
207 | |
208 | // This this string to the given literal. We share the mem and don't make a copy. |
209 | void SetLiteral(const CHAR *literal); |
210 | void SetLiteral(const WCHAR *literal); |
211 | |
212 | // ------------------------------------------------------------------ |
213 | // Public operations |
214 | // ------------------------------------------------------------------ |
215 | |
216 | // Normalizes the string representation to unicode. This can be used to |
217 | // make basic read-only operations non-failing. |
218 | void Normalize() const; |
219 | |
220 | // Return the number of characters in the string (excluding the terminating NULL). |
221 | COUNT_T GetCount() const; |
222 | BOOL IsEmpty() const; |
223 | |
224 | // Return whether a single byte string has all characters which fit in the ASCII set. |
225 | // (Note that this will return FALSE if the string has been converted to unicode for any |
226 | // reason.) |
227 | BOOL IsASCII() const; |
228 | |
229 | // !!!!!!!!!!!!!! WARNING about case insensitive operations !!!!!!!!!!!!!!! |
230 | // |
231 | // THIS IS NOT SUPPORTED FULLY ON WIN9x |
232 | // SString case-insensitive comparison is based off LCMapString, |
233 | // which does not work on characters outside the current OS code page. |
234 | // |
235 | // Case insensitive code in SString is primarily targeted at |
236 | // supporting path comparisons, which is supported correctly on 9x, |
237 | // since file system names are limited to the OS code page. |
238 | // |
239 | // !!!!!!!!!!!!!! WARNING about case insensitive operations !!!!!!!!!!!!!!! |
240 | |
241 | // Compute a content-based hash value |
242 | ULONG Hash() const; |
243 | ULONG HashCaseInsensitive() const; |
244 | ULONG HashCaseInsensitive(LocaleID locale) const; |
245 | |
246 | // Do a string comparison. Return 0 if the strings |
247 | // have the same value, -1 if this is "less than" s, or 1 if |
248 | // this is "greater than" s. |
249 | int Compare(const SString &s) const; |
250 | int CompareCaseInsensitive(const SString &s) const; // invariant locale |
251 | int CompareCaseInsensitive(const SString &s, LocaleID locale) const; |
252 | |
253 | // Do a case sensitive string comparison. Return TRUE if the strings |
254 | // have the same value FALSE if not. |
255 | BOOL Equals(const SString &s) const; |
256 | BOOL EqualsCaseInsensitive(const SString &s) const; // invariant locale |
257 | BOOL EqualsCaseInsensitive(const SString &s, LocaleID locale) const; |
258 | |
259 | // Match s to a portion of the string starting at the position. |
260 | // Return TRUE if the strings have the same value |
261 | // (regardless of representation), FALSE if not. |
262 | BOOL Match(const CIterator &i, const SString &s) const; |
263 | BOOL MatchCaseInsensitive(const CIterator &i, const SString &s) const; // invariant locale |
264 | BOOL MatchCaseInsensitive(const CIterator &i, const SString &s, LocaleID locale) const; |
265 | |
266 | BOOL Match(const CIterator &i, WCHAR c) const; |
267 | BOOL MatchCaseInsensitive(const CIterator &i, WCHAR c) const; // invariant locale |
268 | BOOL MatchCaseInsensitive(const CIterator &i, WCHAR c, LocaleID locale) const; |
269 | |
270 | // Like match, but advances the iterator past the match |
271 | // if successful |
272 | BOOL Skip(CIterator &i, const SString &s) const; |
273 | BOOL Skip(CIterator &i, WCHAR c) const; |
274 | |
275 | // Start searching for a match of the given string, starting at |
276 | // the given iterator point. |
277 | // If a match exists, move the iterator to point to the nearest |
278 | // occurence of s in the string and return TRUE. |
279 | // If no match exists, return FALSE and leave the iterator unchanged. |
280 | BOOL Find(CIterator &i, const SString &s) const; |
281 | BOOL Find(CIterator &i, const WCHAR *s) const; |
282 | BOOL FindASCII(CIterator &i, const ASCII *s) const; |
283 | BOOL FindUTF8(CIterator &i, const UTF8 *s) const; |
284 | BOOL Find(CIterator &i, WCHAR c) const; |
285 | |
286 | BOOL FindBack(CIterator &i, const SString &s) const; |
287 | BOOL FindBack(CIterator &i, const WCHAR *s) const; |
288 | BOOL FindBackASCII(CIterator &i, const ASCII *s) const; |
289 | BOOL FindBackUTF8(CIterator &i, const UTF8 *s) const; |
290 | BOOL FindBack(CIterator &i, WCHAR c) const; |
291 | |
292 | // Returns TRUE if this string begins with the contents of s |
293 | BOOL BeginsWith(const SString &s) const; |
294 | BOOL BeginsWithCaseInsensitive(const SString &s) const; // invariant locale |
295 | BOOL BeginsWithCaseInsensitive(const SString &s, LocaleID locale) const; |
296 | |
297 | // Returns TRUE if this string ends with the contents of s |
298 | BOOL EndsWith(const SString &s) const; |
299 | BOOL EndsWithCaseInsensitive(const SString &s) const; // invariant locale |
300 | BOOL EndsWithCaseInsensitive(const SString &s, LocaleID locale) const; |
301 | |
302 | // Sets this string to an empty string "". |
303 | void Clear(); |
304 | |
305 | // Truncate the string to the iterator position |
306 | void Truncate(const Iterator &i); |
307 | |
308 | // Append s to the end of this string. |
309 | void Append(const SString &s); |
310 | void Append(const WCHAR *s); |
311 | void AppendASCII(const CHAR *s); |
312 | void AppendUTF8(const CHAR *s); |
313 | |
314 | // Append char c to the end of this string. |
315 | void Append(const WCHAR c); |
316 | void AppendUTF8(const CHAR c); |
317 | |
318 | // Insert s into this string at the 'position'th character. |
319 | void Insert(const Iterator &i, const SString &s); |
320 | void Insert(const Iterator &i, const WCHAR *s); |
321 | void InsertASCII(const Iterator &i, const CHAR *s); |
322 | void InsertUTF8(const Iterator &i, const CHAR *s); |
323 | |
324 | // Delete substring position + length |
325 | void Delete(const Iterator &i, COUNT_T length); |
326 | |
327 | // Replace character at i with c |
328 | void Replace(const Iterator &i, WCHAR c); |
329 | |
330 | // Replace substring at (i,i+length) with s |
331 | void Replace(const Iterator &i, COUNT_T length, const SString &s); |
332 | |
333 | // Make sure that string buffer has room to grow |
334 | void Preallocate(COUNT_T characters) const; |
335 | |
336 | // Shrink buffer size as much as possible (reallocate if necessary.) |
337 | void Trim() const; |
338 | |
339 | // ------------------------------------------------------------------ |
340 | // Iterators: |
341 | // ------------------------------------------------------------------ |
342 | |
343 | // SString splits iterators into two categories. |
344 | // |
345 | // CIterator and Iterator are cheap to create, but allow only read-only |
346 | // access to the string. |
347 | // |
348 | // UIterator forces a unicode conversion, but allows |
349 | // assignment to individual string characters. They are also a bit more |
350 | // efficient once created. |
351 | |
352 | // ------------------------------------------------------------------ |
353 | // UIterator: |
354 | // ------------------------------------------------------------------ |
355 | |
356 | protected: |
357 | |
358 | class UIndex : public SBuffer::Index |
359 | { |
360 | friend class SString; |
361 | friend class Indexer<WCHAR, UIterator>; |
362 | |
363 | protected: |
364 | |
365 | UIndex(); |
366 | UIndex(SString *string, SCOUNT_T index); |
367 | WCHAR &GetAt(SCOUNT_T delta) const; |
368 | void Skip(SCOUNT_T delta); |
369 | SCOUNT_T Subtract(const UIndex &i) const; |
370 | CHECK DoCheck(SCOUNT_T delta) const; |
371 | |
372 | WCHAR *GetUnicode() const; |
373 | }; |
374 | |
375 | public: |
376 | |
377 | class UIterator : public UIndex, public Indexer<WCHAR, UIterator> |
378 | { |
379 | friend class SString; |
380 | |
381 | public: |
382 | UIterator() |
383 | { |
384 | } |
385 | |
386 | UIterator(SString *string, int index) |
387 | : UIndex(string, index) |
388 | { |
389 | } |
390 | }; |
391 | |
392 | UIterator BeginUnicode(); |
393 | UIterator EndUnicode(); |
394 | |
395 | // For CIterator & Iterator, we try our best to iterate the string without |
396 | // modifying it. (Currently, we do require an ASCII or Unicode string |
397 | // for simple WCHAR retrival, but you could imagine being more flexible |
398 | // going forward - perhaps even supporting iterating multibyte encodings |
399 | // directly.) |
400 | // |
401 | // Because of the runtime-changable nature of the string, CIterators |
402 | // require an extra member to record the character size. They also |
403 | // are unable to properly implement GetAt as required by the template |
404 | // (since there may not be a direct WCHAR pointer), so they provide |
405 | // further customization in a subclass. |
406 | // |
407 | // Normally the user expects to cast Iterators to CIterators transparently, so |
408 | // we provide a constructor on CIterator to support this. |
409 | |
410 | protected: |
411 | |
412 | class Index : public SBuffer::Index |
413 | { |
414 | friend class SString; |
415 | |
416 | friend class Indexer<const WCHAR, CIterator>; |
417 | friend class Indexer<WCHAR, Iterator>; |
418 | |
419 | protected: |
420 | int m_characterSizeShift; |
421 | |
422 | Index(); |
423 | Index(SString *string, SCOUNT_T index); |
424 | BYTE &GetAt(SCOUNT_T delta) const; |
425 | void Skip(SCOUNT_T delta); |
426 | SCOUNT_T Subtract(const Index &i) const; |
427 | CHECK DoCheck(SCOUNT_T delta) const; |
428 | |
429 | void Resync(const SString *string, BYTE *ptr) const; |
430 | |
431 | const WCHAR *GetUnicode() const; |
432 | const CHAR *GetASCII() const; |
433 | |
434 | public: |
435 | // Note these should supercede the Indexer versions |
436 | // since this class comes first in the inheritence list |
437 | WCHAR operator*() const; |
438 | void operator->() const; |
439 | WCHAR operator[](int index) const; |
440 | }; |
441 | |
442 | public: |
443 | |
444 | class CIterator : public Index, public Indexer<const WCHAR, CIterator> |
445 | { |
446 | friend class SString; |
447 | |
448 | public: |
449 | const Iterator &ConstCast() const |
450 | { |
451 | return *(const Iterator *)this; |
452 | } |
453 | |
454 | Iterator &ConstCast() |
455 | { |
456 | return *(Iterator *)this; |
457 | } |
458 | |
459 | operator const SBuffer::CIterator &() const |
460 | { |
461 | return *(const SBuffer::CIterator *)this; |
462 | } |
463 | |
464 | operator SBuffer::CIterator &() |
465 | { |
466 | return *(SBuffer::CIterator *)this; |
467 | } |
468 | |
469 | CIterator() |
470 | { |
471 | } |
472 | |
473 | CIterator(const SString *string, int index) |
474 | : Index(const_cast<SString *>(string), index) |
475 | { |
476 | } |
477 | |
478 | // explicitly resolve these for gcc |
479 | WCHAR operator*() const { return Index::operator*(); } |
480 | void operator->() const { Index::operator->(); } |
481 | WCHAR operator[](int index) const { return Index::operator[](index); } |
482 | }; |
483 | |
484 | class Iterator : public Index, public Indexer<WCHAR, Iterator> |
485 | { |
486 | friend class SString; |
487 | |
488 | public: |
489 | operator const CIterator &() const |
490 | { |
491 | return *(const CIterator *)this; |
492 | } |
493 | |
494 | operator CIterator &() |
495 | { |
496 | return *(CIterator *)this; |
497 | } |
498 | |
499 | operator const SBuffer::Iterator &() const |
500 | { |
501 | return *(const SBuffer::Iterator *)this; |
502 | } |
503 | |
504 | operator SBuffer::Iterator &() |
505 | { |
506 | return *(SBuffer::Iterator *)this; |
507 | } |
508 | |
509 | Iterator() |
510 | { |
511 | } |
512 | |
513 | Iterator(SString *string, int index) |
514 | : Index(string, index) |
515 | { |
516 | SUPPORTS_DAC; |
517 | } |
518 | |
519 | // explicitly resolve these for gcc |
520 | WCHAR operator*() const { return Index::operator*(); } |
521 | void operator->() const { Index::operator->(); } |
522 | WCHAR operator[](int index) const { return Index::operator[](index); } |
523 | }; |
524 | |
525 | CIterator Begin() const; |
526 | CIterator End() const; |
527 | |
528 | Iterator Begin(); |
529 | Iterator End(); |
530 | |
531 | // ------------------------------------------------------------------ |
532 | // Conversion: |
533 | // ------------------------------------------------------------------ |
534 | |
535 | // Get a const pointer to the string in the current representation. |
536 | // This pointer can not be cached because it will become invalid if |
537 | // the SString changes representation or reallocates its buffer. |
538 | |
539 | // You can always get a unicode string. This will force a conversion |
540 | // if necessary. |
541 | const WCHAR *GetUnicode() const; |
542 | const WCHAR *GetUnicode(const CIterator &i) const; |
543 | |
544 | void LowerCase(); |
545 | void UpperCase(); |
546 | |
547 | // Helper function to convert string in-place to lower-case (no allocation overhead for SString instance) |
548 | static void LowerCase(__inout_z LPWSTR wszString); |
549 | |
550 | // These routines will use the given scratch string if necessary |
551 | // to perform a conversion to the desired representation |
552 | |
553 | // Use a local declaration of InlineScratchBuffer or StackScratchBuffer for parameters of |
554 | // AbstractScratchBuffer. |
555 | class AbstractScratchBuffer; |
556 | |
557 | // These routines will use the given scratch buffer if necessary |
558 | // to perform a conversion to the desired representation. Note that |
559 | // the lifetime of the pointer return is limited by BOTH the |
560 | // scratch string and the source (this) string. |
561 | // |
562 | // Typical usage: |
563 | // |
564 | // SString *s = ...; |
565 | // { |
566 | // StackScratchBuffer buffer; |
567 | // const UTF8 *utf8 = s->GetUTF8(buffer); |
568 | // CallFoo(utf8); |
569 | // } |
570 | // // No more pointers to returned buffer allowed. |
571 | |
572 | const UTF8 *GetUTF8(AbstractScratchBuffer &scratch) const; |
573 | const UTF8 *GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const; |
574 | const ANSI *GetANSI(AbstractScratchBuffer &scratch) const; |
575 | #ifdef SSTRING_CONSOLECODEPAGE |
576 | const CONSOLE *GetConsole(AbstractScratchBuffer &scratch) const; |
577 | #endif |
578 | |
579 | // Used when the representation is known, throws if the representation doesn't match |
580 | const UTF8 *GetUTF8NoConvert() const; |
581 | |
582 | // Converts/copies into the given output string |
583 | void ConvertToUnicode(SString &dest) const; |
584 | void ConvertToANSI(SString &dest) const; |
585 | COUNT_T ConvertToUTF8(SString &dest) const; |
586 | #ifdef SSTRING_CONSOLECODEPAGE |
587 | void ConvertToConsole(SString &dest) const; |
588 | #endif |
589 | |
590 | //------------------------------------------------------------------- |
591 | // Accessing the string contents directly |
592 | //------------------------------------------------------------------- |
593 | |
594 | // To write directly to the SString's underlying buffer: |
595 | // 1) Call OpenXXXBuffer() and pass it the count of characters |
596 | // you need. (Not including the null-terminator). |
597 | // 2) That returns a pointer to the raw buffer which you can write to. |
598 | // 3) When you are done writing to the pointer, call CloseBuffer() |
599 | // and pass it the count of characters you actually wrote (not including |
600 | // the null). The pointer from step 1 is now invalid. |
601 | |
602 | // example usage: |
603 | // void GetName(SString & str) { |
604 | // char * p = str.OpenANSIBuffer(3); |
605 | // strcpy(p, "Cat"); |
606 | // str.CloseBuffer(); |
607 | // } |
608 | |
609 | // Regarding the null-terminator: |
610 | // 1) Note that we wrote 4 characters (3 + a null). That's ok. OpenBuffer |
611 | // allocates 1 extra byte for the null. |
612 | // 2) If we only wrote 3 characters and no null, that's ok too. CloseBuffer() |
613 | // will add a null-terminator. |
614 | |
615 | // You should open the buffer, write the data, and immediately close it. |
616 | // No sstring operations are valid while the buffer is opened. |
617 | // |
618 | // In a debug build, Open/Close will do lots of little checks to make sure |
619 | // you don't buffer overflow while it's opened. In a retail build, this |
620 | // is a very streamlined action. |
621 | |
622 | |
623 | // Open the raw buffer for writing countChars characters (not including the null). |
624 | WCHAR *OpenUnicodeBuffer(COUNT_T maxCharCount); |
625 | UTF8 *OpenUTF8Buffer(COUNT_T maxSingleCharCount); |
626 | ANSI *OpenANSIBuffer(COUNT_T maxSingleCharCount); |
627 | |
628 | //Returns the unicode string, the caller is reponsible for lifetime of the string |
629 | WCHAR *GetCopyOfUnicodeString(); |
630 | |
631 | // Get the max size that can be passed to OpenUnicodeBuffer without causing allocations. |
632 | COUNT_T GetUnicodeAllocation(); |
633 | |
634 | // Call after OpenXXXBuffer(). |
635 | |
636 | // Provide the count of characters actually used (not including the |
637 | // null terminator). This will make sure the SString's size is correct |
638 | // and that we have a null-terminator. |
639 | void CloseBuffer(COUNT_T finalCount); |
640 | |
641 | // Close the buffer. Assumes that we completely filled the buffer |
642 | // that OpenBuffer() gave back. If we didn't write all the characters, |
643 | // call CloseBuffer(int) instead. |
644 | void CloseBuffer(); |
645 | |
646 | #ifdef DACCESS_COMPILE |
647 | // DAC access to string functions. |
648 | // Note that other accessors above are not DAC-safe and will return TARGET pointers into |
649 | // the string instead of copying the string over to the host. |
650 | // @dbgtodo dac support: Prevent usage of such DAC-unsafe SString APIs in DAC code |
651 | |
652 | // Instantiate a copy of the raw buffer in the host and return a pointer to it |
653 | void * DacGetRawContent() const; |
654 | |
655 | // Instantiate a copy of the raw buffer in the host. Requires that the underlying |
656 | // representation is already unicode. |
657 | const WCHAR * DacGetRawUnicode() const; |
658 | |
659 | // Copy the string from the target into the provided buffer, converting to unicode if necessary |
660 | bool DacGetUnicode(COUNT_T bufChars, |
661 | __out_z __inout_ecount(bufChars) WCHAR * buffer, |
662 | COUNT_T * needChars) const; |
663 | |
664 | void EnumMemoryRegions(CLRDataEnumMemoryFlags flags) const |
665 | { |
666 | SUPPORTS_DAC; |
667 | SBuffer::EnumMemoryRegions(flags); |
668 | } |
669 | #endif |
670 | |
671 | //--------------------------------------------------------------------- |
672 | // Utilities |
673 | //--------------------------------------------------------------------- |
674 | |
675 | // WARNING: The MBCS version of printf function are factory for globalization |
676 | // issues when used to format Unicode strings (%S). The Unicode versions are |
677 | // preffered in this case. |
678 | void Printf(const CHAR *format, ...); |
679 | void VPrintf(const CHAR *format, va_list args); |
680 | |
681 | void Printf(const WCHAR *format, ...); |
682 | void PPrintf(const WCHAR *format, ...); |
683 | void VPrintf(const WCHAR *format, va_list args); |
684 | |
685 | void PVPrintf(const WCHAR *format, va_list args); |
686 | |
687 | void AppendPrintf(const CHAR *format, ...); |
688 | void AppendVPrintf(const CHAR *format, va_list args); |
689 | |
690 | void AppendPrintf(const WCHAR *format, ...); |
691 | void AppendVPrintf(const WCHAR *format, va_list args); |
692 | |
693 | BOOL LoadResource(CCompRC::ResourceCategory eCategory, int resourceID); |
694 | HRESULT LoadResourceAndReturnHR(CCompRC::ResourceCategory eCategory, int resourceID); |
695 | HRESULT LoadResourceAndReturnHR(CCompRC* pResourceDLL, CCompRC::ResourceCategory eCategory, int resourceID); |
696 | BOOL FormatMessage(DWORD dwFlags, LPCVOID lpSource, DWORD dwMessageId, DWORD dwLanguageId, |
697 | const SString &arg1 = Empty(), const SString &arg2 = Empty(), |
698 | const SString &arg3 = Empty(), const SString &arg4 = Empty(), |
699 | const SString &arg5 = Empty(), const SString &arg6 = Empty(), |
700 | const SString &arg7 = Empty(), const SString &arg8 = Empty(), |
701 | const SString &arg9 = Empty(), const SString &arg10 = Empty()); |
702 | |
703 | #if 1 |
704 | // @todo - get rid of this and move it outside of SString |
705 | void MakeFullNamespacePath(const SString &nameSpace, const SString &name); |
706 | #endif |
707 | |
708 | //-------------------------------------------------------------------- |
709 | // Operators |
710 | //-------------------------------------------------------------------- |
711 | |
712 | operator const WCHAR * () const { WRAPPER_NO_CONTRACT; return GetUnicode(); } |
713 | |
714 | WCHAR operator[](int index) { WRAPPER_NO_CONTRACT; return Begin()[index]; } |
715 | WCHAR operator[](int index) const { WRAPPER_NO_CONTRACT; return Begin()[index]; } |
716 | |
717 | SString &operator= (const SString &s) { WRAPPER_NO_CONTRACT; Set(s); return *this; } |
718 | SString &operator+= (const SString &s) { WRAPPER_NO_CONTRACT; Append(s); return *this; } |
719 | |
720 | // ------------------------------------------------------------------- |
721 | // Check functions |
722 | // ------------------------------------------------------------------- |
723 | |
724 | CHECK CheckIteratorRange(const CIterator &i) const; |
725 | CHECK CheckIteratorRange(const CIterator &i, COUNT_T length) const; |
726 | CHECK CheckEmpty() const; |
727 | |
728 | static CHECK CheckCount(COUNT_T count); |
729 | static CHECK CheckRepresentation(int representation); |
730 | |
731 | #if CHECK_INVARIANTS |
732 | static CHECK CheckASCIIString(const ASCII *string); |
733 | static CHECK CheckASCIIString(const ASCII *string, COUNT_T count); |
734 | |
735 | CHECK Check() const; |
736 | CHECK Invariant() const; |
737 | CHECK InternalInvariant() const; |
738 | #endif // CHECK_INVARIANTS |
739 | |
740 | // Helpers for CRT function equivalance. |
741 | static int __cdecl _stricmp(const CHAR *buffer1, const CHAR *buffer2); |
742 | static int __cdecl _strnicmp(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count); |
743 | |
744 | static int __cdecl _wcsicmp(const WCHAR *buffer1, const WCHAR *buffer2); |
745 | static int __cdecl _wcsnicmp(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count); |
746 | |
747 | // C++ convenience overloads |
748 | static int _tstricmp(const CHAR *buffer1, const CHAR *buffer2); |
749 | static int _tstricmp(const WCHAR *buffer1, const WCHAR *buffer2); |
750 | |
751 | static int _tstrnicmp(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count); |
752 | static int _tstrnicmp(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count); |
753 | |
754 | // ------------------------------------------------------------------- |
755 | // Internal routines |
756 | // ------------------------------------------------------------------- |
757 | |
758 | |
759 | protected: |
760 | // Use this via InlineSString<X> |
761 | SString(void *buffer, COUNT_T size); |
762 | |
763 | private: |
764 | static int CaseCompareHelperA(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount); |
765 | static int CaseCompareHelper(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount); |
766 | |
767 | // Internal helpers: |
768 | |
769 | static const BYTE s_EmptyBuffer[2]; |
770 | |
771 | static UINT s_ACP; |
772 | SVAL_DECL(BOOL, s_IsANSIMultibyte); |
773 | |
774 | #ifdef SSTRING_CONSOLECODEPAGE |
775 | static UINT s_ConsoleCP; |
776 | static BOOL s_IsConsoleMultibyte; |
777 | #endif |
778 | |
779 | const static LocaleID s_defaultLCID; |
780 | |
781 | SPTR_DECL(SString,s_Empty); |
782 | |
783 | COUNT_T GetRawCount() const; |
784 | |
785 | // Get buffer as appropriate string rep |
786 | ASCII *GetRawASCII() const; |
787 | UTF8 *GetRawUTF8() const; |
788 | ANSI *GetRawANSI() const; |
789 | WCHAR *GetRawUnicode() const; |
790 | #ifdef SSTRING_CONSOLECODEPAGE |
791 | CONSOLE *GetRawConsole() const; |
792 | #endif |
793 | |
794 | void InitEmpty(); |
795 | |
796 | Representation GetRepresentation() const; |
797 | void SetRepresentation(Representation representation); |
798 | BOOL IsRepresentation(Representation representation) const; |
799 | BOOL IsFixedSize() const; |
800 | BOOL IsIteratable() const; |
801 | BOOL IsSingleByte() const; |
802 | |
803 | int GetCharacterSizeShift() const; |
804 | |
805 | COUNT_T SizeToCount(COUNT_T size) const; |
806 | COUNT_T CountToSize(COUNT_T count) const; |
807 | |
808 | COUNT_T GetBufferSizeInCharIncludeNullChar() const; |
809 | |
810 | BOOL IsLiteral() const; |
811 | BOOL IsAllocated() const; |
812 | BOOL IsBufferOpen() const; |
813 | BOOL IsASCIIScanned() const; |
814 | void SetASCIIScanned() const; |
815 | void SetNormalized() const; |
816 | BOOL IsNormalized() const; |
817 | void ClearNormalized() const; |
818 | |
819 | void EnsureWritable() const; |
820 | void ConvertToFixed() const; |
821 | void ConvertToIteratable() const; |
822 | |
823 | void ConvertASCIIToUnicode(SString &dest) const; |
824 | void ConvertToUnicode() const; |
825 | void ConvertToUnicode(const CIterator &i) const; |
826 | |
827 | const SString &GetCompatibleString(const SString &s, SString &scratch) const; |
828 | const SString &GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const; |
829 | BOOL ScanASCII() const; |
830 | void NullTerminate(); |
831 | |
832 | void Resize(COUNT_T count, Representation representation, |
833 | Preserve preserve = DONT_PRESERVE); |
834 | |
835 | void OpenBuffer(Representation representation, COUNT_T countChars); |
836 | }; |
837 | |
838 | // =========================================================================== |
839 | // InlineSString is used for stack allocation of strings, or when the string contents |
840 | // are expected or known to be small. Note that it still supports expandability via |
841 | // heap allocation if necessary. |
842 | // =========================================================================== |
843 | |
844 | template <COUNT_T MEMSIZE> |
845 | class InlineSString : public SString |
846 | { |
847 | private: |
848 | BYTE m_inline[SBUFFER_PADDED_SIZE(MEMSIZE)]; |
849 | |
850 | public: |
851 | FORCEINLINE InlineSString() |
852 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
853 | { |
854 | WRAPPER_NO_CONTRACT; |
855 | } |
856 | |
857 | FORCEINLINE InlineSString(const SString &s) |
858 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
859 | { |
860 | WRAPPER_NO_CONTRACT; |
861 | Set(s); |
862 | } |
863 | |
864 | FORCEINLINE InlineSString(const SString &s1, const SString &s2) |
865 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
866 | { |
867 | WRAPPER_NO_CONTRACT; |
868 | Set(s1, s2); |
869 | } |
870 | |
871 | FORCEINLINE InlineSString(const SString &s1, const SString &s2, const SString &s3) |
872 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
873 | { |
874 | WRAPPER_NO_CONTRACT; |
875 | Set(s1, s2, s3); |
876 | } |
877 | |
878 | FORCEINLINE InlineSString(const SString &s1, const SString &s2, const SString &s3, const SString &s4) |
879 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
880 | { |
881 | WRAPPER_NO_CONTRACT; |
882 | Set(s1, s2, s3, s4); |
883 | } |
884 | |
885 | FORCEINLINE InlineSString(const SString &s, const CIterator &start, const CIterator &end) |
886 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
887 | { |
888 | WRAPPER_NO_CONTRACT; |
889 | Set(s, start, end); |
890 | } |
891 | |
892 | FORCEINLINE InlineSString(const SString &s, const CIterator &i, COUNT_T length) |
893 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
894 | { |
895 | WRAPPER_NO_CONTRACT; |
896 | Set(s, i, length); |
897 | } |
898 | |
899 | FORCEINLINE InlineSString(const WCHAR *string) |
900 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
901 | { |
902 | WRAPPER_NO_CONTRACT; |
903 | Set(string); |
904 | } |
905 | |
906 | FORCEINLINE InlineSString(const WCHAR *string, COUNT_T count) |
907 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
908 | { |
909 | WRAPPER_NO_CONTRACT; |
910 | Set(string, count); |
911 | } |
912 | |
913 | FORCEINLINE InlineSString(enum tagASCII, const CHAR *string) |
914 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
915 | { |
916 | WRAPPER_NO_CONTRACT; |
917 | SetASCII(string); |
918 | } |
919 | |
920 | FORCEINLINE InlineSString(enum tagASCII, const CHAR *string, COUNT_T count) |
921 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
922 | { |
923 | WRAPPER_NO_CONTRACT; |
924 | SetASCII(string, count); |
925 | } |
926 | |
927 | FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 *string) |
928 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
929 | { |
930 | WRAPPER_NO_CONTRACT; |
931 | SetUTF8(string); |
932 | } |
933 | |
934 | FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 *string, COUNT_T count) |
935 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
936 | { |
937 | WRAPPER_NO_CONTRACT; |
938 | SetUTF8(string, count); |
939 | } |
940 | |
941 | FORCEINLINE InlineSString(enum tagANSI dummytag, const ANSI *string) |
942 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
943 | { |
944 | WRAPPER_NO_CONTRACT; |
945 | SetANSI(string); |
946 | } |
947 | |
948 | FORCEINLINE InlineSString(enum tagANSI dummytag, const ANSI *string, COUNT_T count) |
949 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
950 | { |
951 | WRAPPER_NO_CONTRACT; |
952 | SetANSI(string, count); |
953 | } |
954 | |
955 | #ifdef SSTRING_CONSOLECODEPAGE |
956 | FORCEINLINE InlineSString(enum tagCONSOLE dummytag, const CONSOLE *string) |
957 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
958 | { |
959 | WRAPPER_NO_CONTRACT; |
960 | SetCONSOLE(string); |
961 | } |
962 | |
963 | FORCEINLINE InlineSString(enum tagCONSOLE dummytag, const CONSOLE *string, COUNT_T count) |
964 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
965 | { |
966 | WRAPPER_NO_CONTRACT; |
967 | SetCONSOLE(string, count); |
968 | } |
969 | #endif |
970 | |
971 | FORCEINLINE InlineSString(WCHAR character) |
972 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
973 | { |
974 | WRAPPER_NO_CONTRACT; |
975 | Set(character); |
976 | } |
977 | |
978 | FORCEINLINE InlineSString(tagUTF8 dummytag, const UTF8 character) |
979 | : SString(m_inline, SBUFFER_PADDED_SIZE(MEMSIZE)) |
980 | { |
981 | WRAPPER_NO_CONTRACT; |
982 | SetUTF8(character); |
983 | } |
984 | |
985 | FORCEINLINE InlineSString<MEMSIZE> &operator= (const SString &s) |
986 | { |
987 | WRAPPER_NO_CONTRACT; |
988 | Set(s); |
989 | return *this; |
990 | } |
991 | |
992 | FORCEINLINE InlineSString<MEMSIZE> &operator= (const InlineSString<MEMSIZE> &s) |
993 | { |
994 | WRAPPER_NO_CONTRACT; |
995 | Set(s); |
996 | return *this; |
997 | } |
998 | }; |
999 | |
1000 | // ================================================================================ |
1001 | // StackSString is a lot like CQuickBytes. Use it to create an SString object |
1002 | // using some stack space as a preallocated buffer. |
1003 | // ================================================================================ |
1004 | |
1005 | typedef InlineSString<512> StackSString; |
1006 | |
1007 | // This is a smaller version for when it is known that the string that's going to |
1008 | // be needed is small and it's preferable not to take up the stack space. |
1009 | typedef InlineSString<32> SmallStackSString; |
1010 | |
1011 | // To be used specifically for path strings. |
1012 | #ifdef _DEBUG |
1013 | // This is a smaller version for debug builds to exercise the buffer allocation path |
1014 | typedef InlineSString<32> PathString; |
1015 | typedef InlineSString<2 * 32> LongPathString; |
1016 | #else |
1017 | // Set it to the current MAX_PATH |
1018 | typedef InlineSString<260> PathString; |
1019 | typedef InlineSString<2 * 260> LongPathString; |
1020 | #endif |
1021 | |
1022 | // ================================================================================ |
1023 | // Quick macro to create an SString around a literal string. |
1024 | // usage: |
1025 | // s = SL("My literal String"); |
1026 | // ================================================================================ |
1027 | |
1028 | #define SL(_literal) SString(SString::Literal, _literal) |
1029 | |
1030 | // ================================================================================ |
1031 | // ScratchBuffer classes are used by the GetXXX() routines to allocate scratch space in. |
1032 | // ================================================================================ |
1033 | |
1034 | class SString::AbstractScratchBuffer : private SString |
1035 | { |
1036 | protected: |
1037 | // Do not use this class directly - use |
1038 | // ScratchBuffer or StackScratchBuffer. |
1039 | AbstractScratchBuffer(void *buffer, COUNT_T size); |
1040 | }; |
1041 | |
1042 | template <COUNT_T MEMSIZE> |
1043 | class ScratchBuffer : public SString::AbstractScratchBuffer |
1044 | { |
1045 | private: |
1046 | BYTE m_inline[MEMSIZE]; |
1047 | |
1048 | public: |
1049 | ScratchBuffer() |
1050 | : AbstractScratchBuffer((void *)m_inline, MEMSIZE) |
1051 | { |
1052 | WRAPPER_NO_CONTRACT; |
1053 | } |
1054 | }; |
1055 | |
1056 | typedef ScratchBuffer<256> StackScratchBuffer; |
1057 | |
1058 | // ================================================================================ |
1059 | // Special contract definition - THROWS_UNLESS_NORMALIZED |
1060 | // this is used for operations which might fail for generalized strings but |
1061 | // not if the string has already been converted to unicode. Rather than just |
1062 | // setting this on all conversions to unicode, we only set it when explicitly |
1063 | // asked. This should expose more potential problems. |
1064 | // ================================================================================ |
1065 | |
1066 | #define THROWS_UNLESS_NORMALIZED \ |
1067 | if (IsNormalized()) NOTHROW; else THROWS |
1068 | |
1069 | #define THROWS_UNLESS_BOTH_NORMALIZED(s) \ |
1070 | if (IsNormalized() && s.IsNormalized()) NOTHROW; else THROWS |
1071 | |
1072 | #define FAULTS_UNLESS_NORMALIZED(stmt) \ |
1073 | if (IsNormalized()) FORBID_FAULT; else INJECT_FAULT(stmt) |
1074 | |
1075 | #define FAULTS_UNLESS_BOTH_NORMALIZED(s, stmt) \ |
1076 | if (IsNormalized() && s.IsNormalized()) FORBID_FAULT; else INJECT_FAULT(stmt) |
1077 | |
1078 | // ================================================================================ |
1079 | // Inline definitions |
1080 | // ================================================================================ |
1081 | |
1082 | #include <sstring.inl> |
1083 | |
1084 | #endif // _SSTRING_H_ |
1085 | |