1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4// ---------------------------------------------------------------------------
5// SString.cpp
6//
7
8// ---------------------------------------------------------------------------
9
10#include "stdafx.h"
11#include "sstring.h"
12#include "ex.h"
13#include "holder.h"
14
15
16#if defined(_MSC_VER)
17#pragma inline_depth (25)
18#endif
19
20//-----------------------------------------------------------------------------
21// Static variables
22//-----------------------------------------------------------------------------
23
24// Have one internal, well-known, literal for the empty string.
25const BYTE SString::s_EmptyBuffer[2] = { 0 };
26
27// @todo: these need to be initialized by calling GetACP() or GetConsoleOutputCP()
28// followed by a GetCPInfo to see if the max character size is 1.
29
30UINT SString::s_ACP = 0;
31SVAL_IMPL_INIT(BOOL, SString, s_IsANSIMultibyte, TRUE);
32
33#ifndef DACCESS_COMPILE
34static BYTE s_EmptySpace[sizeof(SString)] = { 0 };
35#endif // DACCESS_COMPILE
36
37#if FEATURE_USE_LCID
38const LocaleID SString::s_defaultLCID = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
39#else
40const LocaleID SString::s_defaultLCID = NULL;
41#endif
42
43SPTR_IMPL(SString,SString,s_Empty);
44
45void SString::Startup()
46{
47 STATIC_CONTRACT_NOTHROW;
48 STATIC_CONTRACT_GC_NOTRIGGER;
49
50 if (s_ACP == 0)
51 {
52 CPINFO info;
53
54 UINT ACP = GetACP();
55 if (GetCPInfo(ACP, &info) && info.MaxCharSize == 1)
56 s_IsANSIMultibyte = FALSE;
57
58#ifndef DACCESS_COMPILE
59 s_Empty = PTR_SString(new (s_EmptySpace) SString());
60 s_Empty->SetNormalized();
61#endif // DACCESS_COMPILE
62
63 MemoryBarrier();
64 s_ACP = ACP;
65 }
66}
67
68CHECK SString::CheckStartup()
69{
70 WRAPPER_NO_CONTRACT;
71
72 CHECK(s_Empty != NULL);
73 CHECK_OK;
74}
75
76//-----------------------------------------------------------------------------
77// Case insensitive helpers.
78//-----------------------------------------------------------------------------
79
80static WCHAR MapChar(WCHAR wc, DWORD dwFlags, LocaleID lcid)
81{
82 WRAPPER_NO_CONTRACT;
83
84 WCHAR wTmp;
85
86#ifndef FEATURE_PAL
87
88#ifdef FEATURE_USE_LCID
89 int iRet = WszLCMapString(lcid, dwFlags, &wc, 1, &wTmp, 1);
90#else
91 // TODO: Uncertain if this is the best behavior. Caller should specify locale name
92 if (lcid == NULL || lcid[0]==W('!')) lcid = W("");
93 int iRet = ::LCMapStringEx(lcid, dwFlags, &wc, 1, &wTmp, 1, NULL, NULL, 0);
94#endif
95 if (!iRet) {
96 // This can fail in non-exceptional cases becauseof unknown unicode characters.
97 wTmp = wc;
98 }
99
100#else // !FEATURE_PAL
101 // For PAL, no locale specific processing is done
102
103 if (dwFlags == LCMAP_UPPERCASE)
104 {
105 wTmp = toupper(wc);
106 }
107 else
108 {
109 _ASSERTE(dwFlags == LCMAP_LOWERCASE);
110 wTmp = tolower(wc);
111 }
112#endif // !FEATURE_PAL
113
114 return wTmp;
115}
116
117#define IS_UPPER_A_TO_Z(x) (((x) >= W('A')) && ((x) <= W('Z')))
118#define IS_LOWER_A_TO_Z(x) (((x) >= W('a')) && ((x) <= W('z')))
119#define CAN_SIMPLE_UPCASE(x) (((x)&~0x7f) == 0)
120#define CAN_SIMPLE_DOWNCASE(x) (((x)&~0x7f) == 0)
121#define SIMPLE_UPCASE(x) (IS_LOWER_A_TO_Z(x) ? ((x) - W('a') + W('A')) : (x))
122#define SIMPLE_DOWNCASE(x) (IS_UPPER_A_TO_Z(x) ? ((x) - W('A') + W('a')) : (x))
123
124/* static */
125int SString::CaseCompareHelper(const WCHAR *buffer1, const WCHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount)
126{
127 LIMITED_METHOD_CONTRACT;
128
129 _ASSERTE(stopOnNull || stopOnCount);
130
131 const WCHAR *buffer1End = buffer1 + count;
132 int diff = 0;
133
134 while (!stopOnCount || (buffer1 < buffer1End))
135 {
136 WCHAR ch1 = *buffer1++;
137 WCHAR ch2 = *buffer2++;
138 diff = ch1 - ch2;
139 if ((ch1 == 0) || (ch2 == 0))
140 {
141 if (diff != 0 || stopOnNull)
142 {
143 break;
144 }
145 }
146 else
147 {
148 if (diff != 0)
149 {
150 diff = ((CAN_SIMPLE_UPCASE(ch1) ? SIMPLE_UPCASE(ch1) : MapChar(ch1, LCMAP_UPPERCASE, lcid))
151 - (CAN_SIMPLE_UPCASE(ch2) ? SIMPLE_UPCASE(ch2) : MapChar(ch2, LCMAP_UPPERCASE, lcid)));
152 }
153 if (diff != 0)
154 {
155 break;
156 }
157 }
158 }
159
160 return diff;
161}
162
163#define IS_LOWER_A_TO_Z_ANSI(x) (((x) >= 'a') && ((x) <= 'z'))
164#define CAN_SIMPLE_UPCASE_ANSI(x) (((x) >= 0x20) && ((x) <= 0x7f))
165#define SIMPLE_UPCASE_ANSI(x) (IS_LOWER_A_TO_Z(x) ? ((x) - 'a' + 'A') : (x))
166
167// TODO: Need to get rid of LocaleID and use a LPCWSTR locale name instead.
168int GetCaseInsensitiveValueA(LocaleID lcid, const CHAR *buffer, int length) {
169 LIMITED_METHOD_CONTRACT;
170 _ASSERTE(buffer != NULL);
171 _ASSERTE(length == 1 || ((length == 2) && IsDBCSLeadByte(*buffer)));
172
173 WCHAR wideCh;
174 int sortValue;
175 int conversionReturn = MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, buffer, length, &wideCh, 1);
176 if (conversionReturn == 0)
177 {
178 // An invalid sequence should only compare equal to itself, so use a negative mapping.
179 if (length == 1)
180 {
181 sortValue = -((int)((unsigned char)(*buffer)));
182 }
183 else
184 {
185 sortValue = -(((((int)((unsigned char)(*buffer))) << 8) | ((int)((unsigned char)(*(buffer + 1))))));
186 }
187 }
188 else
189 {
190 _ASSERTE(conversionReturn == 1);
191 sortValue = MapChar(wideCh, LCMAP_UPPERCASE, lcid);
192 }
193 return sortValue;
194}
195
196/* static */
197int SString::CaseCompareHelperA(const CHAR *buffer1, const CHAR *buffer2, COUNT_T count, LocaleID lcid, BOOL stopOnNull, BOOL stopOnCount)
198{
199 LIMITED_METHOD_CONTRACT;
200
201 _ASSERTE(stopOnNull || stopOnCount);
202
203 const CHAR *buffer1End = buffer1 + count;
204 int diff = 0;
205
206 while (!stopOnCount || (buffer1 < buffer1End))
207 {
208 CHAR ch1 = *buffer1;
209 CHAR ch2 = *buffer2;
210 if ((ch1 == 0) || (ch2 == 0))
211 {
212 diff = ch1 - ch2;
213 if (diff != 0 || stopOnNull)
214 {
215 break;
216 }
217 buffer1++;
218 buffer2++;
219 }
220 else if (CAN_SIMPLE_UPCASE_ANSI(ch1) && CAN_SIMPLE_UPCASE_ANSI(ch2))
221 {
222 diff = ch1 - ch2;
223 if (diff != 0)
224 {
225 diff = (SIMPLE_UPCASE_ANSI(ch1) - SIMPLE_UPCASE_ANSI(ch2));
226 if (diff != 0)
227 {
228 break;
229 }
230 }
231 buffer1++;
232 buffer2++;
233 }
234 else
235 {
236 int length = 1;
237 if (s_IsANSIMultibyte
238 && IsDBCSLeadByte(ch1)
239 && IsDBCSLeadByte(ch2)
240 && (!stopOnCount || ((buffer1 + 1) < buffer1End)))
241 {
242 length = 2;
243 }
244 int sortValue1 = GetCaseInsensitiveValueA(lcid, buffer1, length);
245 int sortValue2 = GetCaseInsensitiveValueA(lcid, buffer2, length);
246 diff = sortValue1 - sortValue2;
247 if (diff != 0)
248 {
249 break;
250 }
251 buffer1 += length;
252 buffer2 += length;
253 }
254 }
255 return diff;
256}
257
258
259int CaseHashHelper(const WCHAR *buffer, COUNT_T count, LocaleID lcid)
260{
261 LIMITED_METHOD_CONTRACT;
262
263 const WCHAR *bufferEnd = buffer + count;
264 ULONG hash = 5381;
265
266 while (buffer < bufferEnd)
267 {
268 WCHAR ch = *buffer++;
269 ch = CAN_SIMPLE_UPCASE(ch) ? SIMPLE_UPCASE(ch) : MapChar(ch, LCMAP_UPPERCASE, lcid);
270
271 hash = (((hash << 5) + hash) ^ ch);
272 }
273
274 return hash;
275}
276
277static int CaseHashHelperA(const CHAR *buffer, COUNT_T count)
278{
279 LIMITED_METHOD_CONTRACT;
280
281 const CHAR *bufferEnd = buffer + count;
282 ULONG hash = 5381;
283
284 while (buffer < bufferEnd)
285 {
286 CHAR ch = *buffer++;
287 ch = SIMPLE_UPCASE_ANSI(ch);
288
289 hash = (((hash << 5) + hash) ^ ch);
290 }
291
292 return hash;
293}
294
295//-----------------------------------------------------------------------------
296// Set this string to a copy of the unicode string
297//-----------------------------------------------------------------------------
298void SString::Set(const WCHAR *string)
299{
300 CONTRACT_VOID
301 {
302 INSTANCE_CHECK;
303 PRECONDITION(CheckPointer(string, NULL_OK));
304 THROWS;
305 GC_NOTRIGGER;
306 SUPPORTS_DAC_HOST_ONLY;
307 }
308 CONTRACT_END;
309
310 if (string == NULL || *string == 0)
311 Clear();
312 else
313 {
314 Resize((COUNT_T) wcslen(string), REPRESENTATION_UNICODE);
315 wcscpy_s(GetRawUnicode(), GetBufferSizeInCharIncludeNullChar(), string);
316 }
317
318 RETURN;
319}
320
321//-----------------------------------------------------------------------------
322// Set this string to a copy of the first count characters of the given
323// unicode string.
324//-----------------------------------------------------------------------------
325void SString::Set(const WCHAR *string, COUNT_T count)
326{
327 SS_CONTRACT_VOID
328 {
329 INSTANCE_CHECK;
330 PRECONDITION(CheckPointer(string, NULL_OK));
331 PRECONDITION(CheckCount(count));
332 THROWS;
333 GC_NOTRIGGER;
334 }
335 SS_CONTRACT_END;
336
337 if (count == 0)
338 Clear();
339 else
340 {
341 Resize(count, REPRESENTATION_UNICODE);
342 wcsncpy_s(GetRawUnicode(), GetBufferSizeInCharIncludeNullChar(), string, count);
343 GetRawUnicode()[count] = 0;
344 }
345
346 SS_RETURN;
347}
348
349//-----------------------------------------------------------------------------
350// Set this string to a point to the first count characters of the given
351// preallocated unicode string (shallow copy).
352//-----------------------------------------------------------------------------
353void SString::SetPreallocated(const WCHAR *string, COUNT_T count)
354{
355 SS_CONTRACT_VOID
356 {
357 INSTANCE_CHECK;
358 PRECONDITION(CheckPointer(string, NULL_OK));
359 PRECONDITION(CheckCount(count));
360 SS_POSTCONDITION(IsEmpty());
361 GC_NOTRIGGER;
362 NOTHROW;
363 SUPPORTS_DAC_HOST_ONLY;
364 }
365 SS_CONTRACT_END;
366
367 SetImmutable();
368 SetImmutable((BYTE*) string, count*2);
369 ClearAllocated();
370 SetRepresentation(REPRESENTATION_UNICODE);
371
372 SS_RETURN;
373}
374
375//-----------------------------------------------------------------------------
376// Set this string to a copy of the given ansi string
377//-----------------------------------------------------------------------------
378void SString::SetASCII(const ASCII *string)
379{
380 SS_CONTRACT_VOID
381 {
382 INSTANCE_CHECK;
383 PRECONDITION(CheckPointer(string, NULL_OK));
384 PRECONDITION(CheckASCIIString(string));
385 THROWS;
386 GC_NOTRIGGER;
387 }
388 SS_CONTRACT_END;
389
390 if (string == NULL || *string == 0)
391 Clear();
392 else
393 {
394 Resize((COUNT_T) strlen(string), REPRESENTATION_ASCII);
395 strcpy_s(GetRawUTF8(), GetBufferSizeInCharIncludeNullChar(), string);
396 }
397
398 SS_RETURN;
399}
400
401//-----------------------------------------------------------------------------
402// Set this string to a copy of the first count characters of the given
403// ascii string
404//-----------------------------------------------------------------------------
405void SString::SetASCII(const ASCII *string, COUNT_T count)
406{
407 SS_CONTRACT_VOID
408 {
409 INSTANCE_CHECK;
410 PRECONDITION(CheckPointer(string, NULL_OK));
411 PRECONDITION(CheckASCIIString(string, count));
412 PRECONDITION(CheckCount(count));
413 THROWS;
414 GC_NOTRIGGER;
415 }
416 SS_CONTRACT_END;
417
418 if (count == 0)
419 Clear();
420 else
421 {
422 Resize(count, REPRESENTATION_ASCII);
423 strncpy_s(GetRawASCII(), GetBufferSizeInCharIncludeNullChar(), string, count);
424 GetRawASCII()[count] = 0;
425 }
426
427 SS_RETURN;
428}
429
430//-----------------------------------------------------------------------------
431// Set this string to a copy of the given UTF8 string
432//-----------------------------------------------------------------------------
433void SString::SetUTF8(const UTF8 *string)
434{
435 SS_CONTRACT_VOID
436 {
437 // !!! Check for illegal UTF8 encoding?
438 INSTANCE_CHECK;
439 PRECONDITION(CheckPointer(string, NULL_OK));
440 THROWS;
441 GC_NOTRIGGER;
442 SUPPORTS_DAC_HOST_ONLY;
443 }
444 SS_CONTRACT_END;
445
446 if (string == NULL || *string == 0)
447 Clear();
448 else
449 {
450 Resize((COUNT_T) strlen(string), REPRESENTATION_UTF8);
451 strcpy_s(GetRawUTF8(), GetBufferSizeInCharIncludeNullChar(), string);
452 }
453
454 SS_RETURN;
455}
456
457//-----------------------------------------------------------------------------
458// Set this string to a copy of the first count characters of the given
459// UTF8 string.
460//-----------------------------------------------------------------------------
461void SString::SetUTF8(const UTF8 *string, COUNT_T count)
462{
463 SS_CONTRACT_VOID
464 {
465 // !!! Check for illegal UTF8 encoding?
466 INSTANCE_CHECK;
467 PRECONDITION(CheckPointer(string, NULL_OK));
468 PRECONDITION(CheckCount(count));
469 THROWS;
470 GC_NOTRIGGER;
471 }
472 SS_CONTRACT_END;
473
474 if (count == 0)
475 Clear();
476 else
477 {
478 Resize(count, REPRESENTATION_UTF8);
479 strncpy_s(GetRawUTF8(), GetBufferSizeInCharIncludeNullChar(), string, count);
480 GetRawUTF8()[count] = 0;
481 }
482
483 SS_RETURN;
484}
485
486//-----------------------------------------------------------------------------
487// Set this string to a copy of the given ANSI string
488//-----------------------------------------------------------------------------
489void SString::SetANSI(const ANSI *string)
490{
491 SS_CONTRACT_VOID
492 {
493 INSTANCE_CHECK;
494 PRECONDITION(CheckPointer(string, NULL_OK));
495 THROWS;
496 GC_NOTRIGGER;
497 }
498 SS_CONTRACT_END;
499
500 if (string == NULL || *string == 0)
501 Clear();
502 else
503 {
504 Resize((COUNT_T) strlen(string), REPRESENTATION_ANSI);
505 strcpy_s(GetRawANSI(), GetBufferSizeInCharIncludeNullChar(), string);
506 }
507
508 SS_RETURN;
509}
510
511//-----------------------------------------------------------------------------
512// Set this string to a copy of the first count characters of the given
513// ANSI string.
514//-----------------------------------------------------------------------------
515void SString::SetANSI(const ANSI *string, COUNT_T count)
516{
517 SS_CONTRACT_VOID
518 {
519 INSTANCE_CHECK;
520 PRECONDITION(CheckPointer(string, NULL_OK));
521 PRECONDITION(CheckCount(count));
522 THROWS;
523 GC_NOTRIGGER;
524 }
525 SS_CONTRACT_END;
526
527 if (count == 0)
528 Clear();
529 else
530 {
531 Resize(count, REPRESENTATION_ANSI);
532 strncpy_s(GetRawANSI(), GetBufferSizeInCharIncludeNullChar(), string, count);
533 GetRawANSI()[count] = 0;
534 }
535
536 SS_RETURN;
537}
538
539//-----------------------------------------------------------------------------
540// Set this string to the given unicode character
541//-----------------------------------------------------------------------------
542void SString::Set(WCHAR character)
543{
544 SS_CONTRACT_VOID
545 {
546 INSTANCE_CHECK;
547 THROWS;
548 GC_NOTRIGGER;
549 SUPPORTS_DAC_HOST_ONLY;
550 }
551 SS_CONTRACT_END;
552
553 if (character == 0)
554 Clear();
555 else
556 {
557 Resize(1, REPRESENTATION_UNICODE);
558 GetRawUnicode()[0] = character;
559 GetRawUnicode()[1] = 0;
560 }
561
562 SS_RETURN;
563}
564
565//-----------------------------------------------------------------------------
566// Set this string to the given UTF8 character
567//-----------------------------------------------------------------------------
568void SString::SetUTF8(CHAR character)
569{
570 SS_CONTRACT_VOID
571 {
572 INSTANCE_CHECK;
573 THROWS;
574 GC_NOTRIGGER;
575 }
576 SS_CONTRACT_END;
577
578 if (character == 0)
579 Clear();
580 else
581 {
582 Resize(1, REPRESENTATION_UTF8);
583 GetRawUTF8()[0] = character;
584 GetRawUTF8()[1] = 0;
585 }
586
587 SS_RETURN;
588}
589
590
591//-----------------------------------------------------------------------------
592// Set this string to the given ansi literal.
593// This will share the memory and not make a copy.
594//-----------------------------------------------------------------------------
595void SString::SetLiteral(const ASCII *literal)
596{
597 SS_CONTRACT_VOID
598 {
599 INSTANCE_CHECK;
600 PRECONDITION(CheckPointer(literal));
601 PRECONDITION(CheckASCIIString(literal));
602 THROWS;
603 GC_NOTRIGGER;
604 }
605 SS_CONTRACT_END;
606
607 SString s(Literal, literal);
608 Set(s);
609
610 SS_RETURN;
611}
612
613//-----------------------------------------------------------------------------
614// Set this string to the given unicode literal.
615// This will share the memory and not make a copy.
616//-----------------------------------------------------------------------------
617void SString::SetLiteral(const WCHAR *literal)
618{
619 SS_CONTRACT_VOID
620 {
621 INSTANCE_CHECK;
622 PRECONDITION(CheckPointer(literal));
623 THROWS;
624 GC_NOTRIGGER;
625 }
626 SS_CONTRACT_END;
627
628 SString s(Literal, literal);
629 Set(s);
630
631 SS_RETURN;
632}
633
634//-----------------------------------------------------------------------------
635// Hash the string contents
636//-----------------------------------------------------------------------------
637ULONG SString::Hash() const
638{
639 SS_CONTRACT(ULONG)
640 {
641 INSTANCE_CHECK;
642 THROWS_UNLESS_NORMALIZED;
643 GC_NOTRIGGER;
644 }
645 SS_CONTRACT_END;
646
647 ConvertToUnicode();
648
649 SS_RETURN HashString(GetRawUnicode());
650}
651
652//-----------------------------------------------------------------------------
653// Hash the string contents
654//-----------------------------------------------------------------------------
655ULONG SString::HashCaseInsensitive(LocaleID lcid) const
656{
657 SS_CONTRACT(ULONG)
658 {
659 INSTANCE_CHECK;
660 THROWS_UNLESS_NORMALIZED;
661 GC_NOTRIGGER;
662 }
663 SS_CONTRACT_END;
664
665 ConvertToIteratable();
666
667 ULONG result;
668
669 switch (GetRepresentation())
670 {
671 case REPRESENTATION_UNICODE:
672 case REPRESENTATION_EMPTY:
673 result = CaseHashHelper(GetRawUnicode(), GetRawCount(), lcid);
674 break;
675
676 case REPRESENTATION_ASCII:
677 result = CaseHashHelperA(GetRawASCII(), GetRawCount());
678 break;
679
680 default:
681 UNREACHABLE();
682 }
683
684 SS_RETURN result;
685}
686
687//-----------------------------------------------------------------------------
688// Truncate this string to count characters.
689//-----------------------------------------------------------------------------
690void SString::Truncate(const Iterator &i)
691{
692 SS_CONTRACT_VOID
693 {
694 INSTANCE_CHECK;
695 PRECONDITION(CheckIteratorRange(i));
696 SS_POSTCONDITION(GetRawCount() == i - Begin());
697 THROWS;
698 GC_NOTRIGGER;
699 SUPPORTS_DAC_HOST_ONLY;
700 }
701 SS_CONTRACT_END;
702
703 CONSISTENCY_CHECK(IsFixedSize());
704
705 COUNT_T size = i - Begin();
706
707 Resize(size, GetRepresentation(), PRESERVE);
708
709 i.Resync(this, (BYTE *) (GetRawUnicode() + size));
710
711 SS_RETURN;
712}
713
714//-----------------------------------------------------------------------------
715// Convert the ASCII representation for this String to Unicode. We can do this
716// quickly and in-place (if this == &dest), which is why it is optimized.
717//-----------------------------------------------------------------------------
718void SString::ConvertASCIIToUnicode(SString &dest) const
719{
720 CONTRACT_VOID
721 {
722 PRECONDITION(IsRepresentation(REPRESENTATION_ASCII));
723 POSTCONDITION(dest.IsRepresentation(REPRESENTATION_UNICODE));
724 THROWS;
725 GC_NOTRIGGER;
726 SUPPORTS_DAC_HOST_ONLY;
727 }
728 CONTRACT_END;
729
730 // Handle the empty case.
731 if (IsEmpty())
732 {
733 dest.Clear();
734 RETURN;
735 }
736
737 CONSISTENCY_CHECK(CheckPointer(GetRawASCII()));
738 CONSISTENCY_CHECK(GetRawCount() > 0);
739
740 // If dest is the same as this, then we need to preserve on resize.
741 dest.Resize(GetRawCount(), REPRESENTATION_UNICODE,
742 this == &dest ? PRESERVE : DONT_PRESERVE);
743
744 // Make sure the buffer is big enough.
745 CONSISTENCY_CHECK(dest.GetAllocation() > (GetRawCount() * sizeof(WCHAR)));
746
747 // This is a poor man's widen. Since we know that the representation is ASCII,
748 // we can just pad the string with a bunch of zero-value bytes. Of course,
749 // we move from the end of the string to the start so that we can convert in
750 // place (in the case that &dest == this).
751 WCHAR *outBuf = dest.GetRawUnicode() + dest.GetRawCount();
752 ASCII *inBuf = GetRawASCII() + GetRawCount();
753
754 while (GetRawASCII() <= inBuf)
755 {
756 CONSISTENCY_CHECK(dest.GetRawUnicode() <= outBuf);
757 // The casting zero-extends the value, thus giving us the zero-valued byte.
758 *outBuf = (WCHAR) *inBuf;
759 outBuf--;
760 inBuf--;
761 }
762
763 RETURN;
764}
765
766//-----------------------------------------------------------------------------
767// Convert the internal representation for this String to Unicode.
768//-----------------------------------------------------------------------------
769void SString::ConvertToUnicode() const
770{
771 CONTRACT_VOID
772 {
773 POSTCONDITION(IsRepresentation(REPRESENTATION_UNICODE));
774 if (IsRepresentation(REPRESENTATION_UNICODE)) NOTHROW; else THROWS;
775 GC_NOTRIGGER;
776 SUPPORTS_DAC_HOST_ONLY;
777 }
778 CONTRACT_END;
779
780 if (!IsRepresentation(REPRESENTATION_UNICODE))
781 {
782 if (IsRepresentation(REPRESENTATION_ASCII))
783 {
784 ConvertASCIIToUnicode(*(const_cast<SString *>(this)));
785 }
786 else
787 {
788 StackSString s;
789 ConvertToUnicode(s);
790 PREFIX_ASSUME(!s.IsImmutable());
791 (const_cast<SString*>(this))->Set(s);
792 }
793 }
794
795 RETURN;
796}
797
798//-----------------------------------------------------------------------------
799// Convert the internal representation for this String to Unicode, while
800// preserving the iterator if the conversion is done.
801//-----------------------------------------------------------------------------
802void SString::ConvertToUnicode(const CIterator &i) const
803{
804 CONTRACT_VOID
805 {
806 PRECONDITION(i.Check());
807 POSTCONDITION(IsRepresentation(REPRESENTATION_UNICODE));
808 if (IsRepresentation(REPRESENTATION_UNICODE)) NOTHROW; else THROWS;
809 GC_NOTRIGGER;
810 SUPPORTS_DAC_HOST_ONLY;
811 }
812 CONTRACT_END;
813
814 if (!IsRepresentation(REPRESENTATION_UNICODE))
815 {
816 CONSISTENCY_CHECK(IsFixedSize());
817
818 COUNT_T index = 0;
819 // Get the current index of the iterator
820 if (i.m_ptr != NULL)
821 {
822 CONSISTENCY_CHECK(GetCharacterSizeShift() == 0);
823 index = (COUNT_T) (i.m_ptr - m_buffer);
824 }
825
826 if (IsRepresentation(REPRESENTATION_ASCII))
827 {
828 ConvertASCIIToUnicode(*(const_cast<SString *>(this)));
829 }
830 else
831 {
832 StackSString s;
833 ConvertToUnicode(s);
834 (const_cast<SString*>(this))->Set(s);
835 }
836
837 // Move the iterator to the new location.
838 if (i.m_ptr != NULL)
839 {
840 i.Resync(this, (BYTE *) (GetRawUnicode() + index));
841 }
842 }
843
844 RETURN;
845}
846
847//-----------------------------------------------------------------------------
848// Set s to be a copy of this string's contents, but in the unicode format.
849//-----------------------------------------------------------------------------
850void SString::ConvertToUnicode(SString &s) const
851{
852 CONTRACT_VOID
853 {
854 PRECONDITION(s.Check());
855 POSTCONDITION(s.IsRepresentation(REPRESENTATION_UNICODE));
856 THROWS;
857 GC_NOTRIGGER;
858 SUPPORTS_DAC_HOST_ONLY;
859 }
860 CONTRACT_END;
861
862 int page = 0;
863
864 switch (GetRepresentation())
865 {
866 case REPRESENTATION_EMPTY:
867 s.Clear();
868 RETURN;
869
870 case REPRESENTATION_UNICODE:
871 s.Set(*this);
872 RETURN;
873
874 case REPRESENTATION_UTF8:
875 page = CP_UTF8;
876 break;
877
878 case REPRESENTATION_ASCII:
879 ConvertASCIIToUnicode(s);
880 RETURN;
881
882 case REPRESENTATION_ANSI:
883 page = CP_ACP;
884 break;
885
886 default:
887 UNREACHABLE();
888 }
889
890 COUNT_T length = WszMultiByteToWideChar(page, 0, GetRawANSI(), GetRawCount()+1, 0, 0);
891 if (length == 0)
892 ThrowLastError();
893
894 s.Resize(length-1, REPRESENTATION_UNICODE);
895
896 length = WszMultiByteToWideChar(page, 0, GetRawANSI(), GetRawCount()+1, s.GetRawUnicode(), length);
897 if (length == 0)
898 ThrowLastError();
899
900 RETURN;
901}
902
903//-----------------------------------------------------------------------------
904// Set s to be a copy of this string's contents, but in the ANSI format.
905//-----------------------------------------------------------------------------
906void SString::ConvertToANSI(SString &s) const
907{
908 CONTRACT_VOID
909 {
910 PRECONDITION(s.Check());
911 POSTCONDITION(s.IsRepresentation(REPRESENTATION_ANSI));
912 THROWS;
913 GC_NOTRIGGER;
914 }
915 CONTRACT_END;
916
917 switch (GetRepresentation())
918 {
919 case REPRESENTATION_EMPTY:
920 s.Clear();
921 RETURN;
922
923 case REPRESENTATION_ASCII:
924 case REPRESENTATION_ANSI:
925 s.Set(*this);
926 RETURN;
927
928 case REPRESENTATION_UTF8:
929 // No direct conversion to ANSI
930 ConvertToUnicode();
931 // fall through
932
933 case REPRESENTATION_UNICODE:
934 break;
935
936 default:
937 UNREACHABLE();
938 }
939
940 // @todo: use WC_NO_BEST_FIT_CHARS
941 COUNT_T length = WszWideCharToMultiByte(CP_ACP, 0, GetRawUnicode(), GetRawCount()+1,
942 NULL, 0, NULL, NULL);
943
944 s.Resize(length-1, REPRESENTATION_ANSI);
945
946 // @todo: use WC_NO_BEST_FIT_CHARS
947 length = WszWideCharToMultiByte(CP_ACP, 0, GetRawUnicode(), GetRawCount()+1,
948 s.GetRawANSI(), length, NULL, NULL);
949 if (length == 0)
950 ThrowLastError();
951
952 RETURN;
953}
954
955//-----------------------------------------------------------------------------
956// Set s to be a copy of this string's contents, but in the utf8 format.
957//-----------------------------------------------------------------------------
958COUNT_T SString::ConvertToUTF8(SString &s) const
959{
960 CONTRACT(COUNT_T)
961 {
962 PRECONDITION(s.Check());
963 POSTCONDITION(s.IsRepresentation(REPRESENTATION_UTF8));
964 THROWS;
965 GC_NOTRIGGER;
966 }
967 CONTRACT_END;
968
969 switch (GetRepresentation())
970 {
971 case REPRESENTATION_EMPTY:
972 s.Clear();
973 RETURN 1;
974
975 case REPRESENTATION_ASCII:
976 case REPRESENTATION_UTF8:
977 s.Set(*this);
978 RETURN s.GetRawCount()+1;
979
980 case REPRESENTATION_ANSI:
981 // No direct conversion from ANSI to UTF8
982 ConvertToUnicode();
983 // fall through
984
985 case REPRESENTATION_UNICODE:
986 break;
987
988 default:
989 UNREACHABLE();
990 }
991
992 // <TODO> @todo: use WC_NO_BEST_FIT_CHARS </TODO>
993 bool allAscii;
994 DWORD length;
995
996 HRESULT hr = FString::Unicode_Utf8_Length(GetRawUnicode(), & allAscii, & length);
997
998 if (SUCCEEDED(hr))
999 {
1000 s.Resize(length, REPRESENTATION_UTF8);
1001
1002 //FString::Unicode_Utf8 expects an array all the time
1003 //we optimize the empty string by replacing it with null for SString above in Resize
1004 if (length > 0)
1005 {
1006 hr = FString::Unicode_Utf8(GetRawUnicode(), allAscii, (LPSTR) s.GetRawUTF8(), length);
1007 }
1008 }
1009
1010 IfFailThrow(hr);
1011
1012 RETURN length + 1;
1013}
1014
1015//-----------------------------------------------------------------------------
1016// Replace a single character with another character.
1017//-----------------------------------------------------------------------------
1018void SString::Replace(const Iterator &i, WCHAR c)
1019{
1020 CONTRACT_VOID
1021 {
1022 INSTANCE_CHECK;
1023 PRECONDITION(CheckIteratorRange(i, 1));
1024 POSTCONDITION(Match(i, c));
1025 THROWS;
1026 GC_NOTRIGGER;
1027 }
1028 CONTRACT_END;
1029
1030 if (IsRepresentation(REPRESENTATION_ASCII) && ((c&~0x7f) == 0))
1031 {
1032 *(BYTE*)i.m_ptr = (BYTE) c;
1033 }
1034 else
1035 {
1036 ConvertToUnicode(i);
1037
1038 *(USHORT*)i.m_ptr = c;
1039 }
1040
1041 RETURN;
1042}
1043
1044//-----------------------------------------------------------------------------
1045// Replace the substring specified by position, length with the given string s.
1046//-----------------------------------------------------------------------------
1047void SString::Replace(const Iterator &i, COUNT_T length, const SString &s)
1048{
1049 CONTRACT_VOID
1050 {
1051 INSTANCE_CHECK;
1052 PRECONDITION(CheckIteratorRange(i, length));
1053 PRECONDITION(s.Check());
1054 POSTCONDITION(Match(i, s));
1055 THROWS;
1056 GC_NOTRIGGER;
1057 SUPPORTS_DAC_HOST_ONLY;
1058 }
1059 CONTRACT_END;
1060
1061 Representation representation = GetRepresentation();
1062 if (representation == REPRESENTATION_EMPTY)
1063 {
1064 // This special case contains some optimizations (like literal sharing).
1065 Set(s);
1066 ConvertToIteratable();
1067 i.Resync(this, m_buffer);
1068 }
1069 else
1070 {
1071 StackSString temp;
1072 const SString &source = GetCompatibleString(s, temp, i);
1073
1074 COUNT_T deleteSize = length<<GetCharacterSizeShift();
1075 COUNT_T insertSize = source.GetRawCount()<<source.GetCharacterSizeShift();
1076
1077 SBuffer::Replace(i, deleteSize, insertSize);
1078 SBuffer::Copy(i, source.m_buffer, insertSize);
1079 }
1080
1081 RETURN;
1082}
1083
1084//-----------------------------------------------------------------------------
1085// Find s in this string starting at i. Return TRUE & update iterator if found.
1086//-----------------------------------------------------------------------------
1087BOOL SString::Find(CIterator &i, const SString &s) const
1088{
1089 CONTRACT(BOOL)
1090 {
1091 INSTANCE_CHECK;
1092 PRECONDITION(CheckIteratorRange(i));
1093 PRECONDITION(s.Check());
1094 POSTCONDITION(RETVAL == Match(i, s));
1095 THROWS_UNLESS_BOTH_NORMALIZED(s);
1096 GC_NOTRIGGER;
1097 }
1098 CONTRACT_END;
1099
1100 // Get a compatible string from s
1101 StackSString temp;
1102 const SString &source = GetCompatibleString(s, temp, i);
1103
1104 switch (GetRepresentation())
1105 {
1106 case REPRESENTATION_UNICODE:
1107 {
1108 COUNT_T count = source.GetRawCount();
1109 const WCHAR *start = i.GetUnicode();
1110 const WCHAR *end = GetUnicode() + GetRawCount() - count;
1111 while (start <= end)
1112 {
1113 if (wcsncmp(start, source.GetRawUnicode(), count) == 0)
1114 {
1115 i.Resync(this, (BYTE*) start);
1116 RETURN TRUE;
1117 }
1118 start++;
1119 }
1120 }
1121 break;
1122
1123 case REPRESENTATION_ANSI:
1124 case REPRESENTATION_ASCII:
1125 {
1126 COUNT_T count = source.GetRawCount();
1127 const CHAR *start = i.GetASCII();
1128 const CHAR *end = GetRawASCII() + GetRawCount() - count;
1129 while (start <= end)
1130 {
1131 if (strncmp(start, source.GetRawASCII(), count) == 0)
1132 {
1133 i.Resync(this, (BYTE*) start);
1134 RETURN TRUE;
1135 }
1136 start++;
1137 }
1138 }
1139 break;
1140
1141 case REPRESENTATION_EMPTY:
1142 {
1143 if (source.GetRawCount() == 0)
1144 RETURN TRUE;
1145 }
1146 break;
1147
1148 case REPRESENTATION_UTF8:
1149 default:
1150 UNREACHABLE();
1151 }
1152
1153 RETURN FALSE;
1154}
1155
1156//-----------------------------------------------------------------------------
1157// Find s in this string starting at i. Return TRUE & update iterator if found.
1158//-----------------------------------------------------------------------------
1159BOOL SString::Find(CIterator &i, WCHAR c) const
1160{
1161 CONTRACT(BOOL)
1162 {
1163 INSTANCE_CHECK;
1164 PRECONDITION(CheckIteratorRange(i));
1165 POSTCONDITION(RETVAL == Match(i, c));
1166 THROWS_UNLESS_NORMALIZED;
1167 GC_NOTRIGGER;
1168 }
1169 CONTRACT_END;
1170
1171 // Get a compatible string
1172 if (c & ~0x7f)
1173 ConvertToUnicode(i);
1174
1175 switch (GetRepresentation())
1176 {
1177 case REPRESENTATION_UNICODE:
1178 {
1179 const WCHAR *start = i.GetUnicode();
1180 const WCHAR *end = GetUnicode() + GetRawCount() - 1;
1181 while (start <= end)
1182 {
1183 if (*start == c)
1184 {
1185 i.Resync(this, (BYTE*) start);
1186 RETURN TRUE;
1187 }
1188 start++;
1189 }
1190 }
1191 break;
1192
1193 case REPRESENTATION_ANSI:
1194 case REPRESENTATION_ASCII:
1195 {
1196 const CHAR *start = i.GetASCII();
1197 const CHAR *end = GetRawASCII() + GetRawCount() - 1;
1198 while (start <= end)
1199 {
1200 if (*start == c)
1201 {
1202 i.Resync(this, (BYTE*) start);
1203 RETURN TRUE;
1204 }
1205 start++;
1206 }
1207 }
1208 break;
1209
1210 case REPRESENTATION_EMPTY:
1211 break;
1212
1213 case REPRESENTATION_UTF8:
1214 default:
1215 UNREACHABLE();
1216 }
1217
1218 RETURN FALSE;
1219}
1220
1221//-----------------------------------------------------------------------------
1222// Find s in this string, working backwards staring at i.
1223// Return TRUE and update iterator if found.
1224//-----------------------------------------------------------------------------
1225BOOL SString::FindBack(CIterator &i, const SString &s) const
1226{
1227 CONTRACT(BOOL)
1228 {
1229 INSTANCE_CHECK;
1230 PRECONDITION(CheckIteratorRange(i));
1231 PRECONDITION(s.Check());
1232 POSTCONDITION(RETVAL == Match(i, s));
1233 THROWS_UNLESS_BOTH_NORMALIZED(s);
1234 GC_NOTRIGGER;
1235 }
1236 CONTRACT_END;
1237
1238 // Get a compatible string from s
1239 StackSString temp;
1240 const SString &source = GetCompatibleString(s, temp, i);
1241
1242 switch (GetRepresentation())
1243 {
1244 case REPRESENTATION_UNICODE:
1245 {
1246 COUNT_T count = source.GetRawCount();
1247 const WCHAR *start = GetRawUnicode() + GetRawCount() - count;
1248 if (start > i.GetUnicode())
1249 start = i.GetUnicode();
1250 const WCHAR *end = GetRawUnicode();
1251
1252 while (start >= end)
1253 {
1254 if (wcsncmp(start, source.GetRawUnicode(), count) == 0)
1255 {
1256 i.Resync(this, (BYTE*) start);
1257 RETURN TRUE;
1258 }
1259 start--;
1260 }
1261 }
1262 break;
1263
1264 case REPRESENTATION_ANSI:
1265 case REPRESENTATION_ASCII:
1266 {
1267 COUNT_T count = source.GetRawCount();
1268 const CHAR *start = GetRawASCII() + GetRawCount() - count;
1269 if (start > i.GetASCII())
1270 start = i.GetASCII();
1271 const CHAR *end = GetRawASCII();
1272
1273 while (start >= end)
1274 {
1275 if (strncmp(start, source.GetRawASCII(), count) == 0)
1276 {
1277 i.Resync(this, (BYTE*) start);
1278 RETURN TRUE;
1279 }
1280 start--;
1281 }
1282 }
1283 break;
1284
1285 case REPRESENTATION_EMPTY:
1286 {
1287 if (source.GetRawCount() == 0)
1288 RETURN TRUE;
1289 }
1290 break;
1291
1292 case REPRESENTATION_UTF8:
1293 default:
1294 UNREACHABLE();
1295 }
1296
1297 RETURN FALSE;
1298}
1299
1300//-----------------------------------------------------------------------------
1301// Find s in this string, working backwards staring at i.
1302// Return TRUE and update iterator if found.
1303//-----------------------------------------------------------------------------
1304BOOL SString::FindBack(CIterator &i, WCHAR c) const
1305{
1306 CONTRACT(BOOL)
1307 {
1308 INSTANCE_CHECK;
1309 PRECONDITION(CheckIteratorRange(i));
1310 POSTCONDITION(RETVAL == Match(i, c));
1311 THROWS_UNLESS_NORMALIZED;
1312 GC_NOTRIGGER;
1313 }
1314 CONTRACT_END;
1315
1316 // Get a compatible string from s
1317 if (c & ~0x7f)
1318 ConvertToUnicode(i);
1319
1320 switch (GetRepresentation())
1321 {
1322 case REPRESENTATION_UNICODE:
1323 {
1324 const WCHAR *start = GetRawUnicode() + GetRawCount() - 1;
1325 if (start > i.GetUnicode())
1326 start = i.GetUnicode();
1327 const WCHAR *end = GetRawUnicode();
1328
1329 while (start >= end)
1330 {
1331 if (*start == c)
1332 {
1333 i.Resync(this, (BYTE*) start);
1334 RETURN TRUE;
1335 }
1336 start--;
1337 }
1338 }
1339 break;
1340
1341 case REPRESENTATION_ANSI:
1342 case REPRESENTATION_ASCII:
1343 {
1344 const CHAR *start = GetRawASCII() + GetRawCount() - 1;
1345 if (start > i.GetASCII())
1346 start = i.GetASCII();
1347 const CHAR *end = GetRawASCII();
1348
1349 while (start >= end)
1350 {
1351 if (*start == c)
1352 {
1353 i.Resync(this, (BYTE*) start);
1354 RETURN TRUE;
1355 }
1356 start--;
1357 }
1358 }
1359 break;
1360
1361 case REPRESENTATION_EMPTY:
1362 break;
1363
1364 case REPRESENTATION_UTF8:
1365 default:
1366 UNREACHABLE();
1367 }
1368
1369 RETURN FALSE;
1370}
1371
1372//-----------------------------------------------------------------------------
1373// Returns TRUE if this string begins with the contents of s
1374//-----------------------------------------------------------------------------
1375BOOL SString::BeginsWith(const SString &s) const
1376{
1377 WRAPPER_NO_CONTRACT;
1378
1379 return Match(Begin(), s);
1380}
1381
1382//-----------------------------------------------------------------------------
1383// Returns TRUE if this string begins with the contents of s
1384//-----------------------------------------------------------------------------
1385BOOL SString::BeginsWithCaseInsensitive(const SString &s, LocaleID locale) const
1386{
1387 WRAPPER_NO_CONTRACT;
1388
1389 return MatchCaseInsensitive(Begin(), s, locale);
1390}
1391
1392//-----------------------------------------------------------------------------
1393// Returns TRUE if this string begins with the contents of s. Invariant locale.
1394//-----------------------------------------------------------------------------
1395BOOL SString::BeginsWithCaseInsensitive(const SString &s) const
1396{
1397 WRAPPER_NO_CONTRACT;
1398
1399 return BeginsWithCaseInsensitive(s, s_defaultLCID);
1400}
1401
1402//-----------------------------------------------------------------------------
1403// Returns TRUE if this string ends with the contents of s
1404//-----------------------------------------------------------------------------
1405BOOL SString::EndsWith(const SString &s) const
1406{
1407 WRAPPER_NO_CONTRACT;
1408
1409 // Need this check due to iterator arithmetic below.
1410 if (GetCount() < s.GetCount())
1411 {
1412 return FALSE;
1413 }
1414
1415 return Match(End() - s.GetCount(), s);
1416}
1417
1418//-----------------------------------------------------------------------------
1419// Returns TRUE if this string ends with the contents of s
1420//-----------------------------------------------------------------------------
1421BOOL SString::EndsWithCaseInsensitive(const SString &s, LocaleID locale) const
1422{
1423 WRAPPER_NO_CONTRACT;
1424
1425 // Need this check due to iterator arithmetic below.
1426 if (GetCount() < s.GetCount())
1427 {
1428 return FALSE;
1429 }
1430
1431 return MatchCaseInsensitive(End() - s.GetCount(), s, locale);
1432}
1433
1434//-----------------------------------------------------------------------------
1435// Returns TRUE if this string ends with the contents of s. Invariant locale.
1436//-----------------------------------------------------------------------------
1437BOOL SString::EndsWithCaseInsensitive(const SString &s) const
1438{
1439 WRAPPER_NO_CONTRACT;
1440
1441 return EndsWithCaseInsensitive(s, s_defaultLCID);
1442}
1443
1444//-----------------------------------------------------------------------------
1445// Compare this string's contents to s's contents.
1446// The comparison does not take into account localization issues like case folding.
1447// Return 0 if equal, <0 if this < s, >0 is this > s. (same as strcmp).
1448//-----------------------------------------------------------------------------
1449int SString::Compare(const SString &s) const
1450{
1451 CONTRACT(int)
1452 {
1453 INSTANCE_CHECK;
1454 PRECONDITION(s.Check());
1455 THROWS_UNLESS_BOTH_NORMALIZED(s);
1456 GC_NOTRIGGER;
1457 }
1458 CONTRACT_END;
1459
1460 StackSString temp;
1461 const SString &source = GetCompatibleString(s, temp);
1462
1463 COUNT_T smaller;
1464 int equals = 0;
1465 int result = 0;
1466
1467 if (GetRawCount() < source.GetRawCount())
1468 {
1469 smaller = GetRawCount();
1470 equals = -1;
1471 }
1472 else if (GetRawCount() > source.GetRawCount())
1473 {
1474 smaller = source.GetRawCount();
1475 equals = 1;
1476 }
1477 else
1478 {
1479 smaller = GetRawCount();
1480 equals = 0;
1481 }
1482
1483 switch (GetRepresentation())
1484 {
1485 case REPRESENTATION_UNICODE:
1486 result = wcsncmp(GetRawUnicode(), source.GetRawUnicode(), smaller);
1487 break;
1488
1489 case REPRESENTATION_ASCII:
1490 case REPRESENTATION_ANSI:
1491 result = strncmp(GetRawASCII(), source.GetRawASCII(), smaller);
1492 break;
1493
1494 case REPRESENTATION_EMPTY:
1495 result = 0;
1496 break;
1497
1498 default:
1499 case REPRESENTATION_UTF8:
1500 UNREACHABLE();
1501 }
1502
1503 if (result == 0)
1504 RETURN equals;
1505 else
1506 RETURN result;
1507}
1508
1509//-----------------------------------------------------------------------------
1510// Compare this string's contents to s's contents.
1511// Return 0 if equal, <0 if this < s, >0 is this > s. (same as strcmp).
1512//-----------------------------------------------------------------------------
1513
1514int SString::CompareCaseInsensitive(const SString &s, LocaleID lcid) const
1515{
1516 CONTRACT(int)
1517 {
1518 INSTANCE_CHECK;
1519 PRECONDITION(s.Check());
1520 THROWS_UNLESS_BOTH_NORMALIZED(s);
1521 GC_NOTRIGGER;
1522 }
1523 CONTRACT_END;
1524
1525 StackSString temp;
1526 const SString &source = GetCompatibleString(s, temp);
1527
1528 COUNT_T smaller;
1529 int equals = 0;
1530 int result = 0;
1531
1532 if (GetRawCount() < source.GetRawCount())
1533 {
1534 smaller = GetRawCount();
1535 equals = -1;
1536 }
1537 else if (GetRawCount() > source.GetRawCount())
1538 {
1539 smaller = source.GetRawCount();
1540 equals = 1;
1541 }
1542 else
1543 {
1544 smaller = GetRawCount();
1545 equals = 0;
1546 }
1547
1548 switch (GetRepresentation())
1549 {
1550 case REPRESENTATION_UNICODE:
1551 result = CaseCompareHelper(GetRawUnicode(), source.GetRawUnicode(), smaller, lcid, FALSE, TRUE);
1552 break;
1553
1554 case REPRESENTATION_ASCII:
1555 case REPRESENTATION_ANSI:
1556 result = CaseCompareHelperA(GetRawASCII(), source.GetRawASCII(), smaller, lcid, FALSE, TRUE);
1557 break;
1558
1559 case REPRESENTATION_EMPTY:
1560 result = 0;
1561 break;
1562
1563 default:
1564 case REPRESENTATION_UTF8:
1565 UNREACHABLE();
1566 }
1567
1568 if (result == 0)
1569 RETURN equals;
1570 else
1571 RETURN result;
1572}
1573
1574//-----------------------------------------------------------------------------
1575// Compare this string's contents to s's contents.
1576// The comparison does not take into account localization issues like case folding.
1577// Return 1 if equal, 0 if not.
1578//-----------------------------------------------------------------------------
1579BOOL SString::Equals(const SString &s) const
1580{
1581 CONTRACT(BOOL)
1582 {
1583 INSTANCE_CHECK;
1584 PRECONDITION(s.Check());
1585 THROWS_UNLESS_BOTH_NORMALIZED(s);
1586 FAULTS_UNLESS_BOTH_NORMALIZED(s, ThrowOutOfMemory());
1587 GC_NOTRIGGER;
1588 }
1589 CONTRACT_END;
1590
1591 StackSString temp;
1592 const SString &source = GetCompatibleString(s, temp);
1593
1594 COUNT_T count = GetRawCount();
1595
1596 if (count != source.GetRawCount())
1597 RETURN FALSE;
1598
1599 switch (GetRepresentation())
1600 {
1601 case REPRESENTATION_UNICODE:
1602 RETURN (wcsncmp(GetRawUnicode(), source.GetRawUnicode(), count) == 0);
1603
1604 case REPRESENTATION_ASCII:
1605 case REPRESENTATION_ANSI:
1606 RETURN (strncmp(GetRawASCII(), source.GetRawASCII(), count) == 0);
1607
1608 case REPRESENTATION_EMPTY:
1609 RETURN TRUE;
1610
1611 default:
1612 case REPRESENTATION_UTF8:
1613 UNREACHABLE();
1614 }
1615
1616 RETURN FALSE;
1617}
1618
1619//-----------------------------------------------------------------------------
1620// Compare this string's contents case insensitively to s's contents.
1621// Return 1 if equal, 0 if not.
1622//-----------------------------------------------------------------------------
1623BOOL SString::EqualsCaseInsensitive(const SString &s, LocaleID lcid) const
1624{
1625 CONTRACT(BOOL)
1626 {
1627 INSTANCE_CHECK;
1628 PRECONDITION(s.Check());
1629 THROWS_UNLESS_BOTH_NORMALIZED(s);
1630 FAULTS_UNLESS_BOTH_NORMALIZED(s, ThrowOutOfMemory());
1631 GC_NOTRIGGER;
1632 }
1633 CONTRACT_END;
1634
1635 StackSString temp;
1636 const SString &source = GetCompatibleString(s, temp);
1637
1638 COUNT_T count = GetRawCount();
1639
1640 if (count != source.GetRawCount())
1641 RETURN FALSE;
1642
1643 switch (GetRepresentation())
1644 {
1645 case REPRESENTATION_UNICODE:
1646 RETURN (CaseCompareHelper(GetRawUnicode(), source.GetRawUnicode(), count, lcid, FALSE, TRUE) == 0);
1647
1648 case REPRESENTATION_ASCII:
1649 case REPRESENTATION_ANSI:
1650 RETURN (CaseCompareHelperA(GetRawASCII(), source.GetRawASCII(), count, lcid, FALSE, TRUE) == 0);
1651
1652 case REPRESENTATION_EMPTY:
1653 RETURN TRUE;
1654
1655 default:
1656 case REPRESENTATION_UTF8:
1657 UNREACHABLE();
1658 }
1659
1660 RETURN FALSE;
1661}
1662
1663//-----------------------------------------------------------------------------
1664// Compare s's contents to the substring starting at position
1665// The comparison does not take into account localization issues like case folding.
1666// Return TRUE if equal, FALSE if not
1667//-----------------------------------------------------------------------------
1668BOOL SString::Match(const CIterator &i, const SString &s) const
1669{
1670 CONTRACT(BOOL)
1671 {
1672 INSTANCE_CHECK;
1673 PRECONDITION(CheckIteratorRange(i));
1674 PRECONDITION(s.Check());
1675 THROWS_UNLESS_BOTH_NORMALIZED(s);
1676 GC_NOTRIGGER;
1677 }
1678 CONTRACT_END;
1679
1680 StackSString temp;
1681 const SString &source = GetCompatibleString(s, temp, i);
1682
1683 COUNT_T remaining = End() - i;
1684 COUNT_T count = source.GetRawCount();
1685
1686 if (remaining < count)
1687 RETURN FALSE;
1688
1689 switch (GetRepresentation())
1690 {
1691 case REPRESENTATION_UNICODE:
1692 RETURN (wcsncmp(i.GetUnicode(), source.GetRawUnicode(), count) == 0);
1693
1694 case REPRESENTATION_ASCII:
1695 case REPRESENTATION_ANSI:
1696 RETURN (strncmp(i.GetASCII(), source.GetRawASCII(), count) == 0);
1697
1698 case REPRESENTATION_EMPTY:
1699 RETURN TRUE;
1700
1701 default:
1702 case REPRESENTATION_UTF8:
1703 UNREACHABLE();
1704 }
1705
1706 RETURN FALSE;
1707}
1708
1709//-----------------------------------------------------------------------------
1710// Compare s's contents case insensitively to the substring starting at position
1711// Return TRUE if equal, FALSE if not
1712//-----------------------------------------------------------------------------
1713BOOL SString::MatchCaseInsensitive(const CIterator &i, const SString &s, LocaleID lcid) const
1714{
1715 CONTRACT(BOOL)
1716 {
1717 INSTANCE_CHECK;
1718 PRECONDITION(CheckIteratorRange(i));
1719 PRECONDITION(s.Check());
1720 THROWS_UNLESS_BOTH_NORMALIZED(s);
1721 GC_NOTRIGGER;
1722 }
1723 CONTRACT_END;
1724
1725 StackSString temp;
1726 const SString &source = GetCompatibleString(s, temp, i);
1727
1728 COUNT_T remaining = End() - i;
1729 COUNT_T count = source.GetRawCount();
1730
1731 if (remaining < count)
1732 RETURN FALSE;
1733
1734 switch (GetRepresentation())
1735 {
1736 case REPRESENTATION_UNICODE:
1737 case REPRESENTATION_ANSI:
1738 RETURN (CaseCompareHelper(i.GetUnicode(), source.GetRawUnicode(), count, lcid, FALSE, TRUE) == 0);
1739
1740 case REPRESENTATION_ASCII:
1741 RETURN (CaseCompareHelperA(i.GetASCII(), source.GetRawASCII(), count, lcid, FALSE, TRUE) == 0);
1742
1743 case REPRESENTATION_EMPTY:
1744 RETURN TRUE;
1745
1746 default:
1747 case REPRESENTATION_UTF8:
1748 UNREACHABLE();
1749 }
1750
1751 RETURN FALSE;
1752}
1753
1754//-----------------------------------------------------------------------------
1755// Compare c case insensitively to the character at position
1756// Return TRUE if equal, FALSE if not
1757//-----------------------------------------------------------------------------
1758BOOL SString::MatchCaseInsensitive(const CIterator &i, WCHAR c, LocaleID lcid) const
1759{
1760 SS_CONTRACT(BOOL)
1761 {
1762 GC_NOTRIGGER;
1763 INSTANCE_CHECK;
1764 PRECONDITION(CheckIteratorRange(i));
1765 NOTHROW;
1766 }
1767 SS_CONTRACT_END;
1768
1769 // End() will not throw here
1770 CONTRACT_VIOLATION(ThrowsViolation);
1771 if (i >= End())
1772 SS_RETURN FALSE;
1773
1774 WCHAR test = i[0];
1775
1776 SS_RETURN (test == c
1777 || ((CAN_SIMPLE_UPCASE(test) ? SIMPLE_UPCASE(test) : MapChar(test, LCMAP_UPPERCASE, lcid))
1778 == (CAN_SIMPLE_UPCASE(c) ? SIMPLE_UPCASE(c) : MapChar(c, LCMAP_UPPERCASE, lcid))));
1779}
1780
1781//-----------------------------------------------------------------------------
1782// Convert string to unicode lowercase using the invariant culture
1783// Note: Please don't use it in PATH as multiple character can map to the same
1784// lower case symbol
1785//-----------------------------------------------------------------------------
1786void SString::LowerCase()
1787{
1788 SS_CONTRACT_VOID
1789 {
1790 GC_NOTRIGGER;
1791 PRECONDITION(CheckPointer(this));
1792 SS_POSTCONDITION(CheckPointer(RETVAL));
1793 if (IsRepresentation(REPRESENTATION_UNICODE)) NOTHROW; else THROWS;
1794 SUPPORTS_DAC;
1795 }
1796 SS_CONTRACT_END;
1797
1798 ConvertToUnicode();
1799
1800 for (WCHAR *pwch = GetRawUnicode(); pwch < GetRawUnicode() + GetRawCount(); ++pwch)
1801 {
1802 *pwch = (CAN_SIMPLE_DOWNCASE(*pwch) ? SIMPLE_DOWNCASE(*pwch) : MapChar(*pwch, LCMAP_LOWERCASE, s_defaultLCID));
1803 }
1804}
1805
1806//-----------------------------------------------------------------------------
1807// Convert null-terminated string to lowercase using the invariant culture
1808//-----------------------------------------------------------------------------
1809//static
1810void SString::LowerCase(__inout_z LPWSTR wszString)
1811{
1812 SS_CONTRACT_VOID
1813 {
1814 GC_NOTRIGGER;
1815 NOTHROW;
1816 SUPPORTS_DAC;
1817 }
1818 SS_CONTRACT_END;
1819
1820 if (wszString == NULL)
1821 {
1822 return;
1823 }
1824
1825 for (WCHAR * pwch = wszString; *pwch != '\0'; ++pwch)
1826 {
1827 *pwch = (CAN_SIMPLE_DOWNCASE(*pwch) ? SIMPLE_DOWNCASE(*pwch) : MapChar(*pwch, LCMAP_LOWERCASE, s_defaultLCID));
1828 }
1829}
1830
1831//-----------------------------------------------------------------------------
1832// Convert string to unicode uppercase using the invariant culture
1833// Note: Please don't use it in PATH as multiple character can map to the same
1834// upper case symbol
1835//-----------------------------------------------------------------------------
1836void SString::UpperCase()
1837{
1838 SS_CONTRACT_VOID
1839 {
1840 GC_NOTRIGGER;
1841 PRECONDITION(CheckPointer(this));
1842 SS_POSTCONDITION(CheckPointer(RETVAL));
1843 if (IsRepresentation(REPRESENTATION_UNICODE)) NOTHROW; else THROWS;
1844 GC_NOTRIGGER;
1845 SUPPORTS_DAC;
1846 }
1847 SS_CONTRACT_END;
1848
1849 ConvertToUnicode();
1850
1851 for (WCHAR *pwch = GetRawUnicode(); pwch < GetRawUnicode() + GetRawCount(); ++pwch)
1852 {
1853 *pwch = (CAN_SIMPLE_UPCASE(*pwch) ? SIMPLE_UPCASE(*pwch) : MapChar(*pwch, LCMAP_UPPERCASE, s_defaultLCID));
1854 }
1855}
1856
1857//-----------------------------------------------------------------------------
1858// Get a const pointer to the internal buffer as an ANSI string.
1859//-----------------------------------------------------------------------------
1860const CHAR *SString::GetANSI(AbstractScratchBuffer &scratch) const
1861{
1862 SS_CONTRACT(const CHAR *)
1863 {
1864 INSTANCE_CHECK_NULL;
1865 THROWS;
1866 GC_NOTRIGGER;
1867 }
1868 SS_CONTRACT_END;
1869
1870 if (this == NULL)
1871 SS_RETURN NULL;
1872
1873 if (IsRepresentation(REPRESENTATION_ANSI))
1874 SS_RETURN GetRawANSI();
1875
1876 ConvertToANSI((SString&)scratch);
1877 SS_RETURN ((SString&)scratch).GetRawANSI();
1878}
1879
1880//-----------------------------------------------------------------------------
1881// Get a const pointer to the internal buffer as a UTF8 string.
1882//-----------------------------------------------------------------------------
1883const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch) const
1884{
1885 CONTRACT(const UTF8 *)
1886 {
1887 INSTANCE_CHECK_NULL;
1888 THROWS;
1889 GC_NOTRIGGER;
1890 }
1891 CONTRACT_END;
1892
1893 if (this == NULL)
1894 RETURN NULL;
1895
1896 if (IsRepresentation(REPRESENTATION_UTF8))
1897 RETURN GetRawUTF8();
1898
1899 ConvertToUTF8((SString&)scratch);
1900 RETURN ((SString&)scratch).GetRawUTF8();
1901}
1902
1903const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const
1904{
1905 CONTRACT(const UTF8 *)
1906 {
1907 INSTANCE_CHECK_NULL;
1908 THROWS;
1909 GC_NOTRIGGER;
1910 }
1911 CONTRACT_END;
1912
1913 if (this == NULL)
1914 RETURN NULL;
1915
1916 if (IsRepresentation(REPRESENTATION_UTF8))
1917 {
1918 *pcbUtf8 = GetRawCount() + 1;
1919 RETURN GetRawUTF8();
1920 }
1921
1922 *pcbUtf8 = ConvertToUTF8((SString&)scratch);
1923 RETURN ((SString&)scratch).GetRawUTF8();
1924}
1925
1926//-----------------------------------------------------------------------------
1927// Get a const pointer to the internal buffer which must already be a UTF8 string.
1928// This avoids the need to create a scratch buffer we know will never be used.
1929//-----------------------------------------------------------------------------
1930const UTF8 *SString::GetUTF8NoConvert() const
1931{
1932 CONTRACT(const UTF8 *)
1933 {
1934 INSTANCE_CHECK_NULL;
1935 THROWS;
1936 GC_NOTRIGGER;
1937 }
1938 CONTRACT_END;
1939
1940 if (this == NULL)
1941 RETURN NULL;
1942
1943 if (IsRepresentation(REPRESENTATION_UTF8))
1944 RETURN GetRawUTF8();
1945
1946 ThrowHR(E_INVALIDARG);
1947}
1948
1949//-----------------------------------------------------------------------------
1950// Safe version of sprintf.
1951// Prints formatted ansi text w/ var args to this buffer.
1952//-----------------------------------------------------------------------------
1953void SString::Printf(const CHAR *format, ...)
1954{
1955 WRAPPER_NO_CONTRACT;
1956
1957 va_list args;
1958 va_start(args, format);
1959 VPrintf(format, args);
1960 va_end(args);
1961}
1962
1963#ifdef _DEBUG
1964//
1965// Check the Printf use for potential globalization bugs. %S formatting
1966// specifier does Unicode->Ansi or Ansi->Unicode conversion using current
1967// C-locale. This almost always means globalization bug in the CLR codebase.
1968//
1969// Ideally, we would elimitate %S from all format strings. Unfortunately,
1970// %S is too widespread in non-shipping code that such cleanup is not feasible.
1971//
1972static void CheckForFormatStringGlobalizationIssues(const SString &format, const SString &result)
1973{
1974 CONTRACTL
1975 {
1976 THROWS;
1977 GC_NOTRIGGER;
1978 DEBUG_ONLY;
1979 }
1980 CONTRACTL_END;
1981
1982 BOOL fDangerousFormat = FALSE;
1983
1984 // Check whether the format string contains the %S formatting specifier
1985 SString::CIterator itrFormat = format.Begin();
1986 while (*itrFormat)
1987 {
1988 if (*itrFormat++ == '%')
1989 {
1990 // <TODO>Handle the complex format strings like %blahS</TODO>
1991 if (*itrFormat++ == 'S')
1992 {
1993 fDangerousFormat = TRUE;
1994 break;
1995 }
1996 }
1997 }
1998
1999 if (fDangerousFormat)
2000 {
2001 BOOL fNonAsciiUsed = FALSE;
2002
2003 // Now check whether there are any non-ASCII characters in the output.
2004
2005 // Check whether the result contains non-Ascii characters
2006 SString::CIterator itrResult = format.Begin();
2007 while (*itrResult)
2008 {
2009 if (*itrResult++ > 127)
2010 {
2011 fNonAsciiUsed = TRUE;
2012 break;
2013 }
2014 }
2015
2016 CONSISTENCY_CHECK_MSGF(!fNonAsciiUsed,
2017 ("Non-ASCII string was produced by %%S format specifier. This is likely globalization bug."
2018 "To fix this, change the format string to %%s and do the correct encoding at the Printf callsite"));
2019 }
2020}
2021#endif
2022
2023#ifndef EBADF
2024#define EBADF 9
2025#endif
2026
2027#ifndef ENOMEM
2028#define ENOMEM 12
2029#endif
2030
2031#ifndef ERANGE
2032#define ERANGE 34
2033#endif
2034
2035#if defined(_MSC_VER)
2036#undef va_copy
2037#define va_copy(dest,src) (dest = src)
2038#endif
2039
2040void SString::VPrintf(const CHAR *format, va_list args)
2041{
2042 CONTRACT_VOID
2043 {
2044 INSTANCE_CHECK;
2045 PRECONDITION(CheckPointer(format));
2046 THROWS;
2047 GC_NOTRIGGER;
2048 }
2049 CONTRACT_END;
2050
2051 va_list ap;
2052 // sprintf gives us no means to know how many characters are written
2053 // other than guessing and trying
2054
2055 if (GetRawCount() > 0)
2056 {
2057 // First, try to use the existing buffer
2058 va_copy(ap, args);
2059 int result = _vsnprintf_s(GetRawANSI(), GetRawCount()+1, _TRUNCATE, format, ap);
2060 va_end(ap);
2061
2062 if (result >=0)
2063 {
2064 // Succeeded in writing. Now resize -
2065 Resize(result, REPRESENTATION_ANSI, PRESERVE);
2066 SString sss(Ansi, format);
2067 INDEBUG(CheckForFormatStringGlobalizationIssues(sss, *this));
2068 RETURN;
2069 }
2070 }
2071
2072 // Make a guess how long the result will be (note this will be doubled)
2073
2074 COUNT_T guess = (COUNT_T) strlen(format)+1;
2075 if (guess < GetRawCount())
2076 guess = GetRawCount();
2077 if (guess < MINIMUM_GUESS)
2078 guess = MINIMUM_GUESS;
2079
2080 while (TRUE)
2081 {
2082 // Double the previous guess - eventually we will get enough space
2083 guess *= 2;
2084 Resize(guess, REPRESENTATION_ANSI);
2085
2086 // Clear errno to avoid false alarms
2087 errno = 0;
2088
2089 va_copy(ap, args);
2090 int result = _vsnprintf_s(GetRawANSI(), GetRawCount()+1, _TRUNCATE, format, ap);
2091 va_end(ap);
2092
2093 if (result >= 0)
2094 {
2095 // Succeed in writing. Shrink the buffer to fit exactly.
2096 Resize(result, REPRESENTATION_ANSI, PRESERVE);
2097 SString sss(Ansi, format);
2098 INDEBUG(CheckForFormatStringGlobalizationIssues(sss, *this));
2099 RETURN;
2100 }
2101
2102 if (errno==ENOMEM)
2103 {
2104 ThrowOutOfMemory();
2105 }
2106 else
2107 if (errno!=0 && errno!=EBADF && errno!=ERANGE)
2108 {
2109 CONSISTENCY_CHECK_MSG(FALSE, "_vsnprintf_s failed. Potential globalization bug.");
2110 ThrowHR(HRESULT_FROM_WIN32(ERROR_NO_UNICODE_TRANSLATION));
2111 }
2112 }
2113 RETURN;
2114}
2115
2116void SString::Printf(const WCHAR *format, ...)
2117{
2118 WRAPPER_NO_CONTRACT;
2119
2120 va_list args;
2121 va_start(args, format);
2122 VPrintf(format, args);
2123 va_end(args);
2124}
2125
2126void SString::PPrintf(const WCHAR *format, ...)
2127{
2128 CONTRACT_VOID
2129 {
2130 INSTANCE_CHECK;
2131 PRECONDITION(CheckPointer(format));
2132 THROWS;
2133 GC_NOTRIGGER;
2134 }
2135 CONTRACT_END;
2136
2137 va_list argItr;
2138 va_start(argItr, format);
2139 PVPrintf(format, argItr);
2140 va_end(argItr);
2141
2142 RETURN;
2143}
2144
2145void SString::VPrintf(const WCHAR *format, va_list args)
2146{
2147 CONTRACT_VOID
2148 {
2149 INSTANCE_CHECK;
2150 PRECONDITION(CheckPointer(format));
2151 THROWS;
2152 GC_NOTRIGGER;
2153 }
2154 CONTRACT_END;
2155
2156 va_list ap;
2157 // sprintf gives us no means to know how many characters are written
2158 // other than guessing and trying
2159
2160 if (GetRawCount() > 0)
2161 {
2162 // First, try to use the existing buffer
2163 va_copy(ap, args);
2164 int result = _vsnwprintf_s(GetRawUnicode(), GetRawCount()+1, _TRUNCATE, format, ap);
2165 va_end(ap);
2166
2167 if (result >= 0)
2168 {
2169 // succeeded
2170 Resize(result, REPRESENTATION_UNICODE, PRESERVE);
2171 SString sss(format);
2172 INDEBUG(CheckForFormatStringGlobalizationIssues(sss, *this));
2173 RETURN;
2174 }
2175 }
2176
2177 // Make a guess how long the result will be (note this will be doubled)
2178
2179 COUNT_T guess = (COUNT_T) wcslen(format)+1;
2180 if (guess < GetRawCount())
2181 guess = GetRawCount();
2182 if (guess < MINIMUM_GUESS)
2183 guess = MINIMUM_GUESS;
2184
2185 while (TRUE)
2186 {
2187 // Double the previous guess - eventually we will get enough space
2188 guess *= 2;
2189 Resize(guess, REPRESENTATION_UNICODE);
2190
2191 // Clear errno to avoid false alarms
2192 errno = 0;
2193
2194 va_copy(ap, args);
2195 int result = _vsnwprintf_s(GetRawUnicode(), GetRawCount()+1, _TRUNCATE, format, ap);
2196 va_end(ap);
2197
2198 if (result >= 0)
2199 {
2200 Resize(result, REPRESENTATION_UNICODE, PRESERVE);
2201 SString sss(format);
2202 INDEBUG(CheckForFormatStringGlobalizationIssues(sss, *this));
2203 RETURN;
2204 }
2205
2206 if (errno==ENOMEM)
2207 {
2208 ThrowOutOfMemory();
2209 }
2210 else
2211 if (errno!=0 && errno!=EBADF && errno!=ERANGE)
2212 {
2213 CONSISTENCY_CHECK_MSG(FALSE, "_vsnwprintf_s failed. Potential globalization bug.");
2214 ThrowHR(HRESULT_FROM_WIN32(ERROR_NO_UNICODE_TRANSLATION));
2215 }
2216 }
2217 RETURN;
2218}
2219
2220void SString::PVPrintf(const WCHAR *format, va_list args)
2221{
2222 CONTRACT_VOID
2223 {
2224 INSTANCE_CHECK;
2225 PRECONDITION(CheckPointer(format));
2226 THROWS;
2227 GC_NOTRIGGER;
2228 }
2229 CONTRACT_END;
2230
2231 va_list ap;
2232 // sprintf gives us no means to know how many characters are written
2233 // other than guessing and trying
2234
2235 if (GetRawCount() > 0)
2236 {
2237 // First, try to use the existing buffer
2238 va_copy(ap, args);
2239#if defined(FEATURE_CORESYSTEM)
2240 int result = _vsnwprintf_s(GetRawUnicode(), GetRawCount()+1, _TRUNCATE, format, ap);
2241#else
2242 int result = _vswprintf_p(GetRawUnicode(), GetRawCount()+1, format, ap);
2243#endif
2244 va_end(ap);
2245 if (result >= 0)
2246 {
2247 // succeeded
2248 Resize(result, REPRESENTATION_UNICODE, PRESERVE);
2249 SString sss(format);
2250 INDEBUG(CheckForFormatStringGlobalizationIssues(sss, *this));
2251 RETURN;
2252 }
2253 }
2254
2255 // Make a guess how long the result will be (note this will be doubled)
2256
2257 COUNT_T guess = (COUNT_T) wcslen(format)+1;
2258 if (guess < GetRawCount())
2259 guess = GetRawCount();
2260 if (guess < MINIMUM_GUESS)
2261 guess = MINIMUM_GUESS;
2262
2263 while (TRUE)
2264 {
2265 // Double the previous guess - eventually we will get enough space
2266 guess *= 2;
2267 Resize(guess, REPRESENTATION_UNICODE, DONT_PRESERVE);
2268
2269 // Clear errno to avoid false alarms
2270 errno = 0;
2271
2272 va_copy(ap, args);
2273#if defined(FEATURE_CORESYSTEM)
2274 int result = _vsnwprintf_s(GetRawUnicode(), GetRawCount()+1, _TRUNCATE, format, ap);
2275#else
2276 int result = _vswprintf_p(GetRawUnicode(), GetRawCount()+1, format, ap);
2277#endif
2278 va_end(ap);
2279
2280 if (result >= 0)
2281 {
2282 Resize(result, REPRESENTATION_UNICODE, PRESERVE);
2283 SString sss(format);
2284 INDEBUG(CheckForFormatStringGlobalizationIssues(sss, *this));
2285 RETURN;
2286 }
2287
2288 if (errno==ENOMEM)
2289 {
2290 ThrowOutOfMemory();
2291 }
2292 else
2293 if (errno!=0 && errno!=EBADF && errno!=ERANGE)
2294 {
2295 CONSISTENCY_CHECK_MSG(FALSE, "_vsnwprintf_s failed. Potential globalization bug.");
2296 ThrowHR(HRESULT_FROM_WIN32(ERROR_NO_UNICODE_TRANSLATION));
2297 }
2298 }
2299 RETURN;
2300}
2301
2302void SString::AppendPrintf(const CHAR *format, ...)
2303{
2304 WRAPPER_NO_CONTRACT;
2305
2306 va_list args;
2307 va_start(args, format);
2308 AppendVPrintf(format, args);
2309 va_end(args);
2310}
2311
2312void SString::AppendVPrintf(const CHAR *format, va_list args)
2313{
2314 WRAPPER_NO_CONTRACT;
2315
2316 StackSString s;
2317 s.VPrintf(format, args);
2318 Append(s);
2319}
2320
2321void SString::AppendPrintf(const WCHAR *format, ...)
2322{
2323 WRAPPER_NO_CONTRACT;
2324
2325 va_list args;
2326 va_start(args, format);
2327 AppendVPrintf(format, args);
2328 va_end(args);
2329}
2330
2331void SString::AppendVPrintf(const WCHAR *format, va_list args)
2332{
2333 WRAPPER_NO_CONTRACT;
2334
2335 StackSString s;
2336 s.VPrintf(format, args);
2337 Append(s);
2338}
2339
2340//----------------------------------------------------------------------------
2341// LoadResource - moved to sstring_com.cpp
2342//----------------------------------------------------------------------------
2343
2344//----------------------------------------------------------------------------
2345// Format the message and put the contents in this string
2346//----------------------------------------------------------------------------
2347
2348BOOL SString::FormatMessage(DWORD dwFlags, LPCVOID lpSource, DWORD dwMessageId, DWORD dwLanguageId,
2349 const SString &arg1, const SString &arg2,
2350 const SString &arg3, const SString &arg4,
2351 const SString &arg5, const SString &arg6,
2352 const SString &arg7, const SString &arg8,
2353 const SString &arg9, const SString &arg10)
2354{
2355 CONTRACT(BOOL)
2356 {
2357 INSTANCE_CHECK;
2358 THROWS;
2359 GC_NOTRIGGER;
2360 }
2361 CONTRACT_END;
2362
2363 const WCHAR *args[] = {arg1.GetUnicode(), arg2.GetUnicode(), arg3.GetUnicode(), arg4.GetUnicode(),
2364 arg5.GetUnicode(), arg6.GetUnicode(), arg7.GetUnicode(), arg8.GetUnicode(),
2365 arg9.GetUnicode(), arg10.GetUnicode()};
2366
2367 if (GetRawCount() > 0)
2368 {
2369 // First, try to use our existing buffer to hold the result.
2370 Resize(GetRawCount(), REPRESENTATION_UNICODE);
2371
2372 DWORD result = ::WszFormatMessage(dwFlags | FORMAT_MESSAGE_ARGUMENT_ARRAY,
2373 lpSource, dwMessageId, dwLanguageId,
2374 GetRawUnicode(), GetRawCount()+1, (va_list*)args);
2375
2376 // Although we cannot directly detect truncation, we can tell if we
2377 // used up all the space (in which case we will assume truncation.)
2378
2379 if (result != 0 && result < GetRawCount())
2380 {
2381 if (GetRawUnicode()[result-1] == W(' '))
2382 {
2383 GetRawUnicode()[result-1] = W('\0');
2384 result -= 1;
2385 }
2386 Resize(result, REPRESENTATION_UNICODE, PRESERVE);
2387 RETURN TRUE;
2388 }
2389 }
2390
2391 // We don't have enough space in our buffer, do dynamic allocation.
2392 LocalAllocHolder<WCHAR> string;
2393
2394 DWORD result = ::WszFormatMessage(dwFlags | FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_ARGUMENT_ARRAY,
2395 lpSource, dwMessageId, dwLanguageId,
2396 (LPWSTR)(LPWSTR*)&string, 0, (va_list*)args);
2397
2398 if (result == 0)
2399 RETURN FALSE;
2400 else
2401 {
2402 if (string[result-1] == W(' '))
2403 string[result-1] = W('\0');
2404
2405 Set(string);
2406 RETURN TRUE;
2407 }
2408}
2409
2410#if 1
2411//----------------------------------------------------------------------------
2412// Helper
2413//----------------------------------------------------------------------------
2414
2415// @todo -this should be removed and placed outside of SString
2416void SString::MakeFullNamespacePath(const SString &nameSpace, const SString &name)
2417{
2418 CONTRACT_VOID
2419 {
2420 INSTANCE_CHECK;
2421 THROWS;
2422 GC_NOTRIGGER;
2423 }
2424 CONTRACT_END;
2425
2426 if (nameSpace.GetRepresentation() == REPRESENTATION_UTF8
2427 && name.GetRepresentation() == REPRESENTATION_UTF8)
2428 {
2429 const UTF8 *ns = nameSpace.GetRawUTF8();
2430 const UTF8 *n = name.GetRawUTF8();
2431 COUNT_T count = ns::GetFullLength(ns, n)-1;
2432 Resize(count, REPRESENTATION_UTF8);
2433 if (count > 0)
2434 ns::MakePath(GetRawUTF8(), count+1, ns, n);
2435 }
2436 else
2437 {
2438 const WCHAR *ns = nameSpace;
2439 const WCHAR *n = name;
2440 COUNT_T count = ns::GetFullLength(ns, n)-1;
2441 Resize(count, REPRESENTATION_UNICODE);
2442 if (count > 0)
2443 ns::MakePath(GetRawUnicode(), count+1, ns, n);
2444 }
2445
2446 RETURN;
2447}
2448#endif
2449
2450
2451
2452//----------------------------------------------------------------------------
2453// Private helper.
2454// Check to see if the string fits the suggested representation
2455//----------------------------------------------------------------------------
2456BOOL SString::IsRepresentation(Representation representation) const
2457{
2458 CONTRACT(BOOL)
2459 {
2460 PRECONDITION(CheckRepresentation(representation));
2461 NOTHROW;
2462 GC_NOTRIGGER;
2463 SUPPORTS_DAC;
2464 }
2465 CONTRACT_END;
2466
2467 Representation currentRepresentation = GetRepresentation();
2468
2469 // If representations are the same, cool.
2470 if (currentRepresentation == representation)
2471 RETURN TRUE;
2472
2473 // If we have an empty representation, we match everything
2474 if (currentRepresentation == REPRESENTATION_EMPTY)
2475 RETURN TRUE;
2476
2477 // If we're a 1 byte charset, there are some more chances to match
2478 if (currentRepresentation != REPRESENTATION_UNICODE
2479 && representation != REPRESENTATION_UNICODE)
2480 {
2481 // If we're ASCII, we can be any 1 byte rep
2482 if (currentRepresentation == REPRESENTATION_ASCII)
2483 RETURN TRUE;
2484
2485 // We really want to be ASCII - scan to see if we qualify
2486 if (ScanASCII())
2487 RETURN TRUE;
2488 }
2489
2490 // Sorry, must convert.
2491 RETURN FALSE;
2492}
2493
2494//----------------------------------------------------------------------------
2495// Private helper.
2496// Get the contents of the given string in a form which is compatible with our
2497// string (and is in a fixed character set.) Updates the given iterator
2498// if necessary to keep it in sync.
2499//----------------------------------------------------------------------------
2500const SString &SString::GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const
2501{
2502 CONTRACTL
2503 {
2504 PRECONDITION(s.Check());
2505 PRECONDITION(scratch.Check());
2506 PRECONDITION(scratch.CheckEmpty());
2507 THROWS_UNLESS_BOTH_NORMALIZED(s);
2508 GC_NOTRIGGER;
2509 SUPPORTS_DAC;
2510 }
2511 CONTRACTL_END;
2512
2513 // Since we have an iterator, we should be fixed size already
2514 CONSISTENCY_CHECK(IsFixedSize());
2515
2516 switch (GetRepresentation())
2517 {
2518 case REPRESENTATION_EMPTY:
2519 return s;
2520
2521 case REPRESENTATION_ASCII:
2522 if (s.IsRepresentation(REPRESENTATION_ASCII))
2523 return s;
2524
2525 // We can't in general convert to ASCII, so try unicode.
2526 ConvertToUnicode(i);
2527 // fall through
2528
2529 case REPRESENTATION_UNICODE:
2530 if (s.IsRepresentation(REPRESENTATION_UNICODE))
2531 return s;
2532
2533 // @todo: we could convert s to unicode - is that a good policy????
2534 s.ConvertToUnicode(scratch);
2535 return scratch;
2536
2537 case REPRESENTATION_UTF8:
2538 case REPRESENTATION_ANSI:
2539 // These should all be impossible since we have an CIterator on us.
2540 default:
2541 UNREACHABLE_MSG("Unexpected string representation");
2542 }
2543
2544 return s;
2545}
2546
2547//----------------------------------------------------------------------------
2548// Private helper.
2549// Get the contents of the given string in a form which is compatible with our
2550// string (and is in a fixed character set.)
2551// May convert our string to unicode.
2552//----------------------------------------------------------------------------
2553const SString &SString::GetCompatibleString(const SString &s, SString &scratch) const
2554{
2555 CONTRACTL
2556 {
2557 PRECONDITION(s.Check());
2558 PRECONDITION(scratch.Check());
2559 PRECONDITION(scratch.CheckEmpty());
2560 THROWS_UNLESS_BOTH_NORMALIZED(s);
2561 GC_NOTRIGGER;
2562 }
2563 CONTRACTL_END;
2564
2565 // First, make sure we have a fixed size.
2566 ConvertToFixed();
2567
2568 switch (GetRepresentation())
2569 {
2570 case REPRESENTATION_EMPTY:
2571 return s;
2572
2573 case REPRESENTATION_ANSI:
2574 if (s.IsRepresentation(REPRESENTATION_ANSI))
2575 return s;
2576
2577 s.ConvertToANSI(scratch);
2578 return scratch;
2579
2580 case REPRESENTATION_ASCII:
2581 if (s.IsRepresentation(REPRESENTATION_ASCII))
2582 return s;
2583
2584 // We can't in general convert to ASCII, so try unicode.
2585 ConvertToUnicode();
2586 // fall through
2587
2588 case REPRESENTATION_UNICODE:
2589 if (s.IsRepresentation(REPRESENTATION_UNICODE))
2590 return s;
2591
2592 // @todo: we could convert s to unicode in place - is that a good policy????
2593 s.ConvertToUnicode(scratch);
2594 return scratch;
2595
2596 case REPRESENTATION_UTF8:
2597 default:
2598 UNREACHABLE();
2599 }
2600
2601 return s;
2602}
2603
2604//----------------------------------------------------------------------------
2605// Private helper.
2606// If we have a 1 byte representation, scan the buffer to see if we can gain
2607// some conversion flexibility by labelling it ASCII
2608//----------------------------------------------------------------------------
2609BOOL SString::ScanASCII() const
2610{
2611 CONTRACT(BOOL)
2612 {
2613 POSTCONDITION(IsRepresentation(REPRESENTATION_ASCII) || IsASCIIScanned());
2614 NOTHROW;
2615 GC_NOTRIGGER;
2616 SUPPORTS_DAC;
2617 }
2618 CONTRACT_END;
2619
2620 if (!IsASCIIScanned())
2621 {
2622 const CHAR *c = GetRawANSI();
2623 const CHAR *cEnd = c + GetRawCount();
2624 while (c < cEnd)
2625 {
2626 if (*c & 0x80)
2627 break;
2628 c++;
2629 }
2630 if (c == cEnd)
2631 {
2632 const_cast<SString *>(this)->SetRepresentation(REPRESENTATION_ASCII);
2633 RETURN TRUE;
2634 }
2635 else
2636 const_cast<SString *>(this)->SetASCIIScanned();
2637 }
2638 RETURN FALSE;
2639}
2640
2641//----------------------------------------------------------------------------
2642// Private helper.
2643// Resize updates the geometry of the string and ensures that
2644// the space can be written to.
2645// count - number of characters (not including null) to hold
2646// preserve - if we realloc, do we copy data from old to new?
2647//----------------------------------------------------------------------------
2648
2649void SString::Resize(COUNT_T count, SString::Representation representation, Preserve preserve)
2650{
2651 CONTRACT_VOID
2652 {
2653 PRECONDITION(CountToSize(count) >= count);
2654 POSTCONDITION(IsRepresentation(representation));
2655 POSTCONDITION(GetRawCount() == count);
2656 if (count == 0) NOTHROW; else THROWS;
2657 GC_NOTRIGGER;
2658 SUPPORTS_DAC_HOST_ONLY;
2659 }
2660 CONTRACT_END;
2661
2662 // If we are resizing to zero, Clear is more efficient
2663 if (count == 0)
2664 {
2665 Clear();
2666 }
2667 else
2668 {
2669 SetRepresentation(representation);
2670
2671 COUNT_T size = CountToSize(count);
2672
2673 // detect overflow
2674 if (size < count)
2675 ThrowOutOfMemory();
2676
2677 ClearNormalized();
2678
2679 SBuffer::Resize(size, preserve);
2680
2681 if (IsImmutable())
2682 EnsureMutable();
2683
2684 NullTerminate();
2685 }
2686
2687 RETURN;
2688}
2689
2690//-----------------------------------------------------------------------------
2691// This is essentially a specialized version of the above for size 0
2692//-----------------------------------------------------------------------------
2693void SString::Clear()
2694{
2695 CONTRACT_VOID
2696 {
2697 INSTANCE_CHECK;
2698 POSTCONDITION(IsEmpty());
2699 NOTHROW;
2700 GC_NOTRIGGER;
2701 SO_TOLERANT;
2702 SUPPORTS_DAC_HOST_ONLY;
2703 }
2704 CONTRACT_END;
2705
2706 SetRepresentation(REPRESENTATION_EMPTY);
2707
2708 if (IsImmutable())
2709 {
2710 // Use shared empty string rather than allocating a new buffer
2711 SBuffer::SetImmutable(s_EmptyBuffer, sizeof(s_EmptyBuffer));
2712 }
2713 else
2714 {
2715 // Leave allocated buffer for future growth
2716 SBuffer::TweakSize(sizeof(WCHAR));
2717 GetRawUnicode()[0] = 0;
2718 }
2719
2720 RETURN;
2721}
2722
2723
2724#ifdef DACCESS_COMPILE
2725
2726//---------------------------------------------------------------------------------------
2727//
2728// Return a pointer to the raw buffer
2729//
2730// Returns:
2731// A pointer to the raw string buffer.
2732//
2733void * SString::DacGetRawContent() const
2734{
2735 if (IsEmpty())
2736 {
2737 return NULL;
2738 }
2739
2740 switch (GetRepresentation())
2741 {
2742 case REPRESENTATION_EMPTY:
2743 return NULL;
2744
2745 case REPRESENTATION_UNICODE:
2746 case REPRESENTATION_UTF8:
2747 case REPRESENTATION_ASCII:
2748 case REPRESENTATION_ANSI:
2749 // Note: no need to call DacInstantiateString because we know the exact length already.
2750 return SBuffer::DacGetRawContent();
2751
2752 default:
2753 DacNotImpl();
2754 return NULL;
2755 }
2756}
2757
2758//---------------------------------------------------------------------------------------
2759//
2760// Return a pointer to the raw buffer as a pointer to a unicode string. Does not
2761// do conversion, and thus requires that the representation already be in unicode.
2762//
2763// Returns:
2764// A pointer to the raw string buffer as a unicode string.
2765//
2766const WCHAR * SString::DacGetRawUnicode() const
2767{
2768 if (IsEmpty() || (GetRepresentation() == REPRESENTATION_EMPTY))
2769 {
2770 return W("");
2771 }
2772
2773 if (GetRepresentation() != REPRESENTATION_UNICODE)
2774 {
2775 DacError(E_UNEXPECTED);
2776 }
2777
2778 HRESULT status = S_OK;
2779 WCHAR* wszBuf = NULL;
2780 EX_TRY
2781 {
2782 wszBuf = static_cast<WCHAR*>(SBuffer::DacGetRawContent());
2783 }
2784 EX_CATCH_HRESULT(status);
2785
2786 if (SUCCEEDED(status))
2787 {
2788 return wszBuf;
2789 }
2790 else
2791 {
2792 return NULL;
2793 }
2794}
2795
2796//---------------------------------------------------------------------------------------
2797//
2798// Copy the string from the target into the provided buffer, converting to unicode if necessary
2799//
2800// Arguments:
2801// cBufChars - size of pBuffer in count of unicode characters.
2802// pBuffer - a buffer of cBufChars unicode chars.
2803// pcNeedChars - space to store the number of unicode chars in the SString.
2804//
2805// Returns:
2806// true if successful - and buffer is filled with the unicode representation of
2807// the string.
2808// false if unsuccessful.
2809//
2810bool SString::DacGetUnicode(COUNT_T cBufChars,
2811 __out_z __inout_ecount(cBufChars) WCHAR * pBuffer,
2812 COUNT_T * pcNeedChars) const
2813{
2814 SUPPORTS_DAC;
2815
2816 PVOID pContent = NULL;
2817 int iPage = CP_ACP;
2818
2819 if (IsEmpty() || (GetRepresentation() == REPRESENTATION_EMPTY))
2820 {
2821 if (pcNeedChars)
2822 {
2823 *pcNeedChars = 1;
2824 }
2825 if (pBuffer && cBufChars)
2826 {
2827 pBuffer[0] = 0;
2828 }
2829 return true;
2830 }
2831
2832 HRESULT status = S_OK;
2833 EX_TRY
2834 {
2835 pContent = SBuffer::DacGetRawContent();
2836 }
2837 EX_CATCH_HRESULT(status);
2838
2839 if (SUCCEEDED(status) && pContent != NULL)
2840 {
2841 switch (GetRepresentation())
2842 {
2843
2844 case REPRESENTATION_UNICODE:
2845
2846 if (pcNeedChars)
2847 {
2848 *pcNeedChars = GetCount() + 1;
2849 }
2850
2851 if (pBuffer && cBufChars)
2852 {
2853 if (cBufChars > GetCount() + 1)
2854 {
2855 cBufChars = GetCount() + 1;
2856 }
2857 memcpy(pBuffer, pContent, cBufChars * sizeof(*pBuffer));
2858 pBuffer[cBufChars - 1] = 0;
2859 }
2860
2861 return true;
2862
2863 case REPRESENTATION_UTF8:
2864 iPage = CP_UTF8;
2865 case REPRESENTATION_ASCII:
2866 case REPRESENTATION_ANSI:
2867 // iPage defaults to CP_ACP.
2868 if (pcNeedChars)
2869 {
2870 *pcNeedChars = WszMultiByteToWideChar(iPage, 0, reinterpret_cast<PSTR>(pContent), -1, NULL, 0);
2871 }
2872 if (pBuffer && cBufChars)
2873 {
2874 if (!WszMultiByteToWideChar(iPage, 0, reinterpret_cast<PSTR>(pContent), -1, pBuffer, cBufChars))
2875 {
2876 return false;
2877 }
2878 }
2879 return true;
2880
2881 default:
2882 DacNotImpl();
2883 return false;
2884 }
2885 }
2886 return false;
2887}
2888
2889#endif //DACCESS_COMPILE
2890