1 | //************************************ bs::framework - Copyright 2018 Marko Pintera **************************************// |
2 | //*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********// |
3 | #pragma once |
4 | |
5 | #include "Prerequisites/BsPrerequisitesUtil.h" |
6 | |
7 | namespace bs |
8 | { |
9 | /** @addtogroup String |
10 | * @{ |
11 | */ |
12 | |
13 | /** Provides methods to converting between UTF-8 character encoding and other popular encodings. */ |
14 | class BS_UTILITY_EXPORT UTF8 |
15 | { |
16 | public: |
17 | /** |
18 | * Converts from an ANSI encoding in the specified locale into UTF-8. |
19 | * |
20 | * @param[in] input Narrow string encoded as ANSI characters. Characters are expected to be in the code page |
21 | * specified by @p locale. |
22 | * @param[in] locale Locale that determines how are the ANSI characters interpreted. |
23 | * @return UTF-8 encoded string. |
24 | */ |
25 | static String fromANSI(const String& input, const std::locale& locale = std::locale("" )); |
26 | |
27 | /** |
28 | * Converts from an UTF-8 encoding into ANSI encoding in the specified locale. |
29 | * |
30 | * @param[in] input Narrow string encoded as UTF-8 characters. |
31 | * @param[in] locale Locale that determines from which code page to generate the ANSI characters. |
32 | * @param[in] invalidChar Character that will be used when an Unicode character cannot be represented using |
33 | * the selected ANSI code page. |
34 | * @return ANSI encoded string in the specified locale. |
35 | */ |
36 | static String toANSI(const String& input, const std::locale& locale = std::locale("" ), char invalidChar = 0); |
37 | |
38 | /** |
39 | * Converts from a system-specific wide character encoding into UTF-8. |
40 | * |
41 | * @param[in] input Wide string to convert. Actual encoding is system specific be can be assumed to be UTF-16 on |
42 | * Windows and UTF-32 on Unix. |
43 | * @return UTF-8 encoded string. |
44 | */ |
45 | static String fromWide(const WString& input); |
46 | |
47 | /** |
48 | * Converts from an UTF-8 encoding into system-specific wide character encoding. |
49 | * |
50 | * @param[in] input Narrow string encoded as UTF-8 characters. |
51 | * @return Wide string encoded in a system-specific manner. Actual encoding can be assumed to be UTF-16 |
52 | * on Windows and UTF-32 and Unix. |
53 | */ |
54 | static WString toWide(const String& input); |
55 | |
56 | /** |
57 | * Converts from an UTF-16 encoding into UTF-8. |
58 | * |
59 | * @param[in] input String encoded as UTF-16. |
60 | * @return UTF-8 encoded string. |
61 | */ |
62 | static String fromUTF16(const U16String& input); |
63 | |
64 | /** |
65 | * Converts from an UTF-8 encoding into UTF-16. |
66 | * |
67 | * @param[in] input String encoded as UTF-8. |
68 | * @return UTF-16 encoded string. |
69 | */ |
70 | static U16String toUTF16(const String& input); |
71 | |
72 | /** |
73 | * Converts from an UTF-32 encoding into UTF-8. |
74 | * |
75 | * @param[in] input String encoded as UTF-32. |
76 | * @return UTF-8 encoded string. |
77 | */ |
78 | static String fromUTF32(const U32String& input); |
79 | |
80 | /** |
81 | * Converts from an UTF-8 encoding into UTF-32. |
82 | * |
83 | * @param[in] input String encoded as UTF-8. |
84 | * @return UTF-32 encoded string. |
85 | */ |
86 | static U32String toUTF32(const String& input); |
87 | |
88 | /** Counts the number of characters in the provided UTF-8 input string. */ |
89 | static UINT32 count(const String& input); |
90 | |
91 | /** Converts the provided UTF8 encoded string to lowercase. */ |
92 | static String toLower(const String& input); |
93 | |
94 | /** Converts the provided UTF8 encoded string to uppercase. */ |
95 | static String toUpper(const String& input); |
96 | |
97 | /** |
98 | * Returns the byte at which the character with the specified index starts. The string is expected to be in UTF-8 |
99 | * encoding. If @p charIdx is out of range the method returns the index past the last byte in the string (same |
100 | * as the string length in bytes). |
101 | */ |
102 | static UINT32 charToByteIndex(const String& input, UINT32 charIdx); |
103 | |
104 | /** Calculates the number of bytes taken up by the character at the specified position. */ |
105 | static UINT32 charByteCount(const String& input, UINT32 charIdx); |
106 | }; |
107 | |
108 | /** @} */ |
109 | } |