1//************************************ bs::framework - Copyright 2018 Marko Pintera **************************************//
2//*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********//
3#pragma once
4
5#include "Prerequisites/BsPrerequisitesUtil.h"
6
7namespace bs
8{
9 /** @addtogroup String
10 * @{
11 */
12
13 /** Provides methods to converting between UTF-8 character encoding and other popular encodings. */
14 class BS_UTILITY_EXPORT UTF8
15 {
16 public:
17 /**
18 * Converts from an ANSI encoding in the specified locale into UTF-8.
19 *
20 * @param[in] input Narrow string encoded as ANSI characters. Characters are expected to be in the code page
21 * specified by @p locale.
22 * @param[in] locale Locale that determines how are the ANSI characters interpreted.
23 * @return UTF-8 encoded string.
24 */
25 static String fromANSI(const String& input, const std::locale& locale = std::locale(""));
26
27 /**
28 * Converts from an UTF-8 encoding into ANSI encoding in the specified locale.
29 *
30 * @param[in] input Narrow string encoded as UTF-8 characters.
31 * @param[in] locale Locale that determines from which code page to generate the ANSI characters.
32 * @param[in] invalidChar Character that will be used when an Unicode character cannot be represented using
33 * the selected ANSI code page.
34 * @return ANSI encoded string in the specified locale.
35 */
36 static String toANSI(const String& input, const std::locale& locale = std::locale(""), char invalidChar = 0);
37
38 /**
39 * Converts from a system-specific wide character encoding into UTF-8.
40 *
41 * @param[in] input Wide string to convert. Actual encoding is system specific be can be assumed to be UTF-16 on
42 * Windows and UTF-32 on Unix.
43 * @return UTF-8 encoded string.
44 */
45 static String fromWide(const WString& input);
46
47 /**
48 * Converts from an UTF-8 encoding into system-specific wide character encoding.
49 *
50 * @param[in] input Narrow string encoded as UTF-8 characters.
51 * @return Wide string encoded in a system-specific manner. Actual encoding can be assumed to be UTF-16
52 * on Windows and UTF-32 and Unix.
53 */
54 static WString toWide(const String& input);
55
56 /**
57 * Converts from an UTF-16 encoding into UTF-8.
58 *
59 * @param[in] input String encoded as UTF-16.
60 * @return UTF-8 encoded string.
61 */
62 static String fromUTF16(const U16String& input);
63
64 /**
65 * Converts from an UTF-8 encoding into UTF-16.
66 *
67 * @param[in] input String encoded as UTF-8.
68 * @return UTF-16 encoded string.
69 */
70 static U16String toUTF16(const String& input);
71
72 /**
73 * Converts from an UTF-32 encoding into UTF-8.
74 *
75 * @param[in] input String encoded as UTF-32.
76 * @return UTF-8 encoded string.
77 */
78 static String fromUTF32(const U32String& input);
79
80 /**
81 * Converts from an UTF-8 encoding into UTF-32.
82 *
83 * @param[in] input String encoded as UTF-8.
84 * @return UTF-32 encoded string.
85 */
86 static U32String toUTF32(const String& input);
87
88 /** Counts the number of characters in the provided UTF-8 input string. */
89 static UINT32 count(const String& input);
90
91 /** Converts the provided UTF8 encoded string to lowercase. */
92 static String toLower(const String& input);
93
94 /** Converts the provided UTF8 encoded string to uppercase. */
95 static String toUpper(const String& input);
96
97 /**
98 * Returns the byte at which the character with the specified index starts. The string is expected to be in UTF-8
99 * encoding. If @p charIdx is out of range the method returns the index past the last byte in the string (same
100 * as the string length in bytes).
101 */
102 static UINT32 charToByteIndex(const String& input, UINT32 charIdx);
103
104 /** Calculates the number of bytes taken up by the character at the specified position. */
105 static UINT32 charByteCount(const String& input, UINT32 charIdx);
106 };
107
108 /** @} */
109}