| 1 | //************************************ bs::framework - Copyright 2018 Marko Pintera **************************************// |
| 2 | //*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********// |
| 3 | #pragma once |
| 4 | |
| 5 | #include "Prerequisites/BsPrerequisitesUtil.h" |
| 6 | |
| 7 | namespace bs |
| 8 | { |
| 9 | /** @addtogroup String |
| 10 | * @{ |
| 11 | */ |
| 12 | |
| 13 | /** Provides methods to converting between UTF-8 character encoding and other popular encodings. */ |
| 14 | class BS_UTILITY_EXPORT UTF8 |
| 15 | { |
| 16 | public: |
| 17 | /** |
| 18 | * Converts from an ANSI encoding in the specified locale into UTF-8. |
| 19 | * |
| 20 | * @param[in] input Narrow string encoded as ANSI characters. Characters are expected to be in the code page |
| 21 | * specified by @p locale. |
| 22 | * @param[in] locale Locale that determines how are the ANSI characters interpreted. |
| 23 | * @return UTF-8 encoded string. |
| 24 | */ |
| 25 | static String fromANSI(const String& input, const std::locale& locale = std::locale("" )); |
| 26 | |
| 27 | /** |
| 28 | * Converts from an UTF-8 encoding into ANSI encoding in the specified locale. |
| 29 | * |
| 30 | * @param[in] input Narrow string encoded as UTF-8 characters. |
| 31 | * @param[in] locale Locale that determines from which code page to generate the ANSI characters. |
| 32 | * @param[in] invalidChar Character that will be used when an Unicode character cannot be represented using |
| 33 | * the selected ANSI code page. |
| 34 | * @return ANSI encoded string in the specified locale. |
| 35 | */ |
| 36 | static String toANSI(const String& input, const std::locale& locale = std::locale("" ), char invalidChar = 0); |
| 37 | |
| 38 | /** |
| 39 | * Converts from a system-specific wide character encoding into UTF-8. |
| 40 | * |
| 41 | * @param[in] input Wide string to convert. Actual encoding is system specific be can be assumed to be UTF-16 on |
| 42 | * Windows and UTF-32 on Unix. |
| 43 | * @return UTF-8 encoded string. |
| 44 | */ |
| 45 | static String fromWide(const WString& input); |
| 46 | |
| 47 | /** |
| 48 | * Converts from an UTF-8 encoding into system-specific wide character encoding. |
| 49 | * |
| 50 | * @param[in] input Narrow string encoded as UTF-8 characters. |
| 51 | * @return Wide string encoded in a system-specific manner. Actual encoding can be assumed to be UTF-16 |
| 52 | * on Windows and UTF-32 and Unix. |
| 53 | */ |
| 54 | static WString toWide(const String& input); |
| 55 | |
| 56 | /** |
| 57 | * Converts from an UTF-16 encoding into UTF-8. |
| 58 | * |
| 59 | * @param[in] input String encoded as UTF-16. |
| 60 | * @return UTF-8 encoded string. |
| 61 | */ |
| 62 | static String fromUTF16(const U16String& input); |
| 63 | |
| 64 | /** |
| 65 | * Converts from an UTF-8 encoding into UTF-16. |
| 66 | * |
| 67 | * @param[in] input String encoded as UTF-8. |
| 68 | * @return UTF-16 encoded string. |
| 69 | */ |
| 70 | static U16String toUTF16(const String& input); |
| 71 | |
| 72 | /** |
| 73 | * Converts from an UTF-32 encoding into UTF-8. |
| 74 | * |
| 75 | * @param[in] input String encoded as UTF-32. |
| 76 | * @return UTF-8 encoded string. |
| 77 | */ |
| 78 | static String fromUTF32(const U32String& input); |
| 79 | |
| 80 | /** |
| 81 | * Converts from an UTF-8 encoding into UTF-32. |
| 82 | * |
| 83 | * @param[in] input String encoded as UTF-8. |
| 84 | * @return UTF-32 encoded string. |
| 85 | */ |
| 86 | static U32String toUTF32(const String& input); |
| 87 | |
| 88 | /** Counts the number of characters in the provided UTF-8 input string. */ |
| 89 | static UINT32 count(const String& input); |
| 90 | |
| 91 | /** Converts the provided UTF8 encoded string to lowercase. */ |
| 92 | static String toLower(const String& input); |
| 93 | |
| 94 | /** Converts the provided UTF8 encoded string to uppercase. */ |
| 95 | static String toUpper(const String& input); |
| 96 | |
| 97 | /** |
| 98 | * Returns the byte at which the character with the specified index starts. The string is expected to be in UTF-8 |
| 99 | * encoding. If @p charIdx is out of range the method returns the index past the last byte in the string (same |
| 100 | * as the string length in bytes). |
| 101 | */ |
| 102 | static UINT32 charToByteIndex(const String& input, UINT32 charIdx); |
| 103 | |
| 104 | /** Calculates the number of bytes taken up by the character at the specified position. */ |
| 105 | static UINT32 charByteCount(const String& input, UINT32 charIdx); |
| 106 | }; |
| 107 | |
| 108 | /** @} */ |
| 109 | } |