| 1 | // |
| 2 | // UTF8Encoding.h |
| 3 | // |
| 4 | // Library: Foundation |
| 5 | // Package: Text |
| 6 | // Module: UTF8Encoding |
| 7 | // |
| 8 | // Definition of the UTF8Encoding class. |
| 9 | // |
| 10 | // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. |
| 11 | // and Contributors. |
| 12 | // |
| 13 | // SPDX-License-Identifier: BSL-1.0 |
| 14 | // |
| 15 | |
| 16 | |
| 17 | #ifndef Foundation_UTF8Encoding_INCLUDED |
| 18 | #define Foundation_UTF8Encoding_INCLUDED |
| 19 | |
| 20 | |
| 21 | #include "Poco/Foundation.h" |
| 22 | #include "Poco/TextEncoding.h" |
| 23 | |
| 24 | |
| 25 | namespace Poco { |
| 26 | |
| 27 | |
| 28 | class Foundation_API UTF8Encoding: public TextEncoding |
| 29 | /// UTF-8 text encoding, as defined in RFC 2279. |
| 30 | { |
| 31 | public: |
| 32 | UTF8Encoding(); |
| 33 | ~UTF8Encoding(); |
| 34 | const char* canonicalName() const; |
| 35 | bool isA(const std::string& encodingName) const; |
| 36 | const CharacterMap& characterMap() const; |
| 37 | int convert(const unsigned char* bytes) const; |
| 38 | int convert(int ch, unsigned char* bytes, int length) const; |
| 39 | int queryConvert(const unsigned char* bytes, int length) const; |
| 40 | int sequenceLength(const unsigned char* bytes, int length) const; |
| 41 | |
| 42 | static bool isLegal(const unsigned char *bytes, int length); |
| 43 | /// Utility routine to tell whether a sequence of bytes is legal UTF-8. |
| 44 | /// This must be called with the length pre-determined by the first byte. |
| 45 | /// The sequence is illegal right away if there aren't enough bytes |
| 46 | /// available. If presented with a length > 4, this function returns false. |
| 47 | /// The Unicode definition of UTF-8 goes up to 4-byte sequences. |
| 48 | /// |
| 49 | /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c |
| 50 | /// Copyright 2001-2004 Unicode, Inc. |
| 51 | |
| 52 | private: |
| 53 | static const char* _names[]; |
| 54 | static const CharacterMap _charMap; |
| 55 | }; |
| 56 | |
| 57 | |
| 58 | } // namespace Poco |
| 59 | |
| 60 | |
| 61 | #endif // Foundation_UTF8Encoding_INCLUDED |
| 62 | |