1 | // |
2 | // UTF8Encoding.h |
3 | // |
4 | // Library: Foundation |
5 | // Package: Text |
6 | // Module: UTF8Encoding |
7 | // |
8 | // Definition of the UTF8Encoding class. |
9 | // |
10 | // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef Foundation_UTF8Encoding_INCLUDED |
18 | #define Foundation_UTF8Encoding_INCLUDED |
19 | |
20 | |
21 | #include "Poco/Foundation.h" |
22 | #include "Poco/TextEncoding.h" |
23 | |
24 | |
25 | namespace Poco { |
26 | |
27 | |
28 | class Foundation_API UTF8Encoding: public TextEncoding |
29 | /// UTF-8 text encoding, as defined in RFC 2279. |
30 | { |
31 | public: |
32 | UTF8Encoding(); |
33 | ~UTF8Encoding(); |
34 | const char* canonicalName() const; |
35 | bool isA(const std::string& encodingName) const; |
36 | const CharacterMap& characterMap() const; |
37 | int convert(const unsigned char* bytes) const; |
38 | int convert(int ch, unsigned char* bytes, int length) const; |
39 | int queryConvert(const unsigned char* bytes, int length) const; |
40 | int sequenceLength(const unsigned char* bytes, int length) const; |
41 | |
42 | static bool isLegal(const unsigned char *bytes, int length); |
43 | /// Utility routine to tell whether a sequence of bytes is legal UTF-8. |
44 | /// This must be called with the length pre-determined by the first byte. |
45 | /// The sequence is illegal right away if there aren't enough bytes |
46 | /// available. If presented with a length > 4, this function returns false. |
47 | /// The Unicode definition of UTF-8 goes up to 4-byte sequences. |
48 | /// |
49 | /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c |
50 | /// Copyright 2001-2004 Unicode, Inc. |
51 | |
52 | private: |
53 | static const char* _names[]; |
54 | static const CharacterMap _charMap; |
55 | }; |
56 | |
57 | |
58 | } // namespace Poco |
59 | |
60 | |
61 | #endif // Foundation_UTF8Encoding_INCLUDED |
62 | |