| 1 | // | 
|---|
| 2 | // UTF8Encoding.h | 
|---|
| 3 | // | 
|---|
| 4 | // Library: Foundation | 
|---|
| 5 | // Package: Text | 
|---|
| 6 | // Module:  UTF8Encoding | 
|---|
| 7 | // | 
|---|
| 8 | // Definition of the UTF8Encoding class. | 
|---|
| 9 | // | 
|---|
| 10 | // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. | 
|---|
| 11 | // and Contributors. | 
|---|
| 12 | // | 
|---|
| 13 | // SPDX-License-Identifier:	BSL-1.0 | 
|---|
| 14 | // | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | #ifndef Foundation_UTF8Encoding_INCLUDED | 
|---|
| 18 | #define Foundation_UTF8Encoding_INCLUDED | 
|---|
| 19 |  | 
|---|
| 20 |  | 
|---|
| 21 | #include "Poco/Foundation.h" | 
|---|
| 22 | #include "Poco/TextEncoding.h" | 
|---|
| 23 |  | 
|---|
| 24 |  | 
|---|
| 25 | namespace Poco { | 
|---|
| 26 |  | 
|---|
| 27 |  | 
|---|
| 28 | class Foundation_API UTF8Encoding: public TextEncoding | 
|---|
| 29 | /// UTF-8 text encoding, as defined in RFC 2279. | 
|---|
| 30 | { | 
|---|
| 31 | public: | 
|---|
| 32 | UTF8Encoding(); | 
|---|
| 33 | ~UTF8Encoding(); | 
|---|
| 34 | const char* canonicalName() const; | 
|---|
| 35 | bool isA(const std::string& encodingName) const; | 
|---|
| 36 | const CharacterMap& characterMap() const; | 
|---|
| 37 | int convert(const unsigned char* bytes) const; | 
|---|
| 38 | int convert(int ch, unsigned char* bytes, int length) const; | 
|---|
| 39 | int queryConvert(const unsigned char* bytes, int length) const; | 
|---|
| 40 | int sequenceLength(const unsigned char* bytes, int length) const; | 
|---|
| 41 |  | 
|---|
| 42 | static bool isLegal(const unsigned char *bytes, int length); | 
|---|
| 43 | /// Utility routine to tell whether a sequence of bytes is legal UTF-8. | 
|---|
| 44 | /// This must be called with the length pre-determined by the first byte. | 
|---|
| 45 | /// The sequence is illegal right away if there aren't enough bytes | 
|---|
| 46 | /// available. If presented with a length > 4, this function returns false. | 
|---|
| 47 | /// The Unicode definition of UTF-8 goes up to 4-byte sequences. | 
|---|
| 48 | /// | 
|---|
| 49 | /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c | 
|---|
| 50 | /// Copyright 2001-2004 Unicode, Inc. | 
|---|
| 51 |  | 
|---|
| 52 | private: | 
|---|
| 53 | static const char* _names[]; | 
|---|
| 54 | static const CharacterMap _charMap; | 
|---|
| 55 | }; | 
|---|
| 56 |  | 
|---|
| 57 |  | 
|---|
| 58 | } // namespace Poco | 
|---|
| 59 |  | 
|---|
| 60 |  | 
|---|
| 61 | #endif // Foundation_UTF8Encoding_INCLUDED | 
|---|
| 62 |  | 
|---|