1//
2// UTF8Encoding.h
3//
4// Library: Foundation
5// Package: Text
6// Module: UTF8Encoding
7//
8// Definition of the UTF8Encoding class.
9//
10// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
11// and Contributors.
12//
13// SPDX-License-Identifier: BSL-1.0
14//
15
16
17#ifndef Foundation_UTF8Encoding_INCLUDED
18#define Foundation_UTF8Encoding_INCLUDED
19
20
21#include "Poco/Foundation.h"
22#include "Poco/TextEncoding.h"
23
24
25namespace Poco {
26
27
28class Foundation_API UTF8Encoding: public TextEncoding
29 /// UTF-8 text encoding, as defined in RFC 2279.
30{
31public:
32 UTF8Encoding();
33 ~UTF8Encoding();
34 const char* canonicalName() const;
35 bool isA(const std::string& encodingName) const;
36 const CharacterMap& characterMap() const;
37 int convert(const unsigned char* bytes) const;
38 int convert(int ch, unsigned char* bytes, int length) const;
39 int queryConvert(const unsigned char* bytes, int length) const;
40 int sequenceLength(const unsigned char* bytes, int length) const;
41
42 static bool isLegal(const unsigned char *bytes, int length);
43 /// Utility routine to tell whether a sequence of bytes is legal UTF-8.
44 /// This must be called with the length pre-determined by the first byte.
45 /// The sequence is illegal right away if there aren't enough bytes
46 /// available. If presented with a length > 4, this function returns false.
47 /// The Unicode definition of UTF-8 goes up to 4-byte sequences.
48 ///
49 /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
50 /// Copyright 2001-2004 Unicode, Inc.
51
52private:
53 static const char* _names[];
54 static const CharacterMap _charMap;
55};
56
57
58} // namespace Poco
59
60
61#endif // Foundation_UTF8Encoding_INCLUDED
62