1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (C) 2005-2015, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | */ |
9 | |
10 | #ifndef __CSR2022_H |
11 | #define __CSR2022_H |
12 | |
13 | #include "unicode/utypes.h" |
14 | |
15 | #if !UCONFIG_NO_CONVERSION |
16 | |
17 | #include "csrecog.h" |
18 | |
19 | U_NAMESPACE_BEGIN |
20 | |
21 | class CharsetMatch; |
22 | |
23 | /** |
24 | * class CharsetRecog_2022 part of the ICU charset detection imlementation. |
25 | * This is a superclass for the individual detectors for |
26 | * each of the detectable members of the ISO 2022 family |
27 | * of encodings. |
28 | * |
29 | * The separate classes are nested within this class. |
30 | * |
31 | * @internal |
32 | */ |
33 | class CharsetRecog_2022 : public CharsetRecognizer |
34 | { |
35 | |
36 | public: |
37 | virtual ~CharsetRecog_2022() = 0; |
38 | |
39 | protected: |
40 | |
41 | /** |
42 | * Matching function shared among the 2022 detectors JP, CN and KR |
43 | * Counts up the number of legal an unrecognized escape sequences in |
44 | * the sample of text, and computes a score based on the total number & |
45 | * the proportion that fit the encoding. |
46 | * |
47 | * |
48 | * @param text the byte buffer containing text to analyse |
49 | * @param textLen the size of the text in the byte. |
50 | * @param escapeSequences the byte escape sequences to test for. |
51 | * @return match quality, in the range of 0-100. |
52 | */ |
53 | int32_t match_2022(const uint8_t *text, |
54 | int32_t textLen, |
55 | const uint8_t escapeSequences[][5], |
56 | int32_t escapeSequences_length) const; |
57 | |
58 | }; |
59 | |
60 | class CharsetRecog_2022JP :public CharsetRecog_2022 |
61 | { |
62 | public: |
63 | virtual ~CharsetRecog_2022JP(); |
64 | |
65 | const char *getName() const; |
66 | |
67 | UBool match(InputText *textIn, CharsetMatch *results) const; |
68 | }; |
69 | |
70 | #if !UCONFIG_ONLY_HTML_CONVERSION |
71 | class CharsetRecog_2022KR :public CharsetRecog_2022 { |
72 | public: |
73 | virtual ~CharsetRecog_2022KR(); |
74 | |
75 | const char *getName() const; |
76 | |
77 | UBool match(InputText *textIn, CharsetMatch *results) const; |
78 | |
79 | }; |
80 | |
81 | class CharsetRecog_2022CN :public CharsetRecog_2022 |
82 | { |
83 | public: |
84 | virtual ~CharsetRecog_2022CN(); |
85 | |
86 | const char* getName() const; |
87 | |
88 | UBool match(InputText *textIn, CharsetMatch *results) const; |
89 | }; |
90 | #endif |
91 | |
92 | U_NAMESPACE_END |
93 | |
94 | #endif |
95 | #endif /* __CSR2022_H */ |
96 | |