1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2010-2012, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * file name: idna.h |
9 | * encoding: UTF-8 |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2010mar05 |
14 | * created by: Markus W. Scherer |
15 | */ |
16 | |
17 | #ifndef __IDNA_H__ |
18 | #define __IDNA_H__ |
19 | |
20 | /** |
21 | * \file |
22 | * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) |
23 | */ |
24 | |
25 | #include "unicode/utypes.h" |
26 | |
27 | #if U_SHOW_CPLUSPLUS_API |
28 | |
29 | #if !UCONFIG_NO_IDNA |
30 | |
31 | #include "unicode/bytestream.h" |
32 | #include "unicode/stringpiece.h" |
33 | #include "unicode/uidna.h" |
34 | #include "unicode/unistr.h" |
35 | |
36 | U_NAMESPACE_BEGIN |
37 | |
38 | class IDNAInfo; |
39 | |
40 | /** |
41 | * Abstract base class for IDNA processing. |
42 | * See http://www.unicode.org/reports/tr46/ |
43 | * and http://www.ietf.org/rfc/rfc3490.txt |
44 | * |
45 | * The IDNA class is not intended for public subclassing. |
46 | * |
47 | * This C++ API currently only implements UTS #46. |
48 | * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) |
49 | * and IDNA2003 (functions that do not use a service object). |
50 | * @stable ICU 4.6 |
51 | */ |
52 | class U_COMMON_API IDNA : public UObject { |
53 | public: |
54 | /** |
55 | * Destructor. |
56 | * @stable ICU 4.6 |
57 | */ |
58 | ~IDNA(); |
59 | |
60 | /** |
61 | * Returns an IDNA instance which implements UTS #46. |
62 | * Returns an unmodifiable instance, owned by the caller. |
63 | * Cache it for multiple operations, and delete it when done. |
64 | * The instance is thread-safe, that is, it can be used concurrently. |
65 | * |
66 | * UTS #46 defines Unicode IDNA Compatibility Processing, |
67 | * updated to the latest version of Unicode and compatible with both |
68 | * IDNA2003 and IDNA2008. |
69 | * |
70 | * The worker functions use transitional processing, including deviation mappings, |
71 | * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE |
72 | * is used in which case the deviation characters are passed through without change. |
73 | * |
74 | * Disallowed characters are mapped to U+FFFD. |
75 | * |
76 | * For available options see the uidna.h header. |
77 | * Operations with the UTS #46 instance do not support the |
78 | * UIDNA_ALLOW_UNASSIGNED option. |
79 | * |
80 | * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). |
81 | * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than |
82 | * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. |
83 | * |
84 | * @param options Bit set to modify the processing and error checking. |
85 | * See option bit set values in uidna.h. |
86 | * @param errorCode Standard ICU error code. Its input value must |
87 | * pass the U_SUCCESS() test, or else the function returns |
88 | * immediately. Check for U_FAILURE() on output or use with |
89 | * function chaining. (See User Guide for details.) |
90 | * @return the UTS #46 IDNA instance, if successful |
91 | * @stable ICU 4.6 |
92 | */ |
93 | static IDNA * |
94 | createUTS46Instance(uint32_t options, UErrorCode &errorCode); |
95 | |
96 | /** |
97 | * Converts a single domain name label into its ASCII form for DNS lookup. |
98 | * If any processing step fails, then info.hasErrors() will be TRUE and |
99 | * the result might not be an ASCII string. |
100 | * The label might be modified according to the types of errors. |
101 | * Labels with severe errors will be left in (or turned into) their Unicode form. |
102 | * |
103 | * The UErrorCode indicates an error only in exceptional cases, |
104 | * such as a U_MEMORY_ALLOCATION_ERROR. |
105 | * |
106 | * @param label Input domain name label |
107 | * @param dest Destination string object |
108 | * @param info Output container of IDNA processing details. |
109 | * @param errorCode Standard ICU error code. Its input value must |
110 | * pass the U_SUCCESS() test, or else the function returns |
111 | * immediately. Check for U_FAILURE() on output or use with |
112 | * function chaining. (See User Guide for details.) |
113 | * @return dest |
114 | * @stable ICU 4.6 |
115 | */ |
116 | virtual UnicodeString & |
117 | labelToASCII(const UnicodeString &label, UnicodeString &dest, |
118 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
119 | |
120 | /** |
121 | * Converts a single domain name label into its Unicode form for human-readable display. |
122 | * If any processing step fails, then info.hasErrors() will be TRUE. |
123 | * The label might be modified according to the types of errors. |
124 | * |
125 | * The UErrorCode indicates an error only in exceptional cases, |
126 | * such as a U_MEMORY_ALLOCATION_ERROR. |
127 | * |
128 | * @param label Input domain name label |
129 | * @param dest Destination string object |
130 | * @param info Output container of IDNA processing details. |
131 | * @param errorCode Standard ICU error code. Its input value must |
132 | * pass the U_SUCCESS() test, or else the function returns |
133 | * immediately. Check for U_FAILURE() on output or use with |
134 | * function chaining. (See User Guide for details.) |
135 | * @return dest |
136 | * @stable ICU 4.6 |
137 | */ |
138 | virtual UnicodeString & |
139 | labelToUnicode(const UnicodeString &label, UnicodeString &dest, |
140 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
141 | |
142 | /** |
143 | * Converts a whole domain name into its ASCII form for DNS lookup. |
144 | * If any processing step fails, then info.hasErrors() will be TRUE and |
145 | * the result might not be an ASCII string. |
146 | * The domain name might be modified according to the types of errors. |
147 | * Labels with severe errors will be left in (or turned into) their Unicode form. |
148 | * |
149 | * The UErrorCode indicates an error only in exceptional cases, |
150 | * such as a U_MEMORY_ALLOCATION_ERROR. |
151 | * |
152 | * @param name Input domain name |
153 | * @param dest Destination string object |
154 | * @param info Output container of IDNA processing details. |
155 | * @param errorCode Standard ICU error code. Its input value must |
156 | * pass the U_SUCCESS() test, or else the function returns |
157 | * immediately. Check for U_FAILURE() on output or use with |
158 | * function chaining. (See User Guide for details.) |
159 | * @return dest |
160 | * @stable ICU 4.6 |
161 | */ |
162 | virtual UnicodeString & |
163 | nameToASCII(const UnicodeString &name, UnicodeString &dest, |
164 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
165 | |
166 | /** |
167 | * Converts a whole domain name into its Unicode form for human-readable display. |
168 | * If any processing step fails, then info.hasErrors() will be TRUE. |
169 | * The domain name might be modified according to the types of errors. |
170 | * |
171 | * The UErrorCode indicates an error only in exceptional cases, |
172 | * such as a U_MEMORY_ALLOCATION_ERROR. |
173 | * |
174 | * @param name Input domain name |
175 | * @param dest Destination string object |
176 | * @param info Output container of IDNA processing details. |
177 | * @param errorCode Standard ICU error code. Its input value must |
178 | * pass the U_SUCCESS() test, or else the function returns |
179 | * immediately. Check for U_FAILURE() on output or use with |
180 | * function chaining. (See User Guide for details.) |
181 | * @return dest |
182 | * @stable ICU 4.6 |
183 | */ |
184 | virtual UnicodeString & |
185 | nameToUnicode(const UnicodeString &name, UnicodeString &dest, |
186 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
187 | |
188 | // UTF-8 versions of the processing methods ---------------------------- *** |
189 | |
190 | /** |
191 | * Converts a single domain name label into its ASCII form for DNS lookup. |
192 | * UTF-8 version of labelToASCII(), same behavior. |
193 | * |
194 | * @param label Input domain name label |
195 | * @param dest Destination byte sink; Flush()ed if successful |
196 | * @param info Output container of IDNA processing details. |
197 | * @param errorCode Standard ICU error code. Its input value must |
198 | * pass the U_SUCCESS() test, or else the function returns |
199 | * immediately. Check for U_FAILURE() on output or use with |
200 | * function chaining. (See User Guide for details.) |
201 | * @return dest |
202 | * @stable ICU 4.6 |
203 | */ |
204 | virtual void |
205 | labelToASCII_UTF8(StringPiece label, ByteSink &dest, |
206 | IDNAInfo &info, UErrorCode &errorCode) const; |
207 | |
208 | /** |
209 | * Converts a single domain name label into its Unicode form for human-readable display. |
210 | * UTF-8 version of labelToUnicode(), same behavior. |
211 | * |
212 | * @param label Input domain name label |
213 | * @param dest Destination byte sink; Flush()ed if successful |
214 | * @param info Output container of IDNA processing details. |
215 | * @param errorCode Standard ICU error code. Its input value must |
216 | * pass the U_SUCCESS() test, or else the function returns |
217 | * immediately. Check for U_FAILURE() on output or use with |
218 | * function chaining. (See User Guide for details.) |
219 | * @return dest |
220 | * @stable ICU 4.6 |
221 | */ |
222 | virtual void |
223 | labelToUnicodeUTF8(StringPiece label, ByteSink &dest, |
224 | IDNAInfo &info, UErrorCode &errorCode) const; |
225 | |
226 | /** |
227 | * Converts a whole domain name into its ASCII form for DNS lookup. |
228 | * UTF-8 version of nameToASCII(), same behavior. |
229 | * |
230 | * @param name Input domain name |
231 | * @param dest Destination byte sink; Flush()ed if successful |
232 | * @param info Output container of IDNA processing details. |
233 | * @param errorCode Standard ICU error code. Its input value must |
234 | * pass the U_SUCCESS() test, or else the function returns |
235 | * immediately. Check for U_FAILURE() on output or use with |
236 | * function chaining. (See User Guide for details.) |
237 | * @return dest |
238 | * @stable ICU 4.6 |
239 | */ |
240 | virtual void |
241 | nameToASCII_UTF8(StringPiece name, ByteSink &dest, |
242 | IDNAInfo &info, UErrorCode &errorCode) const; |
243 | |
244 | /** |
245 | * Converts a whole domain name into its Unicode form for human-readable display. |
246 | * UTF-8 version of nameToUnicode(), same behavior. |
247 | * |
248 | * @param name Input domain name |
249 | * @param dest Destination byte sink; Flush()ed if successful |
250 | * @param info Output container of IDNA processing details. |
251 | * @param errorCode Standard ICU error code. Its input value must |
252 | * pass the U_SUCCESS() test, or else the function returns |
253 | * immediately. Check for U_FAILURE() on output or use with |
254 | * function chaining. (See User Guide for details.) |
255 | * @return dest |
256 | * @stable ICU 4.6 |
257 | */ |
258 | virtual void |
259 | nameToUnicodeUTF8(StringPiece name, ByteSink &dest, |
260 | IDNAInfo &info, UErrorCode &errorCode) const; |
261 | }; |
262 | |
263 | class UTS46; |
264 | |
265 | /** |
266 | * Output container for IDNA processing errors. |
267 | * The IDNAInfo class is not suitable for subclassing. |
268 | * @stable ICU 4.6 |
269 | */ |
270 | class U_COMMON_API IDNAInfo : public UMemory { |
271 | public: |
272 | /** |
273 | * Constructor for stack allocation. |
274 | * @stable ICU 4.6 |
275 | */ |
276 | IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} |
277 | /** |
278 | * Were there IDNA processing errors? |
279 | * @return TRUE if there were processing errors |
280 | * @stable ICU 4.6 |
281 | */ |
282 | UBool hasErrors() const { return errors!=0; } |
283 | /** |
284 | * Returns a bit set indicating IDNA processing errors. |
285 | * See UIDNA_ERROR_... constants in uidna.h. |
286 | * @return bit set of processing errors |
287 | * @stable ICU 4.6 |
288 | */ |
289 | uint32_t getErrors() const { return errors; } |
290 | /** |
291 | * Returns TRUE if transitional and nontransitional processing produce different results. |
292 | * This is the case when the input label or domain name contains |
293 | * one or more deviation characters outside a Punycode label (see UTS #46). |
294 | * <ul> |
295 | * <li>With nontransitional processing, such characters are |
296 | * copied to the destination string. |
297 | * <li>With transitional processing, such characters are |
298 | * mapped (sharp s/sigma) or removed (joiner/nonjoiner). |
299 | * </ul> |
300 | * @return TRUE if transitional and nontransitional processing produce different results |
301 | * @stable ICU 4.6 |
302 | */ |
303 | UBool isTransitionalDifferent() const { return isTransDiff; } |
304 | |
305 | private: |
306 | friend class UTS46; |
307 | |
308 | IDNAInfo(const IDNAInfo &other); // no copying |
309 | IDNAInfo &operator=(const IDNAInfo &other); // no copying |
310 | |
311 | void reset() { |
312 | errors=labelErrors=0; |
313 | isTransDiff=FALSE; |
314 | isBiDi=FALSE; |
315 | isOkBiDi=TRUE; |
316 | } |
317 | |
318 | uint32_t errors, labelErrors; |
319 | UBool isTransDiff; |
320 | UBool isBiDi; |
321 | UBool isOkBiDi; |
322 | }; |
323 | |
324 | U_NAMESPACE_END |
325 | |
326 | #endif // UCONFIG_NO_IDNA |
327 | |
328 | #endif /* U_SHOW_CPLUSPLUS_API */ |
329 | |
330 | #endif // __IDNA_H__ |
331 | |