1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2010-2012, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: idna.h
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2010mar05
14* created by: Markus W. Scherer
15*/
16
17#ifndef __IDNA_H__
18#define __IDNA_H__
19
20/**
21 * \file
22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23 */
24
25#include "unicode/utypes.h"
26
27#if U_SHOW_CPLUSPLUS_API
28
29#if !UCONFIG_NO_IDNA
30
31#include "unicode/bytestream.h"
32#include "unicode/stringpiece.h"
33#include "unicode/uidna.h"
34#include "unicode/unistr.h"
35
36U_NAMESPACE_BEGIN
37
38class IDNAInfo;
39
40/**
41 * Abstract base class for IDNA processing.
42 * See http://www.unicode.org/reports/tr46/
43 * and http://www.ietf.org/rfc/rfc3490.txt
44 *
45 * The IDNA class is not intended for public subclassing.
46 *
47 * This C++ API currently only implements UTS #46.
48 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
49 * and IDNA2003 (functions that do not use a service object).
50 * @stable ICU 4.6
51 */
52class U_COMMON_API IDNA : public UObject {
53public:
54 /**
55 * Destructor.
56 * @stable ICU 4.6
57 */
58 ~IDNA();
59
60 /**
61 * Returns an IDNA instance which implements UTS #46.
62 * Returns an unmodifiable instance, owned by the caller.
63 * Cache it for multiple operations, and delete it when done.
64 * The instance is thread-safe, that is, it can be used concurrently.
65 *
66 * UTS #46 defines Unicode IDNA Compatibility Processing,
67 * updated to the latest version of Unicode and compatible with both
68 * IDNA2003 and IDNA2008.
69 *
70 * The worker functions use transitional processing, including deviation mappings,
71 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
72 * is used in which case the deviation characters are passed through without change.
73 *
74 * Disallowed characters are mapped to U+FFFD.
75 *
76 * For available options see the uidna.h header.
77 * Operations with the UTS #46 instance do not support the
78 * UIDNA_ALLOW_UNASSIGNED option.
79 *
80 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
81 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
82 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
83 *
84 * @param options Bit set to modify the processing and error checking.
85 * See option bit set values in uidna.h.
86 * @param errorCode Standard ICU error code. Its input value must
87 * pass the U_SUCCESS() test, or else the function returns
88 * immediately. Check for U_FAILURE() on output or use with
89 * function chaining. (See User Guide for details.)
90 * @return the UTS #46 IDNA instance, if successful
91 * @stable ICU 4.6
92 */
93 static IDNA *
94 createUTS46Instance(uint32_t options, UErrorCode &errorCode);
95
96 /**
97 * Converts a single domain name label into its ASCII form for DNS lookup.
98 * If any processing step fails, then info.hasErrors() will be TRUE and
99 * the result might not be an ASCII string.
100 * The label might be modified according to the types of errors.
101 * Labels with severe errors will be left in (or turned into) their Unicode form.
102 *
103 * The UErrorCode indicates an error only in exceptional cases,
104 * such as a U_MEMORY_ALLOCATION_ERROR.
105 *
106 * @param label Input domain name label
107 * @param dest Destination string object
108 * @param info Output container of IDNA processing details.
109 * @param errorCode Standard ICU error code. Its input value must
110 * pass the U_SUCCESS() test, or else the function returns
111 * immediately. Check for U_FAILURE() on output or use with
112 * function chaining. (See User Guide for details.)
113 * @return dest
114 * @stable ICU 4.6
115 */
116 virtual UnicodeString &
117 labelToASCII(const UnicodeString &label, UnicodeString &dest,
118 IDNAInfo &info, UErrorCode &errorCode) const = 0;
119
120 /**
121 * Converts a single domain name label into its Unicode form for human-readable display.
122 * If any processing step fails, then info.hasErrors() will be TRUE.
123 * The label might be modified according to the types of errors.
124 *
125 * The UErrorCode indicates an error only in exceptional cases,
126 * such as a U_MEMORY_ALLOCATION_ERROR.
127 *
128 * @param label Input domain name label
129 * @param dest Destination string object
130 * @param info Output container of IDNA processing details.
131 * @param errorCode Standard ICU error code. Its input value must
132 * pass the U_SUCCESS() test, or else the function returns
133 * immediately. Check for U_FAILURE() on output or use with
134 * function chaining. (See User Guide for details.)
135 * @return dest
136 * @stable ICU 4.6
137 */
138 virtual UnicodeString &
139 labelToUnicode(const UnicodeString &label, UnicodeString &dest,
140 IDNAInfo &info, UErrorCode &errorCode) const = 0;
141
142 /**
143 * Converts a whole domain name into its ASCII form for DNS lookup.
144 * If any processing step fails, then info.hasErrors() will be TRUE and
145 * the result might not be an ASCII string.
146 * The domain name might be modified according to the types of errors.
147 * Labels with severe errors will be left in (or turned into) their Unicode form.
148 *
149 * The UErrorCode indicates an error only in exceptional cases,
150 * such as a U_MEMORY_ALLOCATION_ERROR.
151 *
152 * @param name Input domain name
153 * @param dest Destination string object
154 * @param info Output container of IDNA processing details.
155 * @param errorCode Standard ICU error code. Its input value must
156 * pass the U_SUCCESS() test, or else the function returns
157 * immediately. Check for U_FAILURE() on output or use with
158 * function chaining. (See User Guide for details.)
159 * @return dest
160 * @stable ICU 4.6
161 */
162 virtual UnicodeString &
163 nameToASCII(const UnicodeString &name, UnicodeString &dest,
164 IDNAInfo &info, UErrorCode &errorCode) const = 0;
165
166 /**
167 * Converts a whole domain name into its Unicode form for human-readable display.
168 * If any processing step fails, then info.hasErrors() will be TRUE.
169 * The domain name might be modified according to the types of errors.
170 *
171 * The UErrorCode indicates an error only in exceptional cases,
172 * such as a U_MEMORY_ALLOCATION_ERROR.
173 *
174 * @param name Input domain name
175 * @param dest Destination string object
176 * @param info Output container of IDNA processing details.
177 * @param errorCode Standard ICU error code. Its input value must
178 * pass the U_SUCCESS() test, or else the function returns
179 * immediately. Check for U_FAILURE() on output or use with
180 * function chaining. (See User Guide for details.)
181 * @return dest
182 * @stable ICU 4.6
183 */
184 virtual UnicodeString &
185 nameToUnicode(const UnicodeString &name, UnicodeString &dest,
186 IDNAInfo &info, UErrorCode &errorCode) const = 0;
187
188 // UTF-8 versions of the processing methods ---------------------------- ***
189
190 /**
191 * Converts a single domain name label into its ASCII form for DNS lookup.
192 * UTF-8 version of labelToASCII(), same behavior.
193 *
194 * @param label Input domain name label
195 * @param dest Destination byte sink; Flush()ed if successful
196 * @param info Output container of IDNA processing details.
197 * @param errorCode Standard ICU error code. Its input value must
198 * pass the U_SUCCESS() test, or else the function returns
199 * immediately. Check for U_FAILURE() on output or use with
200 * function chaining. (See User Guide for details.)
201 * @return dest
202 * @stable ICU 4.6
203 */
204 virtual void
205 labelToASCII_UTF8(StringPiece label, ByteSink &dest,
206 IDNAInfo &info, UErrorCode &errorCode) const;
207
208 /**
209 * Converts a single domain name label into its Unicode form for human-readable display.
210 * UTF-8 version of labelToUnicode(), same behavior.
211 *
212 * @param label Input domain name label
213 * @param dest Destination byte sink; Flush()ed if successful
214 * @param info Output container of IDNA processing details.
215 * @param errorCode Standard ICU error code. Its input value must
216 * pass the U_SUCCESS() test, or else the function returns
217 * immediately. Check for U_FAILURE() on output or use with
218 * function chaining. (See User Guide for details.)
219 * @return dest
220 * @stable ICU 4.6
221 */
222 virtual void
223 labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
224 IDNAInfo &info, UErrorCode &errorCode) const;
225
226 /**
227 * Converts a whole domain name into its ASCII form for DNS lookup.
228 * UTF-8 version of nameToASCII(), same behavior.
229 *
230 * @param name Input domain name
231 * @param dest Destination byte sink; Flush()ed if successful
232 * @param info Output container of IDNA processing details.
233 * @param errorCode Standard ICU error code. Its input value must
234 * pass the U_SUCCESS() test, or else the function returns
235 * immediately. Check for U_FAILURE() on output or use with
236 * function chaining. (See User Guide for details.)
237 * @return dest
238 * @stable ICU 4.6
239 */
240 virtual void
241 nameToASCII_UTF8(StringPiece name, ByteSink &dest,
242 IDNAInfo &info, UErrorCode &errorCode) const;
243
244 /**
245 * Converts a whole domain name into its Unicode form for human-readable display.
246 * UTF-8 version of nameToUnicode(), same behavior.
247 *
248 * @param name Input domain name
249 * @param dest Destination byte sink; Flush()ed if successful
250 * @param info Output container of IDNA processing details.
251 * @param errorCode Standard ICU error code. Its input value must
252 * pass the U_SUCCESS() test, or else the function returns
253 * immediately. Check for U_FAILURE() on output or use with
254 * function chaining. (See User Guide for details.)
255 * @return dest
256 * @stable ICU 4.6
257 */
258 virtual void
259 nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
260 IDNAInfo &info, UErrorCode &errorCode) const;
261};
262
263class UTS46;
264
265/**
266 * Output container for IDNA processing errors.
267 * The IDNAInfo class is not suitable for subclassing.
268 * @stable ICU 4.6
269 */
270class U_COMMON_API IDNAInfo : public UMemory {
271public:
272 /**
273 * Constructor for stack allocation.
274 * @stable ICU 4.6
275 */
276 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
277 /**
278 * Were there IDNA processing errors?
279 * @return TRUE if there were processing errors
280 * @stable ICU 4.6
281 */
282 UBool hasErrors() const { return errors!=0; }
283 /**
284 * Returns a bit set indicating IDNA processing errors.
285 * See UIDNA_ERROR_... constants in uidna.h.
286 * @return bit set of processing errors
287 * @stable ICU 4.6
288 */
289 uint32_t getErrors() const { return errors; }
290 /**
291 * Returns TRUE if transitional and nontransitional processing produce different results.
292 * This is the case when the input label or domain name contains
293 * one or more deviation characters outside a Punycode label (see UTS #46).
294 * <ul>
295 * <li>With nontransitional processing, such characters are
296 * copied to the destination string.
297 * <li>With transitional processing, such characters are
298 * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
299 * </ul>
300 * @return TRUE if transitional and nontransitional processing produce different results
301 * @stable ICU 4.6
302 */
303 UBool isTransitionalDifferent() const { return isTransDiff; }
304
305private:
306 friend class UTS46;
307
308 IDNAInfo(const IDNAInfo &other); // no copying
309 IDNAInfo &operator=(const IDNAInfo &other); // no copying
310
311 void reset() {
312 errors=labelErrors=0;
313 isTransDiff=FALSE;
314 isBiDi=FALSE;
315 isOkBiDi=TRUE;
316 }
317
318 uint32_t errors, labelErrors;
319 UBool isTransDiff;
320 UBool isBiDi;
321 UBool isOkBiDi;
322};
323
324U_NAMESPACE_END
325
326#endif // UCONFIG_NO_IDNA
327
328#endif /* U_SHOW_CPLUSPLUS_API */
329
330#endif // __IDNA_H__
331