| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | *   Copyright (C) 2010-2012, International Business Machines | 
|---|
| 6 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 7 | ******************************************************************************* | 
|---|
| 8 | *   file name:  idna.h | 
|---|
| 9 | *   encoding:   UTF-8 | 
|---|
| 10 | *   tab size:   8 (not used) | 
|---|
| 11 | *   indentation:4 | 
|---|
| 12 | * | 
|---|
| 13 | *   created on: 2010mar05 | 
|---|
| 14 | *   created by: Markus W. Scherer | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | #ifndef __IDNA_H__ | 
|---|
| 18 | #define __IDNA_H__ | 
|---|
| 19 |  | 
|---|
| 20 | /** | 
|---|
| 21 | * \file | 
|---|
| 22 | * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) | 
|---|
| 23 | */ | 
|---|
| 24 |  | 
|---|
| 25 | #include "unicode/utypes.h" | 
|---|
| 26 |  | 
|---|
| 27 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 28 |  | 
|---|
| 29 | #if !UCONFIG_NO_IDNA | 
|---|
| 30 |  | 
|---|
| 31 | #include "unicode/bytestream.h" | 
|---|
| 32 | #include "unicode/stringpiece.h" | 
|---|
| 33 | #include "unicode/uidna.h" | 
|---|
| 34 | #include "unicode/unistr.h" | 
|---|
| 35 |  | 
|---|
| 36 | U_NAMESPACE_BEGIN | 
|---|
| 37 |  | 
|---|
| 38 | class IDNAInfo; | 
|---|
| 39 |  | 
|---|
| 40 | /** | 
|---|
| 41 | * Abstract base class for IDNA processing. | 
|---|
| 42 | * See http://www.unicode.org/reports/tr46/ | 
|---|
| 43 | * and http://www.ietf.org/rfc/rfc3490.txt | 
|---|
| 44 | * | 
|---|
| 45 | * The IDNA class is not intended for public subclassing. | 
|---|
| 46 | * | 
|---|
| 47 | * This C++ API currently only implements UTS #46. | 
|---|
| 48 | * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) | 
|---|
| 49 | * and IDNA2003 (functions that do not use a service object). | 
|---|
| 50 | * @stable ICU 4.6 | 
|---|
| 51 | */ | 
|---|
| 52 | class U_COMMON_API IDNA : public UObject { | 
|---|
| 53 | public: | 
|---|
| 54 | /** | 
|---|
| 55 | * Destructor. | 
|---|
| 56 | * @stable ICU 4.6 | 
|---|
| 57 | */ | 
|---|
| 58 | ~IDNA(); | 
|---|
| 59 |  | 
|---|
| 60 | /** | 
|---|
| 61 | * Returns an IDNA instance which implements UTS #46. | 
|---|
| 62 | * Returns an unmodifiable instance, owned by the caller. | 
|---|
| 63 | * Cache it for multiple operations, and delete it when done. | 
|---|
| 64 | * The instance is thread-safe, that is, it can be used concurrently. | 
|---|
| 65 | * | 
|---|
| 66 | * UTS #46 defines Unicode IDNA Compatibility Processing, | 
|---|
| 67 | * updated to the latest version of Unicode and compatible with both | 
|---|
| 68 | * IDNA2003 and IDNA2008. | 
|---|
| 69 | * | 
|---|
| 70 | * The worker functions use transitional processing, including deviation mappings, | 
|---|
| 71 | * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE | 
|---|
| 72 | * is used in which case the deviation characters are passed through without change. | 
|---|
| 73 | * | 
|---|
| 74 | * Disallowed characters are mapped to U+FFFD. | 
|---|
| 75 | * | 
|---|
| 76 | * For available options see the uidna.h header. | 
|---|
| 77 | * Operations with the UTS #46 instance do not support the | 
|---|
| 78 | * UIDNA_ALLOW_UNASSIGNED option. | 
|---|
| 79 | * | 
|---|
| 80 | * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). | 
|---|
| 81 | * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than | 
|---|
| 82 | * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. | 
|---|
| 83 | * | 
|---|
| 84 | * @param options Bit set to modify the processing and error checking. | 
|---|
| 85 | *                See option bit set values in uidna.h. | 
|---|
| 86 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 87 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 88 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 89 | *                  function chaining. (See User Guide for details.) | 
|---|
| 90 | * @return the UTS #46 IDNA instance, if successful | 
|---|
| 91 | * @stable ICU 4.6 | 
|---|
| 92 | */ | 
|---|
| 93 | static IDNA * | 
|---|
| 94 | createUTS46Instance(uint32_t options, UErrorCode &errorCode); | 
|---|
| 95 |  | 
|---|
| 96 | /** | 
|---|
| 97 | * Converts a single domain name label into its ASCII form for DNS lookup. | 
|---|
| 98 | * If any processing step fails, then info.hasErrors() will be TRUE and | 
|---|
| 99 | * the result might not be an ASCII string. | 
|---|
| 100 | * The label might be modified according to the types of errors. | 
|---|
| 101 | * Labels with severe errors will be left in (or turned into) their Unicode form. | 
|---|
| 102 | * | 
|---|
| 103 | * The UErrorCode indicates an error only in exceptional cases, | 
|---|
| 104 | * such as a U_MEMORY_ALLOCATION_ERROR. | 
|---|
| 105 | * | 
|---|
| 106 | * @param label Input domain name label | 
|---|
| 107 | * @param dest Destination string object | 
|---|
| 108 | * @param info Output container of IDNA processing details. | 
|---|
| 109 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 110 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 111 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 112 | *                  function chaining. (See User Guide for details.) | 
|---|
| 113 | * @return dest | 
|---|
| 114 | * @stable ICU 4.6 | 
|---|
| 115 | */ | 
|---|
| 116 | virtual UnicodeString & | 
|---|
| 117 | labelToASCII(const UnicodeString &label, UnicodeString &dest, | 
|---|
| 118 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | 
|---|
| 119 |  | 
|---|
| 120 | /** | 
|---|
| 121 | * Converts a single domain name label into its Unicode form for human-readable display. | 
|---|
| 122 | * If any processing step fails, then info.hasErrors() will be TRUE. | 
|---|
| 123 | * The label might be modified according to the types of errors. | 
|---|
| 124 | * | 
|---|
| 125 | * The UErrorCode indicates an error only in exceptional cases, | 
|---|
| 126 | * such as a U_MEMORY_ALLOCATION_ERROR. | 
|---|
| 127 | * | 
|---|
| 128 | * @param label Input domain name label | 
|---|
| 129 | * @param dest Destination string object | 
|---|
| 130 | * @param info Output container of IDNA processing details. | 
|---|
| 131 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 132 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 133 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 134 | *                  function chaining. (See User Guide for details.) | 
|---|
| 135 | * @return dest | 
|---|
| 136 | * @stable ICU 4.6 | 
|---|
| 137 | */ | 
|---|
| 138 | virtual UnicodeString & | 
|---|
| 139 | labelToUnicode(const UnicodeString &label, UnicodeString &dest, | 
|---|
| 140 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | 
|---|
| 141 |  | 
|---|
| 142 | /** | 
|---|
| 143 | * Converts a whole domain name into its ASCII form for DNS lookup. | 
|---|
| 144 | * If any processing step fails, then info.hasErrors() will be TRUE and | 
|---|
| 145 | * the result might not be an ASCII string. | 
|---|
| 146 | * The domain name might be modified according to the types of errors. | 
|---|
| 147 | * Labels with severe errors will be left in (or turned into) their Unicode form. | 
|---|
| 148 | * | 
|---|
| 149 | * The UErrorCode indicates an error only in exceptional cases, | 
|---|
| 150 | * such as a U_MEMORY_ALLOCATION_ERROR. | 
|---|
| 151 | * | 
|---|
| 152 | * @param name Input domain name | 
|---|
| 153 | * @param dest Destination string object | 
|---|
| 154 | * @param info Output container of IDNA processing details. | 
|---|
| 155 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 156 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 157 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 158 | *                  function chaining. (See User Guide for details.) | 
|---|
| 159 | * @return dest | 
|---|
| 160 | * @stable ICU 4.6 | 
|---|
| 161 | */ | 
|---|
| 162 | virtual UnicodeString & | 
|---|
| 163 | nameToASCII(const UnicodeString &name, UnicodeString &dest, | 
|---|
| 164 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | 
|---|
| 165 |  | 
|---|
| 166 | /** | 
|---|
| 167 | * Converts a whole domain name into its Unicode form for human-readable display. | 
|---|
| 168 | * If any processing step fails, then info.hasErrors() will be TRUE. | 
|---|
| 169 | * The domain name might be modified according to the types of errors. | 
|---|
| 170 | * | 
|---|
| 171 | * The UErrorCode indicates an error only in exceptional cases, | 
|---|
| 172 | * such as a U_MEMORY_ALLOCATION_ERROR. | 
|---|
| 173 | * | 
|---|
| 174 | * @param name Input domain name | 
|---|
| 175 | * @param dest Destination string object | 
|---|
| 176 | * @param info Output container of IDNA processing details. | 
|---|
| 177 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 178 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 179 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 180 | *                  function chaining. (See User Guide for details.) | 
|---|
| 181 | * @return dest | 
|---|
| 182 | * @stable ICU 4.6 | 
|---|
| 183 | */ | 
|---|
| 184 | virtual UnicodeString & | 
|---|
| 185 | nameToUnicode(const UnicodeString &name, UnicodeString &dest, | 
|---|
| 186 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | 
|---|
| 187 |  | 
|---|
| 188 | // UTF-8 versions of the processing methods ---------------------------- *** | 
|---|
| 189 |  | 
|---|
| 190 | /** | 
|---|
| 191 | * Converts a single domain name label into its ASCII form for DNS lookup. | 
|---|
| 192 | * UTF-8 version of labelToASCII(), same behavior. | 
|---|
| 193 | * | 
|---|
| 194 | * @param label Input domain name label | 
|---|
| 195 | * @param dest Destination byte sink; Flush()ed if successful | 
|---|
| 196 | * @param info Output container of IDNA processing details. | 
|---|
| 197 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 198 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 199 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 200 | *                  function chaining. (See User Guide for details.) | 
|---|
| 201 | * @return dest | 
|---|
| 202 | * @stable ICU 4.6 | 
|---|
| 203 | */ | 
|---|
| 204 | virtual void | 
|---|
| 205 | labelToASCII_UTF8(StringPiece label, ByteSink &dest, | 
|---|
| 206 | IDNAInfo &info, UErrorCode &errorCode) const; | 
|---|
| 207 |  | 
|---|
| 208 | /** | 
|---|
| 209 | * Converts a single domain name label into its Unicode form for human-readable display. | 
|---|
| 210 | * UTF-8 version of labelToUnicode(), same behavior. | 
|---|
| 211 | * | 
|---|
| 212 | * @param label Input domain name label | 
|---|
| 213 | * @param dest Destination byte sink; Flush()ed if successful | 
|---|
| 214 | * @param info Output container of IDNA processing details. | 
|---|
| 215 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 216 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 217 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 218 | *                  function chaining. (See User Guide for details.) | 
|---|
| 219 | * @return dest | 
|---|
| 220 | * @stable ICU 4.6 | 
|---|
| 221 | */ | 
|---|
| 222 | virtual void | 
|---|
| 223 | labelToUnicodeUTF8(StringPiece label, ByteSink &dest, | 
|---|
| 224 | IDNAInfo &info, UErrorCode &errorCode) const; | 
|---|
| 225 |  | 
|---|
| 226 | /** | 
|---|
| 227 | * Converts a whole domain name into its ASCII form for DNS lookup. | 
|---|
| 228 | * UTF-8 version of nameToASCII(), same behavior. | 
|---|
| 229 | * | 
|---|
| 230 | * @param name Input domain name | 
|---|
| 231 | * @param dest Destination byte sink; Flush()ed if successful | 
|---|
| 232 | * @param info Output container of IDNA processing details. | 
|---|
| 233 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 234 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 235 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 236 | *                  function chaining. (See User Guide for details.) | 
|---|
| 237 | * @return dest | 
|---|
| 238 | * @stable ICU 4.6 | 
|---|
| 239 | */ | 
|---|
| 240 | virtual void | 
|---|
| 241 | nameToASCII_UTF8(StringPiece name, ByteSink &dest, | 
|---|
| 242 | IDNAInfo &info, UErrorCode &errorCode) const; | 
|---|
| 243 |  | 
|---|
| 244 | /** | 
|---|
| 245 | * Converts a whole domain name into its Unicode form for human-readable display. | 
|---|
| 246 | * UTF-8 version of nameToUnicode(), same behavior. | 
|---|
| 247 | * | 
|---|
| 248 | * @param name Input domain name | 
|---|
| 249 | * @param dest Destination byte sink; Flush()ed if successful | 
|---|
| 250 | * @param info Output container of IDNA processing details. | 
|---|
| 251 | * @param errorCode Standard ICU error code. Its input value must | 
|---|
| 252 | *                  pass the U_SUCCESS() test, or else the function returns | 
|---|
| 253 | *                  immediately. Check for U_FAILURE() on output or use with | 
|---|
| 254 | *                  function chaining. (See User Guide for details.) | 
|---|
| 255 | * @return dest | 
|---|
| 256 | * @stable ICU 4.6 | 
|---|
| 257 | */ | 
|---|
| 258 | virtual void | 
|---|
| 259 | nameToUnicodeUTF8(StringPiece name, ByteSink &dest, | 
|---|
| 260 | IDNAInfo &info, UErrorCode &errorCode) const; | 
|---|
| 261 | }; | 
|---|
| 262 |  | 
|---|
| 263 | class UTS46; | 
|---|
| 264 |  | 
|---|
| 265 | /** | 
|---|
| 266 | * Output container for IDNA processing errors. | 
|---|
| 267 | * The IDNAInfo class is not suitable for subclassing. | 
|---|
| 268 | * @stable ICU 4.6 | 
|---|
| 269 | */ | 
|---|
| 270 | class U_COMMON_API IDNAInfo : public UMemory { | 
|---|
| 271 | public: | 
|---|
| 272 | /** | 
|---|
| 273 | * Constructor for stack allocation. | 
|---|
| 274 | * @stable ICU 4.6 | 
|---|
| 275 | */ | 
|---|
| 276 | IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} | 
|---|
| 277 | /** | 
|---|
| 278 | * Were there IDNA processing errors? | 
|---|
| 279 | * @return TRUE if there were processing errors | 
|---|
| 280 | * @stable ICU 4.6 | 
|---|
| 281 | */ | 
|---|
| 282 | UBool hasErrors() const { return errors!=0; } | 
|---|
| 283 | /** | 
|---|
| 284 | * Returns a bit set indicating IDNA processing errors. | 
|---|
| 285 | * See UIDNA_ERROR_... constants in uidna.h. | 
|---|
| 286 | * @return bit set of processing errors | 
|---|
| 287 | * @stable ICU 4.6 | 
|---|
| 288 | */ | 
|---|
| 289 | uint32_t getErrors() const { return errors; } | 
|---|
| 290 | /** | 
|---|
| 291 | * Returns TRUE if transitional and nontransitional processing produce different results. | 
|---|
| 292 | * This is the case when the input label or domain name contains | 
|---|
| 293 | * one or more deviation characters outside a Punycode label (see UTS #46). | 
|---|
| 294 | * <ul> | 
|---|
| 295 | * <li>With nontransitional processing, such characters are | 
|---|
| 296 | * copied to the destination string. | 
|---|
| 297 | * <li>With transitional processing, such characters are | 
|---|
| 298 | * mapped (sharp s/sigma) or removed (joiner/nonjoiner). | 
|---|
| 299 | * </ul> | 
|---|
| 300 | * @return TRUE if transitional and nontransitional processing produce different results | 
|---|
| 301 | * @stable ICU 4.6 | 
|---|
| 302 | */ | 
|---|
| 303 | UBool isTransitionalDifferent() const { return isTransDiff; } | 
|---|
| 304 |  | 
|---|
| 305 | private: | 
|---|
| 306 | friend class UTS46; | 
|---|
| 307 |  | 
|---|
| 308 | IDNAInfo(const IDNAInfo &other);  // no copying | 
|---|
| 309 | IDNAInfo &operator=(const IDNAInfo &other);  // no copying | 
|---|
| 310 |  | 
|---|
| 311 | void reset() { | 
|---|
| 312 | errors=labelErrors=0; | 
|---|
| 313 | isTransDiff=FALSE; | 
|---|
| 314 | isBiDi=FALSE; | 
|---|
| 315 | isOkBiDi=TRUE; | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 | uint32_t errors, labelErrors; | 
|---|
| 319 | UBool isTransDiff; | 
|---|
| 320 | UBool isBiDi; | 
|---|
| 321 | UBool isOkBiDi; | 
|---|
| 322 | }; | 
|---|
| 323 |  | 
|---|
| 324 | U_NAMESPACE_END | 
|---|
| 325 |  | 
|---|
| 326 | #endif  // UCONFIG_NO_IDNA | 
|---|
| 327 |  | 
|---|
| 328 | #endif /* U_SHOW_CPLUSPLUS_API */ | 
|---|
| 329 |  | 
|---|
| 330 | #endif  // __IDNA_H__ | 
|---|
| 331 |  | 
|---|