| 1 | /*************************************************************************** |
| 2 | * _ _ ____ _ |
| 3 | * Project ___| | | | _ \| | |
| 4 | * / __| | | | |_) | | |
| 5 | * | (__| |_| | _ <| |___ |
| 6 | * \___|\___/|_| \_\_____| |
| 7 | * |
| 8 | * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. |
| 9 | * |
| 10 | * This software is licensed as described in the file COPYING, which |
| 11 | * you should have received as part of this distribution. The terms |
| 12 | * are also available at https://curl.se/docs/copyright.html. |
| 13 | * |
| 14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
| 15 | * copies of the Software, and permit persons to whom the Software is |
| 16 | * furnished to do so, under the terms of the COPYING file. |
| 17 | * |
| 18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| 19 | * KIND, either express or implied. |
| 20 | * |
| 21 | * SPDX-License-Identifier: curl |
| 22 | * |
| 23 | ***************************************************************************/ |
| 24 | |
| 25 | /* |
| 26 | * IDN conversions |
| 27 | */ |
| 28 | |
| 29 | #include "curl_setup.h" |
| 30 | #include "urldata.h" |
| 31 | #include "idn.h" |
| 32 | #include "sendf.h" |
| 33 | #include "curl_multibyte.h" |
| 34 | #include "warnless.h" |
| 35 | |
| 36 | #ifdef USE_LIBIDN2 |
| 37 | #include <idn2.h> |
| 38 | |
| 39 | #if defined(WIN32) && defined(UNICODE) |
| 40 | #define IDN2_LOOKUP(name, host, flags) \ |
| 41 | idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags) |
| 42 | #else |
| 43 | #define IDN2_LOOKUP(name, host, flags) \ |
| 44 | idn2_lookup_ul((const char *)name, (char **)host, flags) |
| 45 | #endif |
| 46 | #endif /* USE_LIBIDN2 */ |
| 47 | |
| 48 | /* The last 3 #include files should be in this order */ |
| 49 | #include "curl_printf.h" |
| 50 | #include "curl_memory.h" |
| 51 | #include "memdebug.h" |
| 52 | |
| 53 | #ifdef USE_WIN32_IDN |
| 54 | /* using Windows kernel32 and normaliz libraries. */ |
| 55 | |
| 56 | #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600 |
| 57 | WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags, |
| 58 | const WCHAR *lpUnicodeCharStr, |
| 59 | int cchUnicodeChar, |
| 60 | WCHAR *lpASCIICharStr, |
| 61 | int cchASCIIChar); |
| 62 | WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags, |
| 63 | const WCHAR *lpASCIICharStr, |
| 64 | int cchASCIIChar, |
| 65 | WCHAR *lpUnicodeCharStr, |
| 66 | int cchUnicodeChar); |
| 67 | #endif |
| 68 | |
| 69 | #define IDN_MAX_LENGTH 255 |
| 70 | |
| 71 | static CURLcode win32_idn_to_ascii(const char *in, char **out) |
| 72 | { |
| 73 | wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); |
| 74 | *out = NULL; |
| 75 | if(in_w) { |
| 76 | wchar_t punycode[IDN_MAX_LENGTH]; |
| 77 | int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode, |
| 78 | IDN_MAX_LENGTH); |
| 79 | curlx_unicodefree(in_w); |
| 80 | if(chars) { |
| 81 | char *mstr = curlx_convert_wchar_to_UTF8(punycode); |
| 82 | if(mstr) { |
| 83 | *out = strdup(mstr); |
| 84 | curlx_unicodefree(mstr); |
| 85 | if(!*out) |
| 86 | return CURLE_OUT_OF_MEMORY; |
| 87 | } |
| 88 | else |
| 89 | return CURLE_OUT_OF_MEMORY; |
| 90 | } |
| 91 | else |
| 92 | return CURLE_URL_MALFORMAT; |
| 93 | } |
| 94 | else |
| 95 | return CURLE_URL_MALFORMAT; |
| 96 | |
| 97 | return CURLE_OK; |
| 98 | } |
| 99 | |
| 100 | static CURLcode win32_ascii_to_idn(const char *in, char **output) |
| 101 | { |
| 102 | char *out = NULL; |
| 103 | |
| 104 | wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); |
| 105 | if(in_w) { |
| 106 | WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */ |
| 107 | int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn, |
| 108 | IDN_MAX_LENGTH); |
| 109 | if(chars) { |
| 110 | /* 'chars' is "the number of characters retrieved" */ |
| 111 | char *mstr = curlx_convert_wchar_to_UTF8(idn); |
| 112 | if(mstr) { |
| 113 | out = strdup(mstr); |
| 114 | curlx_unicodefree(mstr); |
| 115 | if(!out) |
| 116 | return CURLE_OUT_OF_MEMORY; |
| 117 | } |
| 118 | } |
| 119 | else |
| 120 | return CURLE_URL_MALFORMAT; |
| 121 | } |
| 122 | else |
| 123 | return CURLE_URL_MALFORMAT; |
| 124 | *output = out; |
| 125 | return CURLE_OK; |
| 126 | } |
| 127 | |
| 128 | #endif /* USE_WIN32_IDN */ |
| 129 | |
| 130 | /* |
| 131 | * Helpers for IDNA conversions. |
| 132 | */ |
| 133 | bool Curl_is_ASCII_name(const char *hostname) |
| 134 | { |
| 135 | /* get an UNSIGNED local version of the pointer */ |
| 136 | const unsigned char *ch = (const unsigned char *)hostname; |
| 137 | |
| 138 | if(!hostname) /* bad input, consider it ASCII! */ |
| 139 | return TRUE; |
| 140 | |
| 141 | while(*ch) { |
| 142 | if(*ch++ & 0x80) |
| 143 | return FALSE; |
| 144 | } |
| 145 | return TRUE; |
| 146 | } |
| 147 | |
| 148 | #ifdef USE_IDN |
| 149 | /* |
| 150 | * Curl_idn_decode() returns an allocated IDN decoded string if it was |
| 151 | * possible. NULL on error. |
| 152 | * |
| 153 | * CURLE_URL_MALFORMAT - the host name could not be converted |
| 154 | * CURLE_OUT_OF_MEMORY - memory problem |
| 155 | * |
| 156 | */ |
| 157 | static CURLcode idn_decode(const char *input, char **output) |
| 158 | { |
| 159 | char *decoded = NULL; |
| 160 | CURLcode result = CURLE_OK; |
| 161 | #ifdef USE_LIBIDN2 |
| 162 | if(idn2_check_version(IDN2_VERSION)) { |
| 163 | int flags = IDN2_NFC_INPUT |
| 164 | #if IDN2_VERSION_NUMBER >= 0x00140000 |
| 165 | /* IDN2_NFC_INPUT: Normalize input string using normalization form C. |
| 166 | IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional |
| 167 | processing. */ |
| 168 | | IDN2_NONTRANSITIONAL |
| 169 | #endif |
| 170 | ; |
| 171 | int rc = IDN2_LOOKUP(input, &decoded, flags); |
| 172 | if(rc != IDN2_OK) |
| 173 | /* fallback to TR46 Transitional mode for better IDNA2003 |
| 174 | compatibility */ |
| 175 | rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL); |
| 176 | if(rc != IDN2_OK) |
| 177 | result = CURLE_URL_MALFORMAT; |
| 178 | } |
| 179 | else |
| 180 | /* a too old libidn2 version */ |
| 181 | result = CURLE_NOT_BUILT_IN; |
| 182 | #elif defined(USE_WIN32_IDN) |
| 183 | result = win32_idn_to_ascii(input, &decoded); |
| 184 | #endif |
| 185 | if(!result) |
| 186 | *output = decoded; |
| 187 | return result; |
| 188 | } |
| 189 | |
| 190 | static CURLcode idn_encode(const char *puny, char **output) |
| 191 | { |
| 192 | char *enc = NULL; |
| 193 | #ifdef USE_LIBIDN2 |
| 194 | int rc = idn2_to_unicode_8z8z(puny, &enc, 0); |
| 195 | if(rc != IDNA_SUCCESS) |
| 196 | return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; |
| 197 | #elif defined(USE_WIN32_IDN) |
| 198 | CURLcode result = win32_ascii_to_idn(puny, &enc); |
| 199 | if(result) |
| 200 | return result; |
| 201 | #endif |
| 202 | *output = enc; |
| 203 | return CURLE_OK; |
| 204 | } |
| 205 | |
| 206 | CURLcode Curl_idn_decode(const char *input, char **output) |
| 207 | { |
| 208 | char *d = NULL; |
| 209 | CURLcode result = idn_decode(input, &d); |
| 210 | #ifdef USE_LIBIDN2 |
| 211 | if(!result) { |
| 212 | char *c = strdup(d); |
| 213 | idn2_free(d); |
| 214 | if(c) |
| 215 | d = c; |
| 216 | else |
| 217 | result = CURLE_OUT_OF_MEMORY; |
| 218 | } |
| 219 | #endif |
| 220 | if(!result) |
| 221 | *output = d; |
| 222 | return result; |
| 223 | } |
| 224 | |
| 225 | CURLcode Curl_idn_encode(const char *puny, char **output) |
| 226 | { |
| 227 | char *d = NULL; |
| 228 | CURLcode result = idn_encode(puny, &d); |
| 229 | #ifdef USE_LIBIDN2 |
| 230 | if(!result) { |
| 231 | char *c = strdup(d); |
| 232 | idn2_free(d); |
| 233 | if(c) |
| 234 | d = c; |
| 235 | else |
| 236 | result = CURLE_OUT_OF_MEMORY; |
| 237 | } |
| 238 | #endif |
| 239 | if(!result) |
| 240 | *output = d; |
| 241 | return result; |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * Frees data allocated by idnconvert_hostname() |
| 246 | */ |
| 247 | void Curl_free_idnconverted_hostname(struct hostname *host) |
| 248 | { |
| 249 | if(host->encalloc) { |
| 250 | /* must be freed with idn2_free() if allocated by libidn */ |
| 251 | Curl_idn_free(host->encalloc); |
| 252 | host->encalloc = NULL; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | #endif /* USE_IDN */ |
| 257 | |
| 258 | /* |
| 259 | * Perform any necessary IDN conversion of hostname |
| 260 | */ |
| 261 | CURLcode Curl_idnconvert_hostname(struct hostname *host) |
| 262 | { |
| 263 | /* set the name we use to display the host name */ |
| 264 | host->dispname = host->name; |
| 265 | |
| 266 | #ifdef USE_IDN |
| 267 | /* Check name for non-ASCII and convert hostname if we can */ |
| 268 | if(!Curl_is_ASCII_name(host->name)) { |
| 269 | char *decoded; |
| 270 | CURLcode result = idn_decode(host->name, &decoded); |
| 271 | if(!result) { |
| 272 | if(!*decoded) { |
| 273 | /* zero length is a bad host name */ |
| 274 | Curl_idn_free(decoded); |
| 275 | return CURLE_URL_MALFORMAT; |
| 276 | } |
| 277 | /* successful */ |
| 278 | host->encalloc = decoded; |
| 279 | /* change the name pointer to point to the encoded hostname */ |
| 280 | host->name = host->encalloc; |
| 281 | } |
| 282 | else |
| 283 | return result; |
| 284 | } |
| 285 | #endif |
| 286 | return CURLE_OK; |
| 287 | } |
| 288 | |