| 1 | // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
| 2 | // for details. All rights reserved. Use of this source code is governed by a |
| 3 | // BSD-style license that can be found in the LICENSE file. |
| 4 | |
| 5 | #include "vm/uri.h" |
| 6 | |
| 7 | #include "vm/zone.h" |
| 8 | |
| 9 | namespace dart { |
| 10 | |
| 11 | static bool IsUnreservedChar(intptr_t value) { |
| 12 | return ((value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || |
| 13 | (value >= '0' && value <= '9') || value == '-' || value == '.' || |
| 14 | value == '_' || value == '~'); |
| 15 | } |
| 16 | |
| 17 | static bool IsDelimiter(intptr_t value) { |
| 18 | switch (value) { |
| 19 | case ':': |
| 20 | case '/': |
| 21 | case '?': |
| 22 | case '#': |
| 23 | case '[': |
| 24 | case ']': |
| 25 | case '@': |
| 26 | case '!': |
| 27 | case '$': |
| 28 | case '&': |
| 29 | case '\'': |
| 30 | case '(': |
| 31 | case ')': |
| 32 | case '*': |
| 33 | case '+': |
| 34 | case ',': |
| 35 | case ';': |
| 36 | case '=': |
| 37 | return true; |
| 38 | default: |
| 39 | return false; |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | static bool IsHexDigit(char value) { |
| 44 | return ((value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || |
| 45 | (value >= 'a' && value <= 'f')); |
| 46 | } |
| 47 | |
| 48 | static int HexValue(char digit) { |
| 49 | if ((digit >= '0' && digit <= '9')) { |
| 50 | return digit - '0'; |
| 51 | } |
| 52 | if ((digit >= 'A' && digit <= 'F')) { |
| 53 | return digit - 'A' + 10; |
| 54 | } |
| 55 | if ((digit >= 'a' && digit <= 'f')) { |
| 56 | return digit - 'a' + 10; |
| 57 | } |
| 58 | UNREACHABLE(); |
| 59 | return 0; |
| 60 | } |
| 61 | |
| 62 | static int GetEscapedValue(const char* str, intptr_t pos, intptr_t len) { |
| 63 | if (pos + 2 >= len) { |
| 64 | // Not enough room for a valid escape sequence. |
| 65 | return -1; |
| 66 | } |
| 67 | if (str[pos] != '%') { |
| 68 | // Escape sequences start with '%'. |
| 69 | return -1; |
| 70 | } |
| 71 | |
| 72 | char digit1 = str[pos + 1]; |
| 73 | char digit2 = str[pos + 2]; |
| 74 | if (!IsHexDigit(digit1) || !IsHexDigit(digit2)) { |
| 75 | // Invalid escape sequence. Ignore it. |
| 76 | return -1; |
| 77 | } |
| 78 | return HexValue(digit1) * 16 + HexValue(digit2); |
| 79 | } |
| 80 | |
| 81 | static char* NormalizeEscapes(const char* str, intptr_t len) { |
| 82 | // Allocate the buffer. |
| 83 | Zone* zone = ThreadState::Current()->zone(); |
| 84 | // We multiply len by three because a percent-escape sequence is |
| 85 | // three characters long (e.g. ' ' -> '%20). +1 for '\0'. We could |
| 86 | // take two passes through the string and avoid the excess |
| 87 | // allocation, but it's zone-memory so it doesn't seem necessary. |
| 88 | char* buffer = zone->Alloc<char>(len * 3 + 1); |
| 89 | |
| 90 | // Copy the string, normalizing as we go. |
| 91 | intptr_t buffer_pos = 0; |
| 92 | intptr_t pos = 0; |
| 93 | while (pos < len) { |
| 94 | int escaped_value = GetEscapedValue(str, pos, len); |
| 95 | if (escaped_value >= 0) { |
| 96 | // If one of the special "unreserved" characters has been |
| 97 | // escaped, revert the escaping. Otherwise preserve the |
| 98 | // escaping. |
| 99 | if (IsUnreservedChar(escaped_value)) { |
| 100 | buffer[buffer_pos] = escaped_value; |
| 101 | buffer_pos++; |
| 102 | } else { |
| 103 | Utils::SNPrint(buffer + buffer_pos, 4, "%%%02X" , escaped_value); |
| 104 | buffer_pos += 3; |
| 105 | } |
| 106 | pos += 3; |
| 107 | } else { |
| 108 | char c = str[pos]; |
| 109 | // If a delimiter or unreserved character is currently not |
| 110 | // escaped, preserve that. If there is a busted %-sequence in |
| 111 | // the input, preserve that too. |
| 112 | if (c == '%' || IsDelimiter(c) || IsUnreservedChar(c)) { |
| 113 | buffer[buffer_pos] = c; |
| 114 | buffer_pos++; |
| 115 | } else { |
| 116 | // Escape funky characters. |
| 117 | Utils::SNPrint(buffer + buffer_pos, 4, "%%%02X" , c); |
| 118 | buffer_pos += 3; |
| 119 | } |
| 120 | pos++; |
| 121 | } |
| 122 | } |
| 123 | buffer[buffer_pos] = '\0'; |
| 124 | return buffer; |
| 125 | } |
| 126 | |
| 127 | // Lower-case a string in place. |
| 128 | static void StringLower(char* str) { |
| 129 | const intptr_t len = strlen(str); |
| 130 | intptr_t i = 0; |
| 131 | while (i < len) { |
| 132 | int escaped_value = GetEscapedValue(str, i, len); |
| 133 | if (escaped_value >= 0) { |
| 134 | // Don't lowercase escape sequences. |
| 135 | i += 3; |
| 136 | } else { |
| 137 | // I don't use tolower() because I don't want the locale |
| 138 | // transforming any non-acii characters. |
| 139 | char c = str[i]; |
| 140 | if (c >= 'A' && c <= 'Z') { |
| 141 | str[i] = c + ('a' - 'A'); |
| 142 | } |
| 143 | i++; |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | static void ClearParsedUri(ParsedUri* parsed_uri) { |
| 149 | parsed_uri->scheme = NULL; |
| 150 | parsed_uri->userinfo = NULL; |
| 151 | parsed_uri->host = NULL; |
| 152 | parsed_uri->port = NULL; |
| 153 | parsed_uri->path = NULL; |
| 154 | parsed_uri->query = NULL; |
| 155 | parsed_uri->fragment = NULL; |
| 156 | } |
| 157 | |
| 158 | static intptr_t ParseAuthority(const char* authority, ParsedUri* parsed_uri) { |
| 159 | Zone* zone = ThreadState::Current()->zone(); |
| 160 | const char* current = authority; |
| 161 | intptr_t len = 0; |
| 162 | |
| 163 | size_t userinfo_len = strcspn(current, "@/" ); |
| 164 | if (current[userinfo_len] == '@') { |
| 165 | // The '@' character follows the optional userinfo string. |
| 166 | parsed_uri->userinfo = NormalizeEscapes(current, userinfo_len); |
| 167 | current += userinfo_len + 1; |
| 168 | len += userinfo_len + 1; |
| 169 | } else { |
| 170 | parsed_uri->userinfo = NULL; |
| 171 | } |
| 172 | |
| 173 | size_t host_len = strcspn(current, ":/" ); |
| 174 | char* host = NormalizeEscapes(current, host_len); |
| 175 | StringLower(host); |
| 176 | parsed_uri->host = host; |
| 177 | len += host_len; |
| 178 | |
| 179 | if (current[host_len] == ':') { |
| 180 | // The ':' character precedes the optional port string. |
| 181 | const char* port_start = current + host_len + 1; // +1 for ':' |
| 182 | size_t port_len = strcspn(port_start, "/" ); |
| 183 | parsed_uri->port = zone->MakeCopyOfStringN(port_start, port_len); |
| 184 | len += 1 + port_len; // +1 for ':' |
| 185 | } else { |
| 186 | parsed_uri->port = NULL; |
| 187 | } |
| 188 | return len; |
| 189 | } |
| 190 | |
| 191 | // Performs a simple parse of a uri into its components. |
| 192 | // See RFC 3986 Section 3: Syntax. |
| 193 | bool ParseUri(const char* uri, ParsedUri* parsed_uri) { |
| 194 | Zone* zone = ThreadState::Current()->zone(); |
| 195 | |
| 196 | // The first ':' separates the scheme from the rest of the uri. If |
| 197 | // a ':' occurs after the first '/' it doesn't count. |
| 198 | size_t scheme_len = strcspn(uri, ":/" ); |
| 199 | const char* rest = uri; |
| 200 | if (uri[scheme_len] == ':') { |
| 201 | char* scheme = zone->MakeCopyOfStringN(uri, scheme_len); |
| 202 | StringLower(scheme); |
| 203 | parsed_uri->scheme = scheme; |
| 204 | rest = uri + scheme_len + 1; |
| 205 | } else { |
| 206 | parsed_uri->scheme = NULL; |
| 207 | } |
| 208 | |
| 209 | // The first '#' separates the optional fragment |
| 210 | const char* hash_pos = rest + strcspn(rest, "#" ); |
| 211 | if (*hash_pos == '#') { |
| 212 | // There is a fragment part. |
| 213 | const char* fragment_start = hash_pos + 1; |
| 214 | parsed_uri->fragment = |
| 215 | NormalizeEscapes(fragment_start, strlen(fragment_start)); |
| 216 | } else { |
| 217 | parsed_uri->fragment = NULL; |
| 218 | } |
| 219 | |
| 220 | // The first '?' or '#' separates the hierarchical part from the |
| 221 | // optional query. |
| 222 | const char* question_pos = rest + strcspn(rest, "?#" ); |
| 223 | if (*question_pos == '?') { |
| 224 | // There is a query part. |
| 225 | const char* query_start = question_pos + 1; |
| 226 | parsed_uri->query = NormalizeEscapes(query_start, (hash_pos - query_start)); |
| 227 | } else { |
| 228 | parsed_uri->query = NULL; |
| 229 | } |
| 230 | |
| 231 | const char* path_start = rest; |
| 232 | if (rest[0] == '/' && rest[1] == '/') { |
| 233 | // There is an authority part. |
| 234 | const char* authority_start = rest + 2; // 2 for '//'. |
| 235 | |
| 236 | intptr_t authority_len = ParseAuthority(authority_start, parsed_uri); |
| 237 | if (authority_len < 0) { |
| 238 | ClearParsedUri(parsed_uri); |
| 239 | return false; |
| 240 | } |
| 241 | path_start = authority_start + authority_len; |
| 242 | } else { |
| 243 | parsed_uri->userinfo = NULL; |
| 244 | parsed_uri->host = NULL; |
| 245 | parsed_uri->port = NULL; |
| 246 | } |
| 247 | |
| 248 | // The path is the substring between the authority and the query. |
| 249 | parsed_uri->path = NormalizeEscapes(path_start, (question_pos - path_start)); |
| 250 | return true; |
| 251 | } |
| 252 | |
| 253 | static char* RemoveLastSegment(char* current, char* base) { |
| 254 | if (current == base) { |
| 255 | return current; |
| 256 | } |
| 257 | ASSERT(current > base); |
| 258 | for (current--; current > base; current--) { |
| 259 | if (*current == '/') { |
| 260 | // We have found the beginning of the last segment. |
| 261 | return current; |
| 262 | } |
| 263 | } |
| 264 | ASSERT(current == base); |
| 265 | return current; |
| 266 | } |
| 267 | |
| 268 | static intptr_t SegmentLength(const char* input) { |
| 269 | const char* cp = input; |
| 270 | |
| 271 | // Include initial slash in the segment, if any. |
| 272 | if (*cp == '/') { |
| 273 | cp++; |
| 274 | } |
| 275 | |
| 276 | // Don't include trailing slash in the segment. |
| 277 | cp += strcspn(cp, "/" ); |
| 278 | return cp - input; |
| 279 | } |
| 280 | |
| 281 | // See RFC 3986 Section 5.2.4: Remove Dot Segments. |
| 282 | static const char* RemoveDotSegments(const char* path) { |
| 283 | const char* input = path; |
| 284 | |
| 285 | // The output path will always be less than or equal to the size of |
| 286 | // the input path. |
| 287 | Zone* zone = ThreadState::Current()->zone(); |
| 288 | char* buffer = zone->Alloc<char>(strlen(path) + 1); // +1 for '\0' |
| 289 | char* output = buffer; |
| 290 | |
| 291 | while (*input != '\0') { |
| 292 | if (strncmp("../" , input, 3) == 0) { |
| 293 | // Discard initial "../" from the input. It's junk. |
| 294 | input += 3; |
| 295 | |
| 296 | } else if (strncmp("./" , input, 3) == 0) { |
| 297 | // Discard initial "./" from the input. It's junk. |
| 298 | input += 2; |
| 299 | |
| 300 | } else if (strncmp("/./" , input, 3) == 0) { |
| 301 | // Advance past the "/." part of the input. |
| 302 | input += 2; |
| 303 | |
| 304 | } else if (strcmp("/." , input) == 0) { |
| 305 | // Pretend the input just contains a "/". |
| 306 | input = "/" ; |
| 307 | |
| 308 | } else if (strncmp("/../" , input, 4) == 0) { |
| 309 | // Advance past the "/.." part of the input and remove one |
| 310 | // segment from the output. |
| 311 | input += 3; |
| 312 | output = RemoveLastSegment(output, buffer); |
| 313 | |
| 314 | } else if (strcmp("/.." , input) == 0) { |
| 315 | // Pretend the input contains a "/" and remove one segment from |
| 316 | // the output. |
| 317 | input = "/" ; |
| 318 | output = RemoveLastSegment(output, buffer); |
| 319 | |
| 320 | } else if (strcmp(".." , input) == 0) { |
| 321 | // The input has been reduced to nothing useful. |
| 322 | input += 2; |
| 323 | |
| 324 | } else if (strcmp("." , input) == 0) { |
| 325 | // The input has been reduced to nothing useful. |
| 326 | input += 1; |
| 327 | |
| 328 | } else { |
| 329 | intptr_t segment_len = SegmentLength(input); |
| 330 | if (input[0] != '/' && output != buffer) { |
| 331 | *output = '/'; |
| 332 | output++; |
| 333 | } |
| 334 | strncpy(output, input, segment_len); |
| 335 | output += segment_len; |
| 336 | input += segment_len; |
| 337 | } |
| 338 | } |
| 339 | *output = '\0'; |
| 340 | return buffer; |
| 341 | } |
| 342 | |
| 343 | // See RFC 3986 Section 5.2.3: Merge Paths. |
| 344 | static const char* MergePaths(const char* base_path, const char* ref_path) { |
| 345 | Zone* zone = ThreadState::Current()->zone(); |
| 346 | if (base_path[0] == '\0') { |
| 347 | // If the base_path is empty, we prepend '/'. |
| 348 | return zone->PrintToString("/%s" , ref_path); |
| 349 | } |
| 350 | |
| 351 | // We need to find the last '/' in base_path. |
| 352 | const char* last_slash = strrchr(base_path, '/'); |
| 353 | if (last_slash == NULL) { |
| 354 | // There is no slash in the base_path. Return the ref_path unchanged. |
| 355 | return ref_path; |
| 356 | } |
| 357 | |
| 358 | // We found a '/' in the base_path. Cut off everything after it and |
| 359 | // add the ref_path. |
| 360 | intptr_t truncated_base_len = last_slash - base_path; |
| 361 | intptr_t ref_path_len = strlen(ref_path); |
| 362 | intptr_t len = truncated_base_len + ref_path_len + 1; // +1 for '/' |
| 363 | char* buffer = zone->Alloc<char>(len + 1); // +1 for '\0' |
| 364 | |
| 365 | // Copy truncated base. |
| 366 | strncpy(buffer, base_path, truncated_base_len); |
| 367 | |
| 368 | // Add a slash. |
| 369 | buffer[truncated_base_len] = '/'; |
| 370 | |
| 371 | // Copy the ref_path. |
| 372 | strncpy((buffer + truncated_base_len + 1), ref_path, ref_path_len + 1); |
| 373 | |
| 374 | return buffer; |
| 375 | } |
| 376 | |
| 377 | static char* BuildUri(const ParsedUri& uri) { |
| 378 | Zone* zone = ThreadState::Current()->zone(); |
| 379 | ASSERT(uri.path != NULL); |
| 380 | |
| 381 | const char* fragment = uri.fragment == NULL ? "" : uri.fragment; |
| 382 | const char* fragment_separator = uri.fragment == NULL ? "" : "#" ; |
| 383 | const char* query = uri.query == NULL ? "" : uri.query; |
| 384 | const char* query_separator = uri.query == NULL ? "" : "?" ; |
| 385 | |
| 386 | // If there is no scheme for this uri, just build a relative uri of |
| 387 | // the form: "path[?query][#fragment]". This occurs when we resolve |
| 388 | // relative urls inside a "dart:" library. |
| 389 | if (uri.scheme == NULL) { |
| 390 | ASSERT(uri.userinfo == NULL && uri.host == NULL && uri.port == NULL); |
| 391 | return zone->PrintToString("%s%s%s%s%s" , uri.path, query_separator, query, |
| 392 | fragment_separator, fragment); |
| 393 | } |
| 394 | |
| 395 | // Uri with no authority: "scheme:path[?query][#fragment]" |
| 396 | if (uri.host == NULL) { |
| 397 | ASSERT(uri.userinfo == NULL && uri.port == NULL); |
| 398 | return zone->PrintToString("%s:%s%s%s%s%s" , uri.scheme, uri.path, |
| 399 | query_separator, query, fragment_separator, |
| 400 | fragment); |
| 401 | } |
| 402 | |
| 403 | const char* user = uri.userinfo == NULL ? "" : uri.userinfo; |
| 404 | const char* user_separator = uri.userinfo == NULL ? "" : "@" ; |
| 405 | const char* port = uri.port == NULL ? "" : uri.port; |
| 406 | const char* port_separator = uri.port == NULL ? "" : ":" ; |
| 407 | |
| 408 | // If the path doesn't start with a '/', add one. We need it to |
| 409 | // separate the path from the authority. |
| 410 | const char* path_separator = |
| 411 | ((uri.path[0] == '\0' || uri.path[0] == '/') ? "" : "/" ); |
| 412 | |
| 413 | // Uri with authority: |
| 414 | // "scheme://[userinfo@]host[:port][/]path[?query][#fragment]" |
| 415 | return zone->PrintToString( |
| 416 | "%s://%s%s%s%s%s%s%s%s%s%s%s" , // There is *nothing* wrong with this. |
| 417 | uri.scheme, user, user_separator, uri.host, port_separator, port, |
| 418 | path_separator, uri.path, query_separator, query, fragment_separator, |
| 419 | fragment); |
| 420 | } |
| 421 | |
| 422 | // See RFC 3986 Section 5: Reference Resolution |
| 423 | bool ResolveUri(const char* ref_uri, |
| 424 | const char* base_uri, |
| 425 | const char** target_uri) { |
| 426 | // Parse the reference uri. |
| 427 | ParsedUri ref; |
| 428 | if (!ParseUri(ref_uri, &ref)) { |
| 429 | *target_uri = NULL; |
| 430 | return false; |
| 431 | } |
| 432 | |
| 433 | ParsedUri target; |
| 434 | if (ref.scheme != NULL) { |
| 435 | if (strcmp(ref.scheme, "dart" ) == 0) { |
| 436 | Zone* zone = ThreadState::Current()->zone(); |
| 437 | *target_uri = zone->MakeCopyOfString(ref_uri); |
| 438 | return true; |
| 439 | } |
| 440 | |
| 441 | // When the ref_uri specifies a scheme, the base_uri is ignored. |
| 442 | target.scheme = ref.scheme; |
| 443 | target.userinfo = ref.userinfo; |
| 444 | target.host = ref.host; |
| 445 | target.port = ref.port; |
| 446 | target.path = RemoveDotSegments(ref.path); |
| 447 | target.query = ref.query; |
| 448 | target.fragment = ref.fragment; |
| 449 | *target_uri = BuildUri(target); |
| 450 | return true; |
| 451 | } |
| 452 | |
| 453 | // Parse the base uri. |
| 454 | ParsedUri base; |
| 455 | if (!ParseUri(base_uri, &base)) { |
| 456 | *target_uri = NULL; |
| 457 | return false; |
| 458 | } |
| 459 | |
| 460 | if ((base.scheme != NULL) && strcmp(base.scheme, "dart" ) == 0) { |
| 461 | Zone* zone = ThreadState::Current()->zone(); |
| 462 | *target_uri = zone->MakeCopyOfString(ref_uri); |
| 463 | return true; |
| 464 | } |
| 465 | |
| 466 | if (ref.host != NULL) { |
| 467 | // When the ref_uri specifies an authority, we only use the base scheme. |
| 468 | target.scheme = base.scheme; |
| 469 | target.userinfo = ref.userinfo; |
| 470 | target.host = ref.host; |
| 471 | target.port = ref.port; |
| 472 | target.path = RemoveDotSegments(ref.path); |
| 473 | target.query = ref.query; |
| 474 | target.fragment = ref.fragment; |
| 475 | *target_uri = BuildUri(target); |
| 476 | return true; |
| 477 | } |
| 478 | |
| 479 | if (ref.path[0] == '\0') { |
| 480 | // Empty path. Use most parts of base_uri. |
| 481 | target.scheme = base.scheme; |
| 482 | target.userinfo = base.userinfo; |
| 483 | target.host = base.host; |
| 484 | target.port = base.port; |
| 485 | target.path = base.path; |
| 486 | target.query = ((ref.query == NULL) ? base.query : ref.query); |
| 487 | target.fragment = ref.fragment; |
| 488 | *target_uri = BuildUri(target); |
| 489 | return true; |
| 490 | |
| 491 | } else if (ref.path[0] == '/') { |
| 492 | // Absolute path. ref_path wins. |
| 493 | target.scheme = base.scheme; |
| 494 | target.userinfo = base.userinfo; |
| 495 | target.host = base.host; |
| 496 | target.port = base.port; |
| 497 | target.path = RemoveDotSegments(ref.path); |
| 498 | target.query = ref.query; |
| 499 | target.fragment = ref.fragment; |
| 500 | *target_uri = BuildUri(target); |
| 501 | return true; |
| 502 | |
| 503 | } else { |
| 504 | // Relative path. We need to merge the base path and the ref path. |
| 505 | |
| 506 | if (base.scheme == NULL && base.host == NULL && base.path[0] != '/') { |
| 507 | // The dart:core Uri class handles resolving a relative uri |
| 508 | // against a second relative uri specially, in a way not |
| 509 | // described in the RFC. We do not need to support this for |
| 510 | // library resolution. If we need to implement this later, we |
| 511 | // can. |
| 512 | *target_uri = NULL; |
| 513 | return false; |
| 514 | } |
| 515 | |
| 516 | target.scheme = base.scheme; |
| 517 | target.userinfo = base.userinfo; |
| 518 | target.host = base.host; |
| 519 | target.port = base.port; |
| 520 | target.path = RemoveDotSegments(MergePaths(base.path, ref.path)); |
| 521 | target.query = ref.query; |
| 522 | target.fragment = ref.fragment; |
| 523 | *target_uri = BuildUri(target); |
| 524 | return true; |
| 525 | } |
| 526 | } |
| 527 | |
| 528 | } // namespace dart |
| 529 | |