uri.cc source code [engine/third_party/dart/runtime/vm/uri.cc]

1	// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2	// for details. All rights reserved. Use of this source code is governed by a
3	// BSD-style license that can be found in the LICENSE file.
4
5	#include "vm/uri.h"
6
7	#include "vm/zone.h"
8
9	namespace dart {
10
11	static bool IsUnreservedChar(intptr_t value) {
12	return ((value >= `'a'` && value <= `'z'`) \|\| (value >= `'A'` && value <= `'Z'`) \|\|
13	(value >= `'0'` && value <= `'9'`) \|\| value == `'-'` \|\| value == `'.'` \|\|
14	value == `'_'` \|\| value == `'~'`);
15	}
16
17	static bool IsDelimiter(intptr_t value) {
18	switch (value) {
19	case `':'`:
20	case `'/'`:
21	case `'?'`:
22	case `'#'`:
23	case `'['`:
24	case `']'`:
25	case `'@'`:
26	case `'!'`:
27	case `'$'`:
28	case `'&'`:
29	case `'\''`:
30	case `'('`:
31	case `')'`:
32	case `'*'`:
33	case `'+'`:
34	case `','`:
35	case `';'`:
36	case `'='`:
37	return true;
38	default:
39	return false;
40	}
41	}
42
43	static bool IsHexDigit(char value) {
44	return ((value >= `'0'` && value <= `'9'`) \|\| (value >= `'A'` && value <= `'F'`) \|\|
45	(value >= `'a'` && value <= `'f'`));
46	}
47
48	static int HexValue(char digit) {
49	if ((digit >= `'0'` && digit <= `'9'`)) {
50	return digit - `'0'`;
51	}
52	if ((digit >= `'A'` && digit <= `'F'`)) {
53	return digit - `'A'` + `10`;
54	}
55	if ((digit >= `'a'` && digit <= `'f'`)) {
56	return digit - `'a'` + `10`;
57	}
58	UNREACHABLE();
59	return `0`;
60	}
61
62	static int GetEscapedValue(const char* str, intptr_t pos, intptr_t len) {
63	if (pos + `2` >= len) {
64	// Not enough room for a valid escape sequence.
65	return -`1`;
66	}
67	if (str[pos] != `'%'`) {
68	// Escape sequences start with '%'.
69	return -`1`;
70	}
71
72	char digit1 = str[pos + `1`];
73	char digit2 = str[pos + `2`];
74	if (!IsHexDigit(digit1) \|\| !IsHexDigit(digit2)) {
75	// Invalid escape sequence. Ignore it.
76	return -`1`;
77	}
78	return HexValue(digit1) * `16` + HexValue(digit2);
79	}
80
81	static char* NormalizeEscapes(const char* str, intptr_t len) {
82	// Allocate the buffer.
83	Zone* zone = ThreadState::Current()->zone();
84	// We multiply len by three because a percent-escape sequence is
85	// three characters long (e.g. ' ' -> '%20). +1 for '\0'. We could
86	// take two passes through the string and avoid the excess
87	// allocation, but it's zone-memory so it doesn't seem necessary.
88	char* buffer = zone->Alloc<char>(len * `3` + `1`);
89
90	// Copy the string, normalizing as we go.
91	intptr_t buffer_pos = `0`;
92	intptr_t pos = `0`;
93	while (pos < len) {
94	int escaped_value = GetEscapedValue(str, pos, len);
95	if (escaped_value >= `0`) {
96	// If one of the special "unreserved" characters has been
97	// escaped, revert the escaping. Otherwise preserve the
98	// escaping.
99	if (IsUnreservedChar(escaped_value)) {
100	buffer[buffer_pos] = escaped_value;
101	buffer_pos++;
102	} else {
103	Utils::SNPrint(buffer + buffer_pos, `4`, "%%%02X", escaped_value);
104	buffer_pos += `3`;
105	}
106	pos += `3`;
107	} else {
108	char c = str[pos];
109	// If a delimiter or unreserved character is currently not
110	// escaped, preserve that. If there is a busted %-sequence in
111	// the input, preserve that too.
112	if (c == `'%'` \|\| IsDelimiter(c) \|\| IsUnreservedChar(c)) {
113	buffer[buffer_pos] = c;
114	buffer_pos++;
115	} else {
116	// Escape funky characters.
117	Utils::SNPrint(buffer + buffer_pos, `4`, "%%%02X", c);
118	buffer_pos += `3`;
119	}
120	pos++;
121	}
122	}
123	buffer[buffer_pos] = `'\0'`;
124	return buffer;
125	}
126
127	// Lower-case a string in place.
128	static void StringLower(char* str) {
129	const intptr_t len = strlen(str);
130	intptr_t i = `0`;
131	while (i < len) {
132	int escaped_value = GetEscapedValue(str, i, len);
133	if (escaped_value >= `0`) {
134	// Don't lowercase escape sequences.
135	i += `3`;
136	} else {
137	// I don't use tolower() because I don't want the locale
138	// transforming any non-acii characters.
139	char c = str[i];
140	if (c >= `'A'` && c <= `'Z'`) {
141	str[i] = c + (`'a'` - `'A'`);
142	}
143	i++;
144	}
145	}
146	}
147
148	static void ClearParsedUri(ParsedUri* parsed_uri) {
149	parsed_uri->scheme = NULL;
150	parsed_uri->userinfo = NULL;
151	parsed_uri->host = NULL;
152	parsed_uri->port = NULL;
153	parsed_uri->path = NULL;
154	parsed_uri->query = NULL;
155	parsed_uri->fragment = NULL;
156	}
157
158	static intptr_t ParseAuthority(const char* authority, ParsedUri* parsed_uri) {
159	Zone* zone = ThreadState::Current()->zone();
160	const char* current = authority;
161	intptr_t len = `0`;
162
163	size_t userinfo_len = strcspn(current, "@/");
164	if (current[userinfo_len] == `'@'`) {
165	// The '@' character follows the optional userinfo string.
166	parsed_uri->userinfo = NormalizeEscapes(current, userinfo_len);
167	current += userinfo_len + `1`;
168	len += userinfo_len + `1`;
169	} else {
170	parsed_uri->userinfo = NULL;
171	}
172
173	size_t host_len = strcspn(current, ":/");
174	char* host = NormalizeEscapes(current, host_len);
175	StringLower(host);
176	parsed_uri->host = host;
177	len += host_len;
178
179	if (current[host_len] == `':'`) {
180	// The ':' character precedes the optional port string.
181	const char* port_start = current + host_len + `1`; // +1 for ':'
182	size_t port_len = strcspn(port_start, "/");
183	parsed_uri->port = zone->MakeCopyOfStringN(port_start, port_len);
184	len += `1` + port_len; // +1 for ':'
185	} else {
186	parsed_uri->port = NULL;
187	}
188	return len;
189	}
190
191	// Performs a simple parse of a uri into its components.
192	// See RFC 3986 Section 3: Syntax.
193	bool ParseUri(const char* uri, ParsedUri* parsed_uri) {
194	Zone* zone = ThreadState::Current()->zone();
195
196	// The first ':' separates the scheme from the rest of the uri. If
197	// a ':' occurs after the first '/' it doesn't count.
198	size_t scheme_len = strcspn(uri, ":/");
199	const char* rest = uri;
200	if (uri[scheme_len] == `':'`) {
201	char* scheme = zone->MakeCopyOfStringN(uri, scheme_len);
202	StringLower(scheme);
203	parsed_uri->scheme = scheme;
204	rest = uri + scheme_len + `1`;
205	} else {
206	parsed_uri->scheme = NULL;
207	}
208
209	// The first '#' separates the optional fragment
210	const char* hash_pos = rest + strcspn(rest, "#");
211	if (*hash_pos == `'#'`) {
212	// There is a fragment part.
213	const char* fragment_start = hash_pos + `1`;
214	parsed_uri->fragment =
215	NormalizeEscapes(fragment_start, strlen(fragment_start));
216	} else {
217	parsed_uri->fragment = NULL;
218	}
219
220	// The first '?' or '#' separates the hierarchical part from the
221	// optional query.
222	const char* question_pos = rest + strcspn(rest, "?#");
223	if (*question_pos == `'?'`) {
224	// There is a query part.
225	const char* query_start = question_pos + `1`;
226	parsed_uri->query = NormalizeEscapes(query_start, (hash_pos - query_start));
227	} else {
228	parsed_uri->query = NULL;
229	}
230
231	const char* path_start = rest;
232	if (rest[`0`] == `'/'` && rest[`1`] == `'/'`) {
233	// There is an authority part.
234	const char* authority_start = rest + `2`; // 2 for '//'.
235
236	intptr_t authority_len = ParseAuthority(authority_start, parsed_uri);
237	if (authority_len < `0`) {
238	ClearParsedUri(parsed_uri);
239	return false;
240	}
241	path_start = authority_start + authority_len;
242	} else {
243	parsed_uri->userinfo = NULL;
244	parsed_uri->host = NULL;
245	parsed_uri->port = NULL;
246	}
247
248	// The path is the substring between the authority and the query.
249	parsed_uri->path = NormalizeEscapes(path_start, (question_pos - path_start));
250	return true;
251	}
252
253	static char* RemoveLastSegment(char* current, char* base) {
254	if (current == base) {
255	return current;
256	}
257	ASSERT(current > base);
258	for (current--; current > base; current--) {
259	if (*current == `'/'`) {
260	// We have found the beginning of the last segment.
261	return current;
262	}
263	}
264	ASSERT(current == base);
265	return current;
266	}
267
268	static intptr_t SegmentLength(const char* input) {
269	const char* cp = input;
270
271	// Include initial slash in the segment, if any.
272	if (*cp == `'/'`) {
273	cp++;
274	}
275
276	// Don't include trailing slash in the segment.
277	cp += strcspn(cp, "/");
278	return cp - input;
279	}
280
281	// See RFC 3986 Section 5.2.4: Remove Dot Segments.
282	static const char* RemoveDotSegments(const char* path) {
283	const char* input = path;
284
285	// The output path will always be less than or equal to the size of
286	// the input path.
287	Zone* zone = ThreadState::Current()->zone();
288	char* buffer = zone->Alloc<char>(strlen(path) + `1`); // +1 for '\0'
289	char* output = buffer;
290
291	while (*input != `'\0'`) {
292	if (strncmp("../", input, `3`) == `0`) {
293	// Discard initial "../" from the input. It's junk.
294	input += `3`;
295
296	} else if (strncmp("./", input, `3`) == `0`) {
297	// Discard initial "./" from the input. It's junk.
298	input += `2`;
299
300	} else if (strncmp("/./", input, `3`) == `0`) {
301	// Advance past the "/." part of the input.
302	input += `2`;
303
304	} else if (strcmp("/.", input) == `0`) {
305	// Pretend the input just contains a "/".
306	input = "/";
307
308	} else if (strncmp("/../", input, `4`) == `0`) {
309	// Advance past the "/.." part of the input and remove one
310	// segment from the output.
311	input += `3`;
312	output = RemoveLastSegment(output, buffer);
313
314	} else if (strcmp("/..", input) == `0`) {
315	// Pretend the input contains a "/" and remove one segment from
316	// the output.
317	input = "/";
318	output = RemoveLastSegment(output, buffer);
319
320	} else if (strcmp("..", input) == `0`) {
321	// The input has been reduced to nothing useful.
322	input += `2`;
323
324	} else if (strcmp(".", input) == `0`) {
325	// The input has been reduced to nothing useful.
326	input += `1`;
327
328	} else {
329	intptr_t segment_len = SegmentLength(input);
330	if (input[`0`] != `'/'` && output != buffer) {
331	*output = `'/'`;
332	output++;
333	}
334	strncpy(output, input, segment_len);
335	output += segment_len;
336	input += segment_len;
337	}
338	}
339	*output = `'\0'`;
340	return buffer;
341	}
342
343	// See RFC 3986 Section 5.2.3: Merge Paths.
344	static const char* MergePaths(const char* base_path, const char* ref_path) {
345	Zone* zone = ThreadState::Current()->zone();
346	if (base_path[`0`] == `'\0'`) {
347	// If the base_path is empty, we prepend '/'.
348	return zone->PrintToString("/%s", ref_path);
349	}
350
351	// We need to find the last '/' in base_path.
352	const char* last_slash = strrchr(base_path, `'/'`);
353	if (last_slash == NULL) {
354	// There is no slash in the base_path. Return the ref_path unchanged.
355	return ref_path;
356	}
357
358	// We found a '/' in the base_path. Cut off everything after it and
359	// add the ref_path.
360	intptr_t truncated_base_len = last_slash - base_path;
361	intptr_t ref_path_len = strlen(ref_path);
362	intptr_t len = truncated_base_len + ref_path_len + `1`; // +1 for '/'
363	char* buffer = zone->Alloc<char>(len + `1`); // +1 for '\0'
364
365	// Copy truncated base.
366	strncpy(buffer, base_path, truncated_base_len);
367
368	// Add a slash.
369	buffer[truncated_base_len] = `'/'`;
370
371	// Copy the ref_path.
372	strncpy((buffer + truncated_base_len + `1`), ref_path, ref_path_len + `1`);
373
374	return buffer;
375	}
376
377	static char* BuildUri(const ParsedUri& uri) {
378	Zone* zone = ThreadState::Current()->zone();
379	ASSERT(uri.path != NULL);
380
381	const char* fragment = uri.fragment == NULL ? "" : uri.fragment;
382	const char* fragment_separator = uri.fragment == NULL ? "" : "#";
383	const char* query = uri.query == NULL ? "" : uri.query;
384	const char* query_separator = uri.query == NULL ? "" : "?";
385
386	// If there is no scheme for this uri, just build a relative uri of
387	// the form: "path[?query][#fragment]". This occurs when we resolve
388	// relative urls inside a "dart:" library.
389	if (uri.scheme == NULL) {
390	ASSERT(uri.userinfo == NULL && uri.host == NULL && uri.port == NULL);
391	return zone->PrintToString("%s%s%s%s%s", uri.path, query_separator, query,
392	fragment_separator, fragment);
393	}
394
395	// Uri with no authority: "scheme:path[?query][#fragment]"
396	if (uri.host == NULL) {
397	ASSERT(uri.userinfo == NULL && uri.port == NULL);
398	return zone->PrintToString("%s:%s%s%s%s%s", uri.scheme, uri.path,
399	query_separator, query, fragment_separator,
400	fragment);
401	}
402
403	const char* user = uri.userinfo == NULL ? "" : uri.userinfo;
404	const char* user_separator = uri.userinfo == NULL ? "" : "@";
405	const char* port = uri.port == NULL ? "" : uri.port;
406	const char* port_separator = uri.port == NULL ? "" : ":";
407
408	// If the path doesn't start with a '/', add one. We need it to
409	// separate the path from the authority.
410	const char* path_separator =
411	((uri.path[`0`] == `'\0'` \|\| uri.path[`0`] == `'/'`) ? "" : "/");
412
413	// Uri with authority:
414	// "scheme://[userinfo@]host[:port][/]path[?query][#fragment]"
415	return zone->PrintToString(
416	"%s://%s%s%s%s%s%s%s%s%s%s%s", // There is nothing* wrong with this.*
417	uri.scheme, user, user_separator, uri.host, port_separator, port,
418	path_separator, uri.path, query_separator, query, fragment_separator,
419	fragment);
420	}
421
422	// See RFC 3986 Section 5: Reference Resolution
423	bool ResolveUri(const char* ref_uri,
424	const char* base_uri,
425	const char** target_uri) {
426	// Parse the reference uri.
427	ParsedUri ref;
428	if (!ParseUri(ref_uri, &ref)) {
429	*target_uri = NULL;
430	return false;
431	}
432
433	ParsedUri target;
434	if (ref.scheme != NULL) {
435	if (strcmp(ref.scheme, "dart") == `0`) {
436	Zone* zone = ThreadState::Current()->zone();
437	*target_uri = zone->MakeCopyOfString(ref_uri);
438	return true;
439	}
440
441	// When the ref_uri specifies a scheme, the base_uri is ignored.
442	target.scheme = ref.scheme;
443	target.userinfo = ref.userinfo;
444	target.host = ref.host;
445	target.port = ref.port;
446	target.path = RemoveDotSegments(ref.path);
447	target.query = ref.query;
448	target.fragment = ref.fragment;
449	*target_uri = BuildUri(target);
450	return true;
451	}
452
453	// Parse the base uri.
454	ParsedUri base;
455	if (!ParseUri(base_uri, &base)) {
456	*target_uri = NULL;
457	return false;
458	}
459
460	if ((base.scheme != NULL) && strcmp(base.scheme, "dart") == `0`) {
461	Zone* zone = ThreadState::Current()->zone();
462	*target_uri = zone->MakeCopyOfString(ref_uri);
463	return true;
464	}
465
466	if (ref.host != NULL) {
467	// When the ref_uri specifies an authority, we only use the base scheme.
468	target.scheme = base.scheme;
469	target.userinfo = ref.userinfo;
470	target.host = ref.host;
471	target.port = ref.port;
472	target.path = RemoveDotSegments(ref.path);
473	target.query = ref.query;
474	target.fragment = ref.fragment;
475	*target_uri = BuildUri(target);
476	return true;
477	}
478
479	if (ref.path[`0`] == `'\0'`) {
480	// Empty path. Use most parts of base_uri.
481	target.scheme = base.scheme;
482	target.userinfo = base.userinfo;
483	target.host = base.host;
484	target.port = base.port;
485	target.path = base.path;
486	target.query = ((ref.query == NULL) ? base.query : ref.query);
487	target.fragment = ref.fragment;
488	*target_uri = BuildUri(target);
489	return true;
490
491	} else if (ref.path[`0`] == `'/'`) {
492	// Absolute path. ref_path wins.
493	target.scheme = base.scheme;
494	target.userinfo = base.userinfo;
495	target.host = base.host;
496	target.port = base.port;
497	target.path = RemoveDotSegments(ref.path);
498	target.query = ref.query;
499	target.fragment = ref.fragment;
500	*target_uri = BuildUri(target);
501	return true;
502
503	} else {
504	// Relative path. We need to merge the base path and the ref path.
505
506	if (base.scheme == NULL && base.host == NULL && base.path[`0`] != `'/'`) {
507	// The dart:core Uri class handles resolving a relative uri
508	// against a second relative uri specially, in a way not
509	// described in the RFC. We do not need to support this for
510	// library resolution. If we need to implement this later, we
511	// can.
512	*target_uri = NULL;
513	return false;
514	}
515
516	target.scheme = base.scheme;
517	target.userinfo = base.userinfo;
518	target.host = base.host;
519	target.port = base.port;
520	target.path = RemoveDotSegments(MergePaths(base.path, ref.path));
521	target.query = ref.query;
522	target.fragment = ref.fragment;
523	*target_uri = BuildUri(target);
524	return true;
525	}
526	}
527
528	} // namespace dart
529

Browse the source code of engine/third_party/dart/runtime/vm/uri.cc