uri.c source code [ClickHouse/contrib/libxml2/uri.c]

1	/**
2	* uri.c: set of generic URI related routines
3	*
4	* Reference: RFCs 3986, 2732 and 2373
5	*
6	* See Copyright for the status of this software.
7	*
8	* daniel@veillard.com
9	*/
10
11	#define IN_LIBXML
12	#include "libxml.h"
13
14	#include <string.h>
15
16	#include <libxml/xmlmemory.h>
17	#include <libxml/uri.h>
18	#include <libxml/globals.h>
19	#include <libxml/xmlerror.h>
20
21	/**
22	* MAX_URI_LENGTH:
23	*
24	* The definition of the URI regexp in the above RFC has no size limit
25	* In practice they are usually relativey short except for the
26	* data URI scheme as defined in RFC 2397. Even for data URI the usual
27	* maximum size before hitting random practical limits is around 64 KB
28	* and 4KB is usually a maximum admitted limit for proper operations.
29	* The value below is more a security limit than anything else and
30	* really should never be hit by 'normal' operations
31	* Set to 1 MByte in 2012, this is only enforced on output
32	*/
33	#define MAX_URI_LENGTH 1024 * 1024
34
35	static void
36	xmlURIErrMemory(const char *extra)
37	{
38	if (extra)
39	__xmlRaiseError(NULL, NULL, NULL,
40	NULL, NULL, XML_FROM_URI,
41	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, `0`,
42	extra, NULL, NULL, `0`, `0`,
43	"Memory allocation failed : %s\n", extra);
44	else
45	__xmlRaiseError(NULL, NULL, NULL,
46	NULL, NULL, XML_FROM_URI,
47	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, `0`,
48	NULL, NULL, NULL, `0`, `0`,
49	"Memory allocation failed\n");
50	}
51
52	static void xmlCleanURI(xmlURIPtr uri);
53
54	/*
55	* Old rule from 2396 used in legacy handling code
56	* alpha = lowalpha \| upalpha
57	*/
58	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
59
60
61	/*
62	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
63	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
64	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
65	*/
66
67	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69	/*
70	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
71	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
72	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
73	*/
74	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76	#ifdef IS_DIGIT
77	#undef IS_DIGIT
78	#endif
79	/*
80	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
81	*/
82	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84	/*
85	* alphanum = alpha \| digit
86	*/
87
88	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
89
90	/*
91	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
92	*/
93
94	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
95	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
96	((x) == '(') \|\| ((x) == ')'))
97
98	/*
99	* unwise = "{" \| "}" \| "\|" \| "\" \| "^" \| "`"
100	*/
101
102	#define IS_UNWISE(p) \
103	((((p) == '{')) \|\| (((p) == '}')) \|\| ((*(p) == '\|')) \|\| \
104	(((p) == '\\')) \|\| (((p) == '^')) \|\| ((*(p) == '[')) \|\| \
105	(((p) == ']')) \|\| (((p) == '`')))
106	/*
107	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," \|
108	* "[" \| "]"
109	*/
110
111	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
112	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
113	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ',') \|\| ((x) == '[') \|\| \
114	((x) == ']'))
115
116	/*
117	* unreserved = alphanum \| mark
118	*/
119
120	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
121
122	/*
123	* Skip to next pointer char, handle escaped sequences
124	*/
125
126	#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128	/*
129	* Productions from the spec.
130	*
131	* authority = server \| reg_name
132	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
133	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
134	*
135	* path = [ abs_path \| opaque_part ]
136	*/
137
138	#define STRNDUP(s, n) (char ) xmlStrndup((const xmlChar )(s), (n))
139
140	/************************************************************************
141	* *
142	* RFC 3986 parser *
143	* *
144	************************************************************************/
145
146	#define ISA_DIGIT(p) (((p) >= '0') && ((p) <= '9'))
147	#define ISA_ALPHA(p) ((((p) >= 'a') && ((p) <= 'z')) \|\| \
148	(((p) >= 'A') && ((p) <= 'Z')))
149	#define ISA_HEXDIG(p) \
150	(ISA_DIGIT(p) \|\| (((p) >= 'a') && ((p) <= 'f')) \|\| \
151	(((p) >= 'A') && ((p) <= 'F')))
152
153	/*
154	* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155	* / "*" / "+" / "," / ";" / "="
156	*/
157	#define ISA_SUB_DELIM(p) \
158	((((p) == '!')) \|\| (((p) == '$')) \|\| ((*(p) == '&')) \|\| \
159	(((p) == '(')) \|\| (((p) == ')')) \|\| (((p) == '')) \|\| \
160	(((p) == '+')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
161	(((p) == '=')) \|\| (((p) == '\'')))
162
163	/*
164	* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165	*/
166	#define ISA_GEN_DELIM(p) \
167	((((p) == ':')) \|\| (((p) == '/')) \|\| ((*(p) == '?')) \|\| \
168	(((p) == '#')) \|\| (((p) == '[')) \|\| ((*(p) == ']')) \|\| \
169	((*(p) == '@')))
170
171	/*
172	* reserved = gen-delims / sub-delims
173	*/
174	#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) \|\| (ISA_SUB_DELIM(p)))
175
176	/*
177	* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178	*/
179	#define ISA_UNRESERVED(p) \
180	((ISA_ALPHA(p)) \|\| (ISA_DIGIT(p)) \|\| ((*(p) == '-')) \|\| \
181	(((p) == '.')) \|\| (((p) == '_')) \|\| ((*(p) == '~')))
182
183	/*
184	* pct-encoded = "%" HEXDIG HEXDIG
185	*/
186	#define ISA_PCT_ENCODED(p) \
187	((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189	/*
190	* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191	*/
192	#define ISA_PCHAR(p) \
193	(ISA_UNRESERVED(p) \|\| ISA_PCT_ENCODED(p) \|\| ISA_SUB_DELIM(p) \|\| \
194	(((p) == ':')) \|\| (((p) == '@')))
195
196	/**
197	* xmlParse3986Scheme:
198	* @uri: pointer to an URI structure
199	* @str: pointer to the string to analyze
200	*
201	* Parse an URI scheme
202	*
203	* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204	*
205	* Returns 0 or the error code
206	*/
207	static int
208	xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209	const char *cur;
210
211	if (str == NULL)
212	return(-`1`);
213
214	cur = *str;
215	if (!ISA_ALPHA(cur))
216	return(`2`);
217	cur++;
218	while (ISA_ALPHA(cur) \|\| ISA_DIGIT(cur) \|\|
219	(cur == `'+'`) \|\| (cur == `'-'`) \|\| (*cur == `'.'`)) cur++;
220	if (uri != NULL) {
221	if (uri->scheme != NULL) xmlFree(uri->scheme);
222	uri->scheme = STRNDUP(str, cur - str);
223	}
224	*str = cur;
225	return(`0`);
226	}
227
228	/**
229	* xmlParse3986Fragment:
230	* @uri: pointer to an URI structure
231	* @str: pointer to the string to analyze
232	*
233	* Parse the query part of an URI
234	*
235	* fragment = *( pchar / "/" / "?" )
236	* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237	* in the fragment identifier but this is used very broadly for
238	* xpointer scheme selection, so we are allowing it here to not break
239	* for example all the DocBook processing chains.
240	*
241	* Returns 0 or the error code
242	*/
243	static int
244	xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245	{
246	const char *cur;
247
248	if (str == NULL)
249	return (-`1`);
250
251	cur = *str;
252
253	while ((ISA_PCHAR(cur)) \|\| (cur == `'/'`) \|\| (cur == `'?'`) \|\|
254	(cur == `'['`) \|\| (cur == `']'`) \|\|
255	((uri != NULL) && (uri->cleanup & `1`) && (IS_UNWISE(cur))))
256	NEXT(cur);
257	if (uri != NULL) {
258	if (uri->fragment != NULL)
259	xmlFree(uri->fragment);
260	if (uri->cleanup & `2`)
261	uri->fragment = STRNDUP(str, cur - str);
262	else
263	uri->fragment = xmlURIUnescapeString(str, cur - str, NULL);
264	}
265	*str = cur;
266	return (`0`);
267	}
268
269	/**
270	* xmlParse3986Query:
271	* @uri: pointer to an URI structure
272	* @str: pointer to the string to analyze
273	*
274	* Parse the query part of an URI
275	*
276	* query = *uric
277	*
278	* Returns 0 or the error code
279	*/
280	static int
281	xmlParse3986Query(xmlURIPtr uri, const char **str)
282	{
283	const char *cur;
284
285	if (str == NULL)
286	return (-`1`);
287
288	cur = *str;
289
290	while ((ISA_PCHAR(cur)) \|\| (cur == `'/'`) \|\| (cur == `'?'`) \|\|
291	((uri != NULL) && (uri->cleanup & `1`) && (IS_UNWISE(cur))))
292	NEXT(cur);
293	if (uri != NULL) {
294	if (uri->query != NULL)
295	xmlFree(uri->query);
296	if (uri->cleanup & `2`)
297	uri->query = STRNDUP(str, cur - str);
298	else
299	uri->query = xmlURIUnescapeString(str, cur - str, NULL);
300
301	/ Save the raw bytes of the query as well.*
302	* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303	*/
304	if (uri->query_raw != NULL)
305	xmlFree (uri->query_raw);
306	uri->query_raw = STRNDUP (str, cur - str);
307	}
308	*str = cur;
309	return (`0`);
310	}
311
312	/**
313	* xmlParse3986Port:
314	* @uri: pointer to an URI structure
315	* @str: the string to analyze
316	*
317	* Parse a port part and fills in the appropriate fields
318	* of the @uri structure
319	*
320	* port = *DIGIT
321	*
322	* Returns 0 or the error code
323	*/
324	static int
325	xmlParse3986Port(xmlURIPtr uri, const char **str)
326	{
327	const char cur = str;
328	unsigned port = `0`; / unsigned for defined overflow behavior /
329
330	if (ISA_DIGIT(cur)) {
331	while (ISA_DIGIT(cur)) {
332	port = port * `10` + (*cur - `'0'`);
333
334	cur++;
335	}
336	if (uri != NULL)
337	uri->port = port & INT_MAX; / port value modulo INT_MAX+1 /
338	*str = cur;
339	return(`0`);
340	}
341	return(`1`);
342	}
343
344	/**
345	* xmlParse3986Userinfo:
346	* @uri: pointer to an URI structure
347	* @str: the string to analyze
348	*
349	* Parse an user informations part and fills in the appropriate fields
350	* of the @uri structure
351	*
352	* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
353	*
354	* Returns 0 or the error code
355	*/
356	static int
357	xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358	{
359	const char *cur;
360
361	cur = *str;
362	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\|
363	ISA_SUB_DELIM(cur) \|\| (*cur == `':'`))
364	NEXT(cur);
365	if (*cur == `'@'`) {
366	if (uri != NULL) {
367	if (uri->user != NULL) xmlFree(uri->user);
368	if (uri->cleanup & `2`)
369	uri->user = STRNDUP(str, cur - str);
370	else
371	uri->user = xmlURIUnescapeString(str, cur - str, NULL);
372	}
373	*str = cur;
374	return(`0`);
375	}
376	return(`1`);
377	}
378
379	/**
380	* xmlParse3986DecOctet:
381	* @str: the string to analyze
382	*
383	* dec-octet = DIGIT ; 0-9
384	* / %x31-39 DIGIT ; 10-99
385	* / "1" 2DIGIT ; 100-199
386	* / "2" %x30-34 DIGIT ; 200-249
387	* / "25" %x30-35 ; 250-255
388	*
389	* Skip a dec-octet.
390	*
391	* Returns 0 if found and skipped, 1 otherwise
392	*/
393	static int
394	xmlParse3986DecOctet(const char **str) {
395	const char cur = str;
396
397	if (!(ISA_DIGIT(cur)))
398	return(`1`);
399	if (!ISA_DIGIT(cur+`1`))
400	cur++;
401	else if ((*cur != `'0'`) && (ISA_DIGIT(cur + `1`)) && (!ISA_DIGIT(cur+`2`)))
402	cur += `2`;
403	else if ((*cur == `'1'`) && (ISA_DIGIT(cur + `1`)) && (ISA_DIGIT(cur + `2`)))
404	cur += `3`;
405	else if ((cur == `'2'`) && ((cur + `1`) >= `'0'`) &&
406	(*(cur + `1`) <= `'4'`) && (ISA_DIGIT(cur + `2`)))
407	cur += `3`;
408	else if ((cur == `'2'`) && ((cur + `1`) == `'5'`) &&
409	((cur + `2`) >= `'0'`) && ((cur + `1`) <= `'5'`))
410	cur += `3`;
411	else
412	return(`1`);
413	*str = cur;
414	return(`0`);
415	}
416	/**
417	* xmlParse3986Host:
418	* @uri: pointer to an URI structure
419	* @str: the string to analyze
420	*
421	* Parse an host part and fills in the appropriate fields
422	* of the @uri structure
423	*
424	* host = IP-literal / IPv4address / reg-name
425	* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
426	* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
427	* reg-name = *( unreserved / pct-encoded / sub-delims )
428	*
429	* Returns 0 or the error code
430	*/
431	static int
432	xmlParse3986Host(xmlURIPtr uri, const char **str)
433	{
434	const char cur = str;
435	const char *host;
436
437	host = cur;
438	/*
439	* IPv6 and future adressing scheme are enclosed between brackets
440	*/
441	if (*cur == `'['`) {
442	cur++;
443	while ((cur != `']'`) && (cur != `0`))
444	cur++;
445	if (*cur != `']'`)
446	return(`1`);
447	cur++;
448	goto found;
449	}
450	/*
451	* try to parse an IPv4
452	*/
453	if (ISA_DIGIT(cur)) {
454	if (xmlParse3986DecOctet(&cur) != `0`)
455	goto not_ipv4;
456	if (*cur != `'.'`)
457	goto not_ipv4;
458	cur++;
459	if (xmlParse3986DecOctet(&cur) != `0`)
460	goto not_ipv4;
461	if (*cur != `'.'`)
462	goto not_ipv4;
463	if (xmlParse3986DecOctet(&cur) != `0`)
464	goto not_ipv4;
465	if (*cur != `'.'`)
466	goto not_ipv4;
467	if (xmlParse3986DecOctet(&cur) != `0`)
468	goto not_ipv4;
469	goto found;
470	not_ipv4:
471	cur = *str;
472	}
473	/*
474	* then this should be a hostname which can be empty
475	*/
476	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\| ISA_SUB_DELIM(cur))
477	NEXT(cur);
478	found:
479	if (uri != NULL) {
480	if (uri->authority != NULL) xmlFree(uri->authority);
481	uri->authority = NULL;
482	if (uri->server != NULL) xmlFree(uri->server);
483	if (cur != host) {
484	if (uri->cleanup & `2`)
485	uri->server = STRNDUP(host, cur - host);
486	else
487	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488	} else
489	uri->server = NULL;
490	}
491	*str = cur;
492	return(`0`);
493	}
494
495	/**
496	* xmlParse3986Authority:
497	* @uri: pointer to an URI structure
498	* @str: the string to analyze
499	*
500	* Parse an authority part and fills in the appropriate fields
501	* of the @uri structure
502	*
503	* authority = [ userinfo "@" ] host [ ":" port ]
504	*
505	* Returns 0 or the error code
506	*/
507	static int
508	xmlParse3986Authority(xmlURIPtr uri, const char **str)
509	{
510	const char *cur;
511	int ret;
512
513	cur = *str;
514	/*
515	* try to parse an userinfo and check for the trailing @
516	*/
517	ret = xmlParse3986Userinfo(uri, &cur);
518	if ((ret != `0`) \|\| (*cur != `'@'`))
519	cur = *str;
520	else
521	cur++;
522	ret = xmlParse3986Host(uri, &cur);
523	if (ret != `0`) return(ret);
524	if (*cur == `':'`) {
525	cur++;
526	ret = xmlParse3986Port(uri, &cur);
527	if (ret != `0`) return(ret);
528	}
529	*str = cur;
530	return(`0`);
531	}
532
533	/**
534	* xmlParse3986Segment:
535	* @str: the string to analyze
536	* @forbid: an optional forbidden character
537	* @empty: allow an empty segment
538	*
539	* Parse a segment and fills in the appropriate fields
540	* of the @uri structure
541	*
542	* segment = *pchar
543	* segment-nz = 1*pchar
544	* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545	* ; non-zero-length segment without any colon ":"
546	*
547	* Returns 0 or the error code
548	*/
549	static int
550	xmlParse3986Segment(const char *str, char* forbid, int empty)
551	{
552	const char *cur;
553
554	cur = *str;
555	if (!ISA_PCHAR(cur)) {
556	if (empty)
557	return(`0`);
558	return(`1`);
559	}
560	while (ISA_PCHAR(cur) && (*cur != forbid))
561	NEXT(cur);
562	*str = cur;
563	return (`0`);
564	}
565
566	/**
567	* xmlParse3986PathAbEmpty:
568	* @uri: pointer to an URI structure
569	* @str: the string to analyze
570	*
571	* Parse an path absolute or empty and fills in the appropriate fields
572	* of the @uri structure
573	*
574	* path-abempty = *( "/" segment )
575	*
576	* Returns 0 or the error code
577	*/
578	static int
579	xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580	{
581	const char *cur;
582	int ret;
583
584	cur = *str;
585
586	while (*cur == `'/'`) {
587	cur++;
588	ret = xmlParse3986Segment(&cur, `0`, `1`);
589	if (ret != `0`) return(ret);
590	}
591	if (uri != NULL) {
592	if (uri->path != NULL) xmlFree(uri->path);
593	if (*str != cur) {
594	if (uri->cleanup & `2`)
595	uri->path = STRNDUP(str, cur - str);
596	else
597	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
598	} else {
599	uri->path = NULL;
600	}
601	}
602	*str = cur;
603	return (`0`);
604	}
605
606	/**
607	* xmlParse3986PathAbsolute:
608	* @uri: pointer to an URI structure
609	* @str: the string to analyze
610	*
611	* Parse an path absolute and fills in the appropriate fields
612	* of the @uri structure
613	*
614	* path-absolute = "/" [ segment-nz *( "/" segment ) ]
615	*
616	* Returns 0 or the error code
617	*/
618	static int
619	xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620	{
621	const char *cur;
622	int ret;
623
624	cur = *str;
625
626	if (*cur != `'/'`)
627	return(`1`);
628	cur++;
629	ret = xmlParse3986Segment(&cur, `0`, `0`);
630	if (ret == `0`) {
631	while (*cur == `'/'`) {
632	cur++;
633	ret = xmlParse3986Segment(&cur, `0`, `1`);
634	if (ret != `0`) return(ret);
635	}
636	}
637	if (uri != NULL) {
638	if (uri->path != NULL) xmlFree(uri->path);
639	if (cur != *str) {
640	if (uri->cleanup & `2`)
641	uri->path = STRNDUP(str, cur - str);
642	else
643	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
644	} else {
645	uri->path = NULL;
646	}
647	}
648	*str = cur;
649	return (`0`);
650	}
651
652	/**
653	* xmlParse3986PathRootless:
654	* @uri: pointer to an URI structure
655	* @str: the string to analyze
656	*
657	* Parse an path without root and fills in the appropriate fields
658	* of the @uri structure
659	*
660	* path-rootless = segment-nz *( "/" segment )
661	*
662	* Returns 0 or the error code
663	*/
664	static int
665	xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666	{
667	const char *cur;
668	int ret;
669
670	cur = *str;
671
672	ret = xmlParse3986Segment(&cur, `0`, `0`);
673	if (ret != `0`) return(ret);
674	while (*cur == `'/'`) {
675	cur++;
676	ret = xmlParse3986Segment(&cur, `0`, `1`);
677	if (ret != `0`) return(ret);
678	}
679	if (uri != NULL) {
680	if (uri->path != NULL) xmlFree(uri->path);
681	if (cur != *str) {
682	if (uri->cleanup & `2`)
683	uri->path = STRNDUP(str, cur - str);
684	else
685	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
686	} else {
687	uri->path = NULL;
688	}
689	}
690	*str = cur;
691	return (`0`);
692	}
693
694	/**
695	* xmlParse3986PathNoScheme:
696	* @uri: pointer to an URI structure
697	* @str: the string to analyze
698	*
699	* Parse an path which is not a scheme and fills in the appropriate fields
700	* of the @uri structure
701	*
702	* path-noscheme = segment-nz-nc *( "/" segment )
703	*
704	* Returns 0 or the error code
705	*/
706	static int
707	xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708	{
709	const char *cur;
710	int ret;
711
712	cur = *str;
713
714	ret = xmlParse3986Segment(&cur, `':'`, `0`);
715	if (ret != `0`) return(ret);
716	while (*cur == `'/'`) {
717	cur++;
718	ret = xmlParse3986Segment(&cur, `0`, `1`);
719	if (ret != `0`) return(ret);
720	}
721	if (uri != NULL) {
722	if (uri->path != NULL) xmlFree(uri->path);
723	if (cur != *str) {
724	if (uri->cleanup & `2`)
725	uri->path = STRNDUP(str, cur - str);
726	else
727	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
728	} else {
729	uri->path = NULL;
730	}
731	}
732	*str = cur;
733	return (`0`);
734	}
735
736	/**
737	* xmlParse3986HierPart:
738	* @uri: pointer to an URI structure
739	* @str: the string to analyze
740	*
741	* Parse an hierarchical part and fills in the appropriate fields
742	* of the @uri structure
743	*
744	* hier-part = "//" authority path-abempty
745	* / path-absolute
746	* / path-rootless
747	* / path-empty
748	*
749	* Returns 0 or the error code
750	*/
751	static int
752	xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753	{
754	const char *cur;
755	int ret;
756
757	cur = *str;
758
759	if ((cur == `'/'`) && ((cur + `1`) == `'/'`)) {
760	cur += `2`;
761	ret = xmlParse3986Authority(uri, &cur);
762	if (ret != `0`) return(ret);
763	if (uri->server == NULL)
764	uri->port = -`1`;
765	ret = xmlParse3986PathAbEmpty(uri, &cur);
766	if (ret != `0`) return(ret);
767	*str = cur;
768	return(`0`);
769	} else if (*cur == `'/'`) {
770	ret = xmlParse3986PathAbsolute(uri, &cur);
771	if (ret != `0`) return(ret);
772	} else if (ISA_PCHAR(cur)) {
773	ret = xmlParse3986PathRootless(uri, &cur);
774	if (ret != `0`) return(ret);
775	} else {
776	/ path-empty is effectively empty /
777	if (uri != NULL) {
778	if (uri->path != NULL) xmlFree(uri->path);
779	uri->path = NULL;
780	}
781	}
782	*str = cur;
783	return (`0`);
784	}
785
786	/**
787	* xmlParse3986RelativeRef:
788	* @uri: pointer to an URI structure
789	* @str: the string to analyze
790	*
791	* Parse an URI string and fills in the appropriate fields
792	* of the @uri structure
793	*
794	* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
795	* relative-part = "//" authority path-abempty
796	* / path-absolute
797	* / path-noscheme
798	* / path-empty
799	*
800	* Returns 0 or the error code
801	*/
802	static int
803	xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804	int ret;
805
806	if ((str == `'/'`) && ((str + `1`) == `'/'`)) {
807	str += `2`;
808	ret = xmlParse3986Authority(uri, &str);
809	if (ret != `0`) return(ret);
810	ret = xmlParse3986PathAbEmpty(uri, &str);
811	if (ret != `0`) return(ret);
812	} else if (*str == `'/'`) {
813	ret = xmlParse3986PathAbsolute(uri, &str);
814	if (ret != `0`) return(ret);
815	} else if (ISA_PCHAR(str)) {
816	ret = xmlParse3986PathNoScheme(uri, &str);
817	if (ret != `0`) return(ret);
818	} else {
819	/ path-empty is effectively empty /
820	if (uri != NULL) {
821	if (uri->path != NULL) xmlFree(uri->path);
822	uri->path = NULL;
823	}
824	}
825
826	if (*str == `'?'`) {
827	str++;
828	ret = xmlParse3986Query(uri, &str);
829	if (ret != `0`) return(ret);
830	}
831	if (*str == `'#'`) {
832	str++;
833	ret = xmlParse3986Fragment(uri, &str);
834	if (ret != `0`) return(ret);
835	}
836	if (*str != `0`) {
837	xmlCleanURI(uri);
838	return(`1`);
839	}
840	return(`0`);
841	}
842
843
844	/**
845	* xmlParse3986URI:
846	* @uri: pointer to an URI structure
847	* @str: the string to analyze
848	*
849	* Parse an URI string and fills in the appropriate fields
850	* of the @uri structure
851	*
852	* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853	*
854	* Returns 0 or the error code
855	*/
856	static int
857	xmlParse3986URI(xmlURIPtr uri, const char *str) {
858	int ret;
859
860	ret = xmlParse3986Scheme(uri, &str);
861	if (ret != `0`) return(ret);
862	if (*str != `':'`) {
863	return(`1`);
864	}
865	str++;
866	ret = xmlParse3986HierPart(uri, &str);
867	if (ret != `0`) return(ret);
868	if (*str == `'?'`) {
869	str++;
870	ret = xmlParse3986Query(uri, &str);
871	if (ret != `0`) return(ret);
872	}
873	if (*str == `'#'`) {
874	str++;
875	ret = xmlParse3986Fragment(uri, &str);
876	if (ret != `0`) return(ret);
877	}
878	if (*str != `0`) {
879	xmlCleanURI(uri);
880	return(`1`);
881	}
882	return(`0`);
883	}
884
885	/**
886	* xmlParse3986URIReference:
887	* @uri: pointer to an URI structure
888	* @str: the string to analyze
889	*
890	* Parse an URI reference string and fills in the appropriate fields
891	* of the @uri structure
892	*
893	* URI-reference = URI / relative-ref
894	*
895	* Returns 0 or the error code
896	*/
897	static int
898	xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899	int ret;
900
901	if (str == NULL)
902	return(-`1`);
903	xmlCleanURI(uri);
904
905	/*
906	* Try first to parse absolute refs, then fallback to relative if
907	* it fails.
908	*/
909	ret = xmlParse3986URI(uri, str);
910	if (ret != `0`) {
911	xmlCleanURI(uri);
912	ret = xmlParse3986RelativeRef(uri, str);
913	if (ret != `0`) {
914	xmlCleanURI(uri);
915	return(ret);
916	}
917	}
918	return(`0`);
919	}
920
921	/**
922	* xmlParseURI:
923	* @str: the URI string to analyze
924	*
925	* Parse an URI based on RFC 3986
926	*
927	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
928	*
929	* Returns a newly built xmlURIPtr or NULL in case of error
930	*/
931	xmlURIPtr
932	xmlParseURI(const char *str) {
933	xmlURIPtr uri;
934	int ret;
935
936	if (str == NULL)
937	return(NULL);
938	uri = xmlCreateURI();
939	if (uri != NULL) {
940	ret = xmlParse3986URIReference(uri, str);
941	if (ret) {
942	xmlFreeURI(uri);
943	return(NULL);
944	}
945	}
946	return(uri);
947	}
948
949	/**
950	* xmlParseURIReference:
951	* @uri: pointer to an URI structure
952	* @str: the string to analyze
953	*
954	* Parse an URI reference string based on RFC 3986 and fills in the
955	* appropriate fields of the @uri structure
956	*
957	* URI-reference = URI / relative-ref
958	*
959	* Returns 0 or the error code
960	*/
961	int
962	xmlParseURIReference(xmlURIPtr uri, const char *str) {
963	return(xmlParse3986URIReference(uri, str));
964	}
965
966	/**
967	* xmlParseURIRaw:
968	* @str: the URI string to analyze
969	* @raw: if 1 unescaping of URI pieces are disabled
970	*
971	* Parse an URI but allows to keep intact the original fragments.
972	*
973	* URI-reference = URI / relative-ref
974	*
975	* Returns a newly built xmlURIPtr or NULL in case of error
976	*/
977	xmlURIPtr
978	xmlParseURIRaw(const char str, int* raw) {
979	xmlURIPtr uri;
980	int ret;
981
982	if (str == NULL)
983	return(NULL);
984	uri = xmlCreateURI();
985	if (uri != NULL) {
986	if (raw) {
987	uri->cleanup \|= `2`;
988	}
989	ret = xmlParseURIReference(uri, str);
990	if (ret) {
991	xmlFreeURI(uri);
992	return(NULL);
993	}
994	}
995	return(uri);
996	}
997
998	/************************************************************************
999	* *
1000	* Generic URI structure functions *
1001	* *
1002	************************************************************************/
1003
1004	/**
1005	* xmlCreateURI:
1006	*
1007	* Simply creates an empty xmlURI
1008	*
1009	* Returns the new structure or NULL in case of error
1010	*/
1011	xmlURIPtr
1012	xmlCreateURI(void) {
1013	xmlURIPtr ret;
1014
1015	ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016	if (ret == NULL) {
1017	xmlURIErrMemory("creating URI structure\n");
1018	return(NULL);
1019	}
1020	memset(ret, `0`, sizeof(xmlURI));
1021	return(ret);
1022	}
1023
1024	/**
1025	* xmlSaveUriRealloc:
1026	*
1027	* Function to handle properly a reallocation when saving an URI
1028	* Also imposes some limit on the length of an URI string output
1029	*/
1030	static xmlChar *
1031	xmlSaveUriRealloc(xmlChar ret, int* *max) {
1032	xmlChar *temp;
1033	int tmp;
1034
1035	if (*max > MAX_URI_LENGTH) {
1036	xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037	return(NULL);
1038	}
1039	tmp = max `2`;
1040	temp = (xmlChar *) xmlRealloc(ret, (tmp + `1`));
1041	if (temp == NULL) {
1042	xmlURIErrMemory("saving URI\n");
1043	return(NULL);
1044	}
1045	*max = tmp;
1046	return(temp);
1047	}
1048
1049	/**
1050	* xmlSaveUri:
1051	* @uri: pointer to an xmlURI
1052	*
1053	* Save the URI as an escaped string
1054	*
1055	* Returns a new string (to be deallocated by caller)
1056	*/
1057	xmlChar *
1058	xmlSaveUri(xmlURIPtr uri) {
1059	xmlChar *ret = NULL;
1060	xmlChar *temp;
1061	const char *p;
1062	int len;
1063	int max;
1064
1065	if (uri == NULL) return(NULL);
1066
1067
1068	max = `80`;
1069	ret = (xmlChar ) xmlMallocAtomic((max + `1`) sizeof(xmlChar));
1070	if (ret == NULL) {
1071	xmlURIErrMemory("saving URI\n");
1072	return(NULL);
1073	}
1074	len = `0`;
1075
1076	if (uri->scheme != NULL) {
1077	p = uri->scheme;
1078	while (*p != `0`) {
1079	if (len >= max) {
1080	temp = xmlSaveUriRealloc(ret, &max);
1081	if (temp == NULL) goto mem_error;
1082	ret = temp;
1083	}
1084	ret[len++] = *p++;
1085	}
1086	if (len >= max) {
1087	temp = xmlSaveUriRealloc(ret, &max);
1088	if (temp == NULL) goto mem_error;
1089	ret = temp;
1090	}
1091	ret[len++] = `':'`;
1092	}
1093	if (uri->opaque != NULL) {
1094	p = uri->opaque;
1095	while (*p != `0`) {
1096	if (len + `3` >= max) {
1097	temp = xmlSaveUriRealloc(ret, &max);
1098	if (temp == NULL) goto mem_error;
1099	ret = temp;
1100	}
1101	if (IS_RESERVED((p)) \|\| IS_UNRESERVED((p)))
1102	ret[len++] = *p++;
1103	else {
1104	int val = (unsigned* char *)p++;
1105	int hi = val / `0x10`, lo = val % `0x10`;
1106	ret[len++] = `'%'`;
1107	ret[len++] = hi + (hi > `9`? `'A'`-`10` : `'0'`);
1108	ret[len++] = lo + (lo > `9`? `'A'`-`10` : `'0'`);
1109	}
1110	}
1111	} else {
1112	if ((uri->server != NULL) \|\| (uri->port == -`1`)) {
1113	if (len + `3` >= max) {
1114	temp = xmlSaveUriRealloc(ret, &max);
1115	if (temp == NULL) goto mem_error;
1116	ret = temp;
1117	}
1118	ret[len++] = `'/'`;
1119	ret[len++] = `'/'`;
1120	if (uri->user != NULL) {
1121	p = uri->user;
1122	while (*p != `0`) {
1123	if (len + `3` >= max) {
1124	temp = xmlSaveUriRealloc(ret, &max);
1125	if (temp == NULL) goto mem_error;
1126	ret = temp;
1127	}
1128	if ((IS_UNRESERVED(*(p))) \|\|
1129	(((p) == `';'`)) \|\| (((p) == `':'`)) \|\|
1130	(((p) == `'&'`)) \|\| (((p) == `'='`)) \|\|
1131	(((p) == `'+'`)) \|\| (((p) == `'$'`)) \|\|
1132	((*(p) == `','`)))
1133	ret[len++] = *p++;
1134	else {
1135	int val = (unsigned* char *)p++;
1136	int hi = val / `0x10`, lo = val % `0x10`;
1137	ret[len++] = `'%'`;
1138	ret[len++] = hi + (hi > `9`? `'A'`-`10` : `'0'`);
1139	ret[len++] = lo + (lo > `9`? `'A'`-`10` : `'0'`);
1140	}
1141	}
1142	if (len + `3` >= max) {
1143	temp = xmlSaveUriRealloc(ret, &max);
1144	if (temp == NULL) goto mem_error;
1145	ret = temp;
1146	}
1147	ret[len++] = `'@'`;
1148	}
1149	if (uri->server != NULL) {
1150	p = uri->server;
1151	while (*p != `0`) {
1152	if (len >= max) {
1153	temp = xmlSaveUriRealloc(ret, &max);
1154	if (temp == NULL) goto mem_error;
1155	ret = temp;
1156	}
1157	ret[len++] = *p++;
1158	}
1159	if (uri->port > `0`) {
1160	if (len + `10` >= max) {
1161	temp = xmlSaveUriRealloc(ret, &max);
1162	if (temp == NULL) goto mem_error;
1163	ret = temp;
1164	}
1165	len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166	}
1167	}
1168	} else if (uri->authority != NULL) {
1169	if (len + `3` >= max) {
1170	temp = xmlSaveUriRealloc(ret, &max);
1171	if (temp == NULL) goto mem_error;
1172	ret = temp;
1173	}
1174	ret[len++] = `'/'`;
1175	ret[len++] = `'/'`;
1176	p = uri->authority;
1177	while (*p != `0`) {
1178	if (len + `3` >= max) {
1179	temp = xmlSaveUriRealloc(ret, &max);
1180	if (temp == NULL) goto mem_error;
1181	ret = temp;
1182	}
1183	if ((IS_UNRESERVED(*(p))) \|\|
1184	(((p) == `'$'`)) \|\| (((p) == `','`)) \|\| ((*(p) == `';'`)) \|\|
1185	(((p) == `':'`)) \|\| (((p) == `'@'`)) \|\| ((*(p) == `'&'`)) \|\|
1186	(((p) == `'='`)) \|\| (((p) == `'+'`)))
1187	ret[len++] = *p++;
1188	else {
1189	int val = (unsigned* char *)p++;
1190	int hi = val / `0x10`, lo = val % `0x10`;
1191	ret[len++] = `'%'`;
1192	ret[len++] = hi + (hi > `9`? `'A'`-`10` : `'0'`);
1193	ret[len++] = lo + (lo > `9`? `'A'`-`10` : `'0'`);
1194	}
1195	}
1196	} else if (uri->scheme != NULL) {
1197	if (len + `3` >= max) {
1198	temp = xmlSaveUriRealloc(ret, &max);
1199	if (temp == NULL) goto mem_error;
1200	ret = temp;
1201	}
1202	}
1203	if (uri->path != NULL) {
1204	p = uri->path;
1205	/*
1206	* the colon in file:///d: should not be escaped or
1207	* Windows accesses fail later.
1208	*/
1209	if ((uri->scheme != NULL) &&
1210	(p[`0`] == `'/'`) &&
1211	(((p[`1`] >= `'a'`) && (p[`1`] <= `'z'`)) \|\|
1212	((p[`1`] >= `'A'`) && (p[`1`] <= `'Z'`))) &&
1213	(p[`2`] == `':'`) &&
1214	(xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215	if (len + `3` >= max) {
1216	temp = xmlSaveUriRealloc(ret, &max);
1217	if (temp == NULL) goto mem_error;
1218	ret = temp;
1219	}
1220	ret[len++] = *p++;
1221	ret[len++] = *p++;
1222	ret[len++] = *p++;
1223	}
1224	while (*p != `0`) {
1225	if (len + `3` >= max) {
1226	temp = xmlSaveUriRealloc(ret, &max);
1227	if (temp == NULL) goto mem_error;
1228	ret = temp;
1229	}
1230	if ((IS_UNRESERVED((p))) \|\| (((p) == `'/'`)) \|\|
1231	(((p) == `';'`)) \|\| (((p) == `'@'`)) \|\| ((*(p) == `'&'`)) \|\|
1232	(((p) == `'='`)) \|\| (((p) == `'+'`)) \|\| ((*(p) == `'$'`)) \|\|
1233	((*(p) == `','`)))
1234	ret[len++] = *p++;
1235	else {
1236	int val = (unsigned* char *)p++;
1237	int hi = val / `0x10`, lo = val % `0x10`;
1238	ret[len++] = `'%'`;
1239	ret[len++] = hi + (hi > `9`? `'A'`-`10` : `'0'`);
1240	ret[len++] = lo + (lo > `9`? `'A'`-`10` : `'0'`);
1241	}
1242	}
1243	}
1244	if (uri->query_raw != NULL) {
1245	if (len + `1` >= max) {
1246	temp = xmlSaveUriRealloc(ret, &max);
1247	if (temp == NULL) goto mem_error;
1248	ret = temp;
1249	}
1250	ret[len++] = `'?'`;
1251	p = uri->query_raw;
1252	while (*p != `0`) {
1253	if (len + `1` >= max) {
1254	temp = xmlSaveUriRealloc(ret, &max);
1255	if (temp == NULL) goto mem_error;
1256	ret = temp;
1257	}
1258	ret[len++] = *p++;
1259	}
1260	} else if (uri->query != NULL) {
1261	if (len + `3` >= max) {
1262	temp = xmlSaveUriRealloc(ret, &max);
1263	if (temp == NULL) goto mem_error;
1264	ret = temp;
1265	}
1266	ret[len++] = `'?'`;
1267	p = uri->query;
1268	while (*p != `0`) {
1269	if (len + `3` >= max) {
1270	temp = xmlSaveUriRealloc(ret, &max);
1271	if (temp == NULL) goto mem_error;
1272	ret = temp;
1273	}
1274	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1275	ret[len++] = *p++;
1276	else {
1277	int val = (unsigned* char *)p++;
1278	int hi = val / `0x10`, lo = val % `0x10`;
1279	ret[len++] = `'%'`;
1280	ret[len++] = hi + (hi > `9`? `'A'`-`10` : `'0'`);
1281	ret[len++] = lo + (lo > `9`? `'A'`-`10` : `'0'`);
1282	}
1283	}
1284	}
1285	}
1286	if (uri->fragment != NULL) {
1287	if (len + `3` >= max) {
1288	temp = xmlSaveUriRealloc(ret, &max);
1289	if (temp == NULL) goto mem_error;
1290	ret = temp;
1291	}
1292	ret[len++] = `'#'`;
1293	p = uri->fragment;
1294	while (*p != `0`) {
1295	if (len + `3` >= max) {
1296	temp = xmlSaveUriRealloc(ret, &max);
1297	if (temp == NULL) goto mem_error;
1298	ret = temp;
1299	}
1300	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1301	ret[len++] = *p++;
1302	else {
1303	int val = (unsigned* char *)p++;
1304	int hi = val / `0x10`, lo = val % `0x10`;
1305	ret[len++] = `'%'`;
1306	ret[len++] = hi + (hi > `9`? `'A'`-`10` : `'0'`);
1307	ret[len++] = lo + (lo > `9`? `'A'`-`10` : `'0'`);
1308	}
1309	}
1310	}
1311	if (len >= max) {
1312	temp = xmlSaveUriRealloc(ret, &max);
1313	if (temp == NULL) goto mem_error;
1314	ret = temp;
1315	}
1316	ret[len] = `0`;
1317	return(ret);
1318
1319	mem_error:
1320	xmlFree(ret);
1321	return(NULL);
1322	}
1323
1324	/**
1325	* xmlPrintURI:
1326	* @stream: a FILE* for the output
1327	* @uri: pointer to an xmlURI
1328	*
1329	* Prints the URI in the stream @stream.
1330	*/
1331	void
1332	xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333	xmlChar *out;
1334
1335	out = xmlSaveUri(uri);
1336	if (out != NULL) {
1337	fprintf(stream, "%s", (char *) out);
1338	xmlFree(out);
1339	}
1340	}
1341
1342	/**
1343	* xmlCleanURI:
1344	* @uri: pointer to an xmlURI
1345	*
1346	* Make sure the xmlURI struct is free of content
1347	*/
1348	static void
1349	xmlCleanURI(xmlURIPtr uri) {
1350	if (uri == NULL) return;
1351
1352	if (uri->scheme != NULL) xmlFree(uri->scheme);
1353	uri->scheme = NULL;
1354	if (uri->server != NULL) xmlFree(uri->server);
1355	uri->server = NULL;
1356	if (uri->user != NULL) xmlFree(uri->user);
1357	uri->user = NULL;
1358	if (uri->path != NULL) xmlFree(uri->path);
1359	uri->path = NULL;
1360	if (uri->fragment != NULL) xmlFree(uri->fragment);
1361	uri->fragment = NULL;
1362	if (uri->opaque != NULL) xmlFree(uri->opaque);
1363	uri->opaque = NULL;
1364	if (uri->authority != NULL) xmlFree(uri->authority);
1365	uri->authority = NULL;
1366	if (uri->query != NULL) xmlFree(uri->query);
1367	uri->query = NULL;
1368	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369	uri->query_raw = NULL;
1370	}
1371
1372	/**
1373	* xmlFreeURI:
1374	* @uri: pointer to an xmlURI
1375	*
1376	* Free up the xmlURI struct
1377	*/
1378	void
1379	xmlFreeURI(xmlURIPtr uri) {
1380	if (uri == NULL) return;
1381
1382	if (uri->scheme != NULL) xmlFree(uri->scheme);
1383	if (uri->server != NULL) xmlFree(uri->server);
1384	if (uri->user != NULL) xmlFree(uri->user);
1385	if (uri->path != NULL) xmlFree(uri->path);
1386	if (uri->fragment != NULL) xmlFree(uri->fragment);
1387	if (uri->opaque != NULL) xmlFree(uri->opaque);
1388	if (uri->authority != NULL) xmlFree(uri->authority);
1389	if (uri->query != NULL) xmlFree(uri->query);
1390	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391	xmlFree(uri);
1392	}
1393
1394	/************************************************************************
1395	* *
1396	* Helper functions *
1397	* *
1398	************************************************************************/
1399
1400	/**
1401	* xmlNormalizeURIPath:
1402	* @path: pointer to the path string
1403	*
1404	* Applies the 5 normalization steps to a path string--that is, RFC 2396
1405	* Section 5.2, steps 6.c through 6.g.
1406	*
1407	* Normalization occurs directly on the string, no new allocation is done
1408	*
1409	* Returns 0 or an error code
1410	*/
1411	int
1412	xmlNormalizeURIPath(char *path) {
1413	char cur, out;
1414
1415	if (path == NULL)
1416	return(-`1`);
1417
1418	/ Skip all initial "/" chars. We want to get to the beginning of the*
1419	* first non-empty segment.
1420	*/
1421	cur = path;
1422	while (cur[`0`] == `'/'`)
1423	++cur;
1424	if (cur[`0`] == `'\0'`)
1425	return(`0`);
1426
1427	/ Keep everything we've seen so far. /
1428	out = cur;
1429
1430	/*
1431	* Analyze each segment in sequence for cases (c) and (d).
1432	*/
1433	while (cur[`0`] != `'\0'`) {
1434	/*
1435	* c) All occurrences of "./", where "." is a complete path segment,
1436	* are removed from the buffer string.
1437	*/
1438	if ((cur[`0`] == `'.'`) && (cur[`1`] == `'/'`)) {
1439	cur += `2`;
1440	/ '//' normalization should be done at this point too /
1441	while (cur[`0`] == `'/'`)
1442	cur++;
1443	continue;
1444	}
1445
1446	/*
1447	* d) If the buffer string ends with "." as a complete path segment,
1448	* that "." is removed.
1449	*/
1450	if ((cur[`0`] == `'.'`) && (cur[`1`] == `'\0'`))
1451	break;
1452
1453	/ Otherwise keep the segment. /
1454	while (cur[`0`] != `'/'`) {
1455	if (cur[`0`] == `'\0'`)
1456	goto done_cd;
1457	(out++)[`0`] = (cur++)[`0`];
1458	}
1459	/ nomalize // /
1460	while ((cur[`0`] == `'/'`) && (cur[`1`] == `'/'`))
1461	cur++;
1462
1463	(out++)[`0`] = (cur++)[`0`];
1464	}
1465	done_cd:
1466	out[`0`] = `'\0'`;
1467
1468	/ Reset to the beginning of the first segment for the next sequence. /
1469	cur = path;
1470	while (cur[`0`] == `'/'`)
1471	++cur;
1472	if (cur[`0`] == `'\0'`)
1473	return(`0`);
1474
1475	/*
1476	* Analyze each segment in sequence for cases (e) and (f).
1477	*
1478	* e) All occurrences of "<segment>/../", where <segment> is a
1479	* complete path segment not equal to "..", are removed from the
1480	* buffer string. Removal of these path segments is performed
1481	* iteratively, removing the leftmost matching pattern on each
1482	* iteration, until no matching pattern remains.
1483	*
1484	* f) If the buffer string ends with "<segment>/..", where <segment>
1485	* is a complete path segment not equal to "..", that
1486	* "<segment>/.." is removed.
1487	*
1488	* To satisfy the "iterative" clause in (e), we need to collapse the
1489	* string every time we find something that needs to be removed. Thus,
1490	* we don't need to keep two pointers into the string: we only need a
1491	* "current position" pointer.
1492	*/
1493	while (`1`) {
1494	char segp, tmp;
1495
1496	/ At the beginning of each iteration of this loop, "cur" points to*
1497	* the first character of the segment we want to examine.
1498	*/
1499
1500	/ Find the end of the current segment. /
1501	segp = cur;
1502	while ((segp[`0`] != `'/'`) && (segp[`0`] != `'\0'`))
1503	++segp;
1504
1505	/ If this is the last segment, we're done (we need at least two*
1506	* segments to meet the criteria for the (e) and (f) cases).
1507	*/
1508	if (segp[`0`] == `'\0'`)
1509	break;
1510
1511	/ If the first segment is "..", or if the next segment _isn't_ "..",*
1512	* keep this segment and try the next one.
1513	*/
1514	++segp;
1515	if (((cur[`0`] == `'.'`) && (cur[`1`] == `'.'`) && (segp == cur+`3`))
1516	\|\| ((segp[`0`] != `'.'`) \|\| (segp[`1`] != `'.'`)
1517	\|\| ((segp[`2`] != `'/'`) && (segp[`2`] != `'\0'`)))) {
1518	cur = segp;
1519	continue;
1520	}
1521
1522	/ If we get here, remove this segment and the next one and back up*
1523	* to the previous segment (if there is one), to implement the
1524	* "iteratively" clause. It's pretty much impossible to back up
1525	* while maintaining two pointers into the buffer, so just compact
1526	* the whole buffer now.
1527	*/
1528
1529	/ If this is the end of the buffer, we're done. /
1530	if (segp[`2`] == `'\0'`) {
1531	cur[`0`] = `'\0'`;
1532	break;
1533	}
1534	/ Valgrind complained, strcpy(cur, segp + 3); /
1535	/ string will overlap, do not use strcpy /
1536	tmp = cur;
1537	segp += `3`;
1538	while ((tmp++ = segp++) != `0`)
1539	;
1540
1541	/ If there are no previous segments, then keep going from here. /
1542	segp = cur;
1543	while ((segp > path) && ((--segp)[`0`] == `'/'`))
1544	;
1545	if (segp == path)
1546	continue;
1547
1548	/ "segp" is pointing to the end of a previous segment; find it's*
1549	* start. We need to back up to the previous segment and start
1550	* over with that to handle things like "foo/bar/../..". If we
1551	* don't do this, then on the first pass we'll remove the "bar/..",
1552	* but be pointing at the second ".." so we won't realize we can also
1553	* remove the "foo/..".
1554	*/
1555	cur = segp;
1556	while ((cur > path) && (cur[-`1`] != `'/'`))
1557	--cur;
1558	}
1559	out[`0`] = `'\0'`;
1560
1561	/*
1562	* g) If the resulting buffer string still begins with one or more
1563	* complete path segments of "..", then the reference is
1564	* considered to be in error. Implementations may handle this
1565	* error by retaining these components in the resolved path (i.e.,
1566	* treating them as part of the final URI), by removing them from
1567	* the resolved path (i.e., discarding relative levels above the
1568	* root), or by avoiding traversal of the reference.
1569	*
1570	* We discard them from the final path.
1571	*/
1572	if (path[`0`] == `'/'`) {
1573	cur = path;
1574	while ((cur[`0`] == `'/'`) && (cur[`1`] == `'.'`) && (cur[`2`] == `'.'`)
1575	&& ((cur[`3`] == `'/'`) \|\| (cur[`3`] == `'\0'`)))
1576	cur += `3`;
1577
1578	if (cur != path) {
1579	out = path;
1580	while (cur[`0`] != `'\0'`)
1581	(out++)[`0`] = (cur++)[`0`];
1582	out[`0`] = `0`;
1583	}
1584	}
1585
1586	return(`0`);
1587	}
1588
1589	static int is_hex(char c) {
1590	if (((c >= `'0'`) && (c <= `'9'`)) \|\|
1591	((c >= `'a'`) && (c <= `'f'`)) \|\|
1592	((c >= `'A'`) && (c <= `'F'`)))
1593	return(`1`);
1594	return(`0`);
1595	}
1596
1597	/**
1598	* xmlURIUnescapeString:
1599	* @str: the string to unescape
1600	* @len: the length in bytes to unescape (or <= 0 to indicate full string)
1601	* @target: optional destination buffer
1602	*
1603	* Unescaping routine, but does not check that the string is an URI. The
1604	* output is a direct unsigned char translation of %XX values (no encoding)
1605	* Note that the length of the result can only be smaller or same size as
1606	* the input string.
1607	*
1608	* Returns a copy of the string, but unescaped, will return NULL only in case
1609	* of error
1610	*/
1611	char *
1612	xmlURIUnescapeString(const char str, int* len, char *target) {
1613	char ret, out;
1614	const char *in;
1615
1616	if (str == NULL)
1617	return(NULL);
1618	if (len <= `0`) len = strlen(str);
1619	if (len < `0`) return(NULL);
1620
1621	if (target == NULL) {
1622	ret = (char *) xmlMallocAtomic(len + `1`);
1623	if (ret == NULL) {
1624	xmlURIErrMemory("unescaping URI value\n");
1625	return(NULL);
1626	}
1627	} else
1628	ret = target;
1629	in = str;
1630	out = ret;
1631	while(len > `0`) {
1632	if ((len > `2`) && (*in == `'%'`) && (is_hex(in[`1`])) && (is_hex(in[`2`]))) {
1633	in++;
1634	if ((in >= `'0'`) && (in <= `'9'`))
1635	out = (in - `'0'`);
1636	else if ((in >= `'a'`) && (in <= `'f'`))
1637	out = (in - `'a'`) + `10`;
1638	else if ((in >= `'A'`) && (in <= `'F'`))
1639	out = (in - `'A'`) + `10`;
1640	in++;
1641	if ((in >= `'0'`) && (in <= `'9'`))
1642	out = out * `16` + (*in - `'0'`);
1643	else if ((in >= `'a'`) && (in <= `'f'`))
1644	out = out * `16` + (*in - `'a'`) + `10`;
1645	else if ((in >= `'A'`) && (in <= `'F'`))
1646	out = out * `16` + (*in - `'A'`) + `10`;
1647	in++;
1648	len -= `3`;
1649	out++;
1650	} else {
1651	out++ = in++;
1652	len--;
1653	}
1654	}
1655	*out = `0`;
1656	return(ret);
1657	}
1658
1659	/**
1660	* xmlURIEscapeStr:
1661	* @str: string to escape
1662	* @list: exception list string of chars not to escape
1663	*
1664	* This routine escapes a string to hex, ignoring reserved characters (a-z)
1665	* and the characters in the exception list.
1666	*
1667	* Returns a new escaped string or NULL in case of error.
1668	*/
1669	xmlChar *
1670	xmlURIEscapeStr(const xmlChar str, const* xmlChar *list) {
1671	xmlChar *ret, ch;
1672	xmlChar *temp;
1673	const xmlChar *in;
1674	int len, out;
1675
1676	if (str == NULL)
1677	return(NULL);
1678	if (str[`0`] == `0`)
1679	return(xmlStrdup(str));
1680	len = xmlStrlen(str);
1681	if (!(len > `0`)) return(NULL);
1682
1683	len += `20`;
1684	ret = (xmlChar *) xmlMallocAtomic(len);
1685	if (ret == NULL) {
1686	xmlURIErrMemory("escaping URI value\n");
1687	return(NULL);
1688	}
1689	in = (const xmlChar *) str;
1690	out = `0`;
1691	while(*in != `0`) {
1692	if (len - out <= `3`) {
1693	temp = xmlSaveUriRealloc(ret, &len);
1694	if (temp == NULL) {
1695	xmlURIErrMemory("escaping URI value\n");
1696	xmlFree(ret);
1697	return(NULL);
1698	}
1699	ret = temp;
1700	}
1701
1702	ch = *in;
1703
1704	if ((ch != `'@'`) && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705	unsigned char val;
1706	ret[out++] = `'%'`;
1707	val = ch >> `4`;
1708	if (val <= `9`)
1709	ret[out++] = `'0'` + val;
1710	else
1711	ret[out++] = `'A'` + val - `0xA`;
1712	val = ch & `0xF`;
1713	if (val <= `9`)
1714	ret[out++] = `'0'` + val;
1715	else
1716	ret[out++] = `'A'` + val - `0xA`;
1717	in++;
1718	} else {
1719	ret[out++] = *in++;
1720	}
1721
1722	}
1723	ret[out] = `0`;
1724	return(ret);
1725	}
1726
1727	/**
1728	* xmlURIEscape:
1729	* @str: the string of the URI to escape
1730	*
1731	* Escaping routine, does not do validity checks !
1732	* It will try to escape the chars needing this, but this is heuristic
1733	* based it's impossible to be sure.
1734	*
1735	* Returns an copy of the string, but escaped
1736	*
1737	* 25 May 2001
1738	* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739	* according to RFC2396.
1740	* - Carl Douglas
1741	*/
1742	xmlChar *
1743	xmlURIEscape(const xmlChar * str)
1744	{
1745	xmlChar ret, segment = NULL;
1746	xmlURIPtr uri;
1747	int ret2;
1748
1749	#define NULLCHK(p) if(!p) { \
1750	xmlURIErrMemory("escaping URI value\n"); \
1751	xmlFreeURI(uri); \
1752	return NULL; } \
1753
1754	if (str == NULL)
1755	return (NULL);
1756
1757	uri = xmlCreateURI();
1758	if (uri != NULL) {
1759	/*
1760	* Allow escaping errors in the unescaped form
1761	*/
1762	uri->cleanup = `1`;
1763	ret2 = xmlParseURIReference(uri, (const char *)str);
1764	if (ret2) {
1765	xmlFreeURI(uri);
1766	return (NULL);
1767	}
1768	}
1769
1770	if (!uri)
1771	return NULL;
1772
1773	ret = NULL;
1774
1775	if (uri->scheme) {
1776	segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777	NULLCHK(segment)
1778	ret = xmlStrcat(ret, segment);
1779	ret = xmlStrcat(ret, BAD_CAST ":");
1780	xmlFree(segment);
1781	}
1782
1783	if (uri->authority) {
1784	segment =
1785	xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786	NULLCHK(segment)
1787	ret = xmlStrcat(ret, BAD_CAST "//");
1788	ret = xmlStrcat(ret, segment);
1789	xmlFree(segment);
1790	}
1791
1792	if (uri->user) {
1793	segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794	NULLCHK(segment)
1795	ret = xmlStrcat(ret,BAD_CAST "//");
1796	ret = xmlStrcat(ret, segment);
1797	ret = xmlStrcat(ret, BAD_CAST "@");
1798	xmlFree(segment);
1799	}
1800
1801	if (uri->server) {
1802	segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803	NULLCHK(segment)
1804	if (uri->user == NULL)
1805	ret = xmlStrcat(ret, BAD_CAST "//");
1806	ret = xmlStrcat(ret, segment);
1807	xmlFree(segment);
1808	}
1809
1810	if (uri->port) {
1811	xmlChar port[`10`];
1812
1813	snprintf((char *) port, `10`, "%d", uri->port);
1814	ret = xmlStrcat(ret, BAD_CAST ":");
1815	ret = xmlStrcat(ret, port);
1816	}
1817
1818	if (uri->path) {
1819	segment =
1820	xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821	NULLCHK(segment)
1822	ret = xmlStrcat(ret, segment);
1823	xmlFree(segment);
1824	}
1825
1826	if (uri->query_raw) {
1827	ret = xmlStrcat(ret, BAD_CAST "?");
1828	ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829	}
1830	else if (uri->query) {
1831	segment =
1832	xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833	NULLCHK(segment)
1834	ret = xmlStrcat(ret, BAD_CAST "?");
1835	ret = xmlStrcat(ret, segment);
1836	xmlFree(segment);
1837	}
1838
1839	if (uri->opaque) {
1840	segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841	NULLCHK(segment)
1842	ret = xmlStrcat(ret, segment);
1843	xmlFree(segment);
1844	}
1845
1846	if (uri->fragment) {
1847	segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848	NULLCHK(segment)
1849	ret = xmlStrcat(ret, BAD_CAST "#");
1850	ret = xmlStrcat(ret, segment);
1851	xmlFree(segment);
1852	}
1853
1854	xmlFreeURI(uri);
1855	#undef NULLCHK
1856
1857	return (ret);
1858	}
1859
1860	/************************************************************************
1861	* *
1862	* Public functions *
1863	* *
1864	************************************************************************/
1865
1866	/**
1867	* xmlBuildURI:
1868	* @URI: the URI instance found in the document
1869	* @base: the base value
1870	*
1871	* Computes he final URI of the reference done by checking that
1872	* the given URI is valid, and building the final URI using the
1873	* base URI. This is processed according to section 5.2 of the
1874	* RFC 2396
1875	*
1876	* 5.2. Resolving Relative References to Absolute Form
1877	*
1878	* Returns a new URI string (to be freed by the caller) or NULL in case
1879	* of error.
1880	*/
1881	xmlChar *
1882	xmlBuildURI(const xmlChar URI, const* xmlChar *base) {
1883	xmlChar *val = NULL;
1884	int ret, len, indx, cur, out;
1885	xmlURIPtr ref = NULL;
1886	xmlURIPtr bas = NULL;
1887	xmlURIPtr res = NULL;
1888
1889	/*
1890	* 1) The URI reference is parsed into the potential four components and
1891	* fragment identifier, as described in Section 4.3.
1892	*
1893	* NOTE that a completely empty URI is treated by modern browsers
1894	* as a reference to "." rather than as a synonym for the current
1895	* URI. Should we do that here?
1896	*/
1897	if (URI == NULL)
1898	ret = -`1`;
1899	else {
1900	if (*URI) {
1901	ref = xmlCreateURI();
1902	if (ref == NULL)
1903	goto done;
1904	ret = xmlParseURIReference(ref, (const char *) URI);
1905	}
1906	else
1907	ret = `0`;
1908	}
1909	if (ret != `0`)
1910	goto done;
1911	if ((ref != NULL) && (ref->scheme != NULL)) {
1912	/*
1913	* The URI is absolute don't modify.
1914	*/
1915	val = xmlStrdup(URI);
1916	goto done;
1917	}
1918	if (base == NULL)
1919	ret = -`1`;
1920	else {
1921	bas = xmlCreateURI();
1922	if (bas == NULL)
1923	goto done;
1924	ret = xmlParseURIReference(bas, (const char *) base);
1925	}
1926	if (ret != `0`) {
1927	if (ref)
1928	val = xmlSaveUri(ref);
1929	goto done;
1930	}
1931	if (ref == NULL) {
1932	/*
1933	* the base fragment must be ignored
1934	*/
1935	if (bas->fragment != NULL) {
1936	xmlFree(bas->fragment);
1937	bas->fragment = NULL;
1938	}
1939	val = xmlSaveUri(bas);
1940	goto done;
1941	}
1942
1943	/*
1944	* 2) If the path component is empty and the scheme, authority, and
1945	* query components are undefined, then it is a reference to the
1946	* current document and we are done. Otherwise, the reference URI's
1947	* query and fragment components are defined as found (or not found)
1948	* within the URI reference and not inherited from the base URI.
1949	*
1950	* NOTE that in modern browsers, the parsing differs from the above
1951	* in the following aspect: the query component is allowed to be
1952	* defined while still treating this as a reference to the current
1953	* document.
1954	*/
1955	res = xmlCreateURI();
1956	if (res == NULL)
1957	goto done;
1958	if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959	((ref->authority == NULL) && (ref->server == NULL))) {
1960	if (bas->scheme != NULL)
1961	res->scheme = xmlMemStrdup(bas->scheme);
1962	if (bas->authority != NULL)
1963	res->authority = xmlMemStrdup(bas->authority);
1964	else if ((bas->server != NULL) \|\| (bas->port == -`1`)) {
1965	if (bas->server != NULL)
1966	res->server = xmlMemStrdup(bas->server);
1967	if (bas->user != NULL)
1968	res->user = xmlMemStrdup(bas->user);
1969	res->port = bas->port;
1970	}
1971	if (bas->path != NULL)
1972	res->path = xmlMemStrdup(bas->path);
1973	if (ref->query_raw != NULL)
1974	res->query_raw = xmlMemStrdup (ref->query_raw);
1975	else if (ref->query != NULL)
1976	res->query = xmlMemStrdup(ref->query);
1977	else if (bas->query_raw != NULL)
1978	res->query_raw = xmlMemStrdup(bas->query_raw);
1979	else if (bas->query != NULL)
1980	res->query = xmlMemStrdup(bas->query);
1981	if (ref->fragment != NULL)
1982	res->fragment = xmlMemStrdup(ref->fragment);
1983	goto step_7;
1984	}
1985
1986	/*
1987	* 3) If the scheme component is defined, indicating that the reference
1988	* starts with a scheme name, then the reference is interpreted as an
1989	* absolute URI and we are done. Otherwise, the reference URI's
1990	* scheme is inherited from the base URI's scheme component.
1991	*/
1992	if (ref->scheme != NULL) {
1993	val = xmlSaveUri(ref);
1994	goto done;
1995	}
1996	if (bas->scheme != NULL)
1997	res->scheme = xmlMemStrdup(bas->scheme);
1998
1999	if (ref->query_raw != NULL)
2000	res->query_raw = xmlMemStrdup(ref->query_raw);
2001	else if (ref->query != NULL)
2002	res->query = xmlMemStrdup(ref->query);
2003	if (ref->fragment != NULL)
2004	res->fragment = xmlMemStrdup(ref->fragment);
2005
2006	/*
2007	* 4) If the authority component is defined, then the reference is a
2008	* network-path and we skip to step 7. Otherwise, the reference
2009	* URI's authority is inherited from the base URI's authority
2010	* component, which will also be undefined if the URI scheme does not
2011	* use an authority component.
2012	*/
2013	if ((ref->authority != NULL) \|\| (ref->server != NULL)) {
2014	if (ref->authority != NULL)
2015	res->authority = xmlMemStrdup(ref->authority);
2016	else {
2017	res->server = xmlMemStrdup(ref->server);
2018	if (ref->user != NULL)
2019	res->user = xmlMemStrdup(ref->user);
2020	res->port = ref->port;
2021	}
2022	if (ref->path != NULL)
2023	res->path = xmlMemStrdup(ref->path);
2024	goto step_7;
2025	}
2026	if (bas->authority != NULL)
2027	res->authority = xmlMemStrdup(bas->authority);
2028	else if ((bas->server != NULL) \|\| (bas->port == -`1`)) {
2029	if (bas->server != NULL)
2030	res->server = xmlMemStrdup(bas->server);
2031	if (bas->user != NULL)
2032	res->user = xmlMemStrdup(bas->user);
2033	res->port = bas->port;
2034	}
2035
2036	/*
2037	* 5) If the path component begins with a slash character ("/"), then
2038	* the reference is an absolute-path and we skip to step 7.
2039	*/
2040	if ((ref->path != NULL) && (ref->path[`0`] == `'/'`)) {
2041	res->path = xmlMemStrdup(ref->path);
2042	goto step_7;
2043	}
2044
2045
2046	/*
2047	* 6) If this step is reached, then we are resolving a relative-path
2048	* reference. The relative path needs to be merged with the base
2049	* URI's path. Although there are many ways to do this, we will
2050	* describe a simple method using a separate string buffer.
2051	*
2052	* Allocate a buffer large enough for the result string.
2053	*/
2054	len = `2`; / extra / and 0 /
2055	if (ref->path != NULL)
2056	len += strlen(ref->path);
2057	if (bas->path != NULL)
2058	len += strlen(bas->path);
2059	res->path = (char *) xmlMallocAtomic(len);
2060	if (res->path == NULL) {
2061	xmlURIErrMemory("resolving URI against base\n");
2062	goto done;
2063	}
2064	res->path[`0`] = `0`;
2065
2066	/*
2067	* a) All but the last segment of the base URI's path component is
2068	* copied to the buffer. In other words, any characters after the
2069	* last (right-most) slash character, if any, are excluded.
2070	*/
2071	cur = `0`;
2072	out = `0`;
2073	if (bas->path != NULL) {
2074	while (bas->path[cur] != `0`) {
2075	while ((bas->path[cur] != `0`) && (bas->path[cur] != `'/'`))
2076	cur++;
2077	if (bas->path[cur] == `0`)
2078	break;
2079
2080	cur++;
2081	while (out < cur) {
2082	res->path[out] = bas->path[out];
2083	out++;
2084	}
2085	}
2086	}
2087	res->path[out] = `0`;
2088
2089	/*
2090	* b) The reference's path component is appended to the buffer
2091	* string.
2092	*/
2093	if (ref->path != NULL && ref->path[`0`] != `0`) {
2094	indx = `0`;
2095	/*
2096	* Ensure the path includes a '/'
2097	*/
2098	if ((out == `0`) && (bas->server != NULL))
2099	res->path[out++] = `'/'`;
2100	while (ref->path[indx] != `0`) {
2101	res->path[out++] = ref->path[indx++];
2102	}
2103	}
2104	res->path[out] = `0`;
2105
2106	/*
2107	* Steps c) to h) are really path normalization steps
2108	*/
2109	xmlNormalizeURIPath(res->path);
2110
2111	step_7:
2112
2113	/*
2114	* 7) The resulting URI components, including any inherited from the
2115	* base URI, are recombined to give the absolute form of the URI
2116	* reference.
2117	*/
2118	val = xmlSaveUri(res);
2119
2120	done:
2121	if (ref != NULL)
2122	xmlFreeURI(ref);
2123	if (bas != NULL)
2124	xmlFreeURI(bas);
2125	if (res != NULL)
2126	xmlFreeURI(res);
2127	return(val);
2128	}
2129
2130	/**
2131	* xmlBuildRelativeURI:
2132	* @URI: the URI reference under consideration
2133	* @base: the base value
2134	*
2135	* Expresses the URI of the reference in terms relative to the
2136	* base. Some examples of this operation include:
2137	* base = "http://site1.com/docs/book1.html"
2138	* URI input URI returned
2139	* docs/pic1.gif pic1.gif
2140	* docs/img/pic1.gif img/pic1.gif
2141	* img/pic1.gif ../img/pic1.gif
2142	* http://site1.com/docs/pic1.gif pic1.gif
2143	* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2144	*
2145	* base = "docs/book1.html"
2146	* URI input URI returned
2147	* docs/pic1.gif pic1.gif
2148	* docs/img/pic1.gif img/pic1.gif
2149	* img/pic1.gif ../img/pic1.gif
2150	* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2151	*
2152	*
2153	* Note: if the URI reference is really wierd or complicated, it may be
2154	* worthwhile to first convert it into a "nice" one by calling
2155	* xmlBuildURI (using 'base') before calling this routine,
2156	* since this routine (for reasonable efficiency) assumes URI has
2157	* already been through some validation.
2158	*
2159	* Returns a new URI string (to be freed by the caller) or NULL in case
2160	* error.
2161	*/
2162	xmlChar *
2163	xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2164	{
2165	xmlChar *val = NULL;
2166	int ret;
2167	int ix;
2168	int nbslash = `0`;
2169	int len;
2170	xmlURIPtr ref = NULL;
2171	xmlURIPtr bas = NULL;
2172	xmlChar bptr, uptr, *vptr;
2173	int remove_path = `0`;
2174
2175	if ((URI == NULL) \|\| (*URI == `0`))
2176	return NULL;
2177
2178	/*
2179	* First parse URI into a standard form
2180	*/
2181	ref = xmlCreateURI ();
2182	if (ref == NULL)
2183	return NULL;
2184	/ If URI not already in "relative" form /
2185	if (URI[`0`] != `'.'`) {
2186	ret = xmlParseURIReference (ref, (const char *) URI);
2187	if (ret != `0`)
2188	goto done; / Error in URI, return NULL /
2189	} else
2190	ref->path = (char *)xmlStrdup(URI);
2191
2192	/*
2193	* Next parse base into the same standard form
2194	*/
2195	if ((base == NULL) \|\| (*base == `0`)) {
2196	val = xmlStrdup (URI);
2197	goto done;
2198	}
2199	bas = xmlCreateURI ();
2200	if (bas == NULL)
2201	goto done;
2202	if (base[`0`] != `'.'`) {
2203	ret = xmlParseURIReference (bas, (const char *) base);
2204	if (ret != `0`)
2205	goto done; / Error in base, return NULL /
2206	} else
2207	bas->path = (char *)xmlStrdup(base);
2208
2209	/*
2210	* If the scheme / server on the URI differs from the base,
2211	* just return the URI
2212	*/
2213	if ((ref->scheme != NULL) &&
2214	((bas->scheme == NULL) \|\|
2215	(xmlStrcmp ((xmlChar )bas->scheme, (xmlChar )ref->scheme)) \|\|
2216	(xmlStrcmp ((xmlChar )bas->server, (xmlChar )ref->server)))) {
2217	val = xmlStrdup (URI);
2218	goto done;
2219	}
2220	if (xmlStrEqual((xmlChar )bas->path, (xmlChar )ref->path)) {
2221	val = xmlStrdup(BAD_CAST "");
2222	goto done;
2223	}
2224	if (bas->path == NULL) {
2225	val = xmlStrdup((xmlChar *)ref->path);
2226	goto done;
2227	}
2228	if (ref->path == NULL) {
2229	ref->path = (char *) "/";
2230	remove_path = `1`;
2231	}
2232
2233	/*
2234	* At this point (at last!) we can compare the two paths
2235	*
2236	* First we take care of the special case where either of the
2237	* two path components may be missing (bug 316224)
2238	*/
2239	if (bas->path == NULL) {
2240	if (ref->path != NULL) {
2241	uptr = (xmlChar *) ref->path;
2242	if (*uptr == `'/'`)
2243	uptr++;
2244	/ exception characters from xmlSaveUri /
2245	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2246	}
2247	goto done;
2248	}
2249	bptr = (xmlChar *)bas->path;
2250	if (ref->path == NULL) {
2251	for (ix = `0`; bptr[ix] != `0`; ix++) {
2252	if (bptr[ix] == `'/'`)
2253	nbslash++;
2254	}
2255	uptr = NULL;
2256	len = `1`; / this is for a string terminator only /
2257	} else {
2258	xmlChar rptr = (xmlChar ) ref->path;
2259	int pos = `0`;
2260
2261	/*
2262	* Next we compare the two strings and find where they first differ
2263	*/
2264	if ((*rptr == `'.'`) && (rptr[`1`] == `'/'`))
2265	rptr += `2`;
2266	if ((*bptr == `'.'`) && (bptr[`1`] == `'/'`))
2267	bptr += `2`;
2268	else if ((bptr == `'/'`) && (rptr != `'/'`))
2269	bptr++;
2270	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != `0`))
2271	pos++;
2272
2273	if (bptr[pos] == rptr[pos]) {
2274	val = xmlStrdup(BAD_CAST "");
2275	goto done; / (I can't imagine why anyone would do this) /
2276	}
2277
2278	/*
2279	* In URI, "back up" to the last '/' encountered. This will be the
2280	* beginning of the "unique" suffix of URI
2281	*/
2282	ix = pos;
2283	if ((rptr[ix] == `'/'`) && (ix > `0`))
2284	ix--;
2285	else if ((rptr[ix] == `0`) && (ix > `1`) && (rptr[ix - `1`] == `'/'`))
2286	ix -= `2`;
2287	for (; ix > `0`; ix--) {
2288	if (rptr[ix] == `'/'`)
2289	break;
2290	}
2291	if (ix == `0`) {
2292	uptr = (xmlChar *)rptr;
2293	} else {
2294	ix++;
2295	uptr = (xmlChar *)&rptr[ix];
2296	}
2297
2298	/*
2299	* In base, count the number of '/' from the differing point
2300	*/
2301	if (bptr[pos] != rptr[pos]) {/ check for trivial URI == base /
2302	for (; bptr[ix] != `0`; ix++) {
2303	if (bptr[ix] == `'/'`)
2304	nbslash++;
2305	}
2306	}
2307	len = xmlStrlen (uptr) + `1`;
2308	}
2309
2310	if (nbslash == `0`) {
2311	if (uptr != NULL)
2312	/ exception characters from xmlSaveUri /
2313	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2314	goto done;
2315	}
2316
2317	/*
2318	* Allocate just enough space for the returned string -
2319	* length of the remainder of the URI, plus enough space
2320	* for the "../" groups, plus one for the terminator
2321	*/
2322	val = (xmlChar ) xmlMalloc (len + `3` nbslash);
2323	if (val == NULL) {
2324	xmlURIErrMemory("building relative URI\n");
2325	goto done;
2326	}
2327	vptr = val;
2328	/*
2329	* Put in as many "../" as needed
2330	*/
2331	for (; nbslash>`0`; nbslash--) {
2332	*vptr++ = `'.'`;
2333	*vptr++ = `'.'`;
2334	*vptr++ = `'/'`;
2335	}
2336	/*
2337	* Finish up with the end of the URI
2338	*/
2339	if (uptr != NULL) {
2340	if ((vptr > val) && (len > `0`) &&
2341	(uptr[`0`] == `'/'`) && (vptr[-`1`] == `'/'`)) {
2342	memcpy (vptr, uptr + `1`, len - `1`);
2343	vptr[len - `2`] = `0`;
2344	} else {
2345	memcpy (vptr, uptr, len);
2346	vptr[len - `1`] = `0`;
2347	}
2348	} else {
2349	vptr[len - `1`] = `0`;
2350	}
2351
2352	/ escape the freshly-built path /
2353	vptr = val;
2354	/ exception characters from xmlSaveUri /
2355	val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2356	xmlFree(vptr);
2357
2358	done:
2359	/*
2360	* Free the working variables
2361	*/
2362	if (remove_path != `0`)
2363	ref->path = NULL;
2364	if (ref != NULL)
2365	xmlFreeURI (ref);
2366	if (bas != NULL)
2367	xmlFreeURI (bas);
2368
2369	return val;
2370	}
2371
2372	/**
2373	* xmlCanonicPath:
2374	* @path: the resource locator in a filesystem notation
2375	*
2376	* Constructs a canonic path from the specified path.
2377	*
2378	* Returns a new canonic path, or a duplicate of the path parameter if the
2379	* construction fails. The caller is responsible for freeing the memory occupied
2380	* by the returned string. If there is insufficient memory available, or the
2381	* argument is NULL, the function returns NULL.
2382	*/
2383	#define IS_WINDOWS_PATH(p) \
2384	((p != NULL) && \
2385	(((p[0] >= 'a') && (p[0] <= 'z')) \|\| \
2386	((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2387	(p[1] == ':') && ((p[2] == '/') \|\| (p[2] == '\\')))
2388	xmlChar *
2389	xmlCanonicPath(const xmlChar *path)
2390	{
2391	/*
2392	* For Windows implementations, additional work needs to be done to
2393	* replace backslashes in pathnames with "forward slashes"
2394	*/
2395	#if defined(_WIN32) && !defined(__CYGWIN__)
2396	int len = `0`;
2397	char *p = NULL;
2398	#endif
2399	xmlURIPtr uri;
2400	xmlChar *ret;
2401	const xmlChar *absuri;
2402
2403	if (path == NULL)
2404	return(NULL);
2405
2406	#if defined(_WIN32)
2407	/*
2408	* We must not change the backslashes to slashes if the the path
2409	* starts with \\?\
2410	* Those paths can be up to 32k characters long.
2411	* Was added specifically for OpenOffice, those paths can't be converted
2412	* to URIs anyway.
2413	*/
2414	if ((path[`0`] == `'\\'`) && (path[`1`] == `'\\'`) && (path[`2`] == `'?'`) &&
2415	(path[`3`] == `'\\'`) )
2416	return xmlStrdup((const xmlChar *) path);
2417	#endif
2418
2419	/ sanitize filename starting with // so it can be used as URI /
2420	if ((path[`0`] == `'/'`) && (path[`1`] == `'/'`) && (path[`2`] != `'/'`))
2421	path++;
2422
2423	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2424	xmlFreeURI(uri);
2425	return xmlStrdup(path);
2426	}
2427
2428	/ Check if this is an "absolute uri" /
2429	absuri = xmlStrstr(path, BAD_CAST "://");
2430	if (absuri != NULL) {
2431	int l, j;
2432	unsigned char c;
2433	xmlChar *escURI;
2434
2435	/*
2436	* this looks like an URI where some parts have not been
2437	* escaped leading to a parsing problem. Check that the first
2438	* part matches a protocol.
2439	*/
2440	l = absuri - path;
2441	/ Bypass if first part (part before the '://') is > 20 chars /
2442	if ((l <= `0`) \|\| (l > `20`))
2443	goto path_processing;
2444	/ Bypass if any non-alpha characters are present in first part /
2445	for (j = `0`;j < l;j++) {
2446	c = path[j];
2447	if (!(((c >= `'a'`) && (c <= `'z'`)) \|\| ((c >= `'A'`) && (c <= `'Z'`))))
2448	goto path_processing;
2449	}
2450
2451	/ Escape all except the characters specified in the supplied path /
2452	escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2453	if (escURI != NULL) {
2454	/ Try parsing the escaped path /
2455	uri = xmlParseURI((const char *) escURI);
2456	/ If successful, return the escaped string /
2457	if (uri != NULL) {
2458	xmlFreeURI(uri);
2459	return escURI;
2460	}
2461	xmlFree(escURI);
2462	}
2463	}
2464
2465	path_processing:
2466	/ For Windows implementations, replace backslashes with 'forward slashes' /
2467	#if defined(_WIN32) && !defined(__CYGWIN__)
2468	/*
2469	* Create a URI structure
2470	*/
2471	uri = xmlCreateURI();
2472	if (uri == NULL) { / Guard against 'out of memory' /
2473	return(NULL);
2474	}
2475
2476	len = xmlStrlen(path);
2477	if ((len > `2`) && IS_WINDOWS_PATH(path)) {
2478	/ make the scheme 'file' /
2479	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2480	/ allocate space for leading '/' + path + string terminator /
2481	uri->path = xmlMallocAtomic(len + `2`);
2482	if (uri->path == NULL) {
2483	xmlFreeURI(uri); / Guard agains 'out of memory' /
2484	return(NULL);
2485	}
2486	/ Put in leading '/' plus path /
2487	uri->path[`0`] = `'/'`;
2488	p = uri->path + `1`;
2489	strncpy(p, (char *) path, len + `1`);
2490	} else {
2491	uri->path = (char *) xmlStrdup(path);
2492	if (uri->path == NULL) {
2493	xmlFreeURI(uri);
2494	return(NULL);
2495	}
2496	p = uri->path;
2497	}
2498	/ Now change all occurences of '\' to '/' /
2499	while (*p != `'\0'`) {
2500	if (*p == `'\\'`)
2501	*p = `'/'`;
2502	p++;
2503	}
2504
2505	if (uri->scheme == NULL) {
2506	ret = xmlStrdup((const xmlChar *) uri->path);
2507	} else {
2508	ret = xmlSaveUri(uri);
2509	}
2510
2511	xmlFreeURI(uri);
2512	#else
2513	ret = xmlStrdup((const xmlChar *) path);
2514	#endif
2515	return(ret);
2516	}
2517
2518	/**
2519	* xmlPathToURI:
2520	* @path: the resource locator in a filesystem notation
2521	*
2522	* Constructs an URI expressing the existing path
2523	*
2524	* Returns a new URI, or a duplicate of the path parameter if the
2525	* construction fails. The caller is responsible for freeing the memory
2526	* occupied by the returned string. If there is insufficient memory available,
2527	* or the argument is NULL, the function returns NULL.
2528	*/
2529	xmlChar *
2530	xmlPathToURI(const xmlChar *path)
2531	{
2532	xmlURIPtr uri;
2533	xmlURI temp;
2534	xmlChar ret, cal;
2535
2536	if (path == NULL)
2537	return(NULL);
2538
2539	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2540	xmlFreeURI(uri);
2541	return xmlStrdup(path);
2542	}
2543	cal = xmlCanonicPath(path);
2544	if (cal == NULL)
2545	return(NULL);
2546	#if defined(_WIN32) && !defined(__CYGWIN__)
2547	/ xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)*
2548	If 'cal' is a valid URI allready then we are done here, as continuing would make
2549	it invalid. /*
2550	if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2551	xmlFreeURI(uri);
2552	return cal;
2553	}
2554	/ 'cal' can contain a relative path with backslashes. If that is processed*
2555	by xmlSaveURI, they will be escaped and the external entity loader machinery
2556	will fail. So convert them to slashes. Misuse 'ret' for walking. /*
2557	ret = cal;
2558	while (*ret != `'\0'`) {
2559	if (*ret == `'\\'`)
2560	*ret = `'/'`;
2561	ret++;
2562	}
2563	#endif
2564	memset(&temp, `0`, sizeof(temp));
2565	temp.path = (char *) cal;
2566	ret = xmlSaveUri(&temp);
2567	xmlFree(cal);
2568	return(ret);
2569	}
2570	#define bottom_uri
2571	#include "elfgcchack.h"
2572

Browse the source code of ClickHouse/contrib/libxml2/uri.c