1//
2// URI.cpp
3//
4// Library: Foundation
5// Package: URI
6// Module: URI
7//
8// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
9// and Contributors.
10//
11// SPDX-License-Identifier: BSL-1.0
12//
13
14
15#include "Poco/URI.h"
16#include "Poco/NumberFormatter.h"
17#include "Poco/Exception.h"
18#include "Poco/String.h"
19#include "Poco/NumberParser.h"
20#include "Poco/Path.h"
21
22
23namespace Poco {
24
25
26const std::string URI::RESERVED_PATH = "?#";
27const std::string URI::RESERVED_QUERY = "?#/:;+@";
28const std::string URI::RESERVED_QUERY_PARAM = "?#/:;+@&=";
29const std::string URI::RESERVED_FRAGMENT = "";
30const std::string URI::ILLEGAL = "%<>{}|\\\"^`!*'()$,[]";
31
32
33URI::URI():
34 _port(0)
35{
36}
37
38
39URI::URI(const std::string& uri):
40 _port(0)
41{
42 parse(uri);
43}
44
45
46URI::URI(const char* uri):
47 _port(0)
48{
49 parse(std::string(uri));
50}
51
52
53URI::URI(const std::string& scheme, const std::string& pathEtc):
54 _scheme(scheme),
55 _port(0)
56{
57 toLowerInPlace(_scheme);
58 _port = getWellKnownPort();
59 std::string::const_iterator beg = pathEtc.begin();
60 std::string::const_iterator end = pathEtc.end();
61 parsePathEtc(beg, end);
62}
63
64
65URI::URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc):
66 _scheme(scheme)
67{
68 toLowerInPlace(_scheme);
69 std::string::const_iterator beg = authority.begin();
70 std::string::const_iterator end = authority.end();
71 parseAuthority(beg, end);
72 beg = pathEtc.begin();
73 end = pathEtc.end();
74 parsePathEtc(beg, end);
75}
76
77
78URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query):
79 _scheme(scheme),
80 _path(path),
81 _query(query)
82{
83 toLowerInPlace(_scheme);
84 std::string::const_iterator beg = authority.begin();
85 std::string::const_iterator end = authority.end();
86 parseAuthority(beg, end);
87}
88
89
90URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment):
91 _scheme(scheme),
92 _path(path),
93 _query(query),
94 _fragment(fragment)
95{
96 toLowerInPlace(_scheme);
97 std::string::const_iterator beg = authority.begin();
98 std::string::const_iterator end = authority.end();
99 parseAuthority(beg, end);
100}
101
102
103URI::URI(const URI& uri):
104 _scheme(uri._scheme),
105 _userInfo(uri._userInfo),
106 _host(uri._host),
107 _port(uri._port),
108 _path(uri._path),
109 _query(uri._query),
110 _fragment(uri._fragment)
111{
112}
113
114
115URI::URI(const URI& baseURI, const std::string& relativeURI):
116 _scheme(baseURI._scheme),
117 _userInfo(baseURI._userInfo),
118 _host(baseURI._host),
119 _port(baseURI._port),
120 _path(baseURI._path),
121 _query(baseURI._query),
122 _fragment(baseURI._fragment)
123{
124 resolve(relativeURI);
125}
126
127
128URI::URI(const Path& path):
129 _scheme("file"),
130 _port(0)
131{
132 Path absolutePath(path);
133 absolutePath.makeAbsolute();
134 _path = absolutePath.toString(Path::PATH_UNIX);
135}
136
137
138URI::~URI()
139{
140}
141
142
143URI& URI::operator = (const URI& uri)
144{
145 if (&uri != this)
146 {
147 _scheme = uri._scheme;
148 _userInfo = uri._userInfo;
149 _host = uri._host;
150 _port = uri._port;
151 _path = uri._path;
152 _query = uri._query;
153 _fragment = uri._fragment;
154 }
155 return *this;
156}
157
158
159URI& URI::operator = (const std::string& uri)
160{
161 clear();
162 parse(uri);
163 return *this;
164}
165
166
167URI& URI::operator = (const char* uri)
168{
169 clear();
170 parse(std::string(uri));
171 return *this;
172}
173
174
175void URI::swap(URI& uri)
176{
177 std::swap(_scheme, uri._scheme);
178 std::swap(_userInfo, uri._userInfo);
179 std::swap(_host, uri._host);
180 std::swap(_port, uri._port);
181 std::swap(_path, uri._path);
182 std::swap(_query, uri._query);
183 std::swap(_fragment, uri._fragment);
184}
185
186
187void URI::clear()
188{
189 _scheme.clear();
190 _userInfo.clear();
191 _host.clear();
192 _port = 0;
193 _path.clear();
194 _query.clear();
195 _fragment.clear();
196}
197
198
199std::string URI::toString() const
200{
201 std::string uri;
202 if (isRelative())
203 {
204 encode(_path, RESERVED_PATH, uri);
205 }
206 else
207 {
208 uri = _scheme;
209 uri += ':';
210 std::string auth = getAuthority();
211 if (!auth.empty() || _scheme == "file")
212 {
213 uri.append("//");
214 uri.append(auth);
215 }
216 if (!_path.empty())
217 {
218 if (!auth.empty() && _path[0] != '/')
219 uri += '/';
220 encode(_path, RESERVED_PATH, uri);
221 }
222 else if (!_query.empty() || !_fragment.empty())
223 {
224 uri += '/';
225 }
226 }
227 if (!_query.empty())
228 {
229 uri += '?';
230 uri.append(_query);
231 }
232 if (!_fragment.empty())
233 {
234 uri += '#';
235 encode(_fragment, RESERVED_FRAGMENT, uri);
236 }
237 return uri;
238}
239
240
241void URI::setScheme(const std::string& scheme)
242{
243 _scheme = scheme;
244 toLowerInPlace(_scheme);
245 if (_port == 0)
246 _port = getWellKnownPort();
247}
248
249
250void URI::setUserInfo(const std::string& userInfo)
251{
252 _userInfo.clear();
253 decode(userInfo, _userInfo);
254}
255
256
257void URI::setHost(const std::string& host)
258{
259 _host = host;
260}
261
262
263unsigned short URI::getPort() const
264{
265 if (_port == 0)
266 return getWellKnownPort();
267 else
268 return _port;
269}
270
271
272void URI::setPort(unsigned short port)
273{
274 _port = port;
275}
276
277
278std::string URI::getAuthority() const
279{
280 std::string auth;
281 if (!_userInfo.empty())
282 {
283 auth.append(_userInfo);
284 auth += '@';
285 }
286 if (_host.find(':') != std::string::npos)
287 {
288 auth += '[';
289 auth += _host;
290 auth += ']';
291 }
292 else auth.append(_host);
293 if (_port && !isWellKnownPort())
294 {
295 auth += ':';
296 NumberFormatter::append(auth, _port);
297 }
298 return auth;
299}
300
301
302void URI::setAuthority(const std::string& authority)
303{
304 _userInfo.clear();
305 _host.clear();
306 _port = 0;
307 std::string::const_iterator beg = authority.begin();
308 std::string::const_iterator end = authority.end();
309 parseAuthority(beg, end);
310}
311
312
313void URI::setPath(const std::string& path)
314{
315 _path.clear();
316 decode(path, _path);
317}
318
319
320void URI::setRawQuery(const std::string& query)
321{
322 _query = query;
323}
324
325
326void URI::setQuery(const std::string& query)
327{
328 _query.clear();
329 encode(query, RESERVED_QUERY, _query);
330}
331
332
333void URI::addQueryParameter(const std::string& param, const std::string& val)
334{
335 if (!_query.empty()) _query += '&';
336 encode(param, RESERVED_QUERY_PARAM, _query);
337 _query += '=';
338 encode(val, RESERVED_QUERY_PARAM, _query);
339}
340
341
342std::string URI::getQuery() const
343{
344 std::string query;
345 decode(_query, query);
346 return query;
347}
348
349
350URI::QueryParameters URI::getQueryParameters() const
351{
352 QueryParameters result;
353 std::string::const_iterator it(_query.begin());
354 std::string::const_iterator end(_query.end());
355 while (it != end)
356 {
357 std::string name;
358 std::string value;
359 while (it != end && *it != '=' && *it != '&')
360 {
361 if (*it == '+')
362 name += ' ';
363 else
364 name += *it;
365 ++it;
366 }
367 if (it != end && *it == '=')
368 {
369 ++it;
370 while (it != end && *it != '&')
371 {
372 if (*it == '+')
373 value += ' ';
374 else
375 value += *it;
376 ++it;
377 }
378 }
379 std::string decodedName;
380 std::string decodedValue;
381 URI::decode(name, decodedName);
382 URI::decode(value, decodedValue);
383 result.push_back(std::make_pair(decodedName, decodedValue));
384 if (it != end && *it == '&') ++it;
385 }
386 return result;
387}
388
389
390void URI::setQueryParameters(const QueryParameters& params)
391{
392 _query.clear();
393 for (QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
394 {
395 addQueryParameter(it->first, it->second);
396 }
397}
398
399
400void URI::setFragment(const std::string& fragment)
401{
402 _fragment.clear();
403 decode(fragment, _fragment);
404}
405
406
407void URI::setPathEtc(const std::string& pathEtc)
408{
409 _path.clear();
410 _query.clear();
411 _fragment.clear();
412 std::string::const_iterator beg = pathEtc.begin();
413 std::string::const_iterator end = pathEtc.end();
414 parsePathEtc(beg, end);
415}
416
417
418std::string URI::getPathEtc() const
419{
420 std::string pathEtc;
421 encode(_path, RESERVED_PATH, pathEtc);
422 if (!_query.empty())
423 {
424 pathEtc += '?';
425 pathEtc += _query;
426 }
427 if (!_fragment.empty())
428 {
429 pathEtc += '#';
430 encode(_fragment, RESERVED_FRAGMENT, pathEtc);
431 }
432 return pathEtc;
433}
434
435
436std::string URI::getPathAndQuery() const
437{
438 std::string pathAndQuery;
439 encode(_path, RESERVED_PATH, pathAndQuery);
440 if (!_query.empty())
441 {
442 pathAndQuery += '?';
443 pathAndQuery += _query;
444 }
445 return pathAndQuery;
446}
447
448
449void URI::resolve(const std::string& relativeURI)
450{
451 URI parsedURI(relativeURI);
452 resolve(parsedURI);
453}
454
455
456void URI::resolve(const URI& relativeURI)
457{
458 if (!relativeURI._scheme.empty())
459 {
460 _scheme = relativeURI._scheme;
461 _userInfo = relativeURI._userInfo;
462 _host = relativeURI._host;
463 _port = relativeURI._port;
464 _path = relativeURI._path;
465 _query = relativeURI._query;
466 removeDotSegments();
467 }
468 else
469 {
470 if (!relativeURI._host.empty())
471 {
472 _userInfo = relativeURI._userInfo;
473 _host = relativeURI._host;
474 _port = relativeURI._port;
475 _path = relativeURI._path;
476 _query = relativeURI._query;
477 removeDotSegments();
478 }
479 else
480 {
481 if (relativeURI._path.empty())
482 {
483 if (!relativeURI._query.empty())
484 _query = relativeURI._query;
485 }
486 else
487 {
488 if (relativeURI._path[0] == '/')
489 {
490 _path = relativeURI._path;
491 removeDotSegments();
492 }
493 else
494 {
495 mergePath(relativeURI._path);
496 }
497 _query = relativeURI._query;
498 }
499 }
500 }
501 _fragment = relativeURI._fragment;
502}
503
504
505bool URI::isRelative() const
506{
507 return _scheme.empty();
508}
509
510
511bool URI::empty() const
512{
513 return _scheme.empty() && _host.empty() && _path.empty() && _query.empty() && _fragment.empty();
514}
515
516
517bool URI::operator == (const URI& uri) const
518{
519 return equals(uri);
520}
521
522
523bool URI::operator == (const std::string& uri) const
524{
525 URI parsedURI(uri);
526 return equals(parsedURI);
527}
528
529
530bool URI::operator != (const URI& uri) const
531{
532 return !equals(uri);
533}
534
535
536bool URI::operator != (const std::string& uri) const
537{
538 URI parsedURI(uri);
539 return !equals(parsedURI);
540}
541
542
543bool URI::equals(const URI& uri) const
544{
545 return _scheme == uri._scheme
546 && _userInfo == uri._userInfo
547 && _host == uri._host
548 && getPort() == uri.getPort()
549 && _path == uri._path
550 && _query == uri._query
551 && _fragment == uri._fragment;
552}
553
554
555void URI::normalize()
556{
557 removeDotSegments(!isRelative());
558}
559
560
561void URI::removeDotSegments(bool removeLeading)
562{
563 if (_path.empty()) return;
564
565 bool leadingSlash = *(_path.begin()) == '/';
566 bool trailingSlash = *(_path.rbegin()) == '/';
567 std::vector<std::string> segments;
568 std::vector<std::string> normalizedSegments;
569 getPathSegments(segments);
570 for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
571 {
572 if (*it == "..")
573 {
574 if (!normalizedSegments.empty())
575 {
576 if (normalizedSegments.back() == "..")
577 normalizedSegments.push_back(*it);
578 else
579 normalizedSegments.pop_back();
580 }
581 else if (!removeLeading)
582 {
583 normalizedSegments.push_back(*it);
584 }
585 }
586 else if (*it != ".")
587 {
588 normalizedSegments.push_back(*it);
589 }
590 }
591 buildPath(normalizedSegments, leadingSlash, trailingSlash);
592}
593
594
595void URI::getPathSegments(std::vector<std::string>& segments)
596{
597 getPathSegments(_path, segments);
598}
599
600
601void URI::getPathSegments(const std::string& path, std::vector<std::string>& segments)
602{
603 std::string::const_iterator it = path.begin();
604 std::string::const_iterator end = path.end();
605 std::string seg;
606 while (it != end)
607 {
608 if (*it == '/')
609 {
610 if (!seg.empty())
611 {
612 segments.push_back(seg);
613 seg.clear();
614 }
615 }
616 else seg += *it;
617 ++it;
618 }
619 if (!seg.empty())
620 segments.push_back(seg);
621}
622
623
624void URI::encode(const std::string& str, const std::string& reserved, std::string& encodedStr)
625{
626 for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
627 {
628 char c = *it;
629 if ((c >= 'a' && c <= 'z') ||
630 (c >= 'A' && c <= 'Z') ||
631 (c >= '0' && c <= '9') ||
632 c == '-' || c == '_' ||
633 c == '.' || c == '~')
634 {
635 encodedStr += c;
636 }
637 else if (c <= 0x20 || c >= 0x7F || ILLEGAL.find(c) != std::string::npos || reserved.find(c) != std::string::npos)
638 {
639 encodedStr += '%';
640 encodedStr += NumberFormatter::formatHex((unsigned) (unsigned char) c, 2);
641 }
642 else encodedStr += c;
643 }
644}
645
646
647void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpace)
648{
649 bool inQuery = false;
650 std::string::const_iterator it = str.begin();
651 std::string::const_iterator end = str.end();
652 while (it != end)
653 {
654 char c = *it++;
655 if (c == '?') inQuery = true;
656 // spaces may be encoded as plus signs in the query
657 if (inQuery && plusAsSpace && c == '+') c = ' ';
658 else if (c == '%')
659 {
660 if (it == end) throw URISyntaxException("URI encoding: no hex digit following percent sign", str);
661 char hi = *it++;
662 if (it == end) throw URISyntaxException("URI encoding: two hex digits must follow percent sign", str);
663 char lo = *it++;
664 if (hi >= '0' && hi <= '9')
665 c = hi - '0';
666 else if (hi >= 'A' && hi <= 'F')
667 c = hi - 'A' + 10;
668 else if (hi >= 'a' && hi <= 'f')
669 c = hi - 'a' + 10;
670 else throw URISyntaxException("URI encoding: not a hex digit");
671 c *= 16;
672 if (lo >= '0' && lo <= '9')
673 c += lo - '0';
674 else if (lo >= 'A' && lo <= 'F')
675 c += lo - 'A' + 10;
676 else if (lo >= 'a' && lo <= 'f')
677 c += lo - 'a' + 10;
678 else throw URISyntaxException("URI encoding: not a hex digit");
679 }
680 decodedStr += c;
681 }
682}
683
684
685bool URI::isWellKnownPort() const
686{
687 return _port == getWellKnownPort();
688}
689
690
691unsigned short URI::getWellKnownPort() const
692{
693 if (_scheme == "ftp")
694 return 21;
695 else if (_scheme == "ssh")
696 return 22;
697 else if (_scheme == "telnet")
698 return 23;
699 else if (_scheme == "smtp")
700 return 25;
701 else if (_scheme == "dns")
702 return 53;
703 else if (_scheme == "http" || _scheme == "ws")
704 return 80;
705 else if (_scheme == "nntp")
706 return 119;
707 else if (_scheme == "imap")
708 return 143;
709 else if (_scheme == "ldap")
710 return 389;
711 else if (_scheme == "https" || _scheme == "wss")
712 return 443;
713 else if (_scheme == "smtps")
714 return 465;
715 else if (_scheme == "rtsp")
716 return 554;
717 else if (_scheme == "ldaps")
718 return 636;
719 else if (_scheme == "dnss")
720 return 853;
721 else if (_scheme == "imaps")
722 return 993;
723 else if (_scheme == "sip")
724 return 5060;
725 else if (_scheme == "sips")
726 return 5061;
727 else if (_scheme == "xmpp")
728 return 5222;
729 else
730 return 0;
731}
732
733
734void URI::parse(const std::string& uri)
735{
736 std::string::const_iterator it = uri.begin();
737 std::string::const_iterator end = uri.end();
738 if (it == end) return;
739 if (*it != '/' && *it != '.' && *it != '?' && *it != '#')
740 {
741 std::string scheme;
742 while (it != end && *it != ':' && *it != '?' && *it != '#' && *it != '/') scheme += *it++;
743 if (it != end && *it == ':')
744 {
745 ++it;
746 if (it == end) throw URISyntaxException("URI scheme must be followed by authority or path", uri);
747 setScheme(scheme);
748 if (*it == '/')
749 {
750 ++it;
751 if (it != end && *it == '/')
752 {
753 ++it;
754 parseAuthority(it, end);
755 }
756 else --it;
757 }
758 parsePathEtc(it, end);
759 }
760 else
761 {
762 it = uri.begin();
763 parsePathEtc(it, end);
764 }
765 }
766 else parsePathEtc(it, end);
767}
768
769
770void URI::parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end)
771{
772 std::string userInfo;
773 std::string part;
774 while (it != end && *it != '/' && *it != '?' && *it != '#')
775 {
776 if (*it == '@')
777 {
778 userInfo = part;
779 part.clear();
780 }
781 else part += *it;
782 ++it;
783 }
784 std::string::const_iterator pbeg = part.begin();
785 std::string::const_iterator pend = part.end();
786 parseHostAndPort(pbeg, pend);
787 _userInfo = userInfo;
788}
789
790
791void URI::parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end)
792{
793 if (it == end) return;
794 std::string host;
795 if (*it == '[')
796 {
797 // IPv6 address
798 ++it;
799 while (it != end && *it != ']') host += *it++;
800 if (it == end) throw URISyntaxException("unterminated IPv6 address");
801 ++it;
802 }
803 else
804 {
805 while (it != end && *it != ':') host += *it++;
806 }
807 if (it != end && *it == ':')
808 {
809 ++it;
810 std::string port;
811 while (it != end) port += *it++;
812 if (!port.empty())
813 {
814 int nport = 0;
815 if (NumberParser::tryParse(port, nport) && nport > 0 && nport < 65536)
816 _port = (unsigned short) nport;
817 else
818 throw URISyntaxException("bad or invalid port number", port);
819 }
820 else _port = getWellKnownPort();
821 }
822 else _port = getWellKnownPort();
823 _host = host;
824 toLowerInPlace(_host);
825}
826
827
828void URI::parsePath(std::string::const_iterator& it, const std::string::const_iterator& end)
829{
830 std::string path;
831 while (it != end && *it != '?' && *it != '#') path += *it++;
832 decode(path, _path);
833}
834
835
836void URI::parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end)
837{
838 if (it == end) return;
839 if (*it != '?' && *it != '#')
840 parsePath(it, end);
841 if (it != end && *it == '?')
842 {
843 ++it;
844 parseQuery(it, end);
845 }
846 if (it != end && *it == '#')
847 {
848 ++it;
849 parseFragment(it, end);
850 }
851}
852
853
854void URI::parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end)
855{
856 _query.clear();
857 while (it != end && *it != '#') _query += *it++;
858}
859
860
861void URI::parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end)
862{
863 std::string fragment;
864 while (it != end) fragment += *it++;
865 decode(fragment, _fragment);
866}
867
868
869void URI::mergePath(const std::string& path)
870{
871 std::vector<std::string> segments;
872 std::vector<std::string> normalizedSegments;
873 bool addLeadingSlash = false;
874 if (!_path.empty())
875 {
876 getPathSegments(segments);
877 bool endsWithSlash = *(_path.rbegin()) == '/';
878 if (!endsWithSlash && !segments.empty())
879 segments.pop_back();
880 addLeadingSlash = _path[0] == '/';
881 }
882 getPathSegments(path, segments);
883 addLeadingSlash = addLeadingSlash || (!path.empty() && path[0] == '/');
884 bool hasTrailingSlash = (!path.empty() && *(path.rbegin()) == '/');
885 bool addTrailingSlash = false;
886 for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
887 {
888 if (*it == "..")
889 {
890 addTrailingSlash = true;
891 if (!normalizedSegments.empty())
892 normalizedSegments.pop_back();
893 }
894 else if (*it != ".")
895 {
896 addTrailingSlash = false;
897 normalizedSegments.push_back(*it);
898 }
899 else addTrailingSlash = true;
900 }
901 buildPath(normalizedSegments, addLeadingSlash, hasTrailingSlash || addTrailingSlash);
902}
903
904
905void URI::buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash)
906{
907 _path.clear();
908 bool first = true;
909 for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
910 {
911 if (first)
912 {
913 first = false;
914 if (leadingSlash)
915 _path += '/';
916 else if (_scheme.empty() && (*it).find(':') != std::string::npos)
917 _path.append("./");
918 }
919 else _path += '/';
920 _path.append(*it);
921 }
922 if (trailingSlash)
923 _path += '/';
924}
925
926
927} // namespace Poco
928