1//
2// URI.cpp
3//
4// Library: Foundation
5// Package: URI
6// Module: URI
7//
8// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
9// and Contributors.
10//
11// SPDX-License-Identifier: BSL-1.0
12//
13
14
15#include "Poco/URI.h"
16#include "Poco/NumberFormatter.h"
17#include "Poco/Exception.h"
18#include "Poco/String.h"
19#include "Poco/NumberParser.h"
20#include "Poco/Path.h"
21
22
23namespace Poco {
24
25
26const std::string URI::RESERVED_PATH = "?#";
27const std::string URI::RESERVED_QUERY = "?#/:;+@";
28const std::string URI::RESERVED_QUERY_PARAM = "?#/:;+@&=";
29const std::string URI::RESERVED_FRAGMENT = "";
30const std::string URI::ILLEGAL = "%<>{}|\\\"^`!*'()$,[]";
31
32
33URI::URI():
34 _port(0)
35{
36}
37
38
39URI::URI(const std::string& uri):
40 _port(0)
41{
42 parse(uri);
43}
44
45
46URI::URI(const char* uri):
47 _port(0)
48{
49 parse(std::string(uri));
50}
51
52
53URI::URI(const std::string& scheme, const std::string& pathEtc):
54 _scheme(scheme),
55 _port(0)
56{
57 toLowerInPlace(_scheme);
58 _port = getWellKnownPort();
59 std::string::const_iterator beg = pathEtc.begin();
60 std::string::const_iterator end = pathEtc.end();
61 parsePathEtc(beg, end);
62}
63
64
65URI::URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc):
66 _scheme(scheme)
67{
68 toLowerInPlace(_scheme);
69 std::string::const_iterator beg = authority.begin();
70 std::string::const_iterator end = authority.end();
71 parseAuthority(beg, end);
72 beg = pathEtc.begin();
73 end = pathEtc.end();
74 parsePathEtc(beg, end);
75}
76
77
78URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query):
79 _scheme(scheme),
80 _path(path),
81 _query(query)
82{
83 toLowerInPlace(_scheme);
84 std::string::const_iterator beg = authority.begin();
85 std::string::const_iterator end = authority.end();
86 parseAuthority(beg, end);
87}
88
89
90URI::URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment):
91 _scheme(scheme),
92 _path(path),
93 _query(query),
94 _fragment(fragment)
95{
96 toLowerInPlace(_scheme);
97 std::string::const_iterator beg = authority.begin();
98 std::string::const_iterator end = authority.end();
99 parseAuthority(beg, end);
100}
101
102
103URI::URI(const URI& uri):
104 _scheme(uri._scheme),
105 _userInfo(uri._userInfo),
106 _host(uri._host),
107 _port(uri._port),
108 _path(uri._path),
109 _query(uri._query),
110 _fragment(uri._fragment)
111{
112}
113
114
115URI::URI(const URI& baseURI, const std::string& relativeURI):
116 _scheme(baseURI._scheme),
117 _userInfo(baseURI._userInfo),
118 _host(baseURI._host),
119 _port(baseURI._port),
120 _path(baseURI._path),
121 _query(baseURI._query),
122 _fragment(baseURI._fragment)
123{
124 resolve(relativeURI);
125}
126
127
128URI::URI(const Path& path):
129 _scheme("file"),
130 _port(0)
131{
132 Path absolutePath(path);
133 absolutePath.makeAbsolute();
134 _path = absolutePath.toString(Path::PATH_UNIX);
135}
136
137
138URI::~URI()
139{
140}
141
142
143URI& URI::operator = (const URI& uri)
144{
145 if (&uri != this)
146 {
147 _scheme = uri._scheme;
148 _userInfo = uri._userInfo;
149 _host = uri._host;
150 _port = uri._port;
151 _path = uri._path;
152 _query = uri._query;
153 _fragment = uri._fragment;
154 }
155 return *this;
156}
157
158
159URI& URI::operator = (const std::string& uri)
160{
161 clear();
162 parse(uri);
163 return *this;
164}
165
166
167URI& URI::operator = (const char* uri)
168{
169 clear();
170 parse(std::string(uri));
171 return *this;
172}
173
174
175void URI::swap(URI& uri)
176{
177 std::swap(_scheme, uri._scheme);
178 std::swap(_userInfo, uri._userInfo);
179 std::swap(_host, uri._host);
180 std::swap(_port, uri._port);
181 std::swap(_path, uri._path);
182 std::swap(_query, uri._query);
183 std::swap(_fragment, uri._fragment);
184}
185
186
187void URI::clear()
188{
189 _scheme.clear();
190 _userInfo.clear();
191 _host.clear();
192 _port = 0;
193 _path.clear();
194 _query.clear();
195 _fragment.clear();
196}
197
198
199std::string URI::toString() const
200{
201 std::string uri;
202 if (isRelative())
203 {
204 encode(_path, RESERVED_PATH, uri);
205 }
206 else
207 {
208 uri = _scheme;
209 uri += ':';
210 std::string auth = getAuthority();
211 if (!auth.empty() || _scheme == "file")
212 {
213 uri.append("//");
214 uri.append(auth);
215 }
216 if (!_path.empty())
217 {
218 if (!auth.empty() && _path[0] != '/')
219 uri += '/';
220 encode(_path, RESERVED_PATH, uri);
221 }
222 else if (!_query.empty() || !_fragment.empty())
223 {
224 uri += '/';
225 }
226 }
227 if (!_query.empty())
228 {
229 uri += '?';
230 uri.append(_query);
231 }
232 if (!_fragment.empty())
233 {
234 uri += '#';
235 encode(_fragment, RESERVED_FRAGMENT, uri);
236 }
237 return uri;
238}
239
240
241void URI::setScheme(const std::string& scheme)
242{
243 _scheme = scheme;
244 toLowerInPlace(_scheme);
245 if (_port == 0)
246 _port = getWellKnownPort();
247}
248
249
250void URI::setUserInfo(const std::string& userInfo)
251{
252 _userInfo.clear();
253 decode(userInfo, _userInfo);
254}
255
256
257void URI::setHost(const std::string& host)
258{
259 _host = host;
260}
261
262
263unsigned short URI::getPort() const
264{
265 if (_port == 0)
266 return getWellKnownPort();
267 else
268 return _port;
269}
270
271
272void URI::setPort(unsigned short port)
273{
274 _port = port;
275}
276
277
278std::string URI::getAuthority() const
279{
280 std::string auth;
281 if (!_userInfo.empty())
282 {
283 auth.append(_userInfo);
284 auth += '@';
285 }
286 if (_host.find(':') != std::string::npos)
287 {
288 auth += '[';
289 auth += _host;
290 auth += ']';
291 }
292 else auth.append(_host);
293 if (_port && !isWellKnownPort())
294 {
295 auth += ':';
296 NumberFormatter::append(auth, _port);
297 }
298 return auth;
299}
300
301
302void URI::setAuthority(const std::string& authority)
303{
304 _userInfo.clear();
305 _host.clear();
306 _port = 0;
307 std::string::const_iterator beg = authority.begin();
308 std::string::const_iterator end = authority.end();
309 parseAuthority(beg, end);
310}
311
312
313void URI::setPath(const std::string& path)
314{
315 _path.clear();
316 decode(path, _path);
317}
318
319
320void URI::setRawQuery(const std::string& query)
321{
322 _query = query;
323}
324
325
326void URI::setQuery(const std::string& query)
327{
328 _query.clear();
329 encode(query, RESERVED_QUERY, _query);
330}
331
332
333void URI::addQueryParameter(const std::string& param, const std::string& val)
334{
335 if (!_query.empty()) _query += '&';
336 encode(param, RESERVED_QUERY_PARAM, _query);
337 _query += '=';
338 encode(val, RESERVED_QUERY_PARAM, _query);
339}
340
341
342std::string URI::getQuery() const
343{
344 std::string query;
345 decode(_query, query);
346 return query;
347}
348
349
350URI::QueryParameters URI::getQueryParameters() const
351{
352 QueryParameters result;
353 std::string::const_iterator it(_query.begin());
354 std::string::const_iterator end(_query.end());
355 while (it != end)
356 {
357 std::string name;
358 std::string value;
359 while (it != end && *it != '=' && *it != '&')
360 {
361 if (*it == '+')
362 name += ' ';
363 else
364 name += *it;
365 ++it;
366 }
367 if (it != end && *it == '=')
368 {
369 ++it;
370 while (it != end && *it != '&')
371 {
372 if (*it == '+')
373 value += ' ';
374 else
375 value += *it;
376 ++it;
377 }
378 }
379 std::string decodedName;
380 std::string decodedValue;
381 URI::decode(name, decodedName);
382 URI::decode(value, decodedValue);
383 result.push_back(std::make_pair(decodedName, decodedValue));
384 if (it != end && *it == '&') ++it;
385 }
386 return result;
387}
388
389
390void URI::setQueryParameters(const QueryParameters& params)
391{
392 _query.clear();
393 for (QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it)
394 {
395 addQueryParameter(it->first, it->second);
396 }
397}
398
399
400void URI::setFragment(const std::string& fragment)
401{
402 _fragment.clear();
403 decode(fragment, _fragment);
404}
405
406
407void URI::setPathEtc(const std::string& pathEtc)
408{
409 _path.clear();
410 _query.clear();
411 _fragment.clear();
412 std::string::const_iterator beg = pathEtc.begin();
413 std::string::const_iterator end = pathEtc.end();
414 parsePathEtc(beg, end);
415}
416
417
418std::string URI::getPathEtc() const
419{
420 std::string pathEtc;
421 encode(_path, RESERVED_PATH, pathEtc);
422 if (!_query.empty())
423 {
424 pathEtc += '?';
425 pathEtc += _query;
426 }
427 if (!_fragment.empty())
428 {
429 pathEtc += '#';
430 encode(_fragment, RESERVED_FRAGMENT, pathEtc);
431 }
432 return pathEtc;
433}
434
435
436std::string URI::getPathAndQuery() const
437{
438 std::string pathAndQuery;
439 encode(_path, RESERVED_PATH, pathAndQuery);
440 if (!_query.empty())
441 {
442 pathAndQuery += '?';
443 pathAndQuery += _query;
444 }
445 return pathAndQuery;
446}
447
448
449void URI::resolve(const std::string& relativeURI)
450{
451 URI parsedURI(relativeURI);
452 resolve(parsedURI);
453}
454
455
456void URI::resolve(const URI& relativeURI)
457{
458 if (!relativeURI._scheme.empty())
459 {
460 _scheme = relativeURI._scheme;
461 _userInfo = relativeURI._userInfo;
462 _host = relativeURI._host;
463 _port = relativeURI._port;
464 _path = relativeURI._path;
465 _query = relativeURI._query;
466 removeDotSegments();
467 }
468 else
469 {
470 if (!relativeURI._host.empty())
471 {
472 _userInfo = relativeURI._userInfo;
473 _host = relativeURI._host;
474 _port = relativeURI._port;
475 _path = relativeURI._path;
476 _query = relativeURI._query;
477 removeDotSegments();
478 }
479 else
480 {
481 if (relativeURI._path.empty())
482 {
483 if (!relativeURI._query.empty())
484 _query = relativeURI._query;
485 }
486 else
487 {
488 if (relativeURI._path[0] == '/')
489 {
490 _path = relativeURI._path;
491 removeDotSegments();
492 }
493 else
494 {
495 mergePath(relativeURI._path);
496 }
497 _query = relativeURI._query;
498 }
499 }
500 }
501 _fragment = relativeURI._fragment;
502}
503
504
505bool URI::isRelative() const
506{
507 return _scheme.empty();
508}
509
510
511bool URI::empty() const
512{
513 return _scheme.empty() && _host.empty() && _path.empty() && _query.empty() && _fragment.empty();
514}
515
516
517bool URI::operator == (const URI& uri) const
518{
519 return equals(uri);
520}
521
522
523bool URI::operator == (const std::string& uri) const
524{
525 URI parsedURI(uri);
526 return equals(parsedURI);
527}
528
529
530bool URI::operator != (const URI& uri) const
531{
532 return !equals(uri);
533}
534
535
536bool URI::operator != (const std::string& uri) const
537{
538 URI parsedURI(uri);
539 return !equals(parsedURI);
540}
541
542
543bool URI::equals(const URI& uri) const
544{
545 return _scheme == uri._scheme
546 && _userInfo == uri._userInfo
547 && _host == uri._host
548 && getPort() == uri.getPort()
549 && _path == uri._path
550 && _query == uri._query
551 && _fragment == uri._fragment;
552}
553
554
555void URI::normalize()
556{
557 removeDotSegments(!isRelative());
558}
559
560
561void URI::removeDotSegments(bool removeLeading)
562{
563 if (_path.empty()) return;
564
565 bool leadingSlash = *(_path.begin()) == '/';
566 bool trailingSlash = *(_path.rbegin()) == '/';
567 std::vector<std::string> segments;
568 std::vector<std::string> normalizedSegments;
569 getPathSegments(segments);
570 for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
571 {
572 if (*it == "..")
573 {
574 if (!normalizedSegments.empty())
575 {
576 if (normalizedSegments.back() == "..")
577 normalizedSegments.push_back(*it);
578 else
579 normalizedSegments.pop_back();
580 }
581 else if (!removeLeading)
582 {
583 normalizedSegments.push_back(*it);
584 }
585 }
586 else if (*it != ".")
587 {
588 normalizedSegments.push_back(*it);
589 }
590 }
591 buildPath(normalizedSegments, leadingSlash, trailingSlash);
592}
593
594
595void URI::getPathSegments(std::vector<std::string>& segments)
596{
597 getPathSegments(_path, segments);
598}
599
600
601void URI::getPathSegments(const std::string& path, std::vector<std::string>& segments)
602{
603 std::string::const_iterator it = path.begin();
604 std::string::const_iterator end = path.end();
605 std::string seg;
606 while (it != end)
607 {
608 if (*it == '/')
609 {
610 if (!seg.empty())
611 {
612 segments.push_back(seg);
613 seg.clear();
614 }
615 }
616 else seg += *it;
617 ++it;
618 }
619 if (!seg.empty())
620 segments.push_back(seg);
621}
622
623
624void URI::encode(const std::string& str, const std::string& reserved, std::string& encodedStr)
625{
626 for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
627 {
628 char c = *it;
629 if ((c >= 'a' && c <= 'z') ||
630 (c >= 'A' && c <= 'Z') ||
631 (c >= '0' && c <= '9') ||
632 c == '-' || c == '_' ||
633 c == '.' || c == '~')
634 {
635 encodedStr += c;
636 }
637 else if (c <= 0x20 || c >= 0x7F || ILLEGAL.find(c) != std::string::npos || reserved.find(c) != std::string::npos)
638 {
639 encodedStr += '%';
640 encodedStr += NumberFormatter::formatHex((unsigned) (unsigned char) c, 2);
641 }
642 else encodedStr += c;
643 }
644}
645
646
647void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpace)
648{
649 bool inQuery = false;
650 std::string::const_iterator it = str.begin();
651 std::string::const_iterator end = str.end();
652 while (it != end)
653 {
654 char c = *it++;
655 if (c == '?') inQuery = true;
656 // spaces may be encoded as plus signs in the query
657 if (inQuery && plusAsSpace && c == '+') c = ' ';
658 else if (c == '%')
659 {
660 if (it == end) throw URISyntaxException("URI encoding: no hex digit following percent sign", str);
661 char hi = *it++;
662 if (it == end) throw URISyntaxException("URI encoding: two hex digits must follow percent sign", str);
663 char lo = *it++;
664 if (hi >= '0' && hi <= '9')
665 c = hi - '0';
666 else if (hi >= 'A' && hi <= 'F')
667 c = hi - 'A' + 10;
668 else if (hi >= 'a' && hi <= 'f')
669 c = hi - 'a' + 10;
670 else throw URISyntaxException("URI encoding: not a hex digit");
671 c *= 16;
672 if (lo >= '0' && lo <= '9')
673 c += lo - '0';
674 else if (lo >= 'A' && lo <= 'F')
675 c += lo - 'A' + 10;
676 else if (lo >= 'a' && lo <= 'f')
677 c += lo - 'a' + 10;
678 else throw URISyntaxException("URI encoding: not a hex digit");
679 }
680 decodedStr += c;
681 }
682}
683
684
685bool URI::isWellKnownPort() const
686{
687 return _port == getWellKnownPort();
688}
689
690
691unsigned short URI::getWellKnownPort() const
692{
693 if (_scheme == "ftp")
694 return 21;
695 else if (_scheme == "ssh")
696 return 22;
697 else if (_scheme == "telnet")
698 return 23;
699 else if (_scheme == "http" || _scheme == "ws")
700 return 80;
701 else if (_scheme == "nntp")
702 return 119;
703 else if (_scheme == "ldap")
704 return 389;
705 else if (_scheme == "https" || _scheme == "wss")
706 return 443;
707 else if (_scheme == "rtsp")
708 return 554;
709 else if (_scheme == "sip")
710 return 5060;
711 else if (_scheme == "sips")
712 return 5061;
713 else if (_scheme == "xmpp")
714 return 5222;
715 else
716 return 0;
717}
718
719
720void URI::parse(const std::string& uri)
721{
722 std::for_each(uri.begin(), uri.end(), [] (char ch) {
723 if (static_cast<signed char>(ch) <= 32 || ch == '\x7F')
724 throw URISyntaxException("URI contains invalid characters");
725 });
726
727 std::string::const_iterator it = uri.begin();
728 std::string::const_iterator end = uri.end();
729 if (it == end) return;
730 if (*it != '/' && *it != '.' && *it != '?' && *it != '#')
731 {
732 std::string scheme;
733 while (it != end && *it != ':' && *it != '?' && *it != '#' && *it != '/') scheme += *it++;
734 if (it != end && *it == ':')
735 {
736 ++it;
737 if (it == end) throw URISyntaxException("URI scheme must be followed by authority or path", uri);
738 setScheme(scheme);
739 if (*it == '/')
740 {
741 ++it;
742 if (it != end && *it == '/')
743 {
744 ++it;
745 parseAuthority(it, end);
746 }
747 else --it;
748 }
749 parsePathEtc(it, end);
750 }
751 else
752 {
753 it = uri.begin();
754 parsePathEtc(it, end);
755 }
756 }
757 else parsePathEtc(it, end);
758}
759
760
761void URI::parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end)
762{
763 std::string userInfo;
764 std::string part;
765 while (it != end && *it != '/' && *it != '?' && *it != '#')
766 {
767 if (*it == '@')
768 {
769 userInfo = part;
770 part.clear();
771 }
772 else part += *it;
773 ++it;
774 }
775 std::string::const_iterator pbeg = part.begin();
776 std::string::const_iterator pend = part.end();
777 parseHostAndPort(pbeg, pend);
778 _userInfo = userInfo;
779}
780
781
782void URI::parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end)
783{
784 if (it == end) return;
785 std::string host;
786 if (*it == '[')
787 {
788 // IPv6 address
789 ++it;
790 while (it != end && *it != ']') host += *it++;
791 if (it == end) throw URISyntaxException("unterminated IPv6 address");
792 ++it;
793 }
794 else
795 {
796 while (it != end && *it != ':') host += *it++;
797 }
798 if (it != end && *it == ':')
799 {
800 ++it;
801 std::string port;
802 while (it != end) port += *it++;
803 if (!port.empty())
804 {
805 int nport = 0;
806 if (NumberParser::tryParse(port, nport) && nport > 0 && nport < 65536)
807 _port = (unsigned short) nport;
808 else
809 throw URISyntaxException("bad or invalid port number", port);
810 }
811 else _port = getWellKnownPort();
812 }
813 else _port = getWellKnownPort();
814 _host = host;
815 toLowerInPlace(_host);
816}
817
818
819void URI::parsePath(std::string::const_iterator& it, const std::string::const_iterator& end)
820{
821 std::string path;
822 while (it != end && *it != '?' && *it != '#') path += *it++;
823 decode(path, _path);
824}
825
826
827void URI::parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end)
828{
829 if (it == end) return;
830 if (*it != '?' && *it != '#')
831 parsePath(it, end);
832 if (it != end && *it == '?')
833 {
834 ++it;
835 parseQuery(it, end);
836 }
837 if (it != end && *it == '#')
838 {
839 ++it;
840 parseFragment(it, end);
841 }
842}
843
844
845void URI::parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end)
846{
847 _query.clear();
848 while (it != end && *it != '#') _query += *it++;
849}
850
851
852void URI::parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end)
853{
854 std::string fragment;
855 while (it != end) fragment += *it++;
856 decode(fragment, _fragment);
857}
858
859
860void URI::mergePath(const std::string& path)
861{
862 std::vector<std::string> segments;
863 std::vector<std::string> normalizedSegments;
864 bool addLeadingSlash = false;
865 if (!_path.empty())
866 {
867 getPathSegments(segments);
868 bool endsWithSlash = *(_path.rbegin()) == '/';
869 if (!endsWithSlash && !segments.empty())
870 segments.pop_back();
871 addLeadingSlash = _path[0] == '/';
872 }
873 getPathSegments(path, segments);
874 addLeadingSlash = addLeadingSlash || (!path.empty() && path[0] == '/');
875 bool hasTrailingSlash = (!path.empty() && *(path.rbegin()) == '/');
876 bool addTrailingSlash = false;
877 for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
878 {
879 if (*it == "..")
880 {
881 addTrailingSlash = true;
882 if (!normalizedSegments.empty())
883 normalizedSegments.pop_back();
884 }
885 else if (*it != ".")
886 {
887 addTrailingSlash = false;
888 normalizedSegments.push_back(*it);
889 }
890 else addTrailingSlash = true;
891 }
892 buildPath(normalizedSegments, addLeadingSlash, hasTrailingSlash || addTrailingSlash);
893}
894
895
896void URI::buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash)
897{
898 _path.clear();
899 bool first = true;
900 for (std::vector<std::string>::const_iterator it = segments.begin(); it != segments.end(); ++it)
901 {
902 if (first)
903 {
904 first = false;
905 if (leadingSlash)
906 _path += '/';
907 else if (_scheme.empty() && (*it).find(':') != std::string::npos)
908 _path.append("./");
909 }
910 else _path += '/';
911 _path.append(*it);
912 }
913 if (trailingSlash)
914 _path += '/';
915}
916
917
918} // namespace Poco
919