1//
2// URI.h
3//
4// Library: Foundation
5// Package: URI
6// Module: URI
7//
8// Definition of the URI class.
9//
10// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
11// and Contributors.
12//
13// SPDX-License-Identifier: BSL-1.0
14//
15
16
17#ifndef Foundation_URI_INCLUDED
18#define Foundation_URI_INCLUDED
19
20
21#include "Poco/Foundation.h"
22#include <vector>
23#include <utility>
24
25
26namespace Poco {
27
28
29class Path;
30
31
32class Foundation_API URI
33 /// A Uniform Resource Identifier, as specified in RFC 3986.
34 ///
35 /// The URI class provides methods for building URIs from their
36 /// parts, as well as for splitting URIs into their parts.
37 /// Furthermore, the class provides methods for resolving
38 /// relative URIs against base URIs.
39 ///
40 /// The class automatically performs a few normalizations on
41 /// all URIs and URI parts passed to it:
42 /// * scheme identifiers are converted to lower case
43 /// * percent-encoded characters are decoded (except for the query string)
44 /// * optionally, dot segments are removed from paths (see normalize())
45 ///
46 /// Note that dealing with query strings requires some precautions, as, internally,
47 /// query strings are stored in percent-encoded form, while all other parts of the URI
48 /// are stored in decoded form. While parsing query strings from properly encoded URLs
49 /// generally works, explicitly setting query strings with setQuery() or extracting
50 /// query strings with getQuery() may lead to ambiguities. See the descriptions of
51 /// setQuery(), setRawQuery(), getQuery() and getRawQuery() for more information.
52{
53public:
54 typedef std::vector<std::pair<std::string, std::string> > QueryParameters;
55
56 URI();
57 /// Creates an empty URI.
58
59 explicit URI(const std::string& uri);
60 /// Parses an URI from the given string. Throws a
61 /// SyntaxException if the uri is not valid.
62
63 explicit URI(const char* uri);
64 /// Parses an URI from the given string. Throws a
65 /// SyntaxException if the uri is not valid.
66
67 URI(const std::string& scheme, const std::string& pathEtc);
68 /// Creates an URI from its parts.
69
70 URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc);
71 /// Creates an URI from its parts.
72
73 URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query);
74 /// Creates an URI from its parts.
75
76 URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment);
77 /// Creates an URI from its parts.
78
79 URI(const URI& uri);
80 /// Copy constructor. Creates an URI from another one.
81
82 URI(const URI& baseURI, const std::string& relativeURI);
83 /// Creates an URI from a base URI and a relative URI, according to
84 /// the algorithm in section 5.2 of RFC 3986.
85
86 explicit URI(const Path& path);
87 /// Creates a URI from a path.
88 ///
89 /// The path will be made absolute, and a file:// URI
90 /// will be built from it.
91
92 ~URI();
93 /// Destroys the URI.
94
95 URI& operator = (const URI& uri);
96 /// Assignment operator.
97
98 URI& operator = (const std::string& uri);
99 /// Parses and assigns an URI from the given string. Throws a
100 /// SyntaxException if the uri is not valid.
101
102 URI& operator = (const char* uri);
103 /// Parses and assigns an URI from the given string. Throws a
104 /// SyntaxException if the uri is not valid.
105
106 void swap(URI& uri);
107 /// Swaps the URI with another one.
108
109 void clear();
110 /// Clears all parts of the URI.
111
112 std::string toString() const;
113 /// Returns a string representation of the URI.
114 ///
115 /// Characters in the path, query and fragment parts will be
116 /// percent-encoded as necessary.
117
118 const std::string& getScheme() const;
119 /// Returns the scheme part of the URI.
120
121 void setScheme(const std::string& scheme);
122 /// Sets the scheme part of the URI. The given scheme
123 /// is converted to lower-case.
124 ///
125 /// A list of registered URI schemes can be found
126 /// at <http://www.iana.org/assignments/uri-schemes>.
127
128 const std::string& getUserInfo() const;
129 /// Returns the user-info part of the URI.
130
131 void setUserInfo(const std::string& userInfo);
132 /// Sets the user-info part of the URI.
133
134 const std::string& getHost() const;
135 /// Returns the host part of the URI.
136
137 void setHost(const std::string& host);
138 /// Sets the host part of the URI.
139
140 unsigned short getPort() const;
141 /// Returns the port number part of the URI.
142 ///
143 /// If no port number (0) has been specified, the
144 /// well-known port number (e.g., 80 for http) for
145 /// the given scheme is returned if it is known.
146 /// Otherwise, 0 is returned.
147
148 void setPort(unsigned short port);
149 /// Sets the port number part of the URI.
150
151 std::string getAuthority() const;
152 /// Returns the authority part (userInfo, host and port)
153 /// of the URI.
154 ///
155 /// If the port number is a well-known port
156 /// number for the given scheme (e.g., 80 for http), it
157 /// is not included in the authority.
158
159 void setAuthority(const std::string& authority);
160 /// Parses the given authority part for the URI and sets
161 /// the user-info, host, port components accordingly.
162
163 const std::string& getPath() const;
164 /// Returns the decoded path part of the URI.
165
166 void setPath(const std::string& path);
167 /// Sets the path part of the URI.
168
169 std::string getQuery() const;
170 /// Returns the decoded query part of the URI.
171 ///
172 /// Note that encoded ampersand characters ('&', "%26")
173 /// will be decoded, which could cause ambiguities if the query
174 /// string contains multiple parameters and a parameter name
175 /// or value contains an ampersand as well.
176 /// In such a case it's better to use getRawQuery() or
177 /// getQueryParameters().
178
179 void setQuery(const std::string& query);
180 /// Sets the query part of the URI.
181 ///
182 /// The query string will be percent-encoded. If the query
183 /// already contains percent-encoded characters, these
184 /// will be double-encoded, which is probably not what's
185 /// intended by the caller. Furthermore, ampersand ('&')
186 /// characters in the query will not be encoded. This could
187 /// lead to ambiguity issues if the query string contains multiple
188 /// name-value parameters separated by ampersand, and if any
189 /// name or value also contains an ampersand. In such a
190 /// case, it's better to use setRawQuery() with a properly
191 /// percent-encoded query string, or use addQueryParameter()
192 /// or setQueryParameters(), which take care of appropriate
193 /// percent encoding of parameter names and values.
194
195 void addQueryParameter(const std::string& param, const std::string& val = "");
196 /// Adds "param=val" to the query; "param" may not be empty.
197 /// If val is empty, only '=' is appended to the parameter.
198 ///
199 /// In addition to regular encoding, function also encodes '&' and '=',
200 /// if found in param or val.
201
202 const std::string& getRawQuery() const;
203 /// Returns the query string in raw form, which usually
204 /// means percent encoded.
205
206 void setRawQuery(const std::string& query);
207 /// Sets the query part of the URI.
208 ///
209 /// The given query string must be properly percent-encoded.
210
211 QueryParameters getQueryParameters() const;
212 /// Returns the decoded query string parameters as a vector
213 /// of name-value pairs.
214
215 void setQueryParameters(const QueryParameters& params);
216 /// Sets the query part of the URI from a vector
217 /// of query parameters.
218 ///
219 /// Calls addQueryParameter() for each parameter name and value.
220
221 const std::string& getFragment() const;
222 /// Returns the fragment part of the URI.
223
224 void setFragment(const std::string& fragment);
225 /// Sets the fragment part of the URI.
226
227 void setPathEtc(const std::string& pathEtc);
228 /// Sets the path, query and fragment parts of the URI.
229
230 std::string getPathEtc() const;
231 /// Returns the encoded path, query and fragment parts of the URI.
232
233 std::string getPathAndQuery() const;
234 /// Returns the encoded path and query parts of the URI.
235
236 void resolve(const std::string& relativeURI);
237 /// Resolves the given relative URI against the base URI.
238 /// See section 5.2 of RFC 3986 for the algorithm used.
239
240 void resolve(const URI& relativeURI);
241 /// Resolves the given relative URI against the base URI.
242 /// See section 5.2 of RFC 3986 for the algorithm used.
243
244 bool isRelative() const;
245 /// Returns true if the URI is a relative reference, false otherwise.
246 ///
247 /// A relative reference does not contain a scheme identifier.
248 /// Relative references are usually resolved against an absolute
249 /// base reference.
250
251 bool empty() const;
252 /// Returns true if the URI is empty, false otherwise.
253
254 bool operator == (const URI& uri) const;
255 /// Returns true if both URIs are identical, false otherwise.
256 ///
257 /// Two URIs are identical if their scheme, authority,
258 /// path, query and fragment part are identical.
259
260 bool operator == (const std::string& uri) const;
261 /// Parses the given URI and returns true if both URIs are identical,
262 /// false otherwise.
263
264 bool operator != (const URI& uri) const;
265 /// Returns true if both URIs are identical, false otherwise.
266
267 bool operator != (const std::string& uri) const;
268 /// Parses the given URI and returns true if both URIs are identical,
269 /// false otherwise.
270
271 void normalize();
272 /// Normalizes the URI by removing all but leading . and .. segments from the path.
273 ///
274 /// If the first path segment in a relative path contains a colon (:),
275 /// such as in a Windows path containing a drive letter, a dot segment (./)
276 /// is prepended in accordance with section 3.3 of RFC 3986.
277
278 void getPathSegments(std::vector<std::string>& segments);
279 /// Places the single path segments (delimited by slashes) into the
280 /// given vector.
281
282 static void encode(const std::string& str, const std::string& reserved, std::string& encodedStr);
283 /// URI-encodes the given string by escaping reserved and non-ASCII
284 /// characters. The encoded string is appended to encodedStr.
285
286 static void decode(const std::string& str, std::string& decodedStr, bool plusAsSpace = false);
287 /// URI-decodes the given string by replacing percent-encoded
288 /// characters with the actual character. The decoded string
289 /// is appended to decodedStr.
290 ///
291 /// When plusAsSpace is true, non-encoded plus signs in the query are decoded as spaces.
292 /// (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1)
293
294protected:
295 bool equals(const URI& uri) const;
296 /// Returns true if both uri's are equivalent.
297
298 bool isWellKnownPort() const;
299 /// Returns true if the URI's port number is a well-known one
300 /// (for example, 80, if the scheme is http).
301
302 unsigned short getWellKnownPort() const;
303 /// Returns the well-known port number for the URI's scheme,
304 /// or 0 if the port number is not known.
305
306 void parse(const std::string& uri);
307 /// Parses and assigns an URI from the given string. Throws a
308 /// SyntaxException if the uri is not valid.
309
310 void parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end);
311 /// Parses and sets the user-info, host and port from the given data.
312
313 void parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end);
314 /// Parses and sets the host and port from the given data.
315
316 void parsePath(std::string::const_iterator& it, const std::string::const_iterator& end);
317 /// Parses and sets the path from the given data.
318
319 void parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end);
320 /// Parses and sets the path, query and fragment from the given data.
321
322 void parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end);
323 /// Parses and sets the query from the given data.
324
325 void parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end);
326 /// Parses and sets the fragment from the given data.
327
328 void mergePath(const std::string& path);
329 /// Appends a path to the URI's path.
330
331 void removeDotSegments(bool removeLeading = true);
332 /// Removes all dot segments from the path.
333
334 static void getPathSegments(const std::string& path, std::vector<std::string>& segments);
335 /// Places the single path segments (delimited by slashes) into the
336 /// given vector.
337
338 void buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash);
339 /// Builds the path from the given segments.
340
341 static const std::string RESERVED_PATH;
342 static const std::string RESERVED_QUERY;
343 static const std::string RESERVED_QUERY_PARAM;
344 static const std::string RESERVED_FRAGMENT;
345 static const std::string ILLEGAL;
346
347private:
348 std::string _scheme;
349 std::string _userInfo;
350 std::string _host;
351 unsigned short _port;
352 std::string _path;
353 std::string _query;
354 std::string _fragment;
355};
356
357
358//
359// inlines
360//
361inline const std::string& URI::getScheme() const
362{
363 return _scheme;
364}
365
366
367inline const std::string& URI::getUserInfo() const
368{
369 return _userInfo;
370}
371
372
373inline const std::string& URI::getHost() const
374{
375 return _host;
376}
377
378
379inline const std::string& URI::getPath() const
380{
381 return _path;
382}
383
384
385inline const std::string& URI::getRawQuery() const
386{
387 return _query;
388}
389
390
391inline const std::string& URI::getFragment() const
392{
393 return _fragment;
394}
395
396
397inline void swap(URI& u1, URI& u2)
398{
399 u1.swap(u2);
400}
401
402
403} // namespace Poco
404
405
406#endif // Foundation_URI_INCLUDED
407