1 | // |
2 | // URI.h |
3 | // |
4 | // Library: Foundation |
5 | // Package: URI |
6 | // Module: URI |
7 | // |
8 | // Definition of the URI class. |
9 | // |
10 | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef Foundation_URI_INCLUDED |
18 | #define Foundation_URI_INCLUDED |
19 | |
20 | |
21 | #include "Poco/Foundation.h" |
22 | #include <vector> |
23 | #include <utility> |
24 | |
25 | |
26 | namespace Poco { |
27 | |
28 | |
29 | class Path; |
30 | |
31 | |
32 | class Foundation_API URI |
33 | /// A Uniform Resource Identifier, as specified in RFC 3986. |
34 | /// |
35 | /// The URI class provides methods for building URIs from their |
36 | /// parts, as well as for splitting URIs into their parts. |
37 | /// Furthermore, the class provides methods for resolving |
38 | /// relative URIs against base URIs. |
39 | /// |
40 | /// The class automatically performs a few normalizations on |
41 | /// all URIs and URI parts passed to it: |
42 | /// * scheme identifiers are converted to lower case |
43 | /// * percent-encoded characters are decoded (except for the query string) |
44 | /// * optionally, dot segments are removed from paths (see normalize()) |
45 | /// |
46 | /// Note that dealing with query strings requires some precautions, as, internally, |
47 | /// query strings are stored in percent-encoded form, while all other parts of the URI |
48 | /// are stored in decoded form. While parsing query strings from properly encoded URLs |
49 | /// generally works, explicitly setting query strings with setQuery() or extracting |
50 | /// query strings with getQuery() may lead to ambiguities. See the descriptions of |
51 | /// setQuery(), setRawQuery(), getQuery() and getRawQuery() for more information. |
52 | { |
53 | public: |
54 | typedef std::vector<std::pair<std::string, std::string> > QueryParameters; |
55 | |
56 | URI(); |
57 | /// Creates an empty URI. |
58 | |
59 | explicit URI(const std::string& uri); |
60 | /// Parses an URI from the given string. Throws a |
61 | /// SyntaxException if the uri is not valid. |
62 | |
63 | explicit URI(const char* uri); |
64 | /// Parses an URI from the given string. Throws a |
65 | /// SyntaxException if the uri is not valid. |
66 | |
67 | URI(const std::string& scheme, const std::string& pathEtc); |
68 | /// Creates an URI from its parts. |
69 | |
70 | URI(const std::string& scheme, const std::string& authority, const std::string& pathEtc); |
71 | /// Creates an URI from its parts. |
72 | |
73 | URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query); |
74 | /// Creates an URI from its parts. |
75 | |
76 | URI(const std::string& scheme, const std::string& authority, const std::string& path, const std::string& query, const std::string& fragment); |
77 | /// Creates an URI from its parts. |
78 | |
79 | URI(const URI& uri); |
80 | /// Copy constructor. Creates an URI from another one. |
81 | |
82 | URI(const URI& baseURI, const std::string& relativeURI); |
83 | /// Creates an URI from a base URI and a relative URI, according to |
84 | /// the algorithm in section 5.2 of RFC 3986. |
85 | |
86 | explicit URI(const Path& path); |
87 | /// Creates a URI from a path. |
88 | /// |
89 | /// The path will be made absolute, and a file:// URI |
90 | /// will be built from it. |
91 | |
92 | ~URI(); |
93 | /// Destroys the URI. |
94 | |
95 | URI& operator = (const URI& uri); |
96 | /// Assignment operator. |
97 | |
98 | URI& operator = (const std::string& uri); |
99 | /// Parses and assigns an URI from the given string. Throws a |
100 | /// SyntaxException if the uri is not valid. |
101 | |
102 | URI& operator = (const char* uri); |
103 | /// Parses and assigns an URI from the given string. Throws a |
104 | /// SyntaxException if the uri is not valid. |
105 | |
106 | void swap(URI& uri); |
107 | /// Swaps the URI with another one. |
108 | |
109 | void clear(); |
110 | /// Clears all parts of the URI. |
111 | |
112 | std::string toString() const; |
113 | /// Returns a string representation of the URI. |
114 | /// |
115 | /// Characters in the path, query and fragment parts will be |
116 | /// percent-encoded as necessary. |
117 | |
118 | const std::string& getScheme() const; |
119 | /// Returns the scheme part of the URI. |
120 | |
121 | void setScheme(const std::string& scheme); |
122 | /// Sets the scheme part of the URI. The given scheme |
123 | /// is converted to lower-case. |
124 | /// |
125 | /// A list of registered URI schemes can be found |
126 | /// at <http://www.iana.org/assignments/uri-schemes>. |
127 | |
128 | const std::string& getUserInfo() const; |
129 | /// Returns the user-info part of the URI. |
130 | |
131 | void setUserInfo(const std::string& userInfo); |
132 | /// Sets the user-info part of the URI. |
133 | |
134 | const std::string& getHost() const; |
135 | /// Returns the host part of the URI. |
136 | |
137 | void setHost(const std::string& host); |
138 | /// Sets the host part of the URI. |
139 | |
140 | unsigned short getPort() const; |
141 | /// Returns the port number part of the URI. |
142 | /// |
143 | /// If no port number (0) has been specified, the |
144 | /// well-known port number (e.g., 80 for http) for |
145 | /// the given scheme is returned if it is known. |
146 | /// Otherwise, 0 is returned. |
147 | |
148 | void setPort(unsigned short port); |
149 | /// Sets the port number part of the URI. |
150 | |
151 | std::string getAuthority() const; |
152 | /// Returns the authority part (userInfo, host and port) |
153 | /// of the URI. |
154 | /// |
155 | /// If the port number is a well-known port |
156 | /// number for the given scheme (e.g., 80 for http), it |
157 | /// is not included in the authority. |
158 | |
159 | void setAuthority(const std::string& authority); |
160 | /// Parses the given authority part for the URI and sets |
161 | /// the user-info, host, port components accordingly. |
162 | |
163 | const std::string& getPath() const; |
164 | /// Returns the decoded path part of the URI. |
165 | |
166 | void setPath(const std::string& path); |
167 | /// Sets the path part of the URI. |
168 | |
169 | std::string getQuery() const; |
170 | /// Returns the decoded query part of the URI. |
171 | /// |
172 | /// Note that encoded ampersand characters ('&', "%26") |
173 | /// will be decoded, which could cause ambiguities if the query |
174 | /// string contains multiple parameters and a parameter name |
175 | /// or value contains an ampersand as well. |
176 | /// In such a case it's better to use getRawQuery() or |
177 | /// getQueryParameters(). |
178 | |
179 | void setQuery(const std::string& query); |
180 | /// Sets the query part of the URI. |
181 | /// |
182 | /// The query string will be percent-encoded. If the query |
183 | /// already contains percent-encoded characters, these |
184 | /// will be double-encoded, which is probably not what's |
185 | /// intended by the caller. Furthermore, ampersand ('&') |
186 | /// characters in the query will not be encoded. This could |
187 | /// lead to ambiguity issues if the query string contains multiple |
188 | /// name-value parameters separated by ampersand, and if any |
189 | /// name or value also contains an ampersand. In such a |
190 | /// case, it's better to use setRawQuery() with a properly |
191 | /// percent-encoded query string, or use addQueryParameter() |
192 | /// or setQueryParameters(), which take care of appropriate |
193 | /// percent encoding of parameter names and values. |
194 | |
195 | void addQueryParameter(const std::string& param, const std::string& val = "" ); |
196 | /// Adds "param=val" to the query; "param" may not be empty. |
197 | /// If val is empty, only '=' is appended to the parameter. |
198 | /// |
199 | /// In addition to regular encoding, function also encodes '&' and '=', |
200 | /// if found in param or val. |
201 | |
202 | const std::string& getRawQuery() const; |
203 | /// Returns the query string in raw form, which usually |
204 | /// means percent encoded. |
205 | |
206 | void setRawQuery(const std::string& query); |
207 | /// Sets the query part of the URI. |
208 | /// |
209 | /// The given query string must be properly percent-encoded. |
210 | |
211 | QueryParameters getQueryParameters() const; |
212 | /// Returns the decoded query string parameters as a vector |
213 | /// of name-value pairs. |
214 | |
215 | void setQueryParameters(const QueryParameters& params); |
216 | /// Sets the query part of the URI from a vector |
217 | /// of query parameters. |
218 | /// |
219 | /// Calls addQueryParameter() for each parameter name and value. |
220 | |
221 | const std::string& getFragment() const; |
222 | /// Returns the fragment part of the URI. |
223 | |
224 | void setFragment(const std::string& fragment); |
225 | /// Sets the fragment part of the URI. |
226 | |
227 | void setPathEtc(const std::string& pathEtc); |
228 | /// Sets the path, query and fragment parts of the URI. |
229 | |
230 | std::string getPathEtc() const; |
231 | /// Returns the encoded path, query and fragment parts of the URI. |
232 | |
233 | std::string getPathAndQuery() const; |
234 | /// Returns the encoded path and query parts of the URI. |
235 | |
236 | void resolve(const std::string& relativeURI); |
237 | /// Resolves the given relative URI against the base URI. |
238 | /// See section 5.2 of RFC 3986 for the algorithm used. |
239 | |
240 | void resolve(const URI& relativeURI); |
241 | /// Resolves the given relative URI against the base URI. |
242 | /// See section 5.2 of RFC 3986 for the algorithm used. |
243 | |
244 | bool isRelative() const; |
245 | /// Returns true if the URI is a relative reference, false otherwise. |
246 | /// |
247 | /// A relative reference does not contain a scheme identifier. |
248 | /// Relative references are usually resolved against an absolute |
249 | /// base reference. |
250 | |
251 | bool empty() const; |
252 | /// Returns true if the URI is empty, false otherwise. |
253 | |
254 | bool operator == (const URI& uri) const; |
255 | /// Returns true if both URIs are identical, false otherwise. |
256 | /// |
257 | /// Two URIs are identical if their scheme, authority, |
258 | /// path, query and fragment part are identical. |
259 | |
260 | bool operator == (const std::string& uri) const; |
261 | /// Parses the given URI and returns true if both URIs are identical, |
262 | /// false otherwise. |
263 | |
264 | bool operator != (const URI& uri) const; |
265 | /// Returns true if both URIs are identical, false otherwise. |
266 | |
267 | bool operator != (const std::string& uri) const; |
268 | /// Parses the given URI and returns true if both URIs are identical, |
269 | /// false otherwise. |
270 | |
271 | void normalize(); |
272 | /// Normalizes the URI by removing all but leading . and .. segments from the path. |
273 | /// |
274 | /// If the first path segment in a relative path contains a colon (:), |
275 | /// such as in a Windows path containing a drive letter, a dot segment (./) |
276 | /// is prepended in accordance with section 3.3 of RFC 3986. |
277 | |
278 | void getPathSegments(std::vector<std::string>& segments); |
279 | /// Places the single path segments (delimited by slashes) into the |
280 | /// given vector. |
281 | |
282 | static void (const std::string& str, const std::string& reserved, std::string& encodedStr); |
283 | /// URI-encodes the given string by escaping reserved and non-ASCII |
284 | /// characters. The encoded string is appended to encodedStr. |
285 | |
286 | static void (const std::string& str, std::string& decodedStr, bool plusAsSpace = false); |
287 | /// URI-decodes the given string by replacing percent-encoded |
288 | /// characters with the actual character. The decoded string |
289 | /// is appended to decodedStr. |
290 | /// |
291 | /// When plusAsSpace is true, non-encoded plus signs in the query are decoded as spaces. |
292 | /// (http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1) |
293 | |
294 | protected: |
295 | bool equals(const URI& uri) const; |
296 | /// Returns true if both uri's are equivalent. |
297 | |
298 | bool isWellKnownPort() const; |
299 | /// Returns true if the URI's port number is a well-known one |
300 | /// (for example, 80, if the scheme is http). |
301 | |
302 | unsigned short getWellKnownPort() const; |
303 | /// Returns the well-known port number for the URI's scheme, |
304 | /// or 0 if the port number is not known. |
305 | |
306 | void parse(const std::string& uri); |
307 | /// Parses and assigns an URI from the given string. Throws a |
308 | /// SyntaxException if the uri is not valid. |
309 | |
310 | void parseAuthority(std::string::const_iterator& it, const std::string::const_iterator& end); |
311 | /// Parses and sets the user-info, host and port from the given data. |
312 | |
313 | void parseHostAndPort(std::string::const_iterator& it, const std::string::const_iterator& end); |
314 | /// Parses and sets the host and port from the given data. |
315 | |
316 | void parsePath(std::string::const_iterator& it, const std::string::const_iterator& end); |
317 | /// Parses and sets the path from the given data. |
318 | |
319 | void parsePathEtc(std::string::const_iterator& it, const std::string::const_iterator& end); |
320 | /// Parses and sets the path, query and fragment from the given data. |
321 | |
322 | void parseQuery(std::string::const_iterator& it, const std::string::const_iterator& end); |
323 | /// Parses and sets the query from the given data. |
324 | |
325 | void parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end); |
326 | /// Parses and sets the fragment from the given data. |
327 | |
328 | void mergePath(const std::string& path); |
329 | /// Appends a path to the URI's path. |
330 | |
331 | void removeDotSegments(bool removeLeading = true); |
332 | /// Removes all dot segments from the path. |
333 | |
334 | static void getPathSegments(const std::string& path, std::vector<std::string>& segments); |
335 | /// Places the single path segments (delimited by slashes) into the |
336 | /// given vector. |
337 | |
338 | void buildPath(const std::vector<std::string>& segments, bool leadingSlash, bool trailingSlash); |
339 | /// Builds the path from the given segments. |
340 | |
341 | static const std::string RESERVED_PATH; |
342 | static const std::string RESERVED_QUERY; |
343 | static const std::string RESERVED_QUERY_PARAM; |
344 | static const std::string RESERVED_FRAGMENT; |
345 | static const std::string ILLEGAL; |
346 | |
347 | private: |
348 | std::string _scheme; |
349 | std::string _userInfo; |
350 | std::string _host; |
351 | unsigned short _port; |
352 | std::string _path; |
353 | std::string _query; |
354 | std::string _fragment; |
355 | }; |
356 | |
357 | |
358 | // |
359 | // inlines |
360 | // |
361 | inline const std::string& URI::getScheme() const |
362 | { |
363 | return _scheme; |
364 | } |
365 | |
366 | |
367 | inline const std::string& URI::getUserInfo() const |
368 | { |
369 | return _userInfo; |
370 | } |
371 | |
372 | |
373 | inline const std::string& URI::getHost() const |
374 | { |
375 | return _host; |
376 | } |
377 | |
378 | |
379 | inline const std::string& URI::getPath() const |
380 | { |
381 | return _path; |
382 | } |
383 | |
384 | |
385 | inline const std::string& URI::getRawQuery() const |
386 | { |
387 | return _query; |
388 | } |
389 | |
390 | |
391 | inline const std::string& URI::getFragment() const |
392 | { |
393 | return _fragment; |
394 | } |
395 | |
396 | |
397 | inline void swap(URI& u1, URI& u2) |
398 | { |
399 | u1.swap(u2); |
400 | } |
401 | |
402 | |
403 | } // namespace Poco |
404 | |
405 | |
406 | #endif // Foundation_URI_INCLUDED |
407 | |