IXUrlParser.cpp source code [Aseprite/third_party/IXWebSocket/ixwebsocket/IXUrlParser.cpp]

1	/*
2	* Lightweight URL & URI parser (RFC 1738, RFC 3986)
3	* https://github.com/corporateshark/LUrlParser
4	*
5	* The MIT License (MIT)
6	*
7	* Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
8	*
9	* Permission is hereby granted, free of charge, to any person obtaining a copy
10	* of this software and associated documentation files (the "Software"), to deal
11	* in the Software without restriction, including without limitation the rights
12	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13	* copies of the Software, and to permit persons to whom the Software is
14	* furnished to do so, subject to the following conditions:
15	*
16	* The above copyright notice and this permission notice shall be included in all
17	* copies or substantial portions of the Software.
18	*
19	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25	* SOFTWARE.
26	*
27	* IXUrlParser.cpp
28	* Author: Benjamin Sergeant
29	* Copyright (c) 2019 Machine Zone, Inc. All rights reserved.
30	*/
31
32	#include "IXUrlParser.h"
33
34	#include <algorithm>
35	#include <cstdlib>
36	#include <cstring>
37
38	namespace
39	{
40	enum LUrlParserError
41	{
42	LUrlParserError_Ok = `0`,
43	LUrlParserError_Uninitialized = `1`,
44	LUrlParserError_NoUrlCharacter = `2`,
45	LUrlParserError_InvalidSchemeName = `3`,
46	LUrlParserError_NoDoubleSlash = `4`,
47	LUrlParserError_NoAtSign = `5`,
48	LUrlParserError_UnexpectedEndOfLine = `6`,
49	LUrlParserError_NoSlash = `7`,
50	};
51
52	class clParseURL
53	{
54	public:
55	LUrlParserError m_ErrorCode;
56	std::string m_Scheme;
57	std::string m_Host;
58	std::string m_Port;
59	std::string m_Path;
60	std::string m_Query;
61	std::string m_Fragment;
62	std::string m_UserName;
63	std::string m_Password;
64
65	clParseURL()
66	: m_ErrorCode(LUrlParserError_Uninitialized)
67	{
68	}
69
70	/// return 'true' if the parsing was successful
71	bool IsValid() const
72	{
73	return m_ErrorCode == LUrlParserError_Ok;
74	}
75
76	/// helper to convert the port number to int, return 'true' if the port is valid (within the
77	/// 0..65535 range)
78	bool GetPort(int* OutPort) const;
79
80	/// parse the URL
81	static clParseURL ParseURL(const std::string& URL);
82
83	private:
84	explicit clParseURL(LUrlParserError ErrorCode)
85	: m_ErrorCode(ErrorCode)
86	{
87	}
88	};
89
90	static bool IsSchemeValid(const std::string& SchemeName)
91	{
92	for (auto c : SchemeName)
93	{
94	if (!isalpha(c) && c != `'+'` && c != `'-'` && c != `'.'`) return false;
95	}
96
97	return true;
98	}
99
100	bool clParseURL::GetPort(int* OutPort) const
101	{
102	if (!IsValid())
103	{
104	return false;
105	}
106
107	int Port = atoi(m_Port.c_str());
108
109	if (Port <= `0` \|\| Port > `65535`)
110	{
111	return false;
112	}
113
114	if (OutPort)
115	{
116	*OutPort = Port;
117	}
118
119	return true;
120	}
121
122	// based on RFC 1738 and RFC 3986
123	clParseURL clParseURL::ParseURL(const std::string& URL)
124	{
125	clParseURL Result;
126
127	const char* CurrentString = URL.c_str();
128
129	/*
130	* <scheme>:<scheme-specific-part>
131	* <scheme> := [a-z\+\-\.]+
132	* For resiliency, programs interpreting URLs should treat upper case letters as
133	*equivalent to lower case in scheme names
134	*/
135
136	// try to read scheme
137	{
138	const char* LocalString = strchr(CurrentString, `':'`);
139
140	if (!LocalString)
141	{
142	return clParseURL (LUrlParserError_NoUrlCharacter);
143	}
144
145	// save the scheme name
146	Result.m_Scheme = std::string(CurrentString, LocalString - CurrentString);
147
148	if (!IsSchemeValid(Result.m_Scheme))
149	{
150	return clParseURL (LUrlParserError_InvalidSchemeName);
151	}
152
153	// scheme should be lowercase
154	std::transform(
155	Result.m_Scheme.begin(), Result.m_Scheme.end(), Result.m_Scheme.begin(), ::tolower);
156
157	// skip ':'
158	CurrentString = LocalString + `1`;
159	}
160
161	/*
162	* //<user>:<password>@<host>:<port>/<url-path>
163	* any ":", "@" and "/" must be normalized
164	*/
165
166	// skip "//"
167	if (CurrentString++ != `'/'`) return* clParseURL (LUrlParserError_NoDoubleSlash);
168	if (CurrentString++ != `'/'`) return* clParseURL (LUrlParserError_NoDoubleSlash);
169
170	// check if the user name and password are specified
171	bool bHasUserName = false;
172
173	const char* LocalString = CurrentString;
174
175	while (*LocalString)
176	{
177	if (*LocalString == `'@'`)
178	{
179	// user name and password are specified
180	bHasUserName = true;
181	break;
182	}
183	else if (*LocalString == `'/'`)
184	{
185	// end of <host>:<port> specification
186	bHasUserName = false;
187	break;
188	}
189
190	LocalString++;
191	}
192
193	// user name and password
194	LocalString = CurrentString;
195
196	if (bHasUserName)
197	{
198	// read user name
199	while (LocalString && LocalString != `':'` && *LocalString != `'@'`)
200	LocalString++;
201
202	Result.m_UserName = std::string(CurrentString, LocalString - CurrentString);
203
204	// proceed with the current pointer
205	CurrentString = LocalString;
206
207	if (*CurrentString == `':'`)
208	{
209	// skip ':'
210	CurrentString++;
211
212	// read password
213	LocalString = CurrentString;
214
215	while (LocalString && LocalString != `'@'`)
216	LocalString++;
217
218	Result.m_Password = std::string(CurrentString, LocalString - CurrentString);
219
220	CurrentString = LocalString;
221	}
222
223	// skip '@'
224	if (*CurrentString != `'@'`)
225	{
226	return clParseURL (LUrlParserError_NoAtSign);
227	}
228
229	CurrentString++;
230	}
231
232	bool bHasBracket = (*CurrentString == `'['`);
233
234	// go ahead, read the host name
235	LocalString = CurrentString;
236
237	while (*LocalString)
238	{
239	if (bHasBracket && *LocalString == `']'`)
240	{
241	// end of IPv6 address
242	LocalString++;
243	break;
244	}
245	else if (!bHasBracket && (LocalString == `':'` \|\| LocalString == `'/'`))
246	{
247	// port number is specified
248	break;
249	}
250
251	LocalString++;
252	}
253
254	Result.m_Host = std::string(CurrentString, LocalString - CurrentString);
255
256	CurrentString = LocalString;
257
258	// is port number specified?
259	if (*CurrentString == `':'`)
260	{
261	CurrentString++;
262
263	// read port number
264	LocalString = CurrentString;
265
266	while (LocalString && LocalString != `'/'`)
267	LocalString++;
268
269	Result.m_Port = std::string(CurrentString, LocalString - CurrentString);
270
271	CurrentString = LocalString;
272	}
273
274	// end of string
275	if (!*CurrentString)
276	{
277	Result.m_ErrorCode = LUrlParserError_Ok;
278
279	return Result;
280	}
281
282	// skip '/'
283	if (*CurrentString != `'/'`)
284	{
285	return clParseURL (LUrlParserError_NoSlash);
286	}
287
288	CurrentString++;
289
290	// parse the path
291	LocalString = CurrentString;
292
293	while (LocalString && LocalString != `'#'` && *LocalString != `'?'`)
294	LocalString++;
295
296	Result.m_Path = std::string(CurrentString, LocalString - CurrentString);
297
298	CurrentString = LocalString;
299
300	// check for query
301	if (*CurrentString == `'?'`)
302	{
303	// skip '?'
304	CurrentString++;
305
306	// read query
307	LocalString = CurrentString;
308
309	while (LocalString && LocalString != `'#'`)
310	LocalString++;
311
312	Result.m_Query = std::string(CurrentString, LocalString - CurrentString);
313
314	CurrentString = LocalString;
315	}
316
317	// check for fragment
318	if (*CurrentString == `'#'`)
319	{
320	// skip '#'
321	CurrentString++;
322
323	// read fragment
324	LocalString = CurrentString;
325
326	while (*LocalString)
327	LocalString++;
328
329	Result.m_Fragment = std::string(CurrentString, LocalString - CurrentString);
330	}
331
332	Result.m_ErrorCode = LUrlParserError_Ok;
333
334	return Result;
335	}
336	} // namespace
337
338	namespace ix
339	{
340	bool UrlParser::parse(const std::string& url,
341	std::string& protocol,
342	std::string& host,
343	std::string& path,
344	std::string& query,
345	int& port)
346	{
347	clParseURL res = clParseURL::ParseURL(url);
348
349	if (!res.IsValid())
350	{
351	return false;
352	}
353
354	protocol = res.m_Scheme;
355	host = res.m_Host;
356	path = res.m_Path;
357	query = res.m_Query;
358
359	if (!res.GetPort(&port))
360	{
361	if (protocol == "ws" \|\| protocol == "http")
362	{
363	port = `80`;
364	}
365	else if (protocol == "wss" \|\| protocol == "https")
366	{
367	port = `443`;
368	}
369	else
370	{
371	// Invalid protocol. Should be caught by regex check
372	// but this missing branch trigger cpplint linter.
373	return false;
374	}
375	}
376
377	if (path.empty())
378	{
379	path = "/";
380	}
381	else if (path[`0`] != `'/'`)
382	{
383	path = `'/'` + path;
384	}
385
386	if (!query.empty())
387	{
388	path += "?";
389	path += query;
390	}
391
392	return true;
393	}
394
395	} // namespace ix
396

Browse the source code of Aseprite/third_party/IXWebSocket/ixwebsocket/IXUrlParser.cpp