1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/Uri.h> |
18 | |
19 | #include <algorithm> |
20 | #include <cctype> |
21 | |
22 | #include <boost/regex.hpp> |
23 | |
24 | namespace folly { |
25 | |
26 | namespace { |
27 | |
28 | std::string submatch(const boost::cmatch& m, int idx) { |
29 | const auto& sub = m[idx]; |
30 | return std::string(sub.first, sub.second); |
31 | } |
32 | |
33 | } // namespace |
34 | |
35 | Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) { |
36 | static const boost::regex uriRegex( |
37 | "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme: |
38 | "([^?#]*)" // authority and path |
39 | "(?:\\?([^#]*))?" // ?query |
40 | "(?:#(.*))?" ); // #fragment |
41 | static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?" ); |
42 | |
43 | boost::cmatch match; |
44 | if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) { |
45 | throw std::invalid_argument(to<std::string>("invalid URI " , str)); |
46 | } |
47 | |
48 | scheme_ = submatch(match, 1); |
49 | std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower); |
50 | |
51 | StringPiece authorityAndPath(match[2].first, match[2].second); |
52 | boost::cmatch authorityAndPathMatch; |
53 | if (!boost::regex_match( |
54 | authorityAndPath.begin(), |
55 | authorityAndPath.end(), |
56 | authorityAndPathMatch, |
57 | authorityAndPathRegex)) { |
58 | // Does not start with //, doesn't have authority |
59 | hasAuthority_ = false; |
60 | path_ = authorityAndPath.str(); |
61 | } else { |
62 | static const boost::regex authorityRegex( |
63 | "(?:([^@:]*)(?::([^@]*))?@)?" // username, password |
64 | "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']', |
65 | // dotted-IPv4, or named host) |
66 | "(?::(\\d*))?" ); // port |
67 | |
68 | const auto authority = authorityAndPathMatch[1]; |
69 | boost::cmatch authorityMatch; |
70 | if (!boost::regex_match( |
71 | authority.first, |
72 | authority.second, |
73 | authorityMatch, |
74 | authorityRegex)) { |
75 | throw std::invalid_argument(to<std::string>( |
76 | "invalid URI authority " , |
77 | StringPiece(authority.first, authority.second))); |
78 | } |
79 | |
80 | StringPiece port(authorityMatch[4].first, authorityMatch[4].second); |
81 | if (!port.empty()) { |
82 | port_ = to<uint16_t>(port); |
83 | } |
84 | |
85 | hasAuthority_ = true; |
86 | username_ = submatch(authorityMatch, 1); |
87 | password_ = submatch(authorityMatch, 2); |
88 | host_ = submatch(authorityMatch, 3); |
89 | path_ = submatch(authorityAndPathMatch, 2); |
90 | } |
91 | |
92 | query_ = submatch(match, 3); |
93 | fragment_ = submatch(match, 4); |
94 | } |
95 | |
96 | std::string Uri::authority() const { |
97 | std::string result; |
98 | |
99 | // Port is 5 characters max and we have up to 3 delimiters. |
100 | result.reserve(host().size() + username().size() + password().size() + 8); |
101 | |
102 | if (!username().empty() || !password().empty()) { |
103 | result.append(username()); |
104 | |
105 | if (!password().empty()) { |
106 | result.push_back(':'); |
107 | result.append(password()); |
108 | } |
109 | |
110 | result.push_back('@'); |
111 | } |
112 | |
113 | result.append(host()); |
114 | |
115 | if (port() != 0) { |
116 | result.push_back(':'); |
117 | toAppend(port(), &result); |
118 | } |
119 | |
120 | return result; |
121 | } |
122 | |
123 | std::string Uri::hostname() const { |
124 | if (host_.size() > 0 && host_[0] == '[') { |
125 | // If it starts with '[', then it should end with ']', this is ensured by |
126 | // regex |
127 | return host_.substr(1, host_.size() - 2); |
128 | } |
129 | return host_; |
130 | } |
131 | |
132 | const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams() { |
133 | if (!query_.empty() && queryParams_.empty()) { |
134 | // Parse query string |
135 | static const boost::regex queryParamRegex( |
136 | "(^|&)" /*start of query or start of parameter "&"*/ |
137 | "([^=&]*)=?" /*parameter name and "=" if value is expected*/ |
138 | "([^=&]*)" /*parameter value*/ |
139 | "(?=(&|$))" /*forward reference, next should be end of query or |
140 | start of next parameter*/); |
141 | const boost::cregex_iterator paramBeginItr( |
142 | query_.data(), query_.data() + query_.size(), queryParamRegex); |
143 | boost::cregex_iterator paramEndItr; |
144 | for (auto itr = paramBeginItr; itr != paramEndItr; ++itr) { |
145 | if (itr->length(2) == 0) { |
146 | // key is empty, ignore it |
147 | continue; |
148 | } |
149 | queryParams_.emplace_back( |
150 | std::string((*itr)[2].first, (*itr)[2].second), // parameter name |
151 | std::string((*itr)[3].first, (*itr)[3].second) // parameter value |
152 | ); |
153 | } |
154 | } |
155 | return queryParams_; |
156 | } |
157 | |
158 | } // namespace folly |
159 | |