1/*
2 * Copyright 2013-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/Uri.h>
18
19#include <algorithm>
20#include <cctype>
21
22#include <boost/regex.hpp>
23
24namespace folly {
25
26namespace {
27
28std::string submatch(const boost::cmatch& m, int idx) {
29 const auto& sub = m[idx];
30 return std::string(sub.first, sub.second);
31}
32
33} // namespace
34
35Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) {
36 static const boost::regex uriRegex(
37 "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme:
38 "([^?#]*)" // authority and path
39 "(?:\\?([^#]*))?" // ?query
40 "(?:#(.*))?"); // #fragment
41 static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?");
42
43 boost::cmatch match;
44 if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) {
45 throw std::invalid_argument(to<std::string>("invalid URI ", str));
46 }
47
48 scheme_ = submatch(match, 1);
49 std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower);
50
51 StringPiece authorityAndPath(match[2].first, match[2].second);
52 boost::cmatch authorityAndPathMatch;
53 if (!boost::regex_match(
54 authorityAndPath.begin(),
55 authorityAndPath.end(),
56 authorityAndPathMatch,
57 authorityAndPathRegex)) {
58 // Does not start with //, doesn't have authority
59 hasAuthority_ = false;
60 path_ = authorityAndPath.str();
61 } else {
62 static const boost::regex authorityRegex(
63 "(?:([^@:]*)(?::([^@]*))?@)?" // username, password
64 "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']',
65 // dotted-IPv4, or named host)
66 "(?::(\\d*))?"); // port
67
68 const auto authority = authorityAndPathMatch[1];
69 boost::cmatch authorityMatch;
70 if (!boost::regex_match(
71 authority.first,
72 authority.second,
73 authorityMatch,
74 authorityRegex)) {
75 throw std::invalid_argument(to<std::string>(
76 "invalid URI authority ",
77 StringPiece(authority.first, authority.second)));
78 }
79
80 StringPiece port(authorityMatch[4].first, authorityMatch[4].second);
81 if (!port.empty()) {
82 port_ = to<uint16_t>(port);
83 }
84
85 hasAuthority_ = true;
86 username_ = submatch(authorityMatch, 1);
87 password_ = submatch(authorityMatch, 2);
88 host_ = submatch(authorityMatch, 3);
89 path_ = submatch(authorityAndPathMatch, 2);
90 }
91
92 query_ = submatch(match, 3);
93 fragment_ = submatch(match, 4);
94}
95
96std::string Uri::authority() const {
97 std::string result;
98
99 // Port is 5 characters max and we have up to 3 delimiters.
100 result.reserve(host().size() + username().size() + password().size() + 8);
101
102 if (!username().empty() || !password().empty()) {
103 result.append(username());
104
105 if (!password().empty()) {
106 result.push_back(':');
107 result.append(password());
108 }
109
110 result.push_back('@');
111 }
112
113 result.append(host());
114
115 if (port() != 0) {
116 result.push_back(':');
117 toAppend(port(), &result);
118 }
119
120 return result;
121}
122
123std::string Uri::hostname() const {
124 if (host_.size() > 0 && host_[0] == '[') {
125 // If it starts with '[', then it should end with ']', this is ensured by
126 // regex
127 return host_.substr(1, host_.size() - 2);
128 }
129 return host_;
130}
131
132const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams() {
133 if (!query_.empty() && queryParams_.empty()) {
134 // Parse query string
135 static const boost::regex queryParamRegex(
136 "(^|&)" /*start of query or start of parameter "&"*/
137 "([^=&]*)=?" /*parameter name and "=" if value is expected*/
138 "([^=&]*)" /*parameter value*/
139 "(?=(&|$))" /*forward reference, next should be end of query or
140 start of next parameter*/);
141 const boost::cregex_iterator paramBeginItr(
142 query_.data(), query_.data() + query_.size(), queryParamRegex);
143 boost::cregex_iterator paramEndItr;
144 for (auto itr = paramBeginItr; itr != paramEndItr; ++itr) {
145 if (itr->length(2) == 0) {
146 // key is empty, ignore it
147 continue;
148 }
149 queryParams_.emplace_back(
150 std::string((*itr)[2].first, (*itr)[2].second), // parameter name
151 std::string((*itr)[3].first, (*itr)[3].second) // parameter value
152 );
153 }
154 }
155 return queryParams_;
156}
157
158} // namespace folly
159