| 1 | /*************************************************************************** |
| 2 | * _ _ ____ _ |
| 3 | * Project ___| | | | _ \| | |
| 4 | * / __| | | | |_) | | |
| 5 | * | (__| |_| | _ <| |___ |
| 6 | * \___|\___/|_| \_\_____| |
| 7 | * |
| 8 | * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. |
| 9 | * |
| 10 | * This software is licensed as described in the file COPYING, which |
| 11 | * you should have received as part of this distribution. The terms |
| 12 | * are also available at https://curl.se/docs/copyright.html. |
| 13 | * |
| 14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
| 15 | * copies of the Software, and permit persons to whom the Software is |
| 16 | * furnished to do so, under the terms of the COPYING file. |
| 17 | * |
| 18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| 19 | * KIND, either express or implied. |
| 20 | * |
| 21 | * SPDX-License-Identifier: curl |
| 22 | * |
| 23 | ***************************************************************************/ |
| 24 | |
| 25 | #include "curl_setup.h" |
| 26 | |
| 27 | #ifndef CURL_DISABLE_HTTP |
| 28 | |
| 29 | #include "urldata.h" |
| 30 | #include <curl/curl.h> |
| 31 | #include "http.h" |
| 32 | #include "http1.h" |
| 33 | #include "urlapi-int.h" |
| 34 | |
| 35 | /* The last 3 #include files should be in this order */ |
| 36 | #include "curl_printf.h" |
| 37 | #include "curl_memory.h" |
| 38 | #include "memdebug.h" |
| 39 | |
| 40 | |
| 41 | #define H1_MAX_URL_LEN (8*1024) |
| 42 | |
| 43 | void Curl_h1_req_parse_init(struct h1_req_parser *parser, size_t max_line_len) |
| 44 | { |
| 45 | memset(s: parser, c: 0, n: sizeof(*parser)); |
| 46 | parser->max_line_len = max_line_len; |
| 47 | Curl_dyn_init(s: &parser->scratch, toobig: max_line_len); |
| 48 | } |
| 49 | |
| 50 | void Curl_h1_req_parse_free(struct h1_req_parser *parser) |
| 51 | { |
| 52 | if(parser) { |
| 53 | Curl_http_req_free(req: parser->req); |
| 54 | Curl_dyn_free(s: &parser->scratch); |
| 55 | parser->req = NULL; |
| 56 | parser->done = FALSE; |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | static CURLcode trim_line(struct h1_req_parser *parser, int options) |
| 61 | { |
| 62 | DEBUGASSERT(parser->line); |
| 63 | if(parser->line_len) { |
| 64 | if(parser->line[parser->line_len - 1] == '\n') |
| 65 | --parser->line_len; |
| 66 | if(parser->line_len) { |
| 67 | if(parser->line[parser->line_len - 1] == '\r') |
| 68 | --parser->line_len; |
| 69 | else if(options & H1_PARSE_OPT_STRICT) |
| 70 | return CURLE_URL_MALFORMAT; |
| 71 | } |
| 72 | else if(options & H1_PARSE_OPT_STRICT) |
| 73 | return CURLE_URL_MALFORMAT; |
| 74 | } |
| 75 | else if(options & H1_PARSE_OPT_STRICT) |
| 76 | return CURLE_URL_MALFORMAT; |
| 77 | |
| 78 | if(parser->line_len > parser->max_line_len) { |
| 79 | return CURLE_URL_MALFORMAT; |
| 80 | } |
| 81 | return CURLE_OK; |
| 82 | } |
| 83 | |
| 84 | static ssize_t detect_line(struct h1_req_parser *parser, |
| 85 | const char *buf, const size_t buflen, |
| 86 | CURLcode *err) |
| 87 | { |
| 88 | const char *line_end; |
| 89 | |
| 90 | DEBUGASSERT(!parser->line); |
| 91 | line_end = memchr(s: buf, c: '\n', n: buflen); |
| 92 | if(!line_end) { |
| 93 | *err = CURLE_AGAIN; |
| 94 | return -1; |
| 95 | } |
| 96 | parser->line = buf; |
| 97 | parser->line_len = line_end - buf + 1; |
| 98 | *err = CURLE_OK; |
| 99 | return (ssize_t)parser->line_len; |
| 100 | } |
| 101 | |
| 102 | static ssize_t next_line(struct h1_req_parser *parser, |
| 103 | const char *buf, const size_t buflen, int options, |
| 104 | CURLcode *err) |
| 105 | { |
| 106 | ssize_t nread = 0; |
| 107 | |
| 108 | if(parser->line) { |
| 109 | parser->line = NULL; |
| 110 | parser->line_len = 0; |
| 111 | Curl_dyn_reset(s: &parser->scratch); |
| 112 | } |
| 113 | |
| 114 | nread = detect_line(parser, buf, buflen, err); |
| 115 | if(nread >= 0) { |
| 116 | if(Curl_dyn_len(s: &parser->scratch)) { |
| 117 | /* append detected line to scratch to have the complete line */ |
| 118 | *err = Curl_dyn_addn(s: &parser->scratch, mem: parser->line, len: parser->line_len); |
| 119 | if(*err) |
| 120 | return -1; |
| 121 | parser->line = Curl_dyn_ptr(s: &parser->scratch); |
| 122 | parser->line_len = Curl_dyn_len(s: &parser->scratch); |
| 123 | } |
| 124 | *err = trim_line(parser, options); |
| 125 | if(*err) |
| 126 | return -1; |
| 127 | } |
| 128 | else if(*err == CURLE_AGAIN) { |
| 129 | /* no line end in `buf`, add it to our scratch */ |
| 130 | *err = Curl_dyn_addn(s: &parser->scratch, mem: (const unsigned char *)buf, len: buflen); |
| 131 | nread = (*err)? -1 : (ssize_t)buflen; |
| 132 | } |
| 133 | return nread; |
| 134 | } |
| 135 | |
| 136 | static CURLcode start_req(struct h1_req_parser *parser, |
| 137 | const char *scheme_default, int options) |
| 138 | { |
| 139 | const char *p, *m, *target, *hv, *scheme, *authority, *path; |
| 140 | size_t m_len, target_len, hv_len, scheme_len, authority_len, path_len; |
| 141 | size_t i; |
| 142 | CURLU *url = NULL; |
| 143 | CURLcode result = CURLE_URL_MALFORMAT; /* Use this as default fail */ |
| 144 | |
| 145 | DEBUGASSERT(!parser->req); |
| 146 | /* line must match: "METHOD TARGET HTTP_VERSION" */ |
| 147 | p = memchr(s: parser->line, c: ' ', n: parser->line_len); |
| 148 | if(!p || p == parser->line) |
| 149 | goto out; |
| 150 | |
| 151 | m = parser->line; |
| 152 | m_len = p - parser->line; |
| 153 | target = p + 1; |
| 154 | target_len = hv_len = 0; |
| 155 | hv = NULL; |
| 156 | |
| 157 | /* URL may contain spaces so scan backwards */ |
| 158 | for(i = parser->line_len; i > m_len; --i) { |
| 159 | if(parser->line[i] == ' ') { |
| 160 | hv = &parser->line[i + 1]; |
| 161 | hv_len = parser->line_len - i; |
| 162 | target_len = (hv - target) - 1; |
| 163 | break; |
| 164 | } |
| 165 | } |
| 166 | /* no SPACE found or empty TARGET or empty HTTP_VERSION */ |
| 167 | if(!target_len || !hv_len) |
| 168 | goto out; |
| 169 | |
| 170 | /* TODO: we do not check HTTP_VERSION for conformity, should |
| 171 | + do that when STRICT option is supplied. */ |
| 172 | (void)hv; |
| 173 | |
| 174 | /* The TARGET can be (rfc 9112, ch. 3.2): |
| 175 | * origin-form: path + optional query |
| 176 | * absolute-form: absolute URI |
| 177 | * authority-form: host+port for CONNECT |
| 178 | * asterisk-form: '*' for OPTIONS |
| 179 | * |
| 180 | * from TARGET, we derive `scheme` `authority` `path` |
| 181 | * origin-form -- -- TARGET |
| 182 | * absolute-form URL* URL* URL* |
| 183 | * authority-form -- TARGET -- |
| 184 | * asterisk-form -- -- TARGET |
| 185 | */ |
| 186 | scheme = authority = path = NULL; |
| 187 | scheme_len = authority_len = path_len = 0; |
| 188 | |
| 189 | if(target_len == 1 && target[0] == '*') { |
| 190 | /* asterisk-form */ |
| 191 | path = target; |
| 192 | path_len = target_len; |
| 193 | } |
| 194 | else if(!strncmp(s1: "CONNECT" , s2: m, n: m_len)) { |
| 195 | /* authority-form */ |
| 196 | authority = target; |
| 197 | authority_len = target_len; |
| 198 | } |
| 199 | else if(target[0] == '/') { |
| 200 | /* origin-form */ |
| 201 | path = target; |
| 202 | path_len = target_len; |
| 203 | } |
| 204 | else { |
| 205 | /* origin-form OR absolute-form */ |
| 206 | CURLUcode uc; |
| 207 | char tmp[H1_MAX_URL_LEN]; |
| 208 | |
| 209 | /* default, unless we see an absolute URL */ |
| 210 | path = target; |
| 211 | path_len = target_len; |
| 212 | |
| 213 | /* URL parser wants 0-termination */ |
| 214 | if(target_len >= sizeof(tmp)) |
| 215 | goto out; |
| 216 | memcpy(dest: tmp, src: target, n: target_len); |
| 217 | tmp[target_len] = '\0'; |
| 218 | /* See if treating TARGET as an absolute URL makes sense */ |
| 219 | if(Curl_is_absolute_url(url: tmp, NULL, buflen: 0, FALSE)) { |
| 220 | int url_options; |
| 221 | |
| 222 | url = curl_url(); |
| 223 | if(!url) { |
| 224 | result = CURLE_OUT_OF_MEMORY; |
| 225 | goto out; |
| 226 | } |
| 227 | url_options = (CURLU_NON_SUPPORT_SCHEME| |
| 228 | CURLU_PATH_AS_IS| |
| 229 | CURLU_NO_DEFAULT_PORT); |
| 230 | if(!(options & H1_PARSE_OPT_STRICT)) |
| 231 | url_options |= CURLU_ALLOW_SPACE; |
| 232 | uc = curl_url_set(handle: url, what: CURLUPART_URL, part: tmp, flags: url_options); |
| 233 | if(uc) { |
| 234 | goto out; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | if(!url && (options & H1_PARSE_OPT_STRICT)) { |
| 239 | /* we should have an absolute URL or have seen `/` earlier */ |
| 240 | goto out; |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | if(url) { |
| 245 | result = Curl_http_req_make2(preq: &parser->req, method: m, m_len, url, scheme_default); |
| 246 | } |
| 247 | else { |
| 248 | if(!scheme && scheme_default) { |
| 249 | scheme = scheme_default; |
| 250 | scheme_len = strlen(s: scheme_default); |
| 251 | } |
| 252 | result = Curl_http_req_make(preq: &parser->req, method: m, m_len, scheme, s_len: scheme_len, |
| 253 | authority, a_len: authority_len, path, p_len: path_len); |
| 254 | } |
| 255 | |
| 256 | out: |
| 257 | curl_url_cleanup(handle: url); |
| 258 | return result; |
| 259 | } |
| 260 | |
| 261 | ssize_t Curl_h1_req_parse_read(struct h1_req_parser *parser, |
| 262 | const char *buf, size_t buflen, |
| 263 | const char *scheme_default, int options, |
| 264 | CURLcode *err) |
| 265 | { |
| 266 | ssize_t nread = 0, n; |
| 267 | |
| 268 | *err = CURLE_OK; |
| 269 | while(!parser->done) { |
| 270 | n = next_line(parser, buf, buflen, options, err); |
| 271 | if(n < 0) { |
| 272 | if(*err != CURLE_AGAIN) { |
| 273 | nread = -1; |
| 274 | } |
| 275 | *err = CURLE_OK; |
| 276 | goto out; |
| 277 | } |
| 278 | |
| 279 | /* Consume this line */ |
| 280 | nread += (size_t)n; |
| 281 | buf += (size_t)n; |
| 282 | buflen -= (size_t)n; |
| 283 | |
| 284 | if(!parser->line) { |
| 285 | /* consumed bytes, but line not complete */ |
| 286 | if(!buflen) |
| 287 | goto out; |
| 288 | } |
| 289 | else if(!parser->req) { |
| 290 | *err = start_req(parser, scheme_default, options); |
| 291 | if(*err) { |
| 292 | nread = -1; |
| 293 | goto out; |
| 294 | } |
| 295 | } |
| 296 | else if(parser->line_len == 0) { |
| 297 | /* last, empty line, we are finished */ |
| 298 | if(!parser->req) { |
| 299 | *err = CURLE_URL_MALFORMAT; |
| 300 | nread = -1; |
| 301 | goto out; |
| 302 | } |
| 303 | parser->done = TRUE; |
| 304 | Curl_dyn_reset(s: &parser->scratch); |
| 305 | /* last chance adjustments */ |
| 306 | } |
| 307 | else { |
| 308 | *err = Curl_dynhds_h1_add_line(dynhds: &parser->req->headers, |
| 309 | line: parser->line, line_len: parser->line_len); |
| 310 | if(*err) { |
| 311 | nread = -1; |
| 312 | goto out; |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | out: |
| 318 | return nread; |
| 319 | } |
| 320 | |
| 321 | CURLcode Curl_h1_req_write_head(struct httpreq *req, int http_minor, |
| 322 | struct dynbuf *dbuf) |
| 323 | { |
| 324 | CURLcode result; |
| 325 | |
| 326 | result = Curl_dyn_addf(s: dbuf, fmt: "%s %s%s%s%s HTTP/1.%d\r\n" , |
| 327 | req->method, |
| 328 | req->scheme? req->scheme : "" , |
| 329 | req->scheme? "://" : "" , |
| 330 | req->authority? req->authority : "" , |
| 331 | req->path? req->path : "" , |
| 332 | http_minor); |
| 333 | if(result) |
| 334 | goto out; |
| 335 | |
| 336 | result = Curl_dynhds_h1_dprint(dynhds: &req->headers, dbuf); |
| 337 | if(result) |
| 338 | goto out; |
| 339 | |
| 340 | result = Curl_dyn_addn(s: dbuf, STRCONST("\r\n" )); |
| 341 | |
| 342 | out: |
| 343 | return result; |
| 344 | } |
| 345 | |
| 346 | #endif /* !CURL_DISABLE_HTTP */ |
| 347 | |