1 | /*************************************************************************** |
2 | * _ _ ____ _ |
3 | * Project ___| | | | _ \| | |
4 | * / __| | | | |_) | | |
5 | * | (__| |_| | _ <| |___ |
6 | * \___|\___/|_| \_\_____| |
7 | * |
8 | * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. |
9 | * |
10 | * This software is licensed as described in the file COPYING, which |
11 | * you should have received as part of this distribution. The terms |
12 | * are also available at https://curl.se/docs/copyright.html. |
13 | * |
14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
15 | * copies of the Software, and permit persons to whom the Software is |
16 | * furnished to do so, under the terms of the COPYING file. |
17 | * |
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
19 | * KIND, either express or implied. |
20 | * |
21 | * SPDX-License-Identifier: curl |
22 | * |
23 | ***************************************************************************/ |
24 | |
25 | #include "curl_setup.h" |
26 | |
27 | #ifndef CURL_DISABLE_HTTP |
28 | |
29 | #include "urldata.h" /* it includes http_chunks.h */ |
30 | #include "sendf.h" /* for the client write stuff */ |
31 | #include "dynbuf.h" |
32 | #include "content_encoding.h" |
33 | #include "http.h" |
34 | #include "strtoofft.h" |
35 | #include "warnless.h" |
36 | |
37 | /* The last #include files should be: */ |
38 | #include "curl_memory.h" |
39 | #include "memdebug.h" |
40 | |
41 | /* |
42 | * Chunk format (simplified): |
43 | * |
44 | * <HEX SIZE>[ chunk extension ] CRLF |
45 | * <DATA> CRLF |
46 | * |
47 | * Highlights from RFC2616 section 3.6 say: |
48 | |
49 | The chunked encoding modifies the body of a message in order to |
50 | transfer it as a series of chunks, each with its own size indicator, |
51 | followed by an OPTIONAL trailer containing entity-header fields. This |
52 | allows dynamically produced content to be transferred along with the |
53 | information necessary for the recipient to verify that it has |
54 | received the full message. |
55 | |
56 | Chunked-Body = *chunk |
57 | last-chunk |
58 | trailer |
59 | CRLF |
60 | |
61 | chunk = chunk-size [ chunk-extension ] CRLF |
62 | chunk-data CRLF |
63 | chunk-size = 1*HEX |
64 | last-chunk = 1*("0") [ chunk-extension ] CRLF |
65 | |
66 | chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) |
67 | chunk-ext-name = token |
68 | chunk-ext-val = token | quoted-string |
69 | chunk-data = chunk-size(OCTET) |
70 | trailer = *(entity-header CRLF) |
71 | |
72 | The chunk-size field is a string of hex digits indicating the size of |
73 | the chunk. The chunked encoding is ended by any chunk whose size is |
74 | zero, followed by the trailer, which is terminated by an empty line. |
75 | |
76 | */ |
77 | |
78 | #define isxdigit_ascii(x) Curl_isxdigit(x) |
79 | |
80 | void Curl_httpchunk_init(struct Curl_easy *data) |
81 | { |
82 | struct connectdata *conn = data->conn; |
83 | struct Curl_chunker *chunk = &conn->chunk; |
84 | chunk->hexindex = 0; /* start at 0 */ |
85 | chunk->state = CHUNK_HEX; /* we get hex first! */ |
86 | Curl_dyn_init(s: &conn->trailer, DYN_H1_TRAILER); |
87 | } |
88 | |
89 | /* |
90 | * chunk_read() returns a OK for normal operations, or a positive return code |
91 | * for errors. STOP means this sequence of chunks is complete. The 'wrote' |
92 | * argument is set to tell the caller how many bytes we actually passed to the |
93 | * client (for byte-counting and whatever). |
94 | * |
95 | * The states and the state-machine is further explained in the header file. |
96 | * |
97 | * This function always uses ASCII hex values to accommodate non-ASCII hosts. |
98 | * For example, 0x0d and 0x0a are used instead of '\r' and '\n'. |
99 | */ |
100 | CHUNKcode Curl_httpchunk_read(struct Curl_easy *data, |
101 | char *datap, |
102 | ssize_t datalen, |
103 | ssize_t *wrote, |
104 | CURLcode *) |
105 | { |
106 | CURLcode result = CURLE_OK; |
107 | struct connectdata *conn = data->conn; |
108 | struct Curl_chunker *ch = &conn->chunk; |
109 | struct SingleRequest *k = &data->req; |
110 | size_t piece; |
111 | curl_off_t length = (curl_off_t)datalen; |
112 | |
113 | *wrote = 0; /* nothing's written yet */ |
114 | |
115 | /* the original data is written to the client, but we go on with the |
116 | chunk read process, to properly calculate the content length */ |
117 | if(data->set.http_te_skip && !k->ignorebody) { |
118 | result = Curl_client_write(data, CLIENTWRITE_BODY, ptr: datap, len: datalen); |
119 | if(result) { |
120 | *extrap = result; |
121 | return CHUNKE_PASSTHRU_ERROR; |
122 | } |
123 | } |
124 | |
125 | while(length) { |
126 | switch(ch->state) { |
127 | case CHUNK_HEX: |
128 | if(ISXDIGIT(*datap)) { |
129 | if(ch->hexindex < CHUNK_MAXNUM_LEN) { |
130 | ch->hexbuffer[ch->hexindex] = *datap; |
131 | datap++; |
132 | length--; |
133 | ch->hexindex++; |
134 | } |
135 | else { |
136 | return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */ |
137 | } |
138 | } |
139 | else { |
140 | char *endptr; |
141 | if(0 == ch->hexindex) |
142 | /* This is illegal data, we received junk where we expected |
143 | a hexadecimal digit. */ |
144 | return CHUNKE_ILLEGAL_HEX; |
145 | |
146 | /* length and datap are unmodified */ |
147 | ch->hexbuffer[ch->hexindex] = 0; |
148 | |
149 | if(curlx_strtoofft(str: ch->hexbuffer, endp: &endptr, base: 16, num: &ch->datasize)) |
150 | return CHUNKE_ILLEGAL_HEX; |
151 | ch->state = CHUNK_LF; /* now wait for the CRLF */ |
152 | } |
153 | break; |
154 | |
155 | case CHUNK_LF: |
156 | /* waiting for the LF after a chunk size */ |
157 | if(*datap == 0x0a) { |
158 | /* we're now expecting data to come, unless size was zero! */ |
159 | if(0 == ch->datasize) { |
160 | ch->state = CHUNK_TRAILER; /* now check for trailers */ |
161 | } |
162 | else |
163 | ch->state = CHUNK_DATA; |
164 | } |
165 | |
166 | datap++; |
167 | length--; |
168 | break; |
169 | |
170 | case CHUNK_DATA: |
171 | /* We expect 'datasize' of data. We have 'length' right now, it can be |
172 | more or less than 'datasize'. Get the smallest piece. |
173 | */ |
174 | piece = curlx_sotouz(sonum: (ch->datasize >= length)?length:ch->datasize); |
175 | |
176 | /* Write the data portion available */ |
177 | if(!data->set.http_te_skip && !k->ignorebody) { |
178 | result = Curl_client_write(data, CLIENTWRITE_BODY, ptr: datap, len: piece); |
179 | |
180 | if(result) { |
181 | *extrap = result; |
182 | return CHUNKE_PASSTHRU_ERROR; |
183 | } |
184 | } |
185 | |
186 | *wrote += piece; |
187 | ch->datasize -= piece; /* decrease amount left to expect */ |
188 | datap += piece; /* move read pointer forward */ |
189 | length -= piece; /* decrease space left in this round */ |
190 | |
191 | if(0 == ch->datasize) |
192 | /* end of data this round, we now expect a trailing CRLF */ |
193 | ch->state = CHUNK_POSTLF; |
194 | break; |
195 | |
196 | case CHUNK_POSTLF: |
197 | if(*datap == 0x0a) { |
198 | /* The last one before we go back to hex state and start all over. */ |
199 | Curl_httpchunk_init(data); /* sets state back to CHUNK_HEX */ |
200 | } |
201 | else if(*datap != 0x0d) |
202 | return CHUNKE_BAD_CHUNK; |
203 | datap++; |
204 | length--; |
205 | break; |
206 | |
207 | case CHUNK_TRAILER: |
208 | if((*datap == 0x0d) || (*datap == 0x0a)) { |
209 | char *tr = Curl_dyn_ptr(s: &conn->trailer); |
210 | /* this is the end of a trailer, but if the trailer was zero bytes |
211 | there was no trailer and we move on */ |
212 | |
213 | if(tr) { |
214 | size_t trlen; |
215 | result = Curl_dyn_addn(s: &conn->trailer, mem: (char *)STRCONST("\x0d\x0a" )); |
216 | if(result) |
217 | return CHUNKE_OUT_OF_MEMORY; |
218 | |
219 | tr = Curl_dyn_ptr(s: &conn->trailer); |
220 | trlen = Curl_dyn_len(s: &conn->trailer); |
221 | if(!data->set.http_te_skip) { |
222 | result = Curl_client_write(data, |
223 | CLIENTWRITE_HEADER|CLIENTWRITE_TRAILER, |
224 | ptr: tr, len: trlen); |
225 | if(result) { |
226 | *extrap = result; |
227 | return CHUNKE_PASSTHRU_ERROR; |
228 | } |
229 | } |
230 | Curl_dyn_reset(s: &conn->trailer); |
231 | ch->state = CHUNK_TRAILER_CR; |
232 | if(*datap == 0x0a) |
233 | /* already on the LF */ |
234 | break; |
235 | } |
236 | else { |
237 | /* no trailer, we're on the final CRLF pair */ |
238 | ch->state = CHUNK_TRAILER_POSTCR; |
239 | break; /* don't advance the pointer */ |
240 | } |
241 | } |
242 | else { |
243 | result = Curl_dyn_addn(s: &conn->trailer, mem: datap, len: 1); |
244 | if(result) |
245 | return CHUNKE_OUT_OF_MEMORY; |
246 | } |
247 | datap++; |
248 | length--; |
249 | break; |
250 | |
251 | case CHUNK_TRAILER_CR: |
252 | if(*datap == 0x0a) { |
253 | ch->state = CHUNK_TRAILER_POSTCR; |
254 | datap++; |
255 | length--; |
256 | } |
257 | else |
258 | return CHUNKE_BAD_CHUNK; |
259 | break; |
260 | |
261 | case CHUNK_TRAILER_POSTCR: |
262 | /* We enter this state when a CR should arrive so we expect to |
263 | have to first pass a CR before we wait for LF */ |
264 | if((*datap != 0x0d) && (*datap != 0x0a)) { |
265 | /* not a CR then it must be another header in the trailer */ |
266 | ch->state = CHUNK_TRAILER; |
267 | break; |
268 | } |
269 | if(*datap == 0x0d) { |
270 | /* skip if CR */ |
271 | datap++; |
272 | length--; |
273 | } |
274 | /* now wait for the final LF */ |
275 | ch->state = CHUNK_STOP; |
276 | break; |
277 | |
278 | case CHUNK_STOP: |
279 | if(*datap == 0x0a) { |
280 | length--; |
281 | |
282 | /* Record the length of any data left in the end of the buffer |
283 | even if there's no more chunks to read */ |
284 | ch->datasize = curlx_sotouz(sonum: length); |
285 | |
286 | return CHUNKE_STOP; /* return stop */ |
287 | } |
288 | else |
289 | return CHUNKE_BAD_CHUNK; |
290 | } |
291 | } |
292 | return CHUNKE_OK; |
293 | } |
294 | |
295 | const char *Curl_chunked_strerror(CHUNKcode code) |
296 | { |
297 | switch(code) { |
298 | default: |
299 | return "OK" ; |
300 | case CHUNKE_TOO_LONG_HEX: |
301 | return "Too long hexadecimal number" ; |
302 | case CHUNKE_ILLEGAL_HEX: |
303 | return "Illegal or missing hexadecimal sequence" ; |
304 | case CHUNKE_BAD_CHUNK: |
305 | return "Malformed encoding found" ; |
306 | case CHUNKE_PASSTHRU_ERROR: |
307 | DEBUGASSERT(0); /* never used */ |
308 | return "" ; |
309 | case CHUNKE_BAD_ENCODING: |
310 | return "Bad content-encoding found" ; |
311 | case CHUNKE_OUT_OF_MEMORY: |
312 | return "Out of memory" ; |
313 | } |
314 | } |
315 | |
316 | #endif /* CURL_DISABLE_HTTP */ |
317 | |