1 | /*************************************************************************** |
2 | * _ _ ____ _ |
3 | * Project ___| | | | _ \| | |
4 | * / __| | | | |_) | | |
5 | * | (__| |_| | _ <| |___ |
6 | * \___|\___/|_| \_\_____| |
7 | * |
8 | * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al. |
9 | * |
10 | * This software is licensed as described in the file COPYING, which |
11 | * you should have received as part of this distribution. The terms |
12 | * are also available at https://curl.haxx.se/docs/copyright.html. |
13 | * |
14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
15 | * copies of the Software, and permit persons to whom the Software is |
16 | * furnished to do so, under the terms of the COPYING file. |
17 | * |
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
19 | * KIND, either express or implied. |
20 | * |
21 | ***************************************************************************/ |
22 | |
23 | #include "curl_setup.h" |
24 | |
25 | #ifndef CURL_DISABLE_HTTP |
26 | |
27 | #include "urldata.h" /* it includes http_chunks.h */ |
28 | #include "sendf.h" /* for the client write stuff */ |
29 | |
30 | #include "content_encoding.h" |
31 | #include "http.h" |
32 | #include "non-ascii.h" /* for Curl_convert_to_network prototype */ |
33 | #include "strtoofft.h" |
34 | #include "warnless.h" |
35 | |
36 | /* The last #include files should be: */ |
37 | #include "curl_memory.h" |
38 | #include "memdebug.h" |
39 | |
40 | /* |
41 | * Chunk format (simplified): |
42 | * |
43 | * <HEX SIZE>[ chunk extension ] CRLF |
44 | * <DATA> CRLF |
45 | * |
46 | * Highlights from RFC2616 section 3.6 say: |
47 | |
48 | The chunked encoding modifies the body of a message in order to |
49 | transfer it as a series of chunks, each with its own size indicator, |
50 | followed by an OPTIONAL trailer containing entity-header fields. This |
51 | allows dynamically produced content to be transferred along with the |
52 | information necessary for the recipient to verify that it has |
53 | received the full message. |
54 | |
55 | Chunked-Body = *chunk |
56 | last-chunk |
57 | trailer |
58 | CRLF |
59 | |
60 | chunk = chunk-size [ chunk-extension ] CRLF |
61 | chunk-data CRLF |
62 | chunk-size = 1*HEX |
63 | last-chunk = 1*("0") [ chunk-extension ] CRLF |
64 | |
65 | chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) |
66 | chunk-ext-name = token |
67 | chunk-ext-val = token | quoted-string |
68 | chunk-data = chunk-size(OCTET) |
69 | trailer = *(entity-header CRLF) |
70 | |
71 | The chunk-size field is a string of hex digits indicating the size of |
72 | the chunk. The chunked encoding is ended by any chunk whose size is |
73 | zero, followed by the trailer, which is terminated by an empty line. |
74 | |
75 | */ |
76 | |
77 | #ifdef CURL_DOES_CONVERSIONS |
78 | /* Check for an ASCII hex digit. |
79 | We avoid the use of ISXDIGIT to accommodate non-ASCII hosts. */ |
80 | static bool Curl_isxdigit_ascii(char digit) |
81 | { |
82 | return (digit >= 0x30 && digit <= 0x39) /* 0-9 */ |
83 | || (digit >= 0x41 && digit <= 0x46) /* A-F */ |
84 | || (digit >= 0x61 && digit <= 0x66); /* a-f */ |
85 | } |
86 | #else |
87 | #define Curl_isxdigit_ascii(x) Curl_isxdigit(x) |
88 | #endif |
89 | |
90 | void Curl_httpchunk_init(struct connectdata *conn) |
91 | { |
92 | struct Curl_chunker *chunk = &conn->chunk; |
93 | chunk->hexindex = 0; /* start at 0 */ |
94 | chunk->dataleft = 0; /* no data left yet! */ |
95 | chunk->state = CHUNK_HEX; /* we get hex first! */ |
96 | } |
97 | |
98 | /* |
99 | * chunk_read() returns a OK for normal operations, or a positive return code |
100 | * for errors. STOP means this sequence of chunks is complete. The 'wrote' |
101 | * argument is set to tell the caller how many bytes we actually passed to the |
102 | * client (for byte-counting and whatever). |
103 | * |
104 | * The states and the state-machine is further explained in the header file. |
105 | * |
106 | * This function always uses ASCII hex values to accommodate non-ASCII hosts. |
107 | * For example, 0x0d and 0x0a are used instead of '\r' and '\n'. |
108 | */ |
109 | CHUNKcode Curl_httpchunk_read(struct connectdata *conn, |
110 | char *datap, |
111 | ssize_t datalen, |
112 | ssize_t *wrotep, |
113 | CURLcode *) |
114 | { |
115 | CURLcode result = CURLE_OK; |
116 | struct Curl_easy *data = conn->data; |
117 | struct Curl_chunker *ch = &conn->chunk; |
118 | struct SingleRequest *k = &data->req; |
119 | size_t piece; |
120 | curl_off_t length = (curl_off_t)datalen; |
121 | size_t *wrote = (size_t *)wrotep; |
122 | |
123 | *wrote = 0; /* nothing's written yet */ |
124 | |
125 | /* the original data is written to the client, but we go on with the |
126 | chunk read process, to properly calculate the content length*/ |
127 | if(data->set.http_te_skip && !k->ignorebody) { |
128 | result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen); |
129 | if(result) { |
130 | *extrap = result; |
131 | return CHUNKE_PASSTHRU_ERROR; |
132 | } |
133 | } |
134 | |
135 | while(length) { |
136 | switch(ch->state) { |
137 | case CHUNK_HEX: |
138 | if(Curl_isxdigit_ascii(*datap)) { |
139 | if(ch->hexindex < MAXNUM_SIZE) { |
140 | ch->hexbuffer[ch->hexindex] = *datap; |
141 | datap++; |
142 | length--; |
143 | ch->hexindex++; |
144 | } |
145 | else { |
146 | return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */ |
147 | } |
148 | } |
149 | else { |
150 | char *endptr; |
151 | if(0 == ch->hexindex) |
152 | /* This is illegal data, we received junk where we expected |
153 | a hexadecimal digit. */ |
154 | return CHUNKE_ILLEGAL_HEX; |
155 | |
156 | /* length and datap are unmodified */ |
157 | ch->hexbuffer[ch->hexindex] = 0; |
158 | |
159 | /* convert to host encoding before calling strtoul */ |
160 | result = Curl_convert_from_network(conn->data, ch->hexbuffer, |
161 | ch->hexindex); |
162 | if(result) { |
163 | /* Curl_convert_from_network calls failf if unsuccessful */ |
164 | /* Treat it as a bad hex character */ |
165 | return CHUNKE_ILLEGAL_HEX; |
166 | } |
167 | |
168 | if(curlx_strtoofft(ch->hexbuffer, &endptr, 16, &ch->datasize)) |
169 | return CHUNKE_ILLEGAL_HEX; |
170 | ch->state = CHUNK_LF; /* now wait for the CRLF */ |
171 | } |
172 | break; |
173 | |
174 | case CHUNK_LF: |
175 | /* waiting for the LF after a chunk size */ |
176 | if(*datap == 0x0a) { |
177 | /* we're now expecting data to come, unless size was zero! */ |
178 | if(0 == ch->datasize) { |
179 | ch->state = CHUNK_TRAILER; /* now check for trailers */ |
180 | conn->trlPos = 0; |
181 | } |
182 | else |
183 | ch->state = CHUNK_DATA; |
184 | } |
185 | |
186 | datap++; |
187 | length--; |
188 | break; |
189 | |
190 | case CHUNK_DATA: |
191 | /* We expect 'datasize' of data. We have 'length' right now, it can be |
192 | more or less than 'datasize'. Get the smallest piece. |
193 | */ |
194 | piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize); |
195 | |
196 | /* Write the data portion available */ |
197 | if(!conn->data->set.http_te_skip && !k->ignorebody) { |
198 | if(!conn->data->set.http_ce_skip && k->writer_stack) |
199 | result = Curl_unencode_write(conn, k->writer_stack, datap, piece); |
200 | else |
201 | result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, piece); |
202 | |
203 | if(result) { |
204 | *extrap = result; |
205 | return CHUNKE_PASSTHRU_ERROR; |
206 | } |
207 | } |
208 | |
209 | *wrote += piece; |
210 | ch->datasize -= piece; /* decrease amount left to expect */ |
211 | datap += piece; /* move read pointer forward */ |
212 | length -= piece; /* decrease space left in this round */ |
213 | |
214 | if(0 == ch->datasize) |
215 | /* end of data this round, we now expect a trailing CRLF */ |
216 | ch->state = CHUNK_POSTLF; |
217 | break; |
218 | |
219 | case CHUNK_POSTLF: |
220 | if(*datap == 0x0a) { |
221 | /* The last one before we go back to hex state and start all over. */ |
222 | Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */ |
223 | } |
224 | else if(*datap != 0x0d) |
225 | return CHUNKE_BAD_CHUNK; |
226 | datap++; |
227 | length--; |
228 | break; |
229 | |
230 | case CHUNK_TRAILER: |
231 | if((*datap == 0x0d) || (*datap == 0x0a)) { |
232 | /* this is the end of a trailer, but if the trailer was zero bytes |
233 | there was no trailer and we move on */ |
234 | |
235 | if(conn->trlPos) { |
236 | /* we allocate trailer with 3 bytes extra room to fit this */ |
237 | conn->trailer[conn->trlPos++] = 0x0d; |
238 | conn->trailer[conn->trlPos++] = 0x0a; |
239 | conn->trailer[conn->trlPos] = 0; |
240 | |
241 | /* Convert to host encoding before calling Curl_client_write */ |
242 | result = Curl_convert_from_network(conn->data, conn->trailer, |
243 | conn->trlPos); |
244 | if(result) |
245 | /* Curl_convert_from_network calls failf if unsuccessful */ |
246 | /* Treat it as a bad chunk */ |
247 | return CHUNKE_BAD_CHUNK; |
248 | |
249 | if(!data->set.http_te_skip) { |
250 | result = Curl_client_write(conn, CLIENTWRITE_HEADER, |
251 | conn->trailer, conn->trlPos); |
252 | if(result) { |
253 | *extrap = result; |
254 | return CHUNKE_PASSTHRU_ERROR; |
255 | } |
256 | } |
257 | conn->trlPos = 0; |
258 | ch->state = CHUNK_TRAILER_CR; |
259 | if(*datap == 0x0a) |
260 | /* already on the LF */ |
261 | break; |
262 | } |
263 | else { |
264 | /* no trailer, we're on the final CRLF pair */ |
265 | ch->state = CHUNK_TRAILER_POSTCR; |
266 | break; /* don't advance the pointer */ |
267 | } |
268 | } |
269 | else { |
270 | /* conn->trailer is assumed to be freed in url.c on a |
271 | connection basis */ |
272 | if(conn->trlPos >= conn->trlMax) { |
273 | /* we always allocate three extra bytes, just because when the full |
274 | header has been received we append CRLF\0 */ |
275 | char *ptr; |
276 | if(conn->trlMax) { |
277 | conn->trlMax *= 2; |
278 | ptr = realloc(conn->trailer, conn->trlMax + 3); |
279 | } |
280 | else { |
281 | conn->trlMax = 128; |
282 | ptr = malloc(conn->trlMax + 3); |
283 | } |
284 | if(!ptr) |
285 | return CHUNKE_OUT_OF_MEMORY; |
286 | conn->trailer = ptr; |
287 | } |
288 | conn->trailer[conn->trlPos++]=*datap; |
289 | } |
290 | datap++; |
291 | length--; |
292 | break; |
293 | |
294 | case CHUNK_TRAILER_CR: |
295 | if(*datap == 0x0a) { |
296 | ch->state = CHUNK_TRAILER_POSTCR; |
297 | datap++; |
298 | length--; |
299 | } |
300 | else |
301 | return CHUNKE_BAD_CHUNK; |
302 | break; |
303 | |
304 | case CHUNK_TRAILER_POSTCR: |
305 | /* We enter this state when a CR should arrive so we expect to |
306 | have to first pass a CR before we wait for LF */ |
307 | if((*datap != 0x0d) && (*datap != 0x0a)) { |
308 | /* not a CR then it must be another header in the trailer */ |
309 | ch->state = CHUNK_TRAILER; |
310 | break; |
311 | } |
312 | if(*datap == 0x0d) { |
313 | /* skip if CR */ |
314 | datap++; |
315 | length--; |
316 | } |
317 | /* now wait for the final LF */ |
318 | ch->state = CHUNK_STOP; |
319 | break; |
320 | |
321 | case CHUNK_STOP: |
322 | if(*datap == 0x0a) { |
323 | length--; |
324 | |
325 | /* Record the length of any data left in the end of the buffer |
326 | even if there's no more chunks to read */ |
327 | ch->dataleft = curlx_sotouz(length); |
328 | |
329 | return CHUNKE_STOP; /* return stop */ |
330 | } |
331 | else |
332 | return CHUNKE_BAD_CHUNK; |
333 | } |
334 | } |
335 | return CHUNKE_OK; |
336 | } |
337 | |
338 | const char *Curl_chunked_strerror(CHUNKcode code) |
339 | { |
340 | switch(code) { |
341 | default: |
342 | return "OK" ; |
343 | case CHUNKE_TOO_LONG_HEX: |
344 | return "Too long hexadecimal number" ; |
345 | case CHUNKE_ILLEGAL_HEX: |
346 | return "Illegal or missing hexadecimal sequence" ; |
347 | case CHUNKE_BAD_CHUNK: |
348 | return "Malformed encoding found" ; |
349 | case CHUNKE_PASSTHRU_ERROR: |
350 | DEBUGASSERT(0); /* never used */ |
351 | return "" ; |
352 | case CHUNKE_BAD_ENCODING: |
353 | return "Bad content-encoding found" ; |
354 | case CHUNKE_OUT_OF_MEMORY: |
355 | return "Out of memory" ; |
356 | } |
357 | } |
358 | |
359 | #endif /* CURL_DISABLE_HTTP */ |
360 | |