http_chunks.c source code [ClickHouse/contrib/curl/lib/http_chunks.c]

1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.haxx.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	***************************************************************************/
22
23	#include "curl_setup.h"
24
25	#ifndef CURL_DISABLE_HTTP
26
27	#include "urldata.h" /* it includes http_chunks.h */
28	#include "sendf.h" /* for the client write stuff */
29
30	#include "content_encoding.h"
31	#include "http.h"
32	#include "non-ascii.h" /* for Curl_convert_to_network prototype */
33	#include "strtoofft.h"
34	#include "warnless.h"
35
36	/ The last #include files should be: /
37	#include "curl_memory.h"
38	#include "memdebug.h"
39
40	/*
41	* Chunk format (simplified):
42	*
43	* <HEX SIZE>[ chunk extension ] CRLF
44	* <DATA> CRLF
45	*
46	* Highlights from RFC2616 section 3.6 say:
47
48	The chunked encoding modifies the body of a message in order to
49	transfer it as a series of chunks, each with its own size indicator,
50	followed by an OPTIONAL trailer containing entity-header fields. This
51	allows dynamically produced content to be transferred along with the
52	information necessary for the recipient to verify that it has
53	received the full message.
54
55	Chunked-Body = chunk*
56	last-chunk
57	trailer
58	CRLF
59
60	chunk = chunk-size [ chunk-extension ] CRLF
61	chunk-data CRLF
62	chunk-size = 1HEX*
63	last-chunk = 1("0") [ chunk-extension ] CRLF*
64
65	chunk-extension= ( ";" chunk-ext-name [ "=" chunk-ext-val ] )*
66	chunk-ext-name = token
67	chunk-ext-val = token \| quoted-string
68	chunk-data = chunk-size(OCTET)
69	trailer = (entity-header CRLF)*
70
71	The chunk-size field is a string of hex digits indicating the size of
72	the chunk. The chunked encoding is ended by any chunk whose size is
73	zero, followed by the trailer, which is terminated by an empty line.
74
75	*/
76
77	#ifdef CURL_DOES_CONVERSIONS
78	/ Check for an ASCII hex digit.*
79	We avoid the use of ISXDIGIT to accommodate non-ASCII hosts. /*
80	static bool Curl_isxdigit_ascii(char digit)
81	{
82	return (digit >= `0x30` && digit <= `0x39`) / 0-9 /
83	\|\| (digit >= `0x41` && digit <= `0x46`) / A-F /
84	\|\| (digit >= `0x61` && digit <= `0x66`); / a-f /
85	}
86	#else
87	#define Curl_isxdigit_ascii(x) Curl_isxdigit(x)
88	#endif
89
90	void Curl_httpchunk_init(struct connectdata *conn)
91	{
92	struct Curl_chunker *chunk = &conn->chunk;
93	chunk->hexindex = `0`; / start at 0 /
94	chunk->dataleft = `0`; / no data left yet! /
95	chunk->state = CHUNK_HEX; / we get hex first! /
96	}
97
98	/*
99	* chunk_read() returns a OK for normal operations, or a positive return code
100	* for errors. STOP means this sequence of chunks is complete. The 'wrote'
101	* argument is set to tell the caller how many bytes we actually passed to the
102	* client (for byte-counting and whatever).
103	*
104	* The states and the state-machine is further explained in the header file.
105	*
106	* This function always uses ASCII hex values to accommodate non-ASCII hosts.
107	* For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
108	*/
109	CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
110	char *datap,
111	ssize_t datalen,
112	ssize_t *wrotep,
113	CURLcode *extrap)
114	{
115	CURLcode result = CURLE_OK;
116	struct Curl_easy *data = conn->data;
117	struct Curl_chunker *ch = &conn->chunk;
118	struct SingleRequest *k = &data->req;
119	size_t piece;
120	curl_off_t length = (curl_off_t)datalen;
121	size_t wrote = (size_t )wrotep;
122
123	wrote = `0`; /* nothing's written yet /
124
125	/ the original data is written to the client, but we go on with the*
126	chunk read process, to properly calculate the content length/*
127	if(data->set.http_te_skip && !k->ignorebody) {
128	result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen);
129	if(result) {
130	*extrap = result;
131	return CHUNKE_PASSTHRU_ERROR;
132	}
133	}
134
135	while(length) {
136	switch(ch->state) {
137	case CHUNK_HEX:
138	if(Curl_isxdigit_ascii(*datap)) {
139	if(ch->hexindex < MAXNUM_SIZE) {
140	ch->hexbuffer[ch->hexindex] = *datap;
141	datap++;
142	length--;
143	ch->hexindex++;
144	}
145	else {
146	return CHUNKE_TOO_LONG_HEX; / longer hex than we support /
147	}
148	}
149	else {
150	char *endptr;
151	if(`0` == ch->hexindex)
152	/ This is illegal data, we received junk where we expected*
153	a hexadecimal digit. /*
154	return CHUNKE_ILLEGAL_HEX;
155
156	/ length and datap are unmodified /
157	ch->hexbuffer[ch->hexindex] = `0`;
158
159	/ convert to host encoding before calling strtoul /
160	result = Curl_convert_from_network(conn->data, ch->hexbuffer,
161	ch->hexindex);
162	if(result) {
163	/ Curl_convert_from_network calls failf if unsuccessful /
164	/ Treat it as a bad hex character /
165	return CHUNKE_ILLEGAL_HEX;
166	}
167
168	if(curlx_strtoofft(ch->hexbuffer, &endptr, `16`, &ch->datasize))
169	return CHUNKE_ILLEGAL_HEX;
170	ch->state = CHUNK_LF; / now wait for the CRLF /
171	}
172	break;
173
174	case CHUNK_LF:
175	/ waiting for the LF after a chunk size /
176	if(*datap == `0x0a`) {
177	/ we're now expecting data to come, unless size was zero! /
178	if(`0` == ch->datasize) {
179	ch->state = CHUNK_TRAILER; / now check for trailers /
180	conn->trlPos = `0`;
181	}
182	else
183	ch->state = CHUNK_DATA;
184	}
185
186	datap++;
187	length--;
188	break;
189
190	case CHUNK_DATA:
191	/ We expect 'datasize' of data. We have 'length' right now, it can be*
192	more or less than 'datasize'. Get the smallest piece.
193	*/
194	piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
195
196	/ Write the data portion available /
197	if(!conn->data->set.http_te_skip && !k->ignorebody) {
198	if(!conn->data->set.http_ce_skip && k->writer_stack)
199	result = Curl_unencode_write(conn, k->writer_stack, datap, piece);
200	else
201	result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, piece);
202
203	if(result) {
204	*extrap = result;
205	return CHUNKE_PASSTHRU_ERROR;
206	}
207	}
208
209	*wrote += piece;
210	ch->datasize -= piece; / decrease amount left to expect /
211	datap += piece; / move read pointer forward /
212	length -= piece; / decrease space left in this round /
213
214	if(`0` == ch->datasize)
215	/ end of data this round, we now expect a trailing CRLF /
216	ch->state = CHUNK_POSTLF;
217	break;
218
219	case CHUNK_POSTLF:
220	if(*datap == `0x0a`) {
221	/ The last one before we go back to hex state and start all over. /
222	Curl_httpchunk_init(conn); / sets state back to CHUNK_HEX /
223	}
224	else if(*datap != `0x0d`)
225	return CHUNKE_BAD_CHUNK;
226	datap++;
227	length--;
228	break;
229
230	case CHUNK_TRAILER:
231	if((datap == `0x0d`) \|\| (datap == `0x0a`)) {
232	/ this is the end of a trailer, but if the trailer was zero bytes*
233	there was no trailer and we move on /*
234
235	if(conn->trlPos) {
236	/ we allocate trailer with 3 bytes extra room to fit this /
237	conn->trailer[conn->trlPos++] = `0x0d`;
238	conn->trailer[conn->trlPos++] = `0x0a`;
239	conn->trailer[conn->trlPos] = `0`;
240
241	/ Convert to host encoding before calling Curl_client_write /
242	result = Curl_convert_from_network(conn->data, conn->trailer,
243	conn->trlPos);
244	if(result)
245	/ Curl_convert_from_network calls failf if unsuccessful /
246	/ Treat it as a bad chunk /
247	return CHUNKE_BAD_CHUNK;
248
249	if(!data->set.http_te_skip) {
250	result = Curl_client_write(conn, CLIENTWRITE_HEADER,
251	conn->trailer, conn->trlPos);
252	if(result) {
253	*extrap = result;
254	return CHUNKE_PASSTHRU_ERROR;
255	}
256	}
257	conn->trlPos = `0`;
258	ch->state = CHUNK_TRAILER_CR;
259	if(*datap == `0x0a`)
260	/ already on the LF /
261	break;
262	}
263	else {
264	/ no trailer, we're on the final CRLF pair /
265	ch->state = CHUNK_TRAILER_POSTCR;
266	break; / don't advance the pointer /
267	}
268	}
269	else {
270	/ conn->trailer is assumed to be freed in url.c on a*
271	connection basis /*
272	if(conn->trlPos >= conn->trlMax) {
273	/ we always allocate three extra bytes, just because when the full*
274	header has been received we append CRLF\0 /*
275	char *ptr;
276	if(conn->trlMax) {
277	conn->trlMax *= `2`;
278	ptr = realloc(conn->trailer, conn->trlMax + `3`);
279	}
280	else {
281	conn->trlMax = `128`;
282	ptr = malloc(conn->trlMax + `3`);
283	}
284	if(!ptr)
285	return CHUNKE_OUT_OF_MEMORY;
286	conn->trailer = ptr;
287	}
288	conn->trailer[conn->trlPos++]=*datap;
289	}
290	datap++;
291	length--;
292	break;
293
294	case CHUNK_TRAILER_CR:
295	if(*datap == `0x0a`) {
296	ch->state = CHUNK_TRAILER_POSTCR;
297	datap++;
298	length--;
299	}
300	else
301	return CHUNKE_BAD_CHUNK;
302	break;
303
304	case CHUNK_TRAILER_POSTCR:
305	/ We enter this state when a CR should arrive so we expect to*
306	have to first pass a CR before we wait for LF /*
307	if((datap != `0x0d`) && (datap != `0x0a`)) {
308	/ not a CR then it must be another header in the trailer /
309	ch->state = CHUNK_TRAILER;
310	break;
311	}
312	if(*datap == `0x0d`) {
313	/ skip if CR /
314	datap++;
315	length--;
316	}
317	/ now wait for the final LF /
318	ch->state = CHUNK_STOP;
319	break;
320
321	case CHUNK_STOP:
322	if(*datap == `0x0a`) {
323	length--;
324
325	/ Record the length of any data left in the end of the buffer*
326	even if there's no more chunks to read /*
327	ch->dataleft = curlx_sotouz(length);
328
329	return CHUNKE_STOP; / return stop /
330	}
331	else
332	return CHUNKE_BAD_CHUNK;
333	}
334	}
335	return CHUNKE_OK;
336	}
337
338	const char *Curl_chunked_strerror(CHUNKcode code)
339	{
340	switch(code) {
341	default:
342	return "OK";
343	case CHUNKE_TOO_LONG_HEX:
344	return "Too long hexadecimal number";
345	case CHUNKE_ILLEGAL_HEX:
346	return "Illegal or missing hexadecimal sequence";
347	case CHUNKE_BAD_CHUNK:
348	return "Malformed encoding found";
349	case CHUNKE_PASSTHRU_ERROR:
350	DEBUGASSERT(`0`); / never used /
351	return "";
352	case CHUNKE_BAD_ENCODING:
353	return "Bad content-encoding found";
354	case CHUNKE_OUT_OF_MEMORY:
355	return "Out of memory";
356	}
357	}
358
359	#endif /* CURL_DISABLE_HTTP */
360

Browse the source code of ClickHouse/contrib/curl/lib/http_chunks.c