file.c source code [ClickHouse/contrib/curl/lib/file.c]

1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.haxx.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	***************************************************************************/
22
23	#include "curl_setup.h"
24
25	#ifndef CURL_DISABLE_FILE
26
27	#ifdef HAVE_NETINET_IN_H
28	#include <netinet/in.h>
29	#endif
30	#ifdef HAVE_NETDB_H
31	#include <netdb.h>
32	#endif
33	#ifdef HAVE_ARPA_INET_H
34	#include <arpa/inet.h>
35	#endif
36	#ifdef HAVE_NET_IF_H
37	#include <net/if.h>
38	#endif
39	#ifdef HAVE_SYS_IOCTL_H
40	#include <sys/ioctl.h>
41	#endif
42
43	#ifdef HAVE_SYS_PARAM_H
44	#include <sys/param.h>
45	#endif
46
47	#ifdef HAVE_FCNTL_H
48	#include <fcntl.h>
49	#endif
50
51	#include "strtoofft.h"
52	#include "urldata.h"
53	#include <curl/curl.h>
54	#include "progress.h"
55	#include "sendf.h"
56	#include "escape.h"
57	#include "file.h"
58	#include "speedcheck.h"
59	#include "getinfo.h"
60	#include "transfer.h"
61	#include "url.h"
62	#include "parsedate.h" /* for the week day and month names */
63	#include "warnless.h"
64	#include "curl_range.h"
65	/ The last 3 #include files should be in this order /
66	#include "curl_printf.h"
67	#include "curl_memory.h"
68	#include "memdebug.h"
69
70	#if defined(WIN32) \|\| defined(MSDOS) \|\| defined(__EMX__) \|\| \
71	defined(__SYMBIAN32__)
72	#define DOS_FILESYSTEM 1
73	#endif
74
75	#ifdef OPEN_NEEDS_ARG3
76	# define open_readonly(p,f) open((p),(f),(0))
77	#else
78	# define open_readonly(p,f) open((p),(f))
79	#endif
80
81	/*
82	* Forward declarations.
83	*/
84
85	static CURLcode file_do(struct connectdata , bool done);
86	static CURLcode file_done(struct connectdata *conn,
87	CURLcode status, bool premature);
88	static CURLcode file_connect(struct connectdata conn, bool done);
89	static CURLcode file_disconnect(struct connectdata *conn,
90	bool dead_connection);
91	static CURLcode file_setup_connection(struct connectdata *conn);
92
93	/*
94	* FILE scheme handler.
95	*/
96
97	const struct Curl_handler Curl_handler_file = {
98	"FILE", / scheme /
99	file_setup_connection, / setup_connection /
100	file_do, / do_it /
101	file_done, / done /
102	ZERO_NULL, / do_more /
103	file_connect, / connect_it /
104	ZERO_NULL, / connecting /
105	ZERO_NULL, / doing /
106	ZERO_NULL, / proto_getsock /
107	ZERO_NULL, / doing_getsock /
108	ZERO_NULL, / domore_getsock /
109	ZERO_NULL, / perform_getsock /
110	file_disconnect, / disconnect /
111	ZERO_NULL, / readwrite /
112	ZERO_NULL, / connection_check /
113	`0`, / defport /
114	CURLPROTO_FILE, / protocol /
115	PROTOPT_NONETWORK \| PROTOPT_NOURLQUERY / flags /
116	};
117
118
119	static CURLcode file_setup_connection(struct connectdata *conn)
120	{
121	/ allocate the FILE specific struct /
122	conn->data->req.protop = calloc(`1`, sizeof(struct FILEPROTO));
123	if(!conn->data->req.protop)
124	return CURLE_OUT_OF_MEMORY;
125
126	return CURLE_OK;
127	}
128
129	/*
130	* file_connect() gets called from Curl_protocol_connect() to allow us to
131	* do protocol-specific actions at connect-time. We emulate a
132	* connect-then-transfer protocol and "connect" to the file here
133	*/
134	static CURLcode file_connect(struct connectdata conn, bool done)
135	{
136	struct Curl_easy *data = conn->data;
137	char *real_path;
138	struct FILEPROTO *file = data->req.protop;
139	int fd;
140	#ifdef DOS_FILESYSTEM
141	size_t i;
142	char *actual_path;
143	#endif
144	size_t real_path_len;
145
146	CURLcode result = Curl_urldecode(data, data->state.up.path, `0`, &real_path,
147	&real_path_len, FALSE);
148	if(result)
149	return result;
150
151	#ifdef DOS_FILESYSTEM
152	/ If the first character is a slash, and there's*
153	something that looks like a drive at the beginning of
154	the path, skip the slash. If we remove the initial
155	slash in all cases, paths without drive letters end up
156	relative to the current directory which isn't how
157	browsers work.
158
159	Some browsers accept \| instead of : as the drive letter
160	separator, so we do too.
161
162	On other platforms, we need the slash to indicate an
163	absolute pathname. On Windows, absolute paths start
164	with a drive letter.
165	*/
166	actual_path = real_path;
167	if((actual_path[`0`] == `'/'`) &&
168	actual_path[`1`] &&
169	(actual_path[`2`] == `':'` \|\| actual_path[`2`] == `'\|'`)) {
170	actual_path[`2`] = `':'`;
171	actual_path++;
172	real_path_len--;
173	}
174
175	/ change path separators from '/' to '\\' for DOS, Windows and OS/2 /
176	for(i = `0`; i < real_path_len; ++i)
177	if(actual_path[i] == `'/'`)
178	actual_path[i] = `'\\'`;
179	else if(!actual_path[i]) { / binary zero /
180	Curl_safefree(real_path);
181	return CURLE_URL_MALFORMAT;
182	}
183
184	fd = open_readonly(actual_path, O_RDONLY\|O_BINARY);
185	file->path = actual_path;
186	#else
187	if(memchr(real_path, `0`, real_path_len)) {
188	/ binary zeroes indicate foul play /
189	Curl_safefree(real_path);
190	return CURLE_URL_MALFORMAT;
191	}
192
193	fd = open_readonly(real_path, O_RDONLY);
194	file->path = real_path;
195	#endif
196	file->freepath = real_path; / free this when done /
197
198	file->fd = fd;
199	if(!data->set.upload && (fd == -`1`)) {
200	failf(data, "Couldn't open file %s", data->state.up.path);
201	file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE);
202	return CURLE_FILE_COULDNT_READ_FILE;
203	}
204	*done = TRUE;
205
206	return CURLE_OK;
207	}
208
209	static CURLcode file_done(struct connectdata *conn,
210	CURLcode status, bool premature)
211	{
212	struct FILEPROTO *file = conn->data->req.protop;
213	(void)status; / not used /
214	(void)premature; / not used /
215
216	if(file) {
217	Curl_safefree(file->freepath);
218	file->path = NULL;
219	if(file->fd != -`1`)
220	close(file->fd);
221	file->fd = -`1`;
222	}
223
224	return CURLE_OK;
225	}
226
227	static CURLcode file_disconnect(struct connectdata *conn,
228	bool dead_connection)
229	{
230	struct FILEPROTO *file = conn->data->req.protop;
231	(void)dead_connection; / not used /
232
233	if(file) {
234	Curl_safefree(file->freepath);
235	file->path = NULL;
236	if(file->fd != -`1`)
237	close(file->fd);
238	file->fd = -`1`;
239	}
240
241	return CURLE_OK;
242	}
243
244	#ifdef DOS_FILESYSTEM
245	#define DIRSEP '\\'
246	#else
247	#define DIRSEP '/'
248	#endif
249
250	static CURLcode file_upload(struct connectdata *conn)
251	{
252	struct FILEPROTO *file = conn->data->req.protop;
253	const char *dir = strchr(file->path, DIRSEP);
254	int fd;
255	int mode;
256	CURLcode result = CURLE_OK;
257	struct Curl_easy *data = conn->data;
258	char *buf = data->state.buffer;
259	curl_off_t bytecount = `0`;
260	struct_stat file_stat;
261	const char *buf2;
262
263	/*
264	* Since FILE: doesn't do the full init, we need to provide some extra
265	* assignments here.
266	*/
267	conn->data->req.upload_fromhere = buf;
268
269	if(!dir)
270	return CURLE_FILE_COULDNT_READ_FILE; / fix: better error code /
271
272	if(!dir[`1`])
273	return CURLE_FILE_COULDNT_READ_FILE; / fix: better error code /
274
275	#ifdef O_BINARY
276	#define MODE_DEFAULT O_WRONLY\|O_CREAT\|O_BINARY
277	#else
278	#define MODE_DEFAULT O_WRONLY\|O_CREAT
279	#endif
280
281	if(data->state.resume_from)
282	mode = MODE_DEFAULT\|O_APPEND;
283	else
284	mode = MODE_DEFAULT\|O_TRUNC;
285
286	fd = open(file->path, mode, conn->data->set.new_file_perms);
287	if(fd < `0`) {
288	failf(data, "Can't open %s for writing", file->path);
289	return CURLE_WRITE_ERROR;
290	}
291
292	if(-`1` != data->state.infilesize)
293	/ known size of data to "upload" /
294	Curl_pgrsSetUploadSize(data, data->state.infilesize);
295
296	/ treat the negative resume offset value as the case of "-" /
297	if(data->state.resume_from < `0`) {
298	if(fstat(fd, &file_stat)) {
299	close(fd);
300	failf(data, "Can't get the size of %s", file->path);
301	return CURLE_WRITE_ERROR;
302	}
303	data->state.resume_from = (curl_off_t)file_stat.st_size;
304	}
305
306	while(!result) {
307	size_t nread;
308	size_t nwrite;
309	size_t readcount;
310	result = Curl_fillreadbuffer(conn, data->set.buffer_size, &readcount);
311	if(result)
312	break;
313
314	if(!readcount)
315	break;
316
317	nread = readcount;
318
319	/skip bytes before resume point/
320	if(data->state.resume_from) {
321	if((curl_off_t)nread <= data->state.resume_from) {
322	data->state.resume_from -= nread;
323	nread = `0`;
324	buf2 = buf;
325	}
326	else {
327	buf2 = buf + data->state.resume_from;
328	nread -= (size_t)data->state.resume_from;
329	data->state.resume_from = `0`;
330	}
331	}
332	else
333	buf2 = buf;
334
335	/ write the data to the target /
336	nwrite = write(fd, buf2, nread);
337	if(nwrite != nread) {
338	result = CURLE_SEND_ERROR;
339	break;
340	}
341
342	bytecount += nread;
343
344	Curl_pgrsSetUploadCounter(data, bytecount);
345
346	if(Curl_pgrsUpdate(conn))
347	result = CURLE_ABORTED_BY_CALLBACK;
348	else
349	result = Curl_speedcheck(data, Curl_now());
350	}
351	if(!result && Curl_pgrsUpdate(conn))
352	result = CURLE_ABORTED_BY_CALLBACK;
353
354	close(fd);
355
356	return result;
357	}
358
359	/*
360	* file_do() is the protocol-specific function for the do-phase, separated
361	* from the connect-phase above. Other protocols merely setup the transfer in
362	* the do-phase, to have it done in the main transfer loop but since some
363	* platforms we support don't allow select()ing etc on file handles (as
364	* opposed to sockets) we instead perform the whole do-operation in this
365	* function.
366	*/
367	static CURLcode file_do(struct connectdata conn, bool done)
368	{
369	/ This implementation ignores the host name in conformance with*
370	RFC 1738. Only local files (reachable via the standard file system)
371	are supported. This means that files on remotely mounted directories
372	(via NFS, Samba, NT sharing) can be accessed through a file:// URL
373	*/
374	CURLcode result = CURLE_OK;
375	struct_stat statbuf; / struct_stat instead of struct stat just to allow the*
376	Windows version to have a different struct without
377	having to redefine the simple word 'stat' /*
378	curl_off_t expected_size = `0`;
379	bool size_known;
380	bool fstated = FALSE;
381	struct Curl_easy *data = conn->data;
382	char *buf = data->state.buffer;
383	curl_off_t bytecount = `0`;
384	int fd;
385	struct FILEPROTO *file;
386
387	done = TRUE; /* unconditionally /
388
389	Curl_pgrsStartNow(data);
390
391	if(data->set.upload)
392	return file_upload(conn);
393
394	file = conn->data->req.protop;
395
396	/ get the fd from the connection phase /
397	fd = file->fd;
398
399	/ VMS: This only works reliable for STREAMLF files /
400	if(-`1` != fstat(fd, &statbuf)) {
401	/ we could stat it, then read out the size /
402	expected_size = statbuf.st_size;
403	/ and store the modification time /
404	data->info.filetime = statbuf.st_mtime;
405	fstated = TRUE;
406	}
407
408	if(fstated && !data->state.range && data->set.timecondition) {
409	if(!Curl_meets_timecondition(data, data->info.filetime)) {
410	*done = TRUE;
411	return CURLE_OK;
412	}
413	}
414
415	if(fstated) {
416	time_t filetime;
417	struct tm buffer;
418	const struct tm *tm = &buffer;
419	char header[`80`];
420	msnprintf(header, sizeof(header),
421	"Content-Length: %" CURL_FORMAT_CURL_OFF_T "\r\n",
422	expected_size);
423	result = Curl_client_write(conn, CLIENTWRITE_HEADER, header, `0`);
424	if(result)
425	return result;
426
427	result = Curl_client_write(conn, CLIENTWRITE_HEADER,
428	(char *)"Accept-ranges: bytes\r\n", `0`);
429	if(result)
430	return result;
431
432	filetime = (time_t)statbuf.st_mtime;
433	result = Curl_gmtime(filetime, &buffer);
434	if(result)
435	return result;
436
437	/ format: "Tue, 15 Nov 1994 12:45:26 GMT" /
438	msnprintf(header, sizeof(header),
439	"Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n%s",
440	Curl_wkday[tm->tm_wday?tm->tm_wday-`1`:`6`],
441	tm->tm_mday,
442	Curl_month[tm->tm_mon],
443	tm->tm_year + `1900`,
444	tm->tm_hour,
445	tm->tm_min,
446	tm->tm_sec,
447	data->set.opt_no_body ? "": "\r\n");
448	result = Curl_client_write(conn, CLIENTWRITE_HEADER, header, `0`);
449	if(result)
450	return result;
451	/ set the file size to make it available post transfer /
452	Curl_pgrsSetDownloadSize(data, expected_size);
453	if(data->set.opt_no_body)
454	return result;
455	}
456
457	/ Check whether file range has been specified /
458	result = Curl_range(conn);
459	if(result)
460	return result;
461
462	/ Adjust the start offset in case we want to get the N last bytes*
463	* of the stream if the filesize could be determined */
464	if(data->state.resume_from < `0`) {
465	if(!fstated) {
466	failf(data, "Can't get the size of file.");
467	return CURLE_READ_ERROR;
468	}
469	data->state.resume_from += (curl_off_t)statbuf.st_size;
470	}
471
472	if(data->state.resume_from <= expected_size)
473	expected_size -= data->state.resume_from;
474	else {
475	failf(data, "failed to resume file:// transfer");
476	return CURLE_BAD_DOWNLOAD_RESUME;
477	}
478
479	/ A high water mark has been specified so we obey... /
480	if(data->req.maxdownload > `0`)
481	expected_size = data->req.maxdownload;
482
483	if(!fstated \|\| (expected_size == `0`))
484	size_known = FALSE;
485	else
486	size_known = TRUE;
487
488	/ The following is a shortcut implementation of file reading*
489	this is both more efficient than the former call to download() and
490	it avoids problems with select() and recv() on file descriptors
491	in Winsock /*
492	if(fstated)
493	Curl_pgrsSetDownloadSize(data, expected_size);
494
495	if(data->state.resume_from) {
496	if(data->state.resume_from !=
497	lseek(fd, data->state.resume_from, SEEK_SET))
498	return CURLE_BAD_DOWNLOAD_RESUME;
499	}
500
501	Curl_pgrsTime(data, TIMER_STARTTRANSFER);
502
503	while(!result) {
504	ssize_t nread;
505	/ Don't fill a whole buffer if we want less than all data /
506	size_t bytestoread;
507
508	if(size_known) {
509	bytestoread = (expected_size < data->set.buffer_size) ?
510	curlx_sotouz(expected_size) : (size_t)data->set.buffer_size;
511	}
512	else
513	bytestoread = data->set.buffer_size-`1`;
514
515	nread = read(fd, buf, bytestoread);
516
517	if(nread > `0`)
518	buf[nread] = `0`;
519
520	if(nread <= `0` \|\| (size_known && (expected_size == `0`)))
521	break;
522
523	bytecount += nread;
524	if(size_known)
525	expected_size -= nread;
526
527	result = Curl_client_write(conn, CLIENTWRITE_BODY, buf, nread);
528	if(result)
529	return result;
530
531	Curl_pgrsSetDownloadCounter(data, bytecount);
532
533	if(Curl_pgrsUpdate(conn))
534	result = CURLE_ABORTED_BY_CALLBACK;
535	else
536	result = Curl_speedcheck(data, Curl_now());
537	}
538	if(Curl_pgrsUpdate(conn))
539	result = CURLE_ABORTED_BY_CALLBACK;
540
541	return result;
542	}
543
544	#endif
545

Browse the source code of ClickHouse/contrib/curl/lib/file.c