download.cpp source code [llama.cpp/common/download.cpp]

1	#include "arg.h"
2
3	#include "common.h"
4	#include "gguf.h" // for reading GGUF splits
5	#include "log.h"
6	#include "download.h"
7
8	#define JSON_ASSERT GGML_ASSERT
9	#include <nlohmann/json.hpp>
10
11	#include <algorithm>
12	#include <filesystem>
13	#include <fstream>
14	#include <future>
15	#include <regex>
16	#include <string>
17	#include <thread>
18	#include <vector>
19
20	#if defined(LLAMA_USE_CURL)
21	#include <curl/curl.h>
22	#include <curl/easy.h>
23	#else
24	#include "http.h"
25	#endif
26
27	#ifdef __linux__
28	#include <linux/limits.h>
29	#elif defined(_WIN32)
30	# if !defined(PATH_MAX)
31	# define PATH_MAX MAX_PATH
32	# endif
33	#elif defined(_AIX)
34	#include <sys/limits.h>
35	#else
36	#include <sys/syslimits.h>
37	#endif
38	#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
39
40	// isatty
41	#if defined(_WIN32)
42	#include <io.h>
43	#else
44	#include <unistd.h>
45	#endif
46
47	using json = nlohmann::ordered_json;
48
49	//
50	// downloader
51	//
52
53	static std::string read_file(const std::string & fname) {
54	std::ifstream file(fname);
55	if (!file) {
56	throw std::runtime_error (string_format(fmt: "error: failed to open file '%s'\n", fname.c_str()));
57	}
58	std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
59	file.close();
60	return content;
61	}
62
63	static void write_file(const std::string & fname, const std::string & content) {
64	const std::string fname_tmp = fname + ".tmp";
65	std::ofstream file(fname_tmp);
66	if (!file) {
67	throw std::runtime_error (string_format(fmt: "error: failed to open file '%s'\n", fname.c_str()));
68	}
69
70	try {
71	file << content;
72	file.close();
73
74	// Makes write atomic
75	if (rename(old: fname_tmp.c_str(), new: fname.c_str()) != `0`) {
76	LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
77	// If rename fails, try to delete the temporary file
78	if (remove(filename: fname_tmp.c_str()) != `0`) {
79	LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
80	}
81	}
82	} catch (...) {
83	// If anything fails, try to delete the temporary file
84	if (remove(filename: fname_tmp.c_str()) != `0`) {
85	LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
86	}
87
88	throw std::runtime_error (string_format(fmt: "error: failed to write file '%s'\n", fname.c_str()));
89	}
90	}
91
92	static void write_etag(const std::string & path, const std::string & etag) {
93	const std::string etag_path = path + ".etag";
94	write_file(fname: etag_path, content: etag);
95	LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
96	}
97
98	static std::string read_etag(const std::string & path) {
99	std::string none;
100	const std::string etag_path = path + ".etag";
101
102	if (std::filesystem::exists(p: etag_path)) {
103	std::ifstream etag_in(etag_path);
104	if (!etag_in) {
105	LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
106	return none;
107	}
108	std::string etag;
109	std::getline(is&: etag_in, str&: etag);
110	return etag;
111	}
112
113	// no etag file, but maybe there is an old .json
114	// remove this code later
115	const std::string metadata_path = path + ".json";
116
117	if (std::filesystem::exists(p: metadata_path)) {
118	std::ifstream metadata_in(metadata_path);
119	try {
120	nlohmann::json metadata_json;
121	metadata_in >> metadata_json;
122	LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
123	metadata_json.dump().c_str());
124	if (metadata_json.contains(key: "etag") && metadata_json.at(key: "etag").is_string()) {
125	std::string etag = metadata_json.at(key: "etag");
126	write_etag(path, etag);
127	if (!std::filesystem::remove(p: metadata_path)) {
128	LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str());
129	}
130	return etag;
131	}
132	} catch (const nlohmann::json::exception & e) {
133	LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
134	}
135	}
136	return none;
137	}
138
139	#ifdef LLAMA_USE_CURL
140
141	//
142	// CURL utils
143	//
144
145	using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
146
147	// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
148	struct curl_slist_ptr {
149	struct curl_slist * ptr = nullptr;
150	~curl_slist_ptr() {
151	if (ptr) {
152	curl_slist_free_all(list: ptr);
153	}
154	}
155	};
156
157	static CURLcode common_curl_perf(CURL * curl) {
158	CURLcode res = curl_easy_perform(curl);
159	if (res != CURLE_OK) {
160	LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
161	}
162
163	return res;
164	}
165
166	// Send a HEAD request to retrieve the etag and last-modified headers
167	struct common_load_model_from_url_headers {
168	std::string etag;
169	std::string last_modified;
170	std::string accept_ranges;
171	};
172
173	struct FILE_deleter {
174	void operator()(FILE * f) const { fclose(stream: f); }
175	};
176
177	static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
178	common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
179	static std::regex header_regex("([^:]+): (.*)\r\n");
180	static std::regex etag_regex("ETag", std::regex_constants::icase);
181	static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
182	static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
183	std::string header(buffer, n_items);
184	std::smatch match;
185	if (std::regex_match(s: header, m&: match, re: header_regex)) {
186	const std::string & key = match [`1`];
187	const std::string & value = match [`2`];
188	if (std::regex_match(s: key, m&: match, re: etag_regex)) {
189	headers->etag = value;
190	} else if (std::regex_match(s: key, m&: match, re: last_modified_regex)) {
191	headers->last_modified = value;
192	} else if (std::regex_match(s: key, m&: match, re: accept_ranges_regex)) {
193	headers->accept_ranges = value;
194	}
195	}
196
197	return n_items;
198	}
199
200	static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
201	return std::fwrite(ptr: data, size: size, n: nmemb, s: static_cast<FILE *>(fd));
202	}
203
204	// helper function to hide password in URL
205	static std::string llama_download_hide_password_in_url(const std::string & url) {
206	// Use regex to match and replace the user[:password]@ pattern in URLs
207	// Pattern: scheme://[user[:password]@]host[...]
208	static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
209	std::smatch match;
210
211	if (std::regex_match(s: url, m&: match, re: url_regex)) {
212	// match[1] = scheme (e.g., "https://")
213	// match[2] = user[:password]@ part
214	// match[3] = rest of URL (host and path)
215	return match [`1`].str() + "********@" + match [`3`].str();
216	}
217
218	return url; // No credentials found or malformed URL
219	}
220
221	static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
222	// Set the URL, allow to follow http redirection
223	curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
224	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, `1L`);
225
226	# if defined(_WIN32)
227	// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
228	// operating system. Currently implemented under MS-Windows.
229	curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
230	# endif
231
232	curl_easy_setopt(curl, CURLOPT_NOBODY, `1L`); // will trigger the HEAD verb
233	curl_easy_setopt(curl, CURLOPT_NOPROGRESS, `1L`); // hide head request progress
234	curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
235	}
236
237	static void common_curl_easy_setopt_get(CURL * curl) {
238	curl_easy_setopt(curl, CURLOPT_NOBODY, `0L`);
239	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
240
241	// display download progress
242	curl_easy_setopt(curl, CURLOPT_NOPROGRESS, `0L`);
243	}
244
245	static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
246	if (std::filesystem::exists(p: path_temporary)) {
247	const std::string partial_size = std::to_string(val: std::filesystem::file_size(p: path_temporary));
248	LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
249	const std::string range_str = partial_size + "-";
250	curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
251	}
252
253	// Always open file in append mode could be resuming
254	std::unique_ptr<FILE, FILE_deleter> outfile(fopen(filename: path_temporary.c_str(), modes: "ab"));
255	if (!outfile) {
256	LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
257	return false;
258	}
259
260	common_curl_easy_setopt_get(curl);
261	curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
262
263	return common_curl_perf(curl) == CURLE_OK;
264	}
265
266	static bool common_download_head(CURL * curl,
267	curl_slist_ptr & http_headers,
268	const std::string & url,
269	const std::string & bearer_token) {
270	if (!curl) {
271	LOG_ERR("%s: error initializing libcurl\n", __func__);
272	return false;
273	}
274
275	http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: "User-Agent: llama-cpp");
276	// Check if hf-token or bearer-token was specified
277	if (!bearer_token.empty()) {
278	std::string auth_header = "Authorization: Bearer " + bearer_token;
279	http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: auth_header.c_str());
280	}
281
282	curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
283	common_curl_easy_setopt_head(curl, url);
284	return common_curl_perf(curl) == CURLE_OK;
285	}
286
287	// download one single file from remote URL to local path
288	static bool common_download_file_single_online(const std::string & url,
289	const std::string & path,
290	const std::string & bearer_token) {
291	static const int max_attempts = `3`;
292	static const int retry_delay_seconds = `2`;
293	for (int i = `0`; i < max_attempts; ++i) {
294	std::string etag;
295
296	// Check if the file already exists locally
297	const auto file_exists = std::filesystem::exists(p: path);
298	if (file_exists) {
299	etag = read_etag(path);
300	} else {
301	LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
302	}
303
304	bool head_request_ok = false;
305	bool should_download = !file_exists; // by default, we should download if the file does not exist
306
307	// Initialize libcurl
308	curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
309	common_load_model_from_url_headers headers;
310	curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
311	curl_slist_ptr http_headers;
312	const bool was_perform_successful = common_download_head(curl: curl.get(), http_headers, url, bearer_token);
313	if (!was_perform_successful) {
314	head_request_ok = false;
315	}
316
317	long http_code = `0`;
318	curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
319	if (http_code == `200`) {
320	head_request_ok = true;
321	} else {
322	LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
323	head_request_ok = false;
324	}
325
326	// if head_request_ok is false, we don't have the etag or last-modified headers
327	// we leave should_download as-is, which is true if the file does not exist
328	bool should_download_from_scratch = false;
329	if (head_request_ok) {
330	// check if ETag or Last-Modified headers are different
331	// if it is, we need to download the file again
332	if (!etag.empty() && etag != headers.etag) {
333	LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
334	headers.etag.c_str());
335	should_download = true;
336	should_download_from_scratch = true;
337	}
338	}
339
340	const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
341	if (should_download) {
342	if (file_exists &&
343	!accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
344	LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
345	if (remove(filename: path.c_str()) != `0`) {
346	LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
347	return false;
348	}
349	}
350
351	const std::string path_temporary = path + ".downloadInProgress";
352	if (should_download_from_scratch) {
353	if (std::filesystem::exists(p: path_temporary)) {
354	if (remove(filename: path_temporary.c_str()) != `0`) {
355	LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
356	return false;
357	}
358	}
359
360	if (std::filesystem::exists(p: path)) {
361	if (remove(filename: path.c_str()) != `0`) {
362	LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
363	return false;
364	}
365	}
366	}
367	if (head_request_ok) {
368	write_etag(path, etag: headers.etag);
369	}
370
371	// start the download
372	LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
373	__func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
374	headers.etag.c_str(), headers.last_modified.c_str());
375	const bool was_pull_successful = common_pull_file(curl: curl.get(), path_temporary);
376	if (!was_pull_successful) {
377	if (i + `1` < max_attempts) {
378	const int exponential_backoff_delay = std::pow(x: retry_delay_seconds, y: i) * `1000`;
379	LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
380	std::this_thread::sleep_for(rtime: std::chrono::milliseconds (exponential_backoff_delay));
381	} else {
382	LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
383	}
384
385	continue;
386	}
387
388	long http_code = `0`;
389	curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
390	if (http_code < `200` \|\| http_code >= `400`) {
391	LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
392	return false;
393	}
394
395	if (rename(old: path_temporary.c_str(), new: path.c_str()) != `0`) {
396	LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
397	return false;
398	}
399	} else {
400	LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
401	}
402
403	break;
404	}
405
406	return true;
407	}
408
409	std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
410	curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
411	curl_slist_ptr http_headers;
412	std::vector<char> res_buffer;
413
414	curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
415	curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, `1L`);
416	curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, `1L`);
417	curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, `1L`);
418	typedef size_t(CURLOPT_WRITEFUNCTION_PTR)(void* * ptr, size_t size, size_t nmemb, void * data);
419	auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
420	auto data_vec = static_cast<std::vector<char> *>(data);
421	data_vec->insert(position: data_vec->end(), first: (char )ptr, last: (char* )ptr + size nmemb);
422	return size * nmemb;
423	};
424	curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
425	curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
426	#if defined(_WIN32)
427	curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
428	#endif
429	if (params.timeout > `0`) {
430	curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
431	}
432	if (params.max_size > `0`) {
433	curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
434	}
435	http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: "User-Agent: llama-cpp");
436	for (const auto & header : params.headers) {
437	http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: header.c_str());
438	}
439	curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
440
441	CURLcode res = curl_easy_perform(curl: curl.get());
442
443	if (res != CURLE_OK) {
444	std::string error_msg = curl_easy_strerror(res);
445	throw std::runtime_error ("error: cannot make GET request: " + error_msg);
446	}
447
448	long res_code;
449	curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
450
451	return { res_code, std::move(res_buffer) };
452	}
453
454	#else
455
456	static bool is_output_a_tty() {
457	#if defined(_WIN32)
458	return _isatty(_fileno(stdout));
459	#else
460	return isatty(`1`);
461	#endif
462	}
463
464	static void print_progress(size_t current, size_t total) {
465	if (!is_output_a_tty()) {
466	return;
467	}
468
469	if (!total) {
470	return;
471	}
472
473	size_t width = `50`;
474	size_t pct = (`100` * current) / total;
475	size_t pos = (width * current) / total;
476
477	std::cout << "["
478	<< std::string(pos, `'='`)
479	<< (pos < width ? ">" : "")
480	<< std::string(width - pos, `' '`)
481	<< "] " << std::setw(`3`) << pct << "% ("
482	<< current / (`1024` * `1024`) << " MB / "
483	<< total / (`1024` * `1024`) << " MB)\r";
484	std::cout.flush();
485	}
486
487	static bool common_pull_file(httplib::Client & cli,
488	const std::string & resolve_path,
489	const std::string & path_tmp,
490	bool supports_ranges,
491	size_t existing_size,
492	size_t & total_size) {
493	std::ofstream ofs(path_tmp, std::ios::binary \| std::ios::app);
494	if (!ofs.is_open()) {
495	LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str());
496	return false;
497	}
498
499	httplib::Headers headers;
500	if (supports_ranges && existing_size > `0`) {
501	headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
502	}
503
504	std::atomic<size_t> downloaded{existing_size};
505
506	auto res = cli.Get(resolve_path, headers,
507	[&](const httplib::Response &response) {
508	if (existing_size > `0` && response.status != `206`) {
509	LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
510	return false;
511	}
512	if (existing_size == `0` && response.status != `200`) {
513	LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
514	return false;
515	}
516	if (total_size == `0` && response.has_header("Content-Length")) {
517	try {
518	size_t content_length = std::stoull(response.get_header_value("Content-Length"));
519	total_size = existing_size + content_length;
520	} catch (const std::exception &e) {
521	LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
522	}
523	}
524	return true;
525	},
526	[&](const char *data, size_t len) {
527	ofs.write(data, len);
528	if (!ofs) {
529	LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
530	return false;
531	}
532	downloaded += len;
533	print_progress(downloaded, total_size);
534	return true;
535	},
536	nullptr
537	);
538
539	std::cout << "\n";
540
541	if (!res) {
542	LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -`1`);
543	return false;
544	}
545
546	return true;
547	}
548
549	// download one single file from remote URL to local path
550	static bool common_download_file_single_online(const std::string & url,
551	const std::string & path,
552	const std::string & bearer_token) {
553	static const int max_attempts = `3`;
554	static const int retry_delay_seconds = `2`;
555
556	auto [cli, parts] = common_http_client(url);
557
558	httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
559	if (!bearer_token.empty()) {
560	default_headers.insert({"Authorization", "Bearer " + bearer_token});
561	}
562	cli.set_default_headers(default_headers);
563
564	const bool file_exists = std::filesystem::exists(path);
565
566	std::string last_etag;
567	if (file_exists) {
568	last_etag = read_etag(path);
569	} else {
570	LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
571	}
572
573	for (int i = `0`; i < max_attempts; ++i) {
574	auto head = cli.Head(parts.path);
575	bool head_ok = head && head->status >= `200` && head->status < `300`;
576	if (!head_ok) {
577	LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -`1`);
578	if (file_exists) {
579	LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
580	return true;
581	}
582	}
583
584	std::string etag;
585	if (head_ok && head->has_header("ETag")) {
586	etag = head->get_header_value("ETag");
587	}
588
589	size_t total_size = `0`;
590	if (head_ok && head->has_header("Content-Length")) {
591	try {
592	total_size = std::stoull(head->get_header_value("Content-Length"));
593	} catch (const std::exception& e) {
594	LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what());
595	}
596	}
597
598	bool supports_ranges = false;
599	if (head_ok && head->has_header("Accept-Ranges")) {
600	supports_ranges = head->get_header_value("Accept-Ranges") != "none";
601	}
602
603	bool should_download_from_scratch = false;
604	if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
605	LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
606	last_etag.c_str(), etag.c_str());
607	should_download_from_scratch = true;
608	}
609
610	if (file_exists) {
611	if (!should_download_from_scratch) {
612	LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
613	return true;
614	}
615	LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
616	if (remove(path.c_str()) != `0`) {
617	LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
618	return false;
619	}
620	}
621
622	const std::string path_temporary = path + ".downloadInProgress";
623	size_t existing_size = `0`;
624
625	if (std::filesystem::exists(path_temporary)) {
626	if (supports_ranges && !should_download_from_scratch) {
627	existing_size = std::filesystem::file_size(path_temporary);
628	} else if (remove(path_temporary.c_str()) != `0`) {
629	LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
630	return false;
631	}
632	}
633
634	// start the download
635	LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
636	__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
637	const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
638	if (!was_pull_successful) {
639	if (i + `1` < max_attempts) {
640	const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * `1000`;
641	LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
642	std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
643	} else {
644	LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
645	}
646	continue;
647	}
648
649	if (std::rename(path_temporary.c_str(), path.c_str()) != `0`) {
650	LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
651	return false;
652	}
653	if (!etag.empty()) {
654	write_etag(path, etag);
655	}
656	break;
657	}
658
659	return true;
660	}
661
662	std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
663	const common_remote_params & params) {
664	auto [cli, parts] = common_http_client(url);
665
666	httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
667	for (const auto & header : params.headers) {
668	size_t pos = header.find(`':'`);
669	if (pos != std::string::npos) {
670	headers.emplace(header.substr(`0`, pos), header.substr(pos + `1`));
671	} else {
672	headers.emplace(header, "");
673	}
674	}
675
676	if (params.timeout > `0`) {
677	cli.set_read_timeout(params.timeout, `0`);
678	cli.set_write_timeout(params.timeout, `0`);
679	}
680
681	std::vector<char> buf;
682	auto res = cli.Get(parts.path, headers,
683	[&](const char *data, size_t len) {
684	buf.insert(buf.end(), data, data + len);
685	return params.max_size == `0` \|\|
686	buf.size() <= static_cast<size_t>(params.max_size);
687	},
688	nullptr
689	);
690
691	if (!res) {
692	throw std::runtime_error("error: cannot make GET request");
693	}
694
695	return { res->status, std::move(buf) };
696	}
697
698	#endif // LLAMA_USE_CURL
699
700	static bool common_download_file_single(const std::string & url,
701	const std::string & path,
702	const std::string & bearer_token,
703	bool offline) {
704	if (!offline) {
705	return common_download_file_single_online(url, path, bearer_token);
706	}
707
708	if (!std::filesystem::exists(p: path)) {
709	LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
710	return false;
711	}
712
713	LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
714	return true;
715	}
716
717	// download multiple files from remote URLs to local paths
718	// the input is a vector of pairs <url, path>
719	static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
720	// Prepare download in parallel
721	std::vector<std::future<bool>> futures_download;
722	for (auto const & item : urls) {
723	futures_download.push_back(x: std::async(policy: std::launch::async, fn: [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool {
724	return common_download_file_single(url: it.first, path: it.second, bearer_token, offline);
725	}, args: item));
726	}
727
728	// Wait for all downloads to complete
729	for (auto & f : futures_download) {
730	if (!f.get()) {
731	return false;
732	}
733	}
734
735	return true;
736	}
737
738	bool common_download_model(
739	const common_params_model & model,
740	const std::string & bearer_token,
741	bool offline) {
742	// Basic validation of the model.url
743	if (model.url.empty()) {
744	LOG_ERR("%s: invalid model url\n", __func__);
745	return false;
746	}
747
748	if (!common_download_file_single(url: model.url, path: model.path, bearer_token, offline)) {
749	return false;
750	}
751
752	// check for additional GGUFs split to download
753	int n_split = `0`;
754	{
755	struct gguf_init_params gguf_params = {
756	/.no_alloc = / true,
757	/.ctx = / NULL,
758	};
759	auto * ctx_gguf = gguf_init_from_file(fname: model.path.c_str(), params: gguf_params);
760	if (!ctx_gguf) {
761	LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str());
762	return false;
763	}
764
765	auto key_n_split = gguf_find_key(ctx: ctx_gguf, key: LLM_KV_SPLIT_COUNT);
766	if (key_n_split >= `0`) {
767	n_split = gguf_get_val_u16(ctx: ctx_gguf, key_id: key_n_split);
768	}
769
770	gguf_free(ctx: ctx_gguf);
771	}
772
773	if (n_split > `1`) {
774	char split_prefix[PATH_MAX] = {`0`};
775	char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {`0`};
776
777	// Verify the first split file format
778	// and extract split URL and PATH prefixes
779	{
780	if (!llama_split_prefix(split_prefix, maxlen: sizeof(split_prefix), split_path: model.path.c_str(), split_no: `0`, split_count: n_split)) {
781	LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split);
782	return false;
783	}
784
785	if (!llama_split_prefix(split_prefix: split_url_prefix, maxlen: sizeof(split_url_prefix), split_path: model.url.c_str(), split_no: `0`, split_count: n_split)) {
786	LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model.url.c_str(), n_split);
787	return false;
788	}
789	}
790
791	std::vector<std::pair<std::string, std::string>> urls;
792	for (int idx = `1`; idx < n_split; idx++) {
793	char split_path[PATH_MAX] = {`0`};
794	llama_split_path(split_path, maxlen: sizeof(split_path), path_prefix: split_prefix, split_no: idx, split_count: n_split);
795
796	char split_url[LLAMA_MAX_URL_LENGTH] = {`0`};
797	llama_split_path(split_path: split_url, maxlen: sizeof(split_url), path_prefix: split_url_prefix, split_no: idx, split_count: n_split);
798
799	if (std::string (split_path) == model.path) {
800	continue; // skip the already downloaded file
801	}
802
803	urls.push_back(x: {split_url, split_path});
804	}
805
806	// Download in parallel
807	common_download_file_multiple(urls, bearer_token, offline);
808	}
809
810	return true;
811	}
812
813	common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) {
814	auto parts = string_split<std::string>(input: hf_repo_with_tag, separator: `':'`);
815	std::string tag = parts.size() > `1` ? parts.back() : "latest";
816	std::string hf_repo = parts [`0`];
817	if (string_split<std::string>(input: hf_repo, separator: `'/'`).size() != `2`) {
818	throw std::invalid_argument ("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
819	}
820
821	std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
822
823	// headers
824	std::vector<std::string> headers;
825	headers.push_back(x: "Accept: application/json");
826	if (!bearer_token.empty()) {
827	headers.push_back(x: "Authorization: Bearer " + bearer_token);
828	}
829	// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
830	// User-Agent header is already set in common_remote_get_content, no need to set it here
831
832	// we use "=" to avoid clashing with other component, while still being allowed on windows
833	std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
834	string_replace_all(s&: cached_response_fname, search: "/", replace: "_");
835	std::string cached_response_path = fs_get_cache_file(filename: cached_response_fname);
836
837	// make the request
838	common_remote_params params;
839	params.headers = headers;
840	long res_code = `0`;
841	std::string res_str;
842	bool use_cache = false;
843	if (!offline) {
844	try {
845	auto res = common_remote_get_content(url, params);
846	res_code = res.first;
847	res_str = std::string (res.second.data(), res.second.size());
848	} catch (const std::exception & e) {
849	LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what());
850	}
851	}
852	if (res_code == `0`) {
853	if (std::filesystem::exists(p: cached_response_path)) {
854	LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str());
855	res_str = read_file(fname: cached_response_path);
856	res_code = `200`;
857	use_cache = true;
858	} else {
859	throw std::runtime_error (
860	offline ? "error: failed to get manifest (offline mode)"
861	: "error: failed to get manifest (check your internet connection)");
862	}
863	}
864	std::string ggufFile;
865	std::string mmprojFile;
866
867	if (res_code == `200` \|\| res_code == `304`) {
868	try {
869	auto j = json::parse(i&: res_str);
870
871	if (j.contains(key: "ggufFile") && j ["ggufFile"].contains(key: "rfilename")) {
872	ggufFile = j ["ggufFile"]["rfilename"].get<std::string>();
873	}
874	if (j.contains(key: "mmprojFile") && j ["mmprojFile"].contains(key: "rfilename")) {
875	mmprojFile = j ["mmprojFile"]["rfilename"].get<std::string>();
876	}
877	} catch (const std::exception & e) {
878	throw std::runtime_error (std::string ("error parsing manifest JSON: ") + e.what());
879	}
880	if (!use_cache) {
881	// if not using cached response, update the cache file
882	write_file(fname: cached_response_path, content: res_str);
883	}
884	} else if (res_code == `401`) {
885	throw std::runtime_error ("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
886	} else {
887	throw std::runtime_error (string_format(fmt: "error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
888	}
889
890	// check response
891	if (ggufFile.empty()) {
892	throw std::runtime_error ("error: model does not have ggufFile");
893	}
894
895	return { .repo: hf_repo, .ggufFile: ggufFile, .mmprojFile: mmprojFile };
896	}
897
898	//
899	// Docker registry functions
900	//
901
902	static std::string common_docker_get_token(const std::string & repo) {
903	std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull";
904
905	common_remote_params params;
906	auto res = common_remote_get_content(url, params);
907
908	if (res.first != `200`) {
909	throw std::runtime_error ("Failed to get Docker registry token, HTTP code: " + std::to_string(val: res.first));
910	}
911
912	std::string response_str(res.second.begin(), res.second.end());
913	nlohmann::ordered_json response = nlohmann::ordered_json::parse(i&: response_str);
914
915	if (!response.contains(key: "token")) {
916	throw std::runtime_error ("Docker registry token response missing 'token' field");
917	}
918
919	return response ["token"].get<std::string>();
920	}
921
922	std::string common_docker_resolve_model(const std::string & docker) {
923	// Parse ai/smollm2:135M-Q4_0
924	size_t colon_pos = docker.find(c: `':'`);
925	std::string repo, tag;
926	if (colon_pos != std::string::npos) {
927	repo = docker.substr(pos: `0`, n: colon_pos);
928	tag = docker.substr(pos: colon_pos + `1`);
929	} else {
930	repo = docker;
931	tag = "latest";
932	}
933
934	// ai/ is the default
935	size_t slash_pos = docker.find(c: `'/'`);
936	if (slash_pos == std::string::npos) {
937	repo.insert(pos: `0`, s: "ai/");
938	}
939
940	LOG_INF("%s: Downloading Docker Model: %s:%s\n", __func__, repo.c_str(), tag.c_str());
941	try {
942	// --- helper: digest validation ---
943	auto validate_oci_digest = [](const std::string & digest) -> std::string {
944	// Expected: algo:hex ; start with sha256 (64 hex chars)
945	// You can extend this map if supporting other algorithms in future.
946	static const std::regex re("^sha256:([a-fA-F0-9]{64})$");
947	std::smatch m;
948	if (!std::regex_match(s: digest, m&: m, re: re)) {
949	throw std::runtime_error ("Invalid OCI digest format received in manifest: " + digest);
950	}
951	// normalize hex to lowercase
952	std::string normalized = digest;
953	std::transform(first: normalized.begin()+`7`, last: normalized.end(), result: normalized.begin()+`7`, unary_op: [](unsigned char c){
954	return std::tolower(c: c);
955	});
956	return normalized;
957	};
958
959	std::string token = common_docker_get_token(repo); // Get authentication token
960
961	// Get manifest
962	const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
963	std::string manifest_url = url_prefix + "/manifests/" + tag;
964	common_remote_params manifest_params;
965	manifest_params.headers.push_back(x: "Authorization: Bearer " + token);
966	manifest_params.headers.push_back(
967	x: "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json");
968	auto manifest_res = common_remote_get_content(url: manifest_url, params: manifest_params);
969	if (manifest_res.first != `200`) {
970	throw std::runtime_error ("Failed to get Docker manifest, HTTP code: " + std::to_string(val: manifest_res.first));
971	}
972
973	std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end());
974	nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(i&: manifest_str);
975	std::string gguf_digest; // Find the GGUF layer
976	if (manifest.contains(key: "layers")) {
977	for (const auto & layer : manifest ["layers"]) {
978	if (layer.contains(key: "mediaType")) {
979	std::string media_type = layer ["mediaType"].get<std::string>();
980	if (media_type == "application/vnd.docker.ai.gguf.v3" \|\|
981	media_type.find(s: "gguf") != std::string::npos) {
982	gguf_digest = layer ["digest"].get<std::string>();
983	break;
984	}
985	}
986	}
987	}
988
989	if (gguf_digest.empty()) {
990	throw std::runtime_error ("No GGUF layer found in Docker manifest");
991	}
992
993	// Validate & normalize digest
994	gguf_digest = validate_oci_digest (gguf_digest);
995	LOG_DBG("%s: Using validated digest: %s\n", __func__, gguf_digest.c_str());
996
997	// Prepare local filename
998	std::string model_filename = repo;
999	std::replace(first: model_filename.begin(), last: model_filename.end(), old_value: `'/'`, new_value: `'_'`);
1000	model_filename += "_" + tag + ".gguf";
1001	std::string local_path = fs_get_cache_file(filename: model_filename);
1002
1003	const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
1004	if (!common_download_file_single(url: blob_url, path: local_path, bearer_token: token, offline: false)) {
1005	throw std::runtime_error ("Failed to download Docker Model");
1006	}
1007
1008	LOG_INF("%s: Downloaded Docker Model to: %s\n", __func__, local_path.c_str());
1009	return local_path;
1010	} catch (const std::exception & e) {
1011	LOG_ERR("%s: Docker Model download failed: %s\n", __func__, e.what());
1012	throw;
1013	}
1014	}
1015

Browse the source code of llama.cpp/common/download.cpp