1#include "arg.h"
2
3#include "common.h"
4#include "gguf.h" // for reading GGUF splits
5#include "log.h"
6#include "download.h"
7
8#define JSON_ASSERT GGML_ASSERT
9#include <nlohmann/json.hpp>
10
11#include <algorithm>
12#include <filesystem>
13#include <fstream>
14#include <future>
15#include <regex>
16#include <string>
17#include <thread>
18#include <vector>
19
20#if defined(LLAMA_USE_CURL)
21#include <curl/curl.h>
22#include <curl/easy.h>
23#else
24#include "http.h"
25#endif
26
27#ifdef __linux__
28#include <linux/limits.h>
29#elif defined(_WIN32)
30# if !defined(PATH_MAX)
31# define PATH_MAX MAX_PATH
32# endif
33#elif defined(_AIX)
34#include <sys/limits.h>
35#else
36#include <sys/syslimits.h>
37#endif
38#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
39
40// isatty
41#if defined(_WIN32)
42#include <io.h>
43#else
44#include <unistd.h>
45#endif
46
47using json = nlohmann::ordered_json;
48
49//
50// downloader
51//
52
53static std::string read_file(const std::string & fname) {
54 std::ifstream file(fname);
55 if (!file) {
56 throw std::runtime_error(string_format(fmt: "error: failed to open file '%s'\n", fname.c_str()));
57 }
58 std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
59 file.close();
60 return content;
61}
62
63static void write_file(const std::string & fname, const std::string & content) {
64 const std::string fname_tmp = fname + ".tmp";
65 std::ofstream file(fname_tmp);
66 if (!file) {
67 throw std::runtime_error(string_format(fmt: "error: failed to open file '%s'\n", fname.c_str()));
68 }
69
70 try {
71 file << content;
72 file.close();
73
74 // Makes write atomic
75 if (rename(old: fname_tmp.c_str(), new: fname.c_str()) != 0) {
76 LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
77 // If rename fails, try to delete the temporary file
78 if (remove(filename: fname_tmp.c_str()) != 0) {
79 LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
80 }
81 }
82 } catch (...) {
83 // If anything fails, try to delete the temporary file
84 if (remove(filename: fname_tmp.c_str()) != 0) {
85 LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
86 }
87
88 throw std::runtime_error(string_format(fmt: "error: failed to write file '%s'\n", fname.c_str()));
89 }
90}
91
92static void write_etag(const std::string & path, const std::string & etag) {
93 const std::string etag_path = path + ".etag";
94 write_file(fname: etag_path, content: etag);
95 LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
96}
97
98static std::string read_etag(const std::string & path) {
99 std::string none;
100 const std::string etag_path = path + ".etag";
101
102 if (std::filesystem::exists(p: etag_path)) {
103 std::ifstream etag_in(etag_path);
104 if (!etag_in) {
105 LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
106 return none;
107 }
108 std::string etag;
109 std::getline(is&: etag_in, str&: etag);
110 return etag;
111 }
112
113 // no etag file, but maybe there is an old .json
114 // remove this code later
115 const std::string metadata_path = path + ".json";
116
117 if (std::filesystem::exists(p: metadata_path)) {
118 std::ifstream metadata_in(metadata_path);
119 try {
120 nlohmann::json metadata_json;
121 metadata_in >> metadata_json;
122 LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
123 metadata_json.dump().c_str());
124 if (metadata_json.contains(key: "etag") && metadata_json.at(key: "etag").is_string()) {
125 std::string etag = metadata_json.at(key: "etag");
126 write_etag(path, etag);
127 if (!std::filesystem::remove(p: metadata_path)) {
128 LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str());
129 }
130 return etag;
131 }
132 } catch (const nlohmann::json::exception & e) {
133 LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
134 }
135 }
136 return none;
137}
138
139#ifdef LLAMA_USE_CURL
140
141//
142// CURL utils
143//
144
145using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
146
147// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
148struct curl_slist_ptr {
149 struct curl_slist * ptr = nullptr;
150 ~curl_slist_ptr() {
151 if (ptr) {
152 curl_slist_free_all(list: ptr);
153 }
154 }
155};
156
157static CURLcode common_curl_perf(CURL * curl) {
158 CURLcode res = curl_easy_perform(curl);
159 if (res != CURLE_OK) {
160 LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
161 }
162
163 return res;
164}
165
166// Send a HEAD request to retrieve the etag and last-modified headers
167struct common_load_model_from_url_headers {
168 std::string etag;
169 std::string last_modified;
170 std::string accept_ranges;
171};
172
173struct FILE_deleter {
174 void operator()(FILE * f) const { fclose(stream: f); }
175};
176
177static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
178 common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
179 static std::regex header_regex("([^:]+): (.*)\r\n");
180 static std::regex etag_regex("ETag", std::regex_constants::icase);
181 static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
182 static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
183 std::string header(buffer, n_items);
184 std::smatch match;
185 if (std::regex_match(s: header, m&: match, re: header_regex)) {
186 const std::string & key = match[1];
187 const std::string & value = match[2];
188 if (std::regex_match(s: key, m&: match, re: etag_regex)) {
189 headers->etag = value;
190 } else if (std::regex_match(s: key, m&: match, re: last_modified_regex)) {
191 headers->last_modified = value;
192 } else if (std::regex_match(s: key, m&: match, re: accept_ranges_regex)) {
193 headers->accept_ranges = value;
194 }
195 }
196
197 return n_items;
198}
199
200static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
201 return std::fwrite(ptr: data, size: size, n: nmemb, s: static_cast<FILE *>(fd));
202}
203
204// helper function to hide password in URL
205static std::string llama_download_hide_password_in_url(const std::string & url) {
206 // Use regex to match and replace the user[:password]@ pattern in URLs
207 // Pattern: scheme://[user[:password]@]host[...]
208 static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
209 std::smatch match;
210
211 if (std::regex_match(s: url, m&: match, re: url_regex)) {
212 // match[1] = scheme (e.g., "https://")
213 // match[2] = user[:password]@ part
214 // match[3] = rest of URL (host and path)
215 return match[1].str() + "********@" + match[3].str();
216 }
217
218 return url; // No credentials found or malformed URL
219}
220
221static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
222 // Set the URL, allow to follow http redirection
223 curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
224 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
225
226# if defined(_WIN32)
227 // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
228 // operating system. Currently implemented under MS-Windows.
229 curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
230# endif
231
232 curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
233 curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
234 curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
235}
236
237static void common_curl_easy_setopt_get(CURL * curl) {
238 curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
239 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
240
241 // display download progress
242 curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
243}
244
245static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
246 if (std::filesystem::exists(p: path_temporary)) {
247 const std::string partial_size = std::to_string(val: std::filesystem::file_size(p: path_temporary));
248 LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
249 const std::string range_str = partial_size + "-";
250 curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
251 }
252
253 // Always open file in append mode could be resuming
254 std::unique_ptr<FILE, FILE_deleter> outfile(fopen(filename: path_temporary.c_str(), modes: "ab"));
255 if (!outfile) {
256 LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
257 return false;
258 }
259
260 common_curl_easy_setopt_get(curl);
261 curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
262
263 return common_curl_perf(curl) == CURLE_OK;
264}
265
266static bool common_download_head(CURL * curl,
267 curl_slist_ptr & http_headers,
268 const std::string & url,
269 const std::string & bearer_token) {
270 if (!curl) {
271 LOG_ERR("%s: error initializing libcurl\n", __func__);
272 return false;
273 }
274
275 http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: "User-Agent: llama-cpp");
276 // Check if hf-token or bearer-token was specified
277 if (!bearer_token.empty()) {
278 std::string auth_header = "Authorization: Bearer " + bearer_token;
279 http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: auth_header.c_str());
280 }
281
282 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
283 common_curl_easy_setopt_head(curl, url);
284 return common_curl_perf(curl) == CURLE_OK;
285}
286
287// download one single file from remote URL to local path
288static bool common_download_file_single_online(const std::string & url,
289 const std::string & path,
290 const std::string & bearer_token) {
291 static const int max_attempts = 3;
292 static const int retry_delay_seconds = 2;
293 for (int i = 0; i < max_attempts; ++i) {
294 std::string etag;
295
296 // Check if the file already exists locally
297 const auto file_exists = std::filesystem::exists(p: path);
298 if (file_exists) {
299 etag = read_etag(path);
300 } else {
301 LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
302 }
303
304 bool head_request_ok = false;
305 bool should_download = !file_exists; // by default, we should download if the file does not exist
306
307 // Initialize libcurl
308 curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
309 common_load_model_from_url_headers headers;
310 curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
311 curl_slist_ptr http_headers;
312 const bool was_perform_successful = common_download_head(curl: curl.get(), http_headers, url, bearer_token);
313 if (!was_perform_successful) {
314 head_request_ok = false;
315 }
316
317 long http_code = 0;
318 curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
319 if (http_code == 200) {
320 head_request_ok = true;
321 } else {
322 LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
323 head_request_ok = false;
324 }
325
326 // if head_request_ok is false, we don't have the etag or last-modified headers
327 // we leave should_download as-is, which is true if the file does not exist
328 bool should_download_from_scratch = false;
329 if (head_request_ok) {
330 // check if ETag or Last-Modified headers are different
331 // if it is, we need to download the file again
332 if (!etag.empty() && etag != headers.etag) {
333 LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
334 headers.etag.c_str());
335 should_download = true;
336 should_download_from_scratch = true;
337 }
338 }
339
340 const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
341 if (should_download) {
342 if (file_exists &&
343 !accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
344 LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
345 if (remove(filename: path.c_str()) != 0) {
346 LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
347 return false;
348 }
349 }
350
351 const std::string path_temporary = path + ".downloadInProgress";
352 if (should_download_from_scratch) {
353 if (std::filesystem::exists(p: path_temporary)) {
354 if (remove(filename: path_temporary.c_str()) != 0) {
355 LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
356 return false;
357 }
358 }
359
360 if (std::filesystem::exists(p: path)) {
361 if (remove(filename: path.c_str()) != 0) {
362 LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
363 return false;
364 }
365 }
366 }
367 if (head_request_ok) {
368 write_etag(path, etag: headers.etag);
369 }
370
371 // start the download
372 LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
373 __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
374 headers.etag.c_str(), headers.last_modified.c_str());
375 const bool was_pull_successful = common_pull_file(curl: curl.get(), path_temporary);
376 if (!was_pull_successful) {
377 if (i + 1 < max_attempts) {
378 const int exponential_backoff_delay = std::pow(x: retry_delay_seconds, y: i) * 1000;
379 LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
380 std::this_thread::sleep_for(rtime: std::chrono::milliseconds(exponential_backoff_delay));
381 } else {
382 LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
383 }
384
385 continue;
386 }
387
388 long http_code = 0;
389 curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
390 if (http_code < 200 || http_code >= 400) {
391 LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
392 return false;
393 }
394
395 if (rename(old: path_temporary.c_str(), new: path.c_str()) != 0) {
396 LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
397 return false;
398 }
399 } else {
400 LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
401 }
402
403 break;
404 }
405
406 return true;
407}
408
409std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
410 curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
411 curl_slist_ptr http_headers;
412 std::vector<char> res_buffer;
413
414 curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
415 curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
416 curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
417 curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
418 typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
419 auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
420 auto data_vec = static_cast<std::vector<char> *>(data);
421 data_vec->insert(position: data_vec->end(), first: (char *)ptr, last: (char *)ptr + size * nmemb);
422 return size * nmemb;
423 };
424 curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
425 curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
426#if defined(_WIN32)
427 curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
428#endif
429 if (params.timeout > 0) {
430 curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
431 }
432 if (params.max_size > 0) {
433 curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
434 }
435 http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: "User-Agent: llama-cpp");
436 for (const auto & header : params.headers) {
437 http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: header.c_str());
438 }
439 curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
440
441 CURLcode res = curl_easy_perform(curl: curl.get());
442
443 if (res != CURLE_OK) {
444 std::string error_msg = curl_easy_strerror(res);
445 throw std::runtime_error("error: cannot make GET request: " + error_msg);
446 }
447
448 long res_code;
449 curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
450
451 return { res_code, std::move(res_buffer) };
452}
453
454#else
455
456static bool is_output_a_tty() {
457#if defined(_WIN32)
458 return _isatty(_fileno(stdout));
459#else
460 return isatty(1);
461#endif
462}
463
464static void print_progress(size_t current, size_t total) {
465 if (!is_output_a_tty()) {
466 return;
467 }
468
469 if (!total) {
470 return;
471 }
472
473 size_t width = 50;
474 size_t pct = (100 * current) / total;
475 size_t pos = (width * current) / total;
476
477 std::cout << "["
478 << std::string(pos, '=')
479 << (pos < width ? ">" : "")
480 << std::string(width - pos, ' ')
481 << "] " << std::setw(3) << pct << "% ("
482 << current / (1024 * 1024) << " MB / "
483 << total / (1024 * 1024) << " MB)\r";
484 std::cout.flush();
485}
486
487static bool common_pull_file(httplib::Client & cli,
488 const std::string & resolve_path,
489 const std::string & path_tmp,
490 bool supports_ranges,
491 size_t existing_size,
492 size_t & total_size) {
493 std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app);
494 if (!ofs.is_open()) {
495 LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str());
496 return false;
497 }
498
499 httplib::Headers headers;
500 if (supports_ranges && existing_size > 0) {
501 headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
502 }
503
504 std::atomic<size_t> downloaded{existing_size};
505
506 auto res = cli.Get(resolve_path, headers,
507 [&](const httplib::Response &response) {
508 if (existing_size > 0 && response.status != 206) {
509 LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
510 return false;
511 }
512 if (existing_size == 0 && response.status != 200) {
513 LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
514 return false;
515 }
516 if (total_size == 0 && response.has_header("Content-Length")) {
517 try {
518 size_t content_length = std::stoull(response.get_header_value("Content-Length"));
519 total_size = existing_size + content_length;
520 } catch (const std::exception &e) {
521 LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
522 }
523 }
524 return true;
525 },
526 [&](const char *data, size_t len) {
527 ofs.write(data, len);
528 if (!ofs) {
529 LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
530 return false;
531 }
532 downloaded += len;
533 print_progress(downloaded, total_size);
534 return true;
535 },
536 nullptr
537 );
538
539 std::cout << "\n";
540
541 if (!res) {
542 LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -1);
543 return false;
544 }
545
546 return true;
547}
548
549// download one single file from remote URL to local path
550static bool common_download_file_single_online(const std::string & url,
551 const std::string & path,
552 const std::string & bearer_token) {
553 static const int max_attempts = 3;
554 static const int retry_delay_seconds = 2;
555
556 auto [cli, parts] = common_http_client(url);
557
558 httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
559 if (!bearer_token.empty()) {
560 default_headers.insert({"Authorization", "Bearer " + bearer_token});
561 }
562 cli.set_default_headers(default_headers);
563
564 const bool file_exists = std::filesystem::exists(path);
565
566 std::string last_etag;
567 if (file_exists) {
568 last_etag = read_etag(path);
569 } else {
570 LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
571 }
572
573 for (int i = 0; i < max_attempts; ++i) {
574 auto head = cli.Head(parts.path);
575 bool head_ok = head && head->status >= 200 && head->status < 300;
576 if (!head_ok) {
577 LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
578 if (file_exists) {
579 LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
580 return true;
581 }
582 }
583
584 std::string etag;
585 if (head_ok && head->has_header("ETag")) {
586 etag = head->get_header_value("ETag");
587 }
588
589 size_t total_size = 0;
590 if (head_ok && head->has_header("Content-Length")) {
591 try {
592 total_size = std::stoull(head->get_header_value("Content-Length"));
593 } catch (const std::exception& e) {
594 LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what());
595 }
596 }
597
598 bool supports_ranges = false;
599 if (head_ok && head->has_header("Accept-Ranges")) {
600 supports_ranges = head->get_header_value("Accept-Ranges") != "none";
601 }
602
603 bool should_download_from_scratch = false;
604 if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
605 LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
606 last_etag.c_str(), etag.c_str());
607 should_download_from_scratch = true;
608 }
609
610 if (file_exists) {
611 if (!should_download_from_scratch) {
612 LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
613 return true;
614 }
615 LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
616 if (remove(path.c_str()) != 0) {
617 LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
618 return false;
619 }
620 }
621
622 const std::string path_temporary = path + ".downloadInProgress";
623 size_t existing_size = 0;
624
625 if (std::filesystem::exists(path_temporary)) {
626 if (supports_ranges && !should_download_from_scratch) {
627 existing_size = std::filesystem::file_size(path_temporary);
628 } else if (remove(path_temporary.c_str()) != 0) {
629 LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
630 return false;
631 }
632 }
633
634 // start the download
635 LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
636 __func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
637 const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
638 if (!was_pull_successful) {
639 if (i + 1 < max_attempts) {
640 const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
641 LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
642 std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
643 } else {
644 LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
645 }
646 continue;
647 }
648
649 if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
650 LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
651 return false;
652 }
653 if (!etag.empty()) {
654 write_etag(path, etag);
655 }
656 break;
657 }
658
659 return true;
660}
661
662std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
663 const common_remote_params & params) {
664 auto [cli, parts] = common_http_client(url);
665
666 httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
667 for (const auto & header : params.headers) {
668 size_t pos = header.find(':');
669 if (pos != std::string::npos) {
670 headers.emplace(header.substr(0, pos), header.substr(pos + 1));
671 } else {
672 headers.emplace(header, "");
673 }
674 }
675
676 if (params.timeout > 0) {
677 cli.set_read_timeout(params.timeout, 0);
678 cli.set_write_timeout(params.timeout, 0);
679 }
680
681 std::vector<char> buf;
682 auto res = cli.Get(parts.path, headers,
683 [&](const char *data, size_t len) {
684 buf.insert(buf.end(), data, data + len);
685 return params.max_size == 0 ||
686 buf.size() <= static_cast<size_t>(params.max_size);
687 },
688 nullptr
689 );
690
691 if (!res) {
692 throw std::runtime_error("error: cannot make GET request");
693 }
694
695 return { res->status, std::move(buf) };
696}
697
698#endif // LLAMA_USE_CURL
699
700static bool common_download_file_single(const std::string & url,
701 const std::string & path,
702 const std::string & bearer_token,
703 bool offline) {
704 if (!offline) {
705 return common_download_file_single_online(url, path, bearer_token);
706 }
707
708 if (!std::filesystem::exists(p: path)) {
709 LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
710 return false;
711 }
712
713 LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
714 return true;
715}
716
717// download multiple files from remote URLs to local paths
718// the input is a vector of pairs <url, path>
719static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
720 // Prepare download in parallel
721 std::vector<std::future<bool>> futures_download;
722 for (auto const & item : urls) {
723 futures_download.push_back(x: std::async(policy: std::launch::async, fn: [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool {
724 return common_download_file_single(url: it.first, path: it.second, bearer_token, offline);
725 }, args: item));
726 }
727
728 // Wait for all downloads to complete
729 for (auto & f : futures_download) {
730 if (!f.get()) {
731 return false;
732 }
733 }
734
735 return true;
736}
737
738bool common_download_model(
739 const common_params_model & model,
740 const std::string & bearer_token,
741 bool offline) {
742 // Basic validation of the model.url
743 if (model.url.empty()) {
744 LOG_ERR("%s: invalid model url\n", __func__);
745 return false;
746 }
747
748 if (!common_download_file_single(url: model.url, path: model.path, bearer_token, offline)) {
749 return false;
750 }
751
752 // check for additional GGUFs split to download
753 int n_split = 0;
754 {
755 struct gguf_init_params gguf_params = {
756 /*.no_alloc = */ true,
757 /*.ctx = */ NULL,
758 };
759 auto * ctx_gguf = gguf_init_from_file(fname: model.path.c_str(), params: gguf_params);
760 if (!ctx_gguf) {
761 LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str());
762 return false;
763 }
764
765 auto key_n_split = gguf_find_key(ctx: ctx_gguf, key: LLM_KV_SPLIT_COUNT);
766 if (key_n_split >= 0) {
767 n_split = gguf_get_val_u16(ctx: ctx_gguf, key_id: key_n_split);
768 }
769
770 gguf_free(ctx: ctx_gguf);
771 }
772
773 if (n_split > 1) {
774 char split_prefix[PATH_MAX] = {0};
775 char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0};
776
777 // Verify the first split file format
778 // and extract split URL and PATH prefixes
779 {
780 if (!llama_split_prefix(split_prefix, maxlen: sizeof(split_prefix), split_path: model.path.c_str(), split_no: 0, split_count: n_split)) {
781 LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split);
782 return false;
783 }
784
785 if (!llama_split_prefix(split_prefix: split_url_prefix, maxlen: sizeof(split_url_prefix), split_path: model.url.c_str(), split_no: 0, split_count: n_split)) {
786 LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model.url.c_str(), n_split);
787 return false;
788 }
789 }
790
791 std::vector<std::pair<std::string, std::string>> urls;
792 for (int idx = 1; idx < n_split; idx++) {
793 char split_path[PATH_MAX] = {0};
794 llama_split_path(split_path, maxlen: sizeof(split_path), path_prefix: split_prefix, split_no: idx, split_count: n_split);
795
796 char split_url[LLAMA_MAX_URL_LENGTH] = {0};
797 llama_split_path(split_path: split_url, maxlen: sizeof(split_url), path_prefix: split_url_prefix, split_no: idx, split_count: n_split);
798
799 if (std::string(split_path) == model.path) {
800 continue; // skip the already downloaded file
801 }
802
803 urls.push_back(x: {split_url, split_path});
804 }
805
806 // Download in parallel
807 common_download_file_multiple(urls, bearer_token, offline);
808 }
809
810 return true;
811}
812
813common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) {
814 auto parts = string_split<std::string>(input: hf_repo_with_tag, separator: ':');
815 std::string tag = parts.size() > 1 ? parts.back() : "latest";
816 std::string hf_repo = parts[0];
817 if (string_split<std::string>(input: hf_repo, separator: '/').size() != 2) {
818 throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
819 }
820
821 std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
822
823 // headers
824 std::vector<std::string> headers;
825 headers.push_back(x: "Accept: application/json");
826 if (!bearer_token.empty()) {
827 headers.push_back(x: "Authorization: Bearer " + bearer_token);
828 }
829 // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
830 // User-Agent header is already set in common_remote_get_content, no need to set it here
831
832 // we use "=" to avoid clashing with other component, while still being allowed on windows
833 std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
834 string_replace_all(s&: cached_response_fname, search: "/", replace: "_");
835 std::string cached_response_path = fs_get_cache_file(filename: cached_response_fname);
836
837 // make the request
838 common_remote_params params;
839 params.headers = headers;
840 long res_code = 0;
841 std::string res_str;
842 bool use_cache = false;
843 if (!offline) {
844 try {
845 auto res = common_remote_get_content(url, params);
846 res_code = res.first;
847 res_str = std::string(res.second.data(), res.second.size());
848 } catch (const std::exception & e) {
849 LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what());
850 }
851 }
852 if (res_code == 0) {
853 if (std::filesystem::exists(p: cached_response_path)) {
854 LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str());
855 res_str = read_file(fname: cached_response_path);
856 res_code = 200;
857 use_cache = true;
858 } else {
859 throw std::runtime_error(
860 offline ? "error: failed to get manifest (offline mode)"
861 : "error: failed to get manifest (check your internet connection)");
862 }
863 }
864 std::string ggufFile;
865 std::string mmprojFile;
866
867 if (res_code == 200 || res_code == 304) {
868 try {
869 auto j = json::parse(i&: res_str);
870
871 if (j.contains(key: "ggufFile") && j["ggufFile"].contains(key: "rfilename")) {
872 ggufFile = j["ggufFile"]["rfilename"].get<std::string>();
873 }
874 if (j.contains(key: "mmprojFile") && j["mmprojFile"].contains(key: "rfilename")) {
875 mmprojFile = j["mmprojFile"]["rfilename"].get<std::string>();
876 }
877 } catch (const std::exception & e) {
878 throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what());
879 }
880 if (!use_cache) {
881 // if not using cached response, update the cache file
882 write_file(fname: cached_response_path, content: res_str);
883 }
884 } else if (res_code == 401) {
885 throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
886 } else {
887 throw std::runtime_error(string_format(fmt: "error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
888 }
889
890 // check response
891 if (ggufFile.empty()) {
892 throw std::runtime_error("error: model does not have ggufFile");
893 }
894
895 return { .repo: hf_repo, .ggufFile: ggufFile, .mmprojFile: mmprojFile };
896}
897
898//
899// Docker registry functions
900//
901
902static std::string common_docker_get_token(const std::string & repo) {
903 std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull";
904
905 common_remote_params params;
906 auto res = common_remote_get_content(url, params);
907
908 if (res.first != 200) {
909 throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(val: res.first));
910 }
911
912 std::string response_str(res.second.begin(), res.second.end());
913 nlohmann::ordered_json response = nlohmann::ordered_json::parse(i&: response_str);
914
915 if (!response.contains(key: "token")) {
916 throw std::runtime_error("Docker registry token response missing 'token' field");
917 }
918
919 return response["token"].get<std::string>();
920}
921
922std::string common_docker_resolve_model(const std::string & docker) {
923 // Parse ai/smollm2:135M-Q4_0
924 size_t colon_pos = docker.find(c: ':');
925 std::string repo, tag;
926 if (colon_pos != std::string::npos) {
927 repo = docker.substr(pos: 0, n: colon_pos);
928 tag = docker.substr(pos: colon_pos + 1);
929 } else {
930 repo = docker;
931 tag = "latest";
932 }
933
934 // ai/ is the default
935 size_t slash_pos = docker.find(c: '/');
936 if (slash_pos == std::string::npos) {
937 repo.insert(pos: 0, s: "ai/");
938 }
939
940 LOG_INF("%s: Downloading Docker Model: %s:%s\n", __func__, repo.c_str(), tag.c_str());
941 try {
942 // --- helper: digest validation ---
943 auto validate_oci_digest = [](const std::string & digest) -> std::string {
944 // Expected: algo:hex ; start with sha256 (64 hex chars)
945 // You can extend this map if supporting other algorithms in future.
946 static const std::regex re("^sha256:([a-fA-F0-9]{64})$");
947 std::smatch m;
948 if (!std::regex_match(s: digest, m&: m, re: re)) {
949 throw std::runtime_error("Invalid OCI digest format received in manifest: " + digest);
950 }
951 // normalize hex to lowercase
952 std::string normalized = digest;
953 std::transform(first: normalized.begin()+7, last: normalized.end(), result: normalized.begin()+7, unary_op: [](unsigned char c){
954 return std::tolower(c: c);
955 });
956 return normalized;
957 };
958
959 std::string token = common_docker_get_token(repo); // Get authentication token
960
961 // Get manifest
962 const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
963 std::string manifest_url = url_prefix + "/manifests/" + tag;
964 common_remote_params manifest_params;
965 manifest_params.headers.push_back(x: "Authorization: Bearer " + token);
966 manifest_params.headers.push_back(
967 x: "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json");
968 auto manifest_res = common_remote_get_content(url: manifest_url, params: manifest_params);
969 if (manifest_res.first != 200) {
970 throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(val: manifest_res.first));
971 }
972
973 std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end());
974 nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(i&: manifest_str);
975 std::string gguf_digest; // Find the GGUF layer
976 if (manifest.contains(key: "layers")) {
977 for (const auto & layer : manifest["layers"]) {
978 if (layer.contains(key: "mediaType")) {
979 std::string media_type = layer["mediaType"].get<std::string>();
980 if (media_type == "application/vnd.docker.ai.gguf.v3" ||
981 media_type.find(s: "gguf") != std::string::npos) {
982 gguf_digest = layer["digest"].get<std::string>();
983 break;
984 }
985 }
986 }
987 }
988
989 if (gguf_digest.empty()) {
990 throw std::runtime_error("No GGUF layer found in Docker manifest");
991 }
992
993 // Validate & normalize digest
994 gguf_digest = validate_oci_digest(gguf_digest);
995 LOG_DBG("%s: Using validated digest: %s\n", __func__, gguf_digest.c_str());
996
997 // Prepare local filename
998 std::string model_filename = repo;
999 std::replace(first: model_filename.begin(), last: model_filename.end(), old_value: '/', new_value: '_');
1000 model_filename += "_" + tag + ".gguf";
1001 std::string local_path = fs_get_cache_file(filename: model_filename);
1002
1003 const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
1004 if (!common_download_file_single(url: blob_url, path: local_path, bearer_token: token, offline: false)) {
1005 throw std::runtime_error("Failed to download Docker Model");
1006 }
1007
1008 LOG_INF("%s: Downloaded Docker Model to: %s\n", __func__, local_path.c_str());
1009 return local_path;
1010 } catch (const std::exception & e) {
1011 LOG_ERR("%s: Docker Model download failed: %s\n", __func__, e.what());
1012 throw;
1013 }
1014}
1015