| 1 | #include "arg.h" |
| 2 | |
| 3 | #include "common.h" |
| 4 | #include "gguf.h" // for reading GGUF splits |
| 5 | #include "log.h" |
| 6 | #include "download.h" |
| 7 | |
| 8 | #define JSON_ASSERT GGML_ASSERT |
| 9 | #include <nlohmann/json.hpp> |
| 10 | |
| 11 | #include <algorithm> |
| 12 | #include <filesystem> |
| 13 | #include <fstream> |
| 14 | #include <future> |
| 15 | #include <regex> |
| 16 | #include <string> |
| 17 | #include <thread> |
| 18 | #include <vector> |
| 19 | |
| 20 | #if defined(LLAMA_USE_CURL) |
| 21 | #include <curl/curl.h> |
| 22 | #include <curl/easy.h> |
| 23 | #else |
| 24 | #include "http.h" |
| 25 | #endif |
| 26 | |
| 27 | #ifdef __linux__ |
| 28 | #include <linux/limits.h> |
| 29 | #elif defined(_WIN32) |
| 30 | # if !defined(PATH_MAX) |
| 31 | # define PATH_MAX MAX_PATH |
| 32 | # endif |
| 33 | #elif defined(_AIX) |
| 34 | #include <sys/limits.h> |
| 35 | #else |
| 36 | #include <sys/syslimits.h> |
| 37 | #endif |
| 38 | #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 |
| 39 | |
| 40 | // isatty |
| 41 | #if defined(_WIN32) |
| 42 | #include <io.h> |
| 43 | #else |
| 44 | #include <unistd.h> |
| 45 | #endif |
| 46 | |
| 47 | using json = nlohmann::ordered_json; |
| 48 | |
| 49 | // |
| 50 | // downloader |
| 51 | // |
| 52 | |
| 53 | static std::string read_file(const std::string & fname) { |
| 54 | std::ifstream file(fname); |
| 55 | if (!file) { |
| 56 | throw std::runtime_error(string_format(fmt: "error: failed to open file '%s'\n" , fname.c_str())); |
| 57 | } |
| 58 | std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); |
| 59 | file.close(); |
| 60 | return content; |
| 61 | } |
| 62 | |
| 63 | static void write_file(const std::string & fname, const std::string & content) { |
| 64 | const std::string fname_tmp = fname + ".tmp" ; |
| 65 | std::ofstream file(fname_tmp); |
| 66 | if (!file) { |
| 67 | throw std::runtime_error(string_format(fmt: "error: failed to open file '%s'\n" , fname.c_str())); |
| 68 | } |
| 69 | |
| 70 | try { |
| 71 | file << content; |
| 72 | file.close(); |
| 73 | |
| 74 | // Makes write atomic |
| 75 | if (rename(old: fname_tmp.c_str(), new: fname.c_str()) != 0) { |
| 76 | LOG_ERR("%s: unable to rename file: %s to %s\n" , __func__, fname_tmp.c_str(), fname.c_str()); |
| 77 | // If rename fails, try to delete the temporary file |
| 78 | if (remove(filename: fname_tmp.c_str()) != 0) { |
| 79 | LOG_ERR("%s: unable to delete temporary file: %s\n" , __func__, fname_tmp.c_str()); |
| 80 | } |
| 81 | } |
| 82 | } catch (...) { |
| 83 | // If anything fails, try to delete the temporary file |
| 84 | if (remove(filename: fname_tmp.c_str()) != 0) { |
| 85 | LOG_ERR("%s: unable to delete temporary file: %s\n" , __func__, fname_tmp.c_str()); |
| 86 | } |
| 87 | |
| 88 | throw std::runtime_error(string_format(fmt: "error: failed to write file '%s'\n" , fname.c_str())); |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | static void write_etag(const std::string & path, const std::string & etag) { |
| 93 | const std::string etag_path = path + ".etag" ; |
| 94 | write_file(fname: etag_path, content: etag); |
| 95 | LOG_DBG("%s: file etag saved: %s\n" , __func__, etag_path.c_str()); |
| 96 | } |
| 97 | |
| 98 | static std::string read_etag(const std::string & path) { |
| 99 | std::string none; |
| 100 | const std::string etag_path = path + ".etag" ; |
| 101 | |
| 102 | if (std::filesystem::exists(p: etag_path)) { |
| 103 | std::ifstream etag_in(etag_path); |
| 104 | if (!etag_in) { |
| 105 | LOG_ERR("%s: could not open .etag file for reading: %s\n" , __func__, etag_path.c_str()); |
| 106 | return none; |
| 107 | } |
| 108 | std::string etag; |
| 109 | std::getline(is&: etag_in, str&: etag); |
| 110 | return etag; |
| 111 | } |
| 112 | |
| 113 | // no etag file, but maybe there is an old .json |
| 114 | // remove this code later |
| 115 | const std::string metadata_path = path + ".json" ; |
| 116 | |
| 117 | if (std::filesystem::exists(p: metadata_path)) { |
| 118 | std::ifstream metadata_in(metadata_path); |
| 119 | try { |
| 120 | nlohmann::json metadata_json; |
| 121 | metadata_in >> metadata_json; |
| 122 | LOG_DBG("%s: previous metadata file found %s: %s\n" , __func__, metadata_path.c_str(), |
| 123 | metadata_json.dump().c_str()); |
| 124 | if (metadata_json.contains(key: "etag" ) && metadata_json.at(key: "etag" ).is_string()) { |
| 125 | std::string etag = metadata_json.at(key: "etag" ); |
| 126 | write_etag(path, etag); |
| 127 | if (!std::filesystem::remove(p: metadata_path)) { |
| 128 | LOG_WRN("%s: failed to delete old .json metadata file: %s\n" , __func__, metadata_path.c_str()); |
| 129 | } |
| 130 | return etag; |
| 131 | } |
| 132 | } catch (const nlohmann::json::exception & e) { |
| 133 | LOG_ERR("%s: error reading metadata file %s: %s\n" , __func__, metadata_path.c_str(), e.what()); |
| 134 | } |
| 135 | } |
| 136 | return none; |
| 137 | } |
| 138 | |
| 139 | #ifdef LLAMA_USE_CURL |
| 140 | |
| 141 | // |
| 142 | // CURL utils |
| 143 | // |
| 144 | |
| 145 | using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>; |
| 146 | |
| 147 | // cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one |
| 148 | struct curl_slist_ptr { |
| 149 | struct curl_slist * ptr = nullptr; |
| 150 | ~curl_slist_ptr() { |
| 151 | if (ptr) { |
| 152 | curl_slist_free_all(list: ptr); |
| 153 | } |
| 154 | } |
| 155 | }; |
| 156 | |
| 157 | static CURLcode common_curl_perf(CURL * curl) { |
| 158 | CURLcode res = curl_easy_perform(curl); |
| 159 | if (res != CURLE_OK) { |
| 160 | LOG_ERR("%s: curl_easy_perform() failed\n" , __func__); |
| 161 | } |
| 162 | |
| 163 | return res; |
| 164 | } |
| 165 | |
| 166 | // Send a HEAD request to retrieve the etag and last-modified headers |
| 167 | struct { |
| 168 | std::string ; |
| 169 | std::string ; |
| 170 | std::string ; |
| 171 | }; |
| 172 | |
| 173 | struct FILE_deleter { |
| 174 | void operator()(FILE * f) const { fclose(stream: f); } |
| 175 | }; |
| 176 | |
| 177 | static size_t (char * buffer, size_t, size_t n_items, void * userdata) { |
| 178 | common_load_model_from_url_headers * = (common_load_model_from_url_headers *) userdata; |
| 179 | static std::regex ("([^:]+): (.*)\r\n" ); |
| 180 | static std::regex etag_regex("ETag" , std::regex_constants::icase); |
| 181 | static std::regex last_modified_regex("Last-Modified" , std::regex_constants::icase); |
| 182 | static std::regex accept_ranges_regex("Accept-Ranges" , std::regex_constants::icase); |
| 183 | std::string (buffer, n_items); |
| 184 | std::smatch match; |
| 185 | if (std::regex_match(s: header, m&: match, re: header_regex)) { |
| 186 | const std::string & key = match[1]; |
| 187 | const std::string & value = match[2]; |
| 188 | if (std::regex_match(s: key, m&: match, re: etag_regex)) { |
| 189 | headers->etag = value; |
| 190 | } else if (std::regex_match(s: key, m&: match, re: last_modified_regex)) { |
| 191 | headers->last_modified = value; |
| 192 | } else if (std::regex_match(s: key, m&: match, re: accept_ranges_regex)) { |
| 193 | headers->accept_ranges = value; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | return n_items; |
| 198 | } |
| 199 | |
| 200 | static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) { |
| 201 | return std::fwrite(ptr: data, size: size, n: nmemb, s: static_cast<FILE *>(fd)); |
| 202 | } |
| 203 | |
| 204 | // helper function to hide password in URL |
| 205 | static std::string llama_download_hide_password_in_url(const std::string & url) { |
| 206 | // Use regex to match and replace the user[:password]@ pattern in URLs |
| 207 | // Pattern: scheme://[user[:password]@]host[...] |
| 208 | static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)" ); |
| 209 | std::smatch match; |
| 210 | |
| 211 | if (std::regex_match(s: url, m&: match, re: url_regex)) { |
| 212 | // match[1] = scheme (e.g., "https://") |
| 213 | // match[2] = user[:password]@ part |
| 214 | // match[3] = rest of URL (host and path) |
| 215 | return match[1].str() + "********@" + match[3].str(); |
| 216 | } |
| 217 | |
| 218 | return url; // No credentials found or malformed URL |
| 219 | } |
| 220 | |
| 221 | static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) { |
| 222 | // Set the URL, allow to follow http redirection |
| 223 | curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); |
| 224 | curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); |
| 225 | |
| 226 | # if defined(_WIN32) |
| 227 | // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of |
| 228 | // operating system. Currently implemented under MS-Windows. |
| 229 | curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); |
| 230 | # endif |
| 231 | |
| 232 | curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb |
| 233 | curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress |
| 234 | curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback); |
| 235 | } |
| 236 | |
| 237 | static void common_curl_easy_setopt_get(CURL * curl) { |
| 238 | curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); |
| 239 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback); |
| 240 | |
| 241 | // display download progress |
| 242 | curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); |
| 243 | } |
| 244 | |
| 245 | static bool common_pull_file(CURL * curl, const std::string & path_temporary) { |
| 246 | if (std::filesystem::exists(p: path_temporary)) { |
| 247 | const std::string partial_size = std::to_string(val: std::filesystem::file_size(p: path_temporary)); |
| 248 | LOG_INF("%s: server supports range requests, resuming download from byte %s\n" , __func__, partial_size.c_str()); |
| 249 | const std::string range_str = partial_size + "-" ; |
| 250 | curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str()); |
| 251 | } |
| 252 | |
| 253 | // Always open file in append mode could be resuming |
| 254 | std::unique_ptr<FILE, FILE_deleter> outfile(fopen(filename: path_temporary.c_str(), modes: "ab" )); |
| 255 | if (!outfile) { |
| 256 | LOG_ERR("%s: error opening local file for writing: %s\n" , __func__, path_temporary.c_str()); |
| 257 | return false; |
| 258 | } |
| 259 | |
| 260 | common_curl_easy_setopt_get(curl); |
| 261 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get()); |
| 262 | |
| 263 | return common_curl_perf(curl) == CURLE_OK; |
| 264 | } |
| 265 | |
| 266 | static bool common_download_head(CURL * curl, |
| 267 | curl_slist_ptr & , |
| 268 | const std::string & url, |
| 269 | const std::string & bearer_token) { |
| 270 | if (!curl) { |
| 271 | LOG_ERR("%s: error initializing libcurl\n" , __func__); |
| 272 | return false; |
| 273 | } |
| 274 | |
| 275 | http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: "User-Agent: llama-cpp" ); |
| 276 | // Check if hf-token or bearer-token was specified |
| 277 | if (!bearer_token.empty()) { |
| 278 | std::string = "Authorization: Bearer " + bearer_token; |
| 279 | http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: auth_header.c_str()); |
| 280 | } |
| 281 | |
| 282 | curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr); |
| 283 | common_curl_easy_setopt_head(curl, url); |
| 284 | return common_curl_perf(curl) == CURLE_OK; |
| 285 | } |
| 286 | |
| 287 | // download one single file from remote URL to local path |
| 288 | static bool common_download_file_single_online(const std::string & url, |
| 289 | const std::string & path, |
| 290 | const std::string & bearer_token) { |
| 291 | static const int max_attempts = 3; |
| 292 | static const int retry_delay_seconds = 2; |
| 293 | for (int i = 0; i < max_attempts; ++i) { |
| 294 | std::string etag; |
| 295 | |
| 296 | // Check if the file already exists locally |
| 297 | const auto file_exists = std::filesystem::exists(p: path); |
| 298 | if (file_exists) { |
| 299 | etag = read_etag(path); |
| 300 | } else { |
| 301 | LOG_INF("%s: no previous model file found %s\n" , __func__, path.c_str()); |
| 302 | } |
| 303 | |
| 304 | bool head_request_ok = false; |
| 305 | bool should_download = !file_exists; // by default, we should download if the file does not exist |
| 306 | |
| 307 | // Initialize libcurl |
| 308 | curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); |
| 309 | common_load_model_from_url_headers ; |
| 310 | curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); |
| 311 | curl_slist_ptr ; |
| 312 | const bool was_perform_successful = common_download_head(curl: curl.get(), http_headers, url, bearer_token); |
| 313 | if (!was_perform_successful) { |
| 314 | head_request_ok = false; |
| 315 | } |
| 316 | |
| 317 | long http_code = 0; |
| 318 | curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); |
| 319 | if (http_code == 200) { |
| 320 | head_request_ok = true; |
| 321 | } else { |
| 322 | LOG_WRN("%s: HEAD invalid http status code received: %ld\n" , __func__, http_code); |
| 323 | head_request_ok = false; |
| 324 | } |
| 325 | |
| 326 | // if head_request_ok is false, we don't have the etag or last-modified headers |
| 327 | // we leave should_download as-is, which is true if the file does not exist |
| 328 | bool should_download_from_scratch = false; |
| 329 | if (head_request_ok) { |
| 330 | // check if ETag or Last-Modified headers are different |
| 331 | // if it is, we need to download the file again |
| 332 | if (!etag.empty() && etag != headers.etag) { |
| 333 | LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n" , __func__, etag.c_str(), |
| 334 | headers.etag.c_str()); |
| 335 | should_download = true; |
| 336 | should_download_from_scratch = true; |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none" ; |
| 341 | if (should_download) { |
| 342 | if (file_exists && |
| 343 | !accept_ranges_supported) { // Resumable downloads not supported, delete and start again. |
| 344 | LOG_WRN("%s: deleting previous downloaded file: %s\n" , __func__, path.c_str()); |
| 345 | if (remove(filename: path.c_str()) != 0) { |
| 346 | LOG_ERR("%s: unable to delete file: %s\n" , __func__, path.c_str()); |
| 347 | return false; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | const std::string path_temporary = path + ".downloadInProgress" ; |
| 352 | if (should_download_from_scratch) { |
| 353 | if (std::filesystem::exists(p: path_temporary)) { |
| 354 | if (remove(filename: path_temporary.c_str()) != 0) { |
| 355 | LOG_ERR("%s: unable to delete file: %s\n" , __func__, path_temporary.c_str()); |
| 356 | return false; |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | if (std::filesystem::exists(p: path)) { |
| 361 | if (remove(filename: path.c_str()) != 0) { |
| 362 | LOG_ERR("%s: unable to delete file: %s\n" , __func__, path.c_str()); |
| 363 | return false; |
| 364 | } |
| 365 | } |
| 366 | } |
| 367 | if (head_request_ok) { |
| 368 | write_etag(path, etag: headers.etag); |
| 369 | } |
| 370 | |
| 371 | // start the download |
| 372 | LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n" , |
| 373 | __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(), |
| 374 | headers.etag.c_str(), headers.last_modified.c_str()); |
| 375 | const bool was_pull_successful = common_pull_file(curl: curl.get(), path_temporary); |
| 376 | if (!was_pull_successful) { |
| 377 | if (i + 1 < max_attempts) { |
| 378 | const int exponential_backoff_delay = std::pow(x: retry_delay_seconds, y: i) * 1000; |
| 379 | LOG_WRN("%s: retrying after %d milliseconds...\n" , __func__, exponential_backoff_delay); |
| 380 | std::this_thread::sleep_for(rtime: std::chrono::milliseconds(exponential_backoff_delay)); |
| 381 | } else { |
| 382 | LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n" , __func__, max_attempts); |
| 383 | } |
| 384 | |
| 385 | continue; |
| 386 | } |
| 387 | |
| 388 | long http_code = 0; |
| 389 | curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); |
| 390 | if (http_code < 200 || http_code >= 400) { |
| 391 | LOG_ERR("%s: invalid http status code received: %ld\n" , __func__, http_code); |
| 392 | return false; |
| 393 | } |
| 394 | |
| 395 | if (rename(old: path_temporary.c_str(), new: path.c_str()) != 0) { |
| 396 | LOG_ERR("%s: unable to rename file: %s to %s\n" , __func__, path_temporary.c_str(), path.c_str()); |
| 397 | return false; |
| 398 | } |
| 399 | } else { |
| 400 | LOG_INF("%s: using cached file: %s\n" , __func__, path.c_str()); |
| 401 | } |
| 402 | |
| 403 | break; |
| 404 | } |
| 405 | |
| 406 | return true; |
| 407 | } |
| 408 | |
| 409 | std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) { |
| 410 | curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); |
| 411 | curl_slist_ptr ; |
| 412 | std::vector<char> res_buffer; |
| 413 | |
| 414 | curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); |
| 415 | curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); |
| 416 | curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); |
| 417 | curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L); |
| 418 | typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); |
| 419 | auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { |
| 420 | auto data_vec = static_cast<std::vector<char> *>(data); |
| 421 | data_vec->insert(position: data_vec->end(), first: (char *)ptr, last: (char *)ptr + size * nmemb); |
| 422 | return size * nmemb; |
| 423 | }; |
| 424 | curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback)); |
| 425 | curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer); |
| 426 | #if defined(_WIN32) |
| 427 | curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); |
| 428 | #endif |
| 429 | if (params.timeout > 0) { |
| 430 | curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout); |
| 431 | } |
| 432 | if (params.max_size > 0) { |
| 433 | curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size); |
| 434 | } |
| 435 | http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: "User-Agent: llama-cpp" ); |
| 436 | for (const auto & : params.headers) { |
| 437 | http_headers.ptr = curl_slist_append(list: http_headers.ptr, data: header.c_str()); |
| 438 | } |
| 439 | curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); |
| 440 | |
| 441 | CURLcode res = curl_easy_perform(curl: curl.get()); |
| 442 | |
| 443 | if (res != CURLE_OK) { |
| 444 | std::string error_msg = curl_easy_strerror(res); |
| 445 | throw std::runtime_error("error: cannot make GET request: " + error_msg); |
| 446 | } |
| 447 | |
| 448 | long res_code; |
| 449 | curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); |
| 450 | |
| 451 | return { res_code, std::move(res_buffer) }; |
| 452 | } |
| 453 | |
| 454 | #else |
| 455 | |
| 456 | static bool is_output_a_tty() { |
| 457 | #if defined(_WIN32) |
| 458 | return _isatty(_fileno(stdout)); |
| 459 | #else |
| 460 | return isatty(1); |
| 461 | #endif |
| 462 | } |
| 463 | |
| 464 | static void print_progress(size_t current, size_t total) { |
| 465 | if (!is_output_a_tty()) { |
| 466 | return; |
| 467 | } |
| 468 | |
| 469 | if (!total) { |
| 470 | return; |
| 471 | } |
| 472 | |
| 473 | size_t width = 50; |
| 474 | size_t pct = (100 * current) / total; |
| 475 | size_t pos = (width * current) / total; |
| 476 | |
| 477 | std::cout << "[" |
| 478 | << std::string(pos, '=') |
| 479 | << (pos < width ? ">" : "" ) |
| 480 | << std::string(width - pos, ' ') |
| 481 | << "] " << std::setw(3) << pct << "% (" |
| 482 | << current / (1024 * 1024) << " MB / " |
| 483 | << total / (1024 * 1024) << " MB)\r" ; |
| 484 | std::cout.flush(); |
| 485 | } |
| 486 | |
| 487 | static bool common_pull_file(httplib::Client & cli, |
| 488 | const std::string & resolve_path, |
| 489 | const std::string & path_tmp, |
| 490 | bool supports_ranges, |
| 491 | size_t existing_size, |
| 492 | size_t & total_size) { |
| 493 | std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app); |
| 494 | if (!ofs.is_open()) { |
| 495 | LOG_ERR("%s: error opening local file for writing: %s\n" , __func__, path_tmp.c_str()); |
| 496 | return false; |
| 497 | } |
| 498 | |
| 499 | httplib::Headers headers; |
| 500 | if (supports_ranges && existing_size > 0) { |
| 501 | headers.emplace("Range" , "bytes=" + std::to_string(existing_size) + "-" ); |
| 502 | } |
| 503 | |
| 504 | std::atomic<size_t> downloaded{existing_size}; |
| 505 | |
| 506 | auto res = cli.Get(resolve_path, headers, |
| 507 | [&](const httplib::Response &response) { |
| 508 | if (existing_size > 0 && response.status != 206) { |
| 509 | LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n" , __func__, response.status); |
| 510 | return false; |
| 511 | } |
| 512 | if (existing_size == 0 && response.status != 200) { |
| 513 | LOG_WRN("%s: download received non-successful status code: %d\n" , __func__, response.status); |
| 514 | return false; |
| 515 | } |
| 516 | if (total_size == 0 && response.has_header("Content-Length" )) { |
| 517 | try { |
| 518 | size_t content_length = std::stoull(response.get_header_value("Content-Length" )); |
| 519 | total_size = existing_size + content_length; |
| 520 | } catch (const std::exception &e) { |
| 521 | LOG_WRN("%s: invalid Content-Length header: %s\n" , __func__, e.what()); |
| 522 | } |
| 523 | } |
| 524 | return true; |
| 525 | }, |
| 526 | [&](const char *data, size_t len) { |
| 527 | ofs.write(data, len); |
| 528 | if (!ofs) { |
| 529 | LOG_ERR("%s: error writing to file: %s\n" , __func__, path_tmp.c_str()); |
| 530 | return false; |
| 531 | } |
| 532 | downloaded += len; |
| 533 | print_progress(downloaded, total_size); |
| 534 | return true; |
| 535 | }, |
| 536 | nullptr |
| 537 | ); |
| 538 | |
| 539 | std::cout << "\n" ; |
| 540 | |
| 541 | if (!res) { |
| 542 | LOG_ERR("%s: error during download. Status: %d\n" , __func__, res ? res->status : -1); |
| 543 | return false; |
| 544 | } |
| 545 | |
| 546 | return true; |
| 547 | } |
| 548 | |
| 549 | // download one single file from remote URL to local path |
| 550 | static bool common_download_file_single_online(const std::string & url, |
| 551 | const std::string & path, |
| 552 | const std::string & bearer_token) { |
| 553 | static const int max_attempts = 3; |
| 554 | static const int retry_delay_seconds = 2; |
| 555 | |
| 556 | auto [cli, parts] = common_http_client(url); |
| 557 | |
| 558 | httplib::Headers default_headers = {{"User-Agent" , "llama-cpp" }}; |
| 559 | if (!bearer_token.empty()) { |
| 560 | default_headers.insert({"Authorization" , "Bearer " + bearer_token}); |
| 561 | } |
| 562 | cli.set_default_headers(default_headers); |
| 563 | |
| 564 | const bool file_exists = std::filesystem::exists(path); |
| 565 | |
| 566 | std::string last_etag; |
| 567 | if (file_exists) { |
| 568 | last_etag = read_etag(path); |
| 569 | } else { |
| 570 | LOG_INF("%s: no previous model file found %s\n" , __func__, path.c_str()); |
| 571 | } |
| 572 | |
| 573 | for (int i = 0; i < max_attempts; ++i) { |
| 574 | auto head = cli.Head(parts.path); |
| 575 | bool head_ok = head && head->status >= 200 && head->status < 300; |
| 576 | if (!head_ok) { |
| 577 | LOG_WRN("%s: HEAD invalid http status code received: %d\n" , __func__, head ? head->status : -1); |
| 578 | if (file_exists) { |
| 579 | LOG_INF("%s: Using cached file (HEAD failed): %s\n" , __func__, path.c_str()); |
| 580 | return true; |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | std::string etag; |
| 585 | if (head_ok && head->has_header("ETag" )) { |
| 586 | etag = head->get_header_value("ETag" ); |
| 587 | } |
| 588 | |
| 589 | size_t total_size = 0; |
| 590 | if (head_ok && head->has_header("Content-Length" )) { |
| 591 | try { |
| 592 | total_size = std::stoull(head->get_header_value("Content-Length" )); |
| 593 | } catch (const std::exception& e) { |
| 594 | LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n" , __func__, e.what()); |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | bool supports_ranges = false; |
| 599 | if (head_ok && head->has_header("Accept-Ranges" )) { |
| 600 | supports_ranges = head->get_header_value("Accept-Ranges" ) != "none" ; |
| 601 | } |
| 602 | |
| 603 | bool should_download_from_scratch = false; |
| 604 | if (!last_etag.empty() && !etag.empty() && last_etag != etag) { |
| 605 | LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n" , __func__, |
| 606 | last_etag.c_str(), etag.c_str()); |
| 607 | should_download_from_scratch = true; |
| 608 | } |
| 609 | |
| 610 | if (file_exists) { |
| 611 | if (!should_download_from_scratch) { |
| 612 | LOG_INF("%s: using cached file: %s\n" , __func__, path.c_str()); |
| 613 | return true; |
| 614 | } |
| 615 | LOG_WRN("%s: deleting previous downloaded file: %s\n" , __func__, path.c_str()); |
| 616 | if (remove(path.c_str()) != 0) { |
| 617 | LOG_ERR("%s: unable to delete file: %s\n" , __func__, path.c_str()); |
| 618 | return false; |
| 619 | } |
| 620 | } |
| 621 | |
| 622 | const std::string path_temporary = path + ".downloadInProgress" ; |
| 623 | size_t existing_size = 0; |
| 624 | |
| 625 | if (std::filesystem::exists(path_temporary)) { |
| 626 | if (supports_ranges && !should_download_from_scratch) { |
| 627 | existing_size = std::filesystem::file_size(path_temporary); |
| 628 | } else if (remove(path_temporary.c_str()) != 0) { |
| 629 | LOG_ERR("%s: unable to delete file: %s\n" , __func__, path_temporary.c_str()); |
| 630 | return false; |
| 631 | } |
| 632 | } |
| 633 | |
| 634 | // start the download |
| 635 | LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n" , |
| 636 | __func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str()); |
| 637 | const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size); |
| 638 | if (!was_pull_successful) { |
| 639 | if (i + 1 < max_attempts) { |
| 640 | const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; |
| 641 | LOG_WRN("%s: retrying after %d milliseconds...\n" , __func__, exponential_backoff_delay); |
| 642 | std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); |
| 643 | } else { |
| 644 | LOG_ERR("%s: download failed after %d attempts\n" , __func__, max_attempts); |
| 645 | } |
| 646 | continue; |
| 647 | } |
| 648 | |
| 649 | if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { |
| 650 | LOG_ERR("%s: unable to rename file: %s to %s\n" , __func__, path_temporary.c_str(), path.c_str()); |
| 651 | return false; |
| 652 | } |
| 653 | if (!etag.empty()) { |
| 654 | write_etag(path, etag); |
| 655 | } |
| 656 | break; |
| 657 | } |
| 658 | |
| 659 | return true; |
| 660 | } |
| 661 | |
| 662 | std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, |
| 663 | const common_remote_params & params) { |
| 664 | auto [cli, parts] = common_http_client(url); |
| 665 | |
| 666 | httplib::Headers headers = {{"User-Agent" , "llama-cpp" }}; |
| 667 | for (const auto & header : params.headers) { |
| 668 | size_t pos = header.find(':'); |
| 669 | if (pos != std::string::npos) { |
| 670 | headers.emplace(header.substr(0, pos), header.substr(pos + 1)); |
| 671 | } else { |
| 672 | headers.emplace(header, "" ); |
| 673 | } |
| 674 | } |
| 675 | |
| 676 | if (params.timeout > 0) { |
| 677 | cli.set_read_timeout(params.timeout, 0); |
| 678 | cli.set_write_timeout(params.timeout, 0); |
| 679 | } |
| 680 | |
| 681 | std::vector<char> buf; |
| 682 | auto res = cli.Get(parts.path, headers, |
| 683 | [&](const char *data, size_t len) { |
| 684 | buf.insert(buf.end(), data, data + len); |
| 685 | return params.max_size == 0 || |
| 686 | buf.size() <= static_cast<size_t>(params.max_size); |
| 687 | }, |
| 688 | nullptr |
| 689 | ); |
| 690 | |
| 691 | if (!res) { |
| 692 | throw std::runtime_error("error: cannot make GET request" ); |
| 693 | } |
| 694 | |
| 695 | return { res->status, std::move(buf) }; |
| 696 | } |
| 697 | |
| 698 | #endif // LLAMA_USE_CURL |
| 699 | |
| 700 | static bool common_download_file_single(const std::string & url, |
| 701 | const std::string & path, |
| 702 | const std::string & bearer_token, |
| 703 | bool offline) { |
| 704 | if (!offline) { |
| 705 | return common_download_file_single_online(url, path, bearer_token); |
| 706 | } |
| 707 | |
| 708 | if (!std::filesystem::exists(p: path)) { |
| 709 | LOG_ERR("%s: required file is not available in cache (offline mode): %s\n" , __func__, path.c_str()); |
| 710 | return false; |
| 711 | } |
| 712 | |
| 713 | LOG_INF("%s: using cached file (offline mode): %s\n" , __func__, path.c_str()); |
| 714 | return true; |
| 715 | } |
| 716 | |
| 717 | // download multiple files from remote URLs to local paths |
| 718 | // the input is a vector of pairs <url, path> |
| 719 | static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) { |
| 720 | // Prepare download in parallel |
| 721 | std::vector<std::future<bool>> futures_download; |
| 722 | for (auto const & item : urls) { |
| 723 | futures_download.push_back(x: std::async(policy: std::launch::async, fn: [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool { |
| 724 | return common_download_file_single(url: it.first, path: it.second, bearer_token, offline); |
| 725 | }, args: item)); |
| 726 | } |
| 727 | |
| 728 | // Wait for all downloads to complete |
| 729 | for (auto & f : futures_download) { |
| 730 | if (!f.get()) { |
| 731 | return false; |
| 732 | } |
| 733 | } |
| 734 | |
| 735 | return true; |
| 736 | } |
| 737 | |
| 738 | bool common_download_model( |
| 739 | const common_params_model & model, |
| 740 | const std::string & bearer_token, |
| 741 | bool offline) { |
| 742 | // Basic validation of the model.url |
| 743 | if (model.url.empty()) { |
| 744 | LOG_ERR("%s: invalid model url\n" , __func__); |
| 745 | return false; |
| 746 | } |
| 747 | |
| 748 | if (!common_download_file_single(url: model.url, path: model.path, bearer_token, offline)) { |
| 749 | return false; |
| 750 | } |
| 751 | |
| 752 | // check for additional GGUFs split to download |
| 753 | int n_split = 0; |
| 754 | { |
| 755 | struct gguf_init_params gguf_params = { |
| 756 | /*.no_alloc = */ true, |
| 757 | /*.ctx = */ NULL, |
| 758 | }; |
| 759 | auto * ctx_gguf = gguf_init_from_file(fname: model.path.c_str(), params: gguf_params); |
| 760 | if (!ctx_gguf) { |
| 761 | LOG_ERR("\n%s: failed to load input GGUF from %s\n" , __func__, model.path.c_str()); |
| 762 | return false; |
| 763 | } |
| 764 | |
| 765 | auto key_n_split = gguf_find_key(ctx: ctx_gguf, key: LLM_KV_SPLIT_COUNT); |
| 766 | if (key_n_split >= 0) { |
| 767 | n_split = gguf_get_val_u16(ctx: ctx_gguf, key_id: key_n_split); |
| 768 | } |
| 769 | |
| 770 | gguf_free(ctx: ctx_gguf); |
| 771 | } |
| 772 | |
| 773 | if (n_split > 1) { |
| 774 | char split_prefix[PATH_MAX] = {0}; |
| 775 | char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0}; |
| 776 | |
| 777 | // Verify the first split file format |
| 778 | // and extract split URL and PATH prefixes |
| 779 | { |
| 780 | if (!llama_split_prefix(split_prefix, maxlen: sizeof(split_prefix), split_path: model.path.c_str(), split_no: 0, split_count: n_split)) { |
| 781 | LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n" , __func__, model.path.c_str(), n_split); |
| 782 | return false; |
| 783 | } |
| 784 | |
| 785 | if (!llama_split_prefix(split_prefix: split_url_prefix, maxlen: sizeof(split_url_prefix), split_path: model.url.c_str(), split_no: 0, split_count: n_split)) { |
| 786 | LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n" , __func__, model.url.c_str(), n_split); |
| 787 | return false; |
| 788 | } |
| 789 | } |
| 790 | |
| 791 | std::vector<std::pair<std::string, std::string>> urls; |
| 792 | for (int idx = 1; idx < n_split; idx++) { |
| 793 | char split_path[PATH_MAX] = {0}; |
| 794 | llama_split_path(split_path, maxlen: sizeof(split_path), path_prefix: split_prefix, split_no: idx, split_count: n_split); |
| 795 | |
| 796 | char split_url[LLAMA_MAX_URL_LENGTH] = {0}; |
| 797 | llama_split_path(split_path: split_url, maxlen: sizeof(split_url), path_prefix: split_url_prefix, split_no: idx, split_count: n_split); |
| 798 | |
| 799 | if (std::string(split_path) == model.path) { |
| 800 | continue; // skip the already downloaded file |
| 801 | } |
| 802 | |
| 803 | urls.push_back(x: {split_url, split_path}); |
| 804 | } |
| 805 | |
| 806 | // Download in parallel |
| 807 | common_download_file_multiple(urls, bearer_token, offline); |
| 808 | } |
| 809 | |
| 810 | return true; |
| 811 | } |
| 812 | |
| 813 | common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) { |
| 814 | auto parts = string_split<std::string>(input: hf_repo_with_tag, separator: ':'); |
| 815 | std::string tag = parts.size() > 1 ? parts.back() : "latest" ; |
| 816 | std::string hf_repo = parts[0]; |
| 817 | if (string_split<std::string>(input: hf_repo, separator: '/').size() != 2) { |
| 818 | throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n" ); |
| 819 | } |
| 820 | |
| 821 | std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag; |
| 822 | |
| 823 | // headers |
| 824 | std::vector<std::string> ; |
| 825 | headers.push_back(x: "Accept: application/json" ); |
| 826 | if (!bearer_token.empty()) { |
| 827 | headers.push_back(x: "Authorization: Bearer " + bearer_token); |
| 828 | } |
| 829 | // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response |
| 830 | // User-Agent header is already set in common_remote_get_content, no need to set it here |
| 831 | |
| 832 | // we use "=" to avoid clashing with other component, while still being allowed on windows |
| 833 | std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json" ; |
| 834 | string_replace_all(s&: cached_response_fname, search: "/" , replace: "_" ); |
| 835 | std::string cached_response_path = fs_get_cache_file(filename: cached_response_fname); |
| 836 | |
| 837 | // make the request |
| 838 | common_remote_params params; |
| 839 | params.headers = headers; |
| 840 | long res_code = 0; |
| 841 | std::string res_str; |
| 842 | bool use_cache = false; |
| 843 | if (!offline) { |
| 844 | try { |
| 845 | auto res = common_remote_get_content(url, params); |
| 846 | res_code = res.first; |
| 847 | res_str = std::string(res.second.data(), res.second.size()); |
| 848 | } catch (const std::exception & e) { |
| 849 | LOG_WRN("error: failed to get manifest at %s: %s\n" , url.c_str(), e.what()); |
| 850 | } |
| 851 | } |
| 852 | if (res_code == 0) { |
| 853 | if (std::filesystem::exists(p: cached_response_path)) { |
| 854 | LOG_WRN("trying to read manifest from cache: %s\n" , cached_response_path.c_str()); |
| 855 | res_str = read_file(fname: cached_response_path); |
| 856 | res_code = 200; |
| 857 | use_cache = true; |
| 858 | } else { |
| 859 | throw std::runtime_error( |
| 860 | offline ? "error: failed to get manifest (offline mode)" |
| 861 | : "error: failed to get manifest (check your internet connection)" ); |
| 862 | } |
| 863 | } |
| 864 | std::string ggufFile; |
| 865 | std::string mmprojFile; |
| 866 | |
| 867 | if (res_code == 200 || res_code == 304) { |
| 868 | try { |
| 869 | auto j = json::parse(i&: res_str); |
| 870 | |
| 871 | if (j.contains(key: "ggufFile" ) && j["ggufFile" ].contains(key: "rfilename" )) { |
| 872 | ggufFile = j["ggufFile" ]["rfilename" ].get<std::string>(); |
| 873 | } |
| 874 | if (j.contains(key: "mmprojFile" ) && j["mmprojFile" ].contains(key: "rfilename" )) { |
| 875 | mmprojFile = j["mmprojFile" ]["rfilename" ].get<std::string>(); |
| 876 | } |
| 877 | } catch (const std::exception & e) { |
| 878 | throw std::runtime_error(std::string("error parsing manifest JSON: " ) + e.what()); |
| 879 | } |
| 880 | if (!use_cache) { |
| 881 | // if not using cached response, update the cache file |
| 882 | write_file(fname: cached_response_path, content: res_str); |
| 883 | } |
| 884 | } else if (res_code == 401) { |
| 885 | throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token" ); |
| 886 | } else { |
| 887 | throw std::runtime_error(string_format(fmt: "error from HF API, response code: %ld, data: %s" , res_code, res_str.c_str())); |
| 888 | } |
| 889 | |
| 890 | // check response |
| 891 | if (ggufFile.empty()) { |
| 892 | throw std::runtime_error("error: model does not have ggufFile" ); |
| 893 | } |
| 894 | |
| 895 | return { .repo: hf_repo, .ggufFile: ggufFile, .mmprojFile: mmprojFile }; |
| 896 | } |
| 897 | |
| 898 | // |
| 899 | // Docker registry functions |
| 900 | // |
| 901 | |
| 902 | static std::string common_docker_get_token(const std::string & repo) { |
| 903 | std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull" ; |
| 904 | |
| 905 | common_remote_params params; |
| 906 | auto res = common_remote_get_content(url, params); |
| 907 | |
| 908 | if (res.first != 200) { |
| 909 | throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(val: res.first)); |
| 910 | } |
| 911 | |
| 912 | std::string response_str(res.second.begin(), res.second.end()); |
| 913 | nlohmann::ordered_json response = nlohmann::ordered_json::parse(i&: response_str); |
| 914 | |
| 915 | if (!response.contains(key: "token" )) { |
| 916 | throw std::runtime_error("Docker registry token response missing 'token' field" ); |
| 917 | } |
| 918 | |
| 919 | return response["token" ].get<std::string>(); |
| 920 | } |
| 921 | |
| 922 | std::string common_docker_resolve_model(const std::string & docker) { |
| 923 | // Parse ai/smollm2:135M-Q4_0 |
| 924 | size_t colon_pos = docker.find(c: ':'); |
| 925 | std::string repo, tag; |
| 926 | if (colon_pos != std::string::npos) { |
| 927 | repo = docker.substr(pos: 0, n: colon_pos); |
| 928 | tag = docker.substr(pos: colon_pos + 1); |
| 929 | } else { |
| 930 | repo = docker; |
| 931 | tag = "latest" ; |
| 932 | } |
| 933 | |
| 934 | // ai/ is the default |
| 935 | size_t slash_pos = docker.find(c: '/'); |
| 936 | if (slash_pos == std::string::npos) { |
| 937 | repo.insert(pos: 0, s: "ai/" ); |
| 938 | } |
| 939 | |
| 940 | LOG_INF("%s: Downloading Docker Model: %s:%s\n" , __func__, repo.c_str(), tag.c_str()); |
| 941 | try { |
| 942 | // --- helper: digest validation --- |
| 943 | auto validate_oci_digest = [](const std::string & digest) -> std::string { |
| 944 | // Expected: algo:hex ; start with sha256 (64 hex chars) |
| 945 | // You can extend this map if supporting other algorithms in future. |
| 946 | static const std::regex re("^sha256:([a-fA-F0-9]{64})$" ); |
| 947 | std::smatch m; |
| 948 | if (!std::regex_match(s: digest, m&: m, re: re)) { |
| 949 | throw std::runtime_error("Invalid OCI digest format received in manifest: " + digest); |
| 950 | } |
| 951 | // normalize hex to lowercase |
| 952 | std::string normalized = digest; |
| 953 | std::transform(first: normalized.begin()+7, last: normalized.end(), result: normalized.begin()+7, unary_op: [](unsigned char c){ |
| 954 | return std::tolower(c: c); |
| 955 | }); |
| 956 | return normalized; |
| 957 | }; |
| 958 | |
| 959 | std::string token = common_docker_get_token(repo); // Get authentication token |
| 960 | |
| 961 | // Get manifest |
| 962 | const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo; |
| 963 | std::string manifest_url = url_prefix + "/manifests/" + tag; |
| 964 | common_remote_params manifest_params; |
| 965 | manifest_params.headers.push_back(x: "Authorization: Bearer " + token); |
| 966 | manifest_params.headers.push_back( |
| 967 | x: "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json" ); |
| 968 | auto manifest_res = common_remote_get_content(url: manifest_url, params: manifest_params); |
| 969 | if (manifest_res.first != 200) { |
| 970 | throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(val: manifest_res.first)); |
| 971 | } |
| 972 | |
| 973 | std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end()); |
| 974 | nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(i&: manifest_str); |
| 975 | std::string gguf_digest; // Find the GGUF layer |
| 976 | if (manifest.contains(key: "layers" )) { |
| 977 | for (const auto & layer : manifest["layers" ]) { |
| 978 | if (layer.contains(key: "mediaType" )) { |
| 979 | std::string media_type = layer["mediaType" ].get<std::string>(); |
| 980 | if (media_type == "application/vnd.docker.ai.gguf.v3" || |
| 981 | media_type.find(s: "gguf" ) != std::string::npos) { |
| 982 | gguf_digest = layer["digest" ].get<std::string>(); |
| 983 | break; |
| 984 | } |
| 985 | } |
| 986 | } |
| 987 | } |
| 988 | |
| 989 | if (gguf_digest.empty()) { |
| 990 | throw std::runtime_error("No GGUF layer found in Docker manifest" ); |
| 991 | } |
| 992 | |
| 993 | // Validate & normalize digest |
| 994 | gguf_digest = validate_oci_digest(gguf_digest); |
| 995 | LOG_DBG("%s: Using validated digest: %s\n" , __func__, gguf_digest.c_str()); |
| 996 | |
| 997 | // Prepare local filename |
| 998 | std::string model_filename = repo; |
| 999 | std::replace(first: model_filename.begin(), last: model_filename.end(), old_value: '/', new_value: '_'); |
| 1000 | model_filename += "_" + tag + ".gguf" ; |
| 1001 | std::string local_path = fs_get_cache_file(filename: model_filename); |
| 1002 | |
| 1003 | const std::string blob_url = url_prefix + "/blobs/" + gguf_digest; |
| 1004 | if (!common_download_file_single(url: blob_url, path: local_path, bearer_token: token, offline: false)) { |
| 1005 | throw std::runtime_error("Failed to download Docker Model" ); |
| 1006 | } |
| 1007 | |
| 1008 | LOG_INF("%s: Downloaded Docker Model to: %s\n" , __func__, local_path.c_str()); |
| 1009 | return local_path; |
| 1010 | } catch (const std::exception & e) { |
| 1011 | LOG_ERR("%s: Docker Model download failed: %s\n" , __func__, e.what()); |
| 1012 | throw; |
| 1013 | } |
| 1014 | } |
| 1015 | |