| 1 | #pragma once |
| 2 | |
| 3 | #include <string> |
| 4 | |
| 5 | struct common_params_model; |
| 6 | |
| 7 | // |
| 8 | // download functionalities |
| 9 | // |
| 10 | |
| 11 | struct common_hf_file_res { |
| 12 | std::string repo; // repo name with ":tag" removed |
| 13 | std::string ggufFile; |
| 14 | std::string mmprojFile; |
| 15 | }; |
| 16 | |
| 17 | // resolve and download model from Docker registry |
| 18 | // return local path to downloaded model file |
| 19 | std::string common_docker_resolve_model(const std::string & docker); |
| 20 | |
| 21 | /** |
| 22 | * Allow getting the HF file from the HF repo with tag (like ollama), for example: |
| 23 | * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 |
| 24 | * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M |
| 25 | * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s |
| 26 | * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) |
| 27 | * |
| 28 | * Return pair of <repo, file> (with "repo" already having tag removed) |
| 29 | * |
| 30 | * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. |
| 31 | */ |
| 32 | common_hf_file_res common_get_hf_file( |
| 33 | const std::string & hf_repo_with_tag, |
| 34 | const std::string & bearer_token, |
| 35 | bool offline); |
| 36 | |
| 37 | // returns true if download succeeded |
| 38 | bool common_download_model( |
| 39 | const common_params_model & model, |
| 40 | const std::string & bearer_token, |
| 41 | bool offline); |
| 42 | |