| 1 | #include "ggml.h" |
| 2 | #include "gguf.h" |
| 3 | #include "llama.h" |
| 4 | #include "common.h" |
| 5 | |
| 6 | #include <algorithm> |
| 7 | #include <cinttypes> |
| 8 | #include <climits> |
| 9 | #include <cstdio> |
| 10 | #include <cstdlib> |
| 11 | #include <stdexcept> |
| 12 | #include <cstring> |
| 13 | #include <fstream> |
| 14 | #include <string> |
| 15 | #include <vector> |
| 16 | |
| 17 | #if defined(_WIN32) |
| 18 | #include <windows.h> |
| 19 | #ifndef PATH_MAX |
| 20 | #define PATH_MAX MAX_PATH |
| 21 | #endif |
| 22 | #include <io.h> |
| 23 | #endif |
| 24 | |
| 25 | enum split_operation : uint8_t { |
| 26 | OP_NONE, |
| 27 | OP_SPLIT, |
| 28 | OP_MERGE, |
| 29 | }; |
| 30 | |
| 31 | enum split_mode : uint8_t { |
| 32 | MODE_NONE, |
| 33 | MODE_TENSOR, |
| 34 | MODE_SIZE, |
| 35 | }; |
| 36 | |
| 37 | struct split_params { |
| 38 | split_operation operation = OP_NONE; |
| 39 | split_mode mode = MODE_NONE; |
| 40 | size_t n_bytes_split = 0; |
| 41 | int n_split_tensors = 128; |
| 42 | std::string input; |
| 43 | std::string output; |
| 44 | bool no_tensor_first_split = false; |
| 45 | bool dry_run = false; |
| 46 | }; |
| 47 | |
| 48 | static void split_print_usage(const char * executable) { |
| 49 | const split_params default_params; |
| 50 | printf(format: "\n" ); |
| 51 | printf(format: "usage: %s [options] GGUF_IN GGUF_OUT\n" , executable); |
| 52 | printf(format: "\n" ); |
| 53 | printf(format: "Apply a GGUF operation on IN to OUT." ); |
| 54 | printf(format: "\n" ); |
| 55 | printf(format: "options:\n" ); |
| 56 | printf(format: " -h, --help show this help message and exit\n" ); |
| 57 | printf(format: " --version show version and build info\n" ); |
| 58 | printf(format: " --split split GGUF to multiple GGUF (enabled by default)\n" ); |
| 59 | printf(format: " --merge merge multiple GGUF to a single GGUF\n" ); |
| 60 | printf(format: " --split-max-tensors max tensors in each split (default: %d)\n" , default_params.n_split_tensors); |
| 61 | printf(format: " --split-max-size N(M|G) max size per split\n" ); |
| 62 | printf(format: " --no-tensor-first-split do not add tensors to the first split (disabled by default)\n" ); |
| 63 | printf(format: " --dry-run only print out a split plan and exit, without writing any new files\n" ); |
| 64 | printf(format: "\n" ); |
| 65 | } |
| 66 | |
| 67 | // return convert string, for example "128M" or "4G" to number of bytes |
| 68 | static size_t split_str_to_n_bytes(std::string str) { |
| 69 | size_t n_bytes = 0; |
| 70 | int n; |
| 71 | if (str.back() == 'M') { |
| 72 | sscanf(s: str.c_str(), format: "%d" , &n); |
| 73 | n_bytes = (size_t)n * 1000 * 1000; // megabytes |
| 74 | } else if (str.back() == 'G') { |
| 75 | sscanf(s: str.c_str(), format: "%d" , &n); |
| 76 | n_bytes = (size_t)n * 1000 * 1000 * 1000; // gigabytes |
| 77 | } else { |
| 78 | throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back())); |
| 79 | } |
| 80 | if (n <= 0) { |
| 81 | throw std::invalid_argument("error: size must be a positive value" ); |
| 82 | } |
| 83 | return n_bytes; |
| 84 | } |
| 85 | |
| 86 | static void split_params_parse_ex(int argc, const char ** argv, split_params & params) { |
| 87 | std::string arg; |
| 88 | const std::string arg_prefix = "--" ; |
| 89 | bool invalid_param = false; |
| 90 | |
| 91 | int arg_idx = 1; |
| 92 | for (; arg_idx < argc && strncmp(s1: argv[arg_idx], s2: "--" , n: 2) == 0; arg_idx++) { |
| 93 | arg = argv[arg_idx]; |
| 94 | if (arg.compare(pos: 0, n: arg_prefix.size(), str: arg_prefix) == 0) { |
| 95 | std::replace(first: arg.begin(), last: arg.end(), old_value: '_', new_value: '-'); |
| 96 | } |
| 97 | |
| 98 | bool arg_found = false; |
| 99 | if (arg == "-h" || arg == "--help" ) { |
| 100 | split_print_usage(executable: argv[0]); |
| 101 | exit(status: 0); |
| 102 | } else if (arg == "--version" ) { |
| 103 | fprintf(stderr, format: "version: %d (%s)\n" , LLAMA_BUILD_NUMBER, LLAMA_COMMIT); |
| 104 | fprintf(stderr, format: "built with %s for %s\n" , LLAMA_COMPILER, LLAMA_BUILD_TARGET); |
| 105 | exit(status: 0); |
| 106 | } else if (arg == "--dry-run" ) { |
| 107 | arg_found = true; |
| 108 | params.dry_run = true; |
| 109 | } else if (arg == "--no-tensor-first-split" ) { |
| 110 | arg_found = true; |
| 111 | params.no_tensor_first_split = true; |
| 112 | } else if (arg == "--merge" ) { |
| 113 | arg_found = true; |
| 114 | if (params.operation != OP_NONE && params.operation != OP_MERGE) { |
| 115 | throw std::invalid_argument("error: either --split or --merge can be specified, but not both" ); |
| 116 | } |
| 117 | params.operation = OP_MERGE; |
| 118 | } else if (arg == "--split" ) { |
| 119 | arg_found = true; |
| 120 | if (params.operation != OP_NONE && params.operation != OP_SPLIT) { |
| 121 | throw std::invalid_argument("error: either --split or --merge can be specified, but not both" ); |
| 122 | } |
| 123 | params.operation = OP_SPLIT; |
| 124 | } else if (arg == "--split-max-tensors" ) { |
| 125 | if (++arg_idx >= argc) { |
| 126 | invalid_param = true; |
| 127 | break; |
| 128 | } |
| 129 | arg_found = true; |
| 130 | if (params.mode != MODE_NONE && params.mode != MODE_TENSOR) { |
| 131 | throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both" ); |
| 132 | } |
| 133 | params.mode = MODE_TENSOR; |
| 134 | params.n_split_tensors = atoi(nptr: argv[arg_idx]); |
| 135 | } else if (arg == "--split-max-size" ) { |
| 136 | if (++arg_idx >= argc) { |
| 137 | invalid_param = true; |
| 138 | break; |
| 139 | } |
| 140 | arg_found = true; |
| 141 | if (params.mode != MODE_NONE && params.mode != MODE_SIZE) { |
| 142 | throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both" ); |
| 143 | } |
| 144 | params.mode = MODE_SIZE; |
| 145 | params.n_bytes_split = split_str_to_n_bytes(str: argv[arg_idx]); |
| 146 | } |
| 147 | |
| 148 | if (!arg_found) { |
| 149 | throw std::invalid_argument("error: unknown argument: " + arg); |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | // the operation is split if not specified |
| 154 | if (params.operation == OP_NONE) { |
| 155 | params.operation = OP_SPLIT; |
| 156 | } |
| 157 | // the split mode is by tensor if not specified |
| 158 | if (params.mode == MODE_NONE) { |
| 159 | params.mode = MODE_TENSOR; |
| 160 | } |
| 161 | |
| 162 | if (invalid_param) { |
| 163 | throw std::invalid_argument("error: invalid parameter for argument: " + arg); |
| 164 | } |
| 165 | |
| 166 | if (argc - arg_idx != 2) { |
| 167 | throw std::invalid_argument("error: bad arguments" ); |
| 168 | } |
| 169 | |
| 170 | params.input = argv[arg_idx++]; |
| 171 | params.output = argv[arg_idx++]; |
| 172 | } |
| 173 | |
| 174 | static bool split_params_parse(int argc, const char ** argv, split_params & params) { |
| 175 | bool result = true; |
| 176 | try { |
| 177 | split_params_parse_ex(argc, argv, params); |
| 178 | } |
| 179 | catch (const std::invalid_argument & ex) { |
| 180 | fprintf(stderr, format: "%s\n" , ex.what()); |
| 181 | split_print_usage(executable: argv[0]); |
| 182 | exit(EXIT_FAILURE); |
| 183 | } |
| 184 | return result; |
| 185 | } |
| 186 | |
| 187 | static void zeros(std::ofstream & file, size_t n) { |
| 188 | char zero = 0; |
| 189 | for (size_t i = 0; i < n; ++i) { |
| 190 | file.write(s: &zero, n: 1); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | struct split_strategy { |
| 195 | const split_params params; |
| 196 | std::ifstream & f_input; |
| 197 | struct gguf_context * ctx_gguf; |
| 198 | struct ggml_context * ctx_meta = NULL; |
| 199 | const int n_tensors; |
| 200 | |
| 201 | // one ctx_out per one output file |
| 202 | std::vector<struct gguf_context *> ctx_outs; |
| 203 | |
| 204 | // temporary buffer for reading in tensor data |
| 205 | std::vector<uint8_t> read_buf; |
| 206 | |
| 207 | split_strategy(const split_params & params, |
| 208 | std::ifstream & f_input, |
| 209 | struct gguf_context * ctx_gguf, |
| 210 | struct ggml_context * ctx_meta) : |
| 211 | params(params), |
| 212 | f_input(f_input), |
| 213 | ctx_gguf(ctx_gguf), |
| 214 | ctx_meta(ctx_meta), |
| 215 | n_tensors(gguf_get_n_tensors(ctx: ctx_gguf)) { |
| 216 | |
| 217 | // because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits |
| 218 | int i_split = -1; |
| 219 | struct gguf_context * ctx_out = NULL; |
| 220 | auto new_ctx_out = [&](bool allow_no_tensors) { |
| 221 | i_split++; |
| 222 | if (ctx_out != NULL) { |
| 223 | if (gguf_get_n_tensors(ctx: ctx_out) == 0 && !allow_no_tensors) { |
| 224 | fprintf(stderr, format: "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n" ); |
| 225 | exit(EXIT_FAILURE); |
| 226 | } |
| 227 | ctx_outs.push_back(x: ctx_out); |
| 228 | } |
| 229 | ctx_out = gguf_init_empty(); |
| 230 | // Save all metadata in first split only |
| 231 | if (i_split == 0) { |
| 232 | gguf_set_kv(ctx: ctx_out, src: ctx_gguf); |
| 233 | } |
| 234 | gguf_set_val_u16(ctx: ctx_out, key: LLM_KV_SPLIT_NO, val: i_split); |
| 235 | gguf_set_val_u16(ctx: ctx_out, key: LLM_KV_SPLIT_COUNT, val: 0); // placeholder |
| 236 | gguf_set_val_i32(ctx: ctx_out, key: LLM_KV_SPLIT_TENSORS_COUNT, val: n_tensors); |
| 237 | }; |
| 238 | |
| 239 | // initialize ctx_out for the first split |
| 240 | new_ctx_out(false); |
| 241 | |
| 242 | // skip first split if no_tensor_first_split is set |
| 243 | if (params.no_tensor_first_split) { |
| 244 | new_ctx_out(true); |
| 245 | } |
| 246 | |
| 247 | // process tensors one by one |
| 248 | size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata) |
| 249 | for (int i = 0; i < n_tensors; ++i) { |
| 250 | struct ggml_tensor * t = ggml_get_tensor(ctx: ctx_meta, name: gguf_get_tensor_name(ctx: ctx_gguf, tensor_id: i)); |
| 251 | // calculate the "imaginary" size = the current size + next tensor size |
| 252 | size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT); |
| 253 | size_t next_tensors_size = curr_tensors_size + n_bytes; |
| 254 | if (should_split(i_tensor: i, next_size: next_tensors_size)) { |
| 255 | new_ctx_out(false); |
| 256 | curr_tensors_size = n_bytes; |
| 257 | } else { |
| 258 | curr_tensors_size = next_tensors_size; |
| 259 | } |
| 260 | gguf_add_tensor(ctx: ctx_out, tensor: t); |
| 261 | } |
| 262 | |
| 263 | // push the last ctx_out |
| 264 | ctx_outs.push_back(x: ctx_out); |
| 265 | |
| 266 | // set the correct n_split for all ctx_out |
| 267 | for (auto & ctx : ctx_outs) { |
| 268 | gguf_set_val_u16(ctx, key: LLM_KV_SPLIT_COUNT, val: ctx_outs.size()); |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | ~split_strategy() { |
| 273 | for (auto & ctx_out : ctx_outs) { |
| 274 | gguf_free(ctx: ctx_out); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | bool should_split(int i_tensor, size_t next_size) { |
| 279 | if (params.mode == MODE_SIZE) { |
| 280 | // split by max size per file |
| 281 | return next_size > params.n_bytes_split; |
| 282 | } else if (params.mode == MODE_TENSOR) { |
| 283 | // split by number of tensors per file |
| 284 | return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0; |
| 285 | } |
| 286 | // should never happen |
| 287 | GGML_ABORT("invalid mode" ); |
| 288 | } |
| 289 | |
| 290 | void print_info() { |
| 291 | printf(format: "n_split: %zu\n" , ctx_outs.size()); |
| 292 | int i_split = 0; |
| 293 | for (auto & ctx_out : ctx_outs) { |
| 294 | // re-calculate the real gguf size for each split (= metadata size + total size of all tensors) |
| 295 | size_t total_size = gguf_get_meta_size(ctx: ctx_out); |
| 296 | for (int i = 0; i < gguf_get_n_tensors(ctx: ctx_out); ++i) { |
| 297 | struct ggml_tensor * t = ggml_get_tensor(ctx: ctx_meta, name: gguf_get_tensor_name(ctx: ctx_out, tensor_id: i)); |
| 298 | total_size += ggml_nbytes(tensor: t); |
| 299 | } |
| 300 | total_size = total_size / 1000 / 1000; // convert to megabytes |
| 301 | printf(format: "split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n" , i_split + 1, gguf_get_n_tensors(ctx: ctx_out), total_size); |
| 302 | i_split++; |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | void write() { |
| 307 | int i_split = 0; |
| 308 | int n_split = ctx_outs.size(); |
| 309 | for (auto & ctx_out : ctx_outs) { |
| 310 | // construct file path |
| 311 | char split_path[PATH_MAX] = {0}; |
| 312 | llama_split_path(split_path, maxlen: sizeof(split_path), path_prefix: params.output.c_str(), split_no: i_split, split_count: n_split); |
| 313 | |
| 314 | // open the output file |
| 315 | printf(format: "Writing file %s ... " , split_path); |
| 316 | fflush(stdout); |
| 317 | std::ofstream fout = std::ofstream(split_path, std::ios::binary); |
| 318 | fout.exceptions(except: std::ofstream::failbit); // fail fast on write errors |
| 319 | |
| 320 | // write metadata |
| 321 | std::vector<uint8_t> data(gguf_get_meta_size(ctx: ctx_out)); |
| 322 | gguf_get_meta_data(ctx: ctx_out, data: data.data()); |
| 323 | fout.write(s: (const char *)data.data(), n: data.size()); |
| 324 | |
| 325 | // write tensors |
| 326 | for (int i = 0; i < gguf_get_n_tensors(ctx: ctx_out); ++i) { |
| 327 | // read tensor meta and prepare buffer |
| 328 | const char * t_name = gguf_get_tensor_name(ctx: ctx_out, tensor_id: i); |
| 329 | struct ggml_tensor * t = ggml_get_tensor(ctx: ctx_meta, name: t_name); |
| 330 | auto n_bytes = ggml_nbytes(tensor: t); |
| 331 | read_buf.resize(new_size: n_bytes); |
| 332 | |
| 333 | // calculate offset |
| 334 | auto i_tensor_in = gguf_find_tensor(ctx: ctx_gguf, name: t_name); // idx of tensor in the input file |
| 335 | auto offset = gguf_get_data_offset(ctx: ctx_gguf) + gguf_get_tensor_offset(ctx: ctx_gguf, tensor_id: i_tensor_in); |
| 336 | |
| 337 | // copy tensor from input to output file |
| 338 | copy_file_to_file(f_in&: f_input, f_out&: fout, in_offset: offset, len: n_bytes); |
| 339 | zeros(file&: fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes); |
| 340 | } |
| 341 | |
| 342 | printf(format: "done\n" ); |
| 343 | // close the file |
| 344 | fout.close(); |
| 345 | i_split++; |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) { |
| 350 | // TODO: detect OS and use copy_file_range() here for better performance |
| 351 | if (read_buf.size() < len) { |
| 352 | read_buf.resize(new_size: len); |
| 353 | } |
| 354 | f_in.seekg(in_offset); |
| 355 | f_in.read(s: (char *)read_buf.data(), n: len); |
| 356 | f_out.write(s: (const char *)read_buf.data(), n: len); |
| 357 | } |
| 358 | }; |
| 359 | |
| 360 | static void gguf_split(const split_params & split_params) { |
| 361 | struct ggml_context * ctx_meta = NULL; |
| 362 | |
| 363 | struct gguf_init_params params = { |
| 364 | /*.no_alloc = */ true, |
| 365 | /*.ctx = */ &ctx_meta, |
| 366 | }; |
| 367 | |
| 368 | std::ifstream f_input(split_params.input.c_str(), std::ios::binary); |
| 369 | if (!f_input.is_open()) { |
| 370 | fprintf(stderr, format: "%s: failed to open input GGUF from %s\n" , __func__, split_params.input.c_str()); |
| 371 | exit(EXIT_FAILURE); |
| 372 | } |
| 373 | |
| 374 | auto * ctx_gguf = gguf_init_from_file(fname: split_params.input.c_str(), params); |
| 375 | if (!ctx_gguf) { |
| 376 | fprintf(stderr, format: "%s: failed to load input GGUF from %s\n" , __func__, split_params.input.c_str()); |
| 377 | exit(EXIT_FAILURE); |
| 378 | } |
| 379 | |
| 380 | // prepare the strategy |
| 381 | split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta); |
| 382 | int n_split = strategy.ctx_outs.size(); |
| 383 | strategy.print_info(); |
| 384 | |
| 385 | if (!split_params.dry_run) { |
| 386 | // write all output splits |
| 387 | strategy.write(); |
| 388 | } |
| 389 | |
| 390 | // done, clean up |
| 391 | gguf_free(ctx: ctx_gguf); |
| 392 | f_input.close(); |
| 393 | |
| 394 | fprintf(stderr, format: "%s: %d gguf split written with a total of %d tensors.\n" , |
| 395 | __func__, n_split, strategy.n_tensors); |
| 396 | } |
| 397 | |
| 398 | static void gguf_merge(const split_params & split_params) { |
| 399 | fprintf(stderr, format: "%s: %s -> %s\n" , |
| 400 | __func__, split_params.input.c_str(), |
| 401 | split_params.output.c_str()); |
| 402 | int n_split = 1; |
| 403 | int total_tensors = 0; |
| 404 | |
| 405 | // avoid overwriting existing output file |
| 406 | if (std::ifstream(split_params.output.c_str())) { |
| 407 | fprintf(stderr, format: "%s: output file %s already exists\n" , __func__, split_params.output.c_str()); |
| 408 | exit(EXIT_FAILURE); |
| 409 | } |
| 410 | |
| 411 | |
| 412 | auto * ctx_out = gguf_init_empty(); |
| 413 | |
| 414 | std::vector<uint8_t> read_data; |
| 415 | std::vector<ggml_context *> ctx_metas; |
| 416 | std::vector<gguf_context *> ctx_ggufs; |
| 417 | |
| 418 | char split_path[PATH_MAX] = {0}; |
| 419 | strncpy(dest: split_path, src: split_params.input.c_str(), n: sizeof(split_path) - 1); |
| 420 | char split_prefix[PATH_MAX] = {0}; |
| 421 | |
| 422 | // First pass to find KV and tensors metadata |
| 423 | for (int i_split = 0; i_split < n_split; i_split++) { |
| 424 | struct ggml_context * ctx_meta = NULL; |
| 425 | |
| 426 | struct gguf_init_params params = { |
| 427 | /*.no_alloc = */ true, |
| 428 | /*.ctx = */ &ctx_meta, |
| 429 | }; |
| 430 | |
| 431 | if (i_split > 0) { |
| 432 | llama_split_path(split_path, maxlen: sizeof(split_path), path_prefix: split_prefix, split_no: i_split, split_count: n_split); |
| 433 | } |
| 434 | fprintf(stderr, format: "%s: reading metadata %s ..." , __func__, split_path); |
| 435 | |
| 436 | auto * ctx_gguf = gguf_init_from_file(fname: split_path, params); |
| 437 | if (!ctx_gguf) { |
| 438 | fprintf(stderr, format: "\n%s: failed to load input GGUF from %s\n" , __func__, split_params.input.c_str()); |
| 439 | exit(EXIT_FAILURE); |
| 440 | } |
| 441 | ctx_ggufs.push_back(x: ctx_gguf); |
| 442 | ctx_metas.push_back(x: ctx_meta); |
| 443 | |
| 444 | if (i_split == 0) { |
| 445 | auto key_n_split = gguf_find_key(ctx: ctx_gguf, key: LLM_KV_SPLIT_COUNT); |
| 446 | if (key_n_split < 0) { |
| 447 | fprintf(stderr, |
| 448 | format: "\n%s: input file does not contain %s metadata\n" , |
| 449 | __func__, |
| 450 | LLM_KV_SPLIT_COUNT); |
| 451 | gguf_free(ctx: ctx_gguf); |
| 452 | ggml_free(ctx: ctx_meta); |
| 453 | gguf_free(ctx: ctx_out); |
| 454 | exit(EXIT_FAILURE); |
| 455 | } |
| 456 | |
| 457 | n_split = gguf_get_val_u16(ctx: ctx_gguf, key_id: key_n_split); |
| 458 | if (n_split < 1) { |
| 459 | fprintf(stderr, |
| 460 | format: "\n%s: input file does not contain a valid split count %d\n" , |
| 461 | __func__, |
| 462 | n_split); |
| 463 | gguf_free(ctx: ctx_gguf); |
| 464 | ggml_free(ctx: ctx_meta); |
| 465 | gguf_free(ctx: ctx_out); |
| 466 | exit(EXIT_FAILURE); |
| 467 | } |
| 468 | |
| 469 | // Verify the file naming and extract split_prefix |
| 470 | if (!llama_split_prefix(split_prefix, maxlen: sizeof (split_prefix), split_path, split_no: i_split, split_count: n_split)) { |
| 471 | fprintf(stderr, format: "\n%s: unexpected input file name: %s" |
| 472 | " i_split=%d" |
| 473 | " n_split=%d\n" , __func__, |
| 474 | split_path, i_split, n_split); |
| 475 | gguf_free(ctx: ctx_gguf); |
| 476 | ggml_free(ctx: ctx_meta); |
| 477 | gguf_free(ctx: ctx_out); |
| 478 | exit(EXIT_FAILURE); |
| 479 | } |
| 480 | |
| 481 | // Do not trigger merge if we try to merge again the output |
| 482 | gguf_set_val_u16(ctx: ctx_gguf, key: LLM_KV_SPLIT_COUNT, val: 0); |
| 483 | |
| 484 | // Set metadata from the first split |
| 485 | gguf_set_kv(ctx: ctx_out, src: ctx_gguf); |
| 486 | } |
| 487 | |
| 488 | auto n_tensors = gguf_get_n_tensors(ctx: ctx_gguf); |
| 489 | for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) { |
| 490 | const char * t_name = gguf_get_tensor_name(ctx: ctx_gguf, tensor_id: i_tensor); |
| 491 | struct ggml_tensor * t = ggml_get_tensor(ctx: ctx_meta, name: t_name); |
| 492 | gguf_add_tensor(ctx: ctx_out, tensor: t); |
| 493 | } |
| 494 | total_tensors += n_tensors; |
| 495 | |
| 496 | fprintf(stderr, format: "\033[3Ddone\n" ); |
| 497 | } |
| 498 | std::ofstream fout; |
| 499 | if (!split_params.dry_run) { |
| 500 | fout.open(s: split_params.output.c_str(), mode: std::ios::binary); |
| 501 | fout.exceptions(except: std::ofstream::failbit); // fail fast on write errors |
| 502 | // placeholder for the meta data |
| 503 | auto meta_size = gguf_get_meta_size(ctx: ctx_out); |
| 504 | ::zeros(file&: fout, n: meta_size); |
| 505 | } |
| 506 | |
| 507 | // Write tensors data |
| 508 | for (int i_split = 0; i_split < n_split; i_split++) { |
| 509 | llama_split_path(split_path, maxlen: sizeof(split_path), path_prefix: split_prefix, split_no: i_split, split_count: n_split); |
| 510 | std::ifstream f_input(split_path, std::ios::binary); |
| 511 | if (!f_input.is_open()) { |
| 512 | fprintf(stderr, format: "%s: failed to open input GGUF from %s\n" , __func__, split_path); |
| 513 | for (uint32_t i = 0; i < ctx_ggufs.size(); i++) { |
| 514 | gguf_free(ctx: ctx_ggufs[i]); |
| 515 | ggml_free(ctx: ctx_metas[i]); |
| 516 | } |
| 517 | gguf_free(ctx: ctx_out); |
| 518 | if (!split_params.dry_run) { |
| 519 | fout.close(); |
| 520 | } |
| 521 | exit(EXIT_FAILURE); |
| 522 | } |
| 523 | fprintf(stderr, format: "%s: writing tensors %s ..." , __func__, split_path); |
| 524 | |
| 525 | auto * ctx_gguf = ctx_ggufs[i_split]; |
| 526 | auto * ctx_meta = ctx_metas[i_split]; |
| 527 | |
| 528 | auto n_tensors = gguf_get_n_tensors(ctx: ctx_gguf); |
| 529 | for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) { |
| 530 | const char * t_name = gguf_get_tensor_name(ctx: ctx_gguf, tensor_id: i_tensor); |
| 531 | struct ggml_tensor * t = ggml_get_tensor(ctx: ctx_meta, name: t_name); |
| 532 | |
| 533 | auto n_bytes = ggml_nbytes(tensor: t); |
| 534 | |
| 535 | if (read_data.size() < n_bytes) { |
| 536 | read_data.resize(new_size: n_bytes); |
| 537 | } |
| 538 | |
| 539 | auto offset = gguf_get_data_offset(ctx: ctx_gguf) + gguf_get_tensor_offset(ctx: ctx_gguf, tensor_id: i_tensor); |
| 540 | f_input.seekg(offset); |
| 541 | f_input.read(s: (char *)read_data.data(), n: n_bytes); |
| 542 | if (!split_params.dry_run) { |
| 543 | // write tensor data + padding |
| 544 | fout.write(s: (const char *)read_data.data(), n: n_bytes); |
| 545 | zeros(file&: fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes); |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | gguf_free(ctx: ctx_gguf); |
| 550 | ggml_free(ctx: ctx_meta); |
| 551 | f_input.close(); |
| 552 | fprintf(stderr, format: "\033[3Ddone\n" ); |
| 553 | } |
| 554 | |
| 555 | if (!split_params.dry_run) { |
| 556 | // go back to beginning of file and write the updated metadata |
| 557 | fout.seekp(0); |
| 558 | std::vector<uint8_t> data(gguf_get_meta_size(ctx: ctx_out)); |
| 559 | gguf_get_meta_data(ctx: ctx_out, data: data.data()); |
| 560 | fout.write(s: (const char *)data.data(), n: data.size()); |
| 561 | fout.close(); |
| 562 | } |
| 563 | gguf_free(ctx: ctx_out); |
| 564 | |
| 565 | fprintf(stderr, format: "%s: %s merged from %d split with %d tensors.\n" , |
| 566 | __func__, split_params.output.c_str(), n_split, total_tensors); |
| 567 | } |
| 568 | |
| 569 | int main(int argc, const char ** argv) { |
| 570 | split_params params; |
| 571 | split_params_parse(argc, argv, params); |
| 572 | |
| 573 | switch (params.operation) { |
| 574 | case OP_SPLIT: gguf_split(split_params: params); |
| 575 | break; |
| 576 | case OP_MERGE: gguf_merge(split_params: params); |
| 577 | break; |
| 578 | default: split_print_usage(executable: argv[0]); |
| 579 | exit(EXIT_FAILURE); |
| 580 | } |
| 581 | |
| 582 | return 0; |
| 583 | } |
| 584 | |