| 1 | #include "ggml.h" |
| 2 | #include "llama.h" |
| 3 | #include "common.h" |
| 4 | #include "ngram-cache.h" |
| 5 | |
| 6 | #include <cstdint> |
| 7 | #include <cstdio> |
| 8 | #include <fstream> |
| 9 | #include <iostream> |
| 10 | #include <string> |
| 11 | #include <unordered_map> |
| 12 | #include <vector> |
| 13 | |
| 14 | static void print_usage(char* argv0) { |
| 15 | fprintf(stderr, format: "Merges multiple lookup cache files into a single one.\n" ); |
| 16 | fprintf(stderr, format: "Usage: %s [--help] lookup_part_1.bin lookup_part_2.bin ... lookup_merged.bin\n" , argv0); |
| 17 | } |
| 18 | |
| 19 | int main(int argc, char ** argv){ |
| 20 | if (argc < 3) { |
| 21 | print_usage(argv0: argv[0]); |
| 22 | exit(status: 1); |
| 23 | } |
| 24 | |
| 25 | std::vector<std::string> args; |
| 26 | args.resize(new_size: argc-1); |
| 27 | for (int i = 0; i < argc-1; ++i) { |
| 28 | args[i] = argv[i+1]; |
| 29 | if (args[i] == "-h" || args[i] == "--help" ) { |
| 30 | print_usage(argv0: argv[0]); |
| 31 | exit(status: 0); |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | fprintf(stderr, format: "lookup-merge: loading file %s\n" , args[0].c_str()); |
| 36 | common_ngram_cache ngram_cache_merged = common_ngram_cache_load(filename&: args[0]); |
| 37 | |
| 38 | for (size_t i = 1; i < args.size()-1; ++i) { |
| 39 | fprintf(stderr, format: "lookup-merge: loading file %s\n" , args[i].c_str()); |
| 40 | common_ngram_cache ngram_cache = common_ngram_cache_load(filename&: args[i]); |
| 41 | |
| 42 | common_ngram_cache_merge(ngram_cache_target&: ngram_cache_merged, ngram_cache_add&: ngram_cache); |
| 43 | } |
| 44 | |
| 45 | fprintf(stderr, format: "lookup-merge: saving file %s\n" , args.back().c_str()); |
| 46 | common_ngram_cache_save(ngram_cache&: ngram_cache_merged, filename&: args.back()); |
| 47 | } |
| 48 | |