ngram-cache.h source code [llama.cpp/common/ngram-cache.h]

1	#pragma once
2
3	#include "llama.h"
4
5	#include <unordered_map>
6	#include <string>
7	#include <vector>
8
9	#define LLAMA_NGRAM_MIN 1
10	#define LLAMA_NGRAM_MAX 4
11	#define LLAMA_NGRAM_STATIC 2
12
13	// Data structures to map n-grams to empirical token probabilities:
14
15	struct common_ngram {
16	llama_token tokens[LLAMA_NGRAM_MAX];
17
18	common_ngram() {
19	for (int i = `0`; i < LLAMA_NGRAM_MAX; ++i) {
20	tokens[i] = LLAMA_TOKEN_NULL;
21	}
22	}
23
24	common_ngram(const llama_token * input, const int ngram_size) {
25	for (int i = `0`; i < LLAMA_NGRAM_MAX; ++i) {
26	tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
27	}
28	}
29
30	bool operator==(const common_ngram & other) const {
31	for (int i = `0`; i < LLAMA_NGRAM_MAX; ++i) {
32	if (tokens[i] != other.tokens[i]) {
33	return false;
34	}
35	}
36	return true;
37	}
38	};
39
40	struct common_token_hash_function {
41	size_t operator()(const llama_token token) const {
42	// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
43	return token * `11400714819323198485llu`;
44	}
45	};
46
47	struct common_ngram_hash_function {
48	size_t operator()(const common_ngram & ngram) const {
49	size_t hash = common_token_hash_function{}(ngram.tokens[`0`]);
50	for (int i = `1`; i < LLAMA_NGRAM_MAX; ++i) {
51	hash ^= common_token_hash_function{}(ngram.tokens[i]);
52	}
53	return hash;
54	}
55	};
56
57	// token -> number of times token has been seen
58	typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
59
60	// n-gram -> empirical distribution of following tokens
61	typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
62
63
64	// Update an ngram cache with tokens.
65	// ngram_cache: the cache to modify.
66	// ngram_min/ngram_max: the min/max size of the ngrams to extract from inp_data.
67	// inp_data: the token sequence with which to update ngram_cache.
68	// nnew: how many new tokens have been appended to inp_data since the last call to this function.
69	// print_progress: whether to print progress to stderr.
70	//
71	// In order to get correct results inp_data can ONLY BE APPENDED TO.
72	// Changes in the middle need a complete rebuild.
73	void common_ngram_cache_update(
74	common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
75
76	// Try to draft tokens from ngram caches.
77	// inp: the tokens generated so far.
78	// draft: the token sequence to draft. Expected to initially contain the previously sampled token.
79	// n_draft: maximum number of tokens to add to draft.
80	// ngram_min/gram_max: the min/max size of the ngrams in nc_context and nc_dynamic.
81	// nc_context: ngram cache based on current context.
82	// nc_dynamic: ngram cache based on previous user generations.
83	// nc_static: ngram cache generated from a large text corpus, used for validation.
84	void common_ngram_cache_draft(
85	std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
86	common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
87
88	// Save an ngram cache to a file.
89	// ngram_cache: the ngram cache to save.
90	// filename: the path under which to save the ngram cache.
91	void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);
92
93	// Load an ngram cache saved with common_ngram_cache_save.
94	// filename: the path from which to load the ngram cache.
95	// returns: an ngram cache containing the information saved to filename.
96	common_ngram_cache common_ngram_cache_load(std::string & filename);
97
98	// Merge two ngram caches.
99	// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
100	// ngram_cache_add: the ngram cache to add to ngram_cache_target.
101	void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);
102

Browse the source code of llama.cpp/common/ngram-cache.h