llama-impl.cpp source code [llama.cpp/src/llama-impl.cpp]

1	#include "llama-impl.h"
2
3	#include "gguf.h"
4	#include "llama.h"
5
6	#include <cinttypes>
7	#include <climits>
8	#include <cstdarg>
9	#include <cstring>
10	#include <vector>
11	#include <sstream>
12
13	struct llama_logger_state {
14	ggml_log_callback log_callback = llama_log_callback_default;
15	void * log_callback_user_data = nullptr;
16	};
17
18	static llama_logger_state g_logger_state;
19
20	time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -`1` : ggml_time_us()), t_acc(t_acc) {}
21
22	time_meas::~time_meas() {
23	if (t_start_us >= `0`) {
24	t_acc += ggml_time_us() - t_start_us;
25	}
26	}
27
28	void llama_log_set(ggml_log_callback log_callback, void * user_data) {
29	ggml_log_set(log_callback, user_data);
30	g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
31	g_logger_state.log_callback_user_data = user_data;
32	}
33
34	static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) {
35	va_list args_copy;
36	va_copy(args_copy, args);
37	char buffer[`128`];
38	int len = vsnprintf(s: buffer, maxlen: `128`, format: format, arg: args);
39	if (len < `128`) {
40	g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
41	} else {
42	char * buffer2 = new char[len + `1`];
43	vsnprintf(s: buffer2, maxlen: len + `1`, format: format, arg: args_copy);
44	buffer2[len] = `0`;
45	g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
46	delete[] buffer2;
47	}
48	va_end(args_copy);
49	}
50
51	void llama_log_internal(ggml_log_level level, const char * format, ...) {
52	va_list args;
53	va_start(args, format);
54	llama_log_internal_v(level, format, args);
55	va_end(args);
56	}
57
58	void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
59	(void) level;
60	(void) user_data;
61	fputs(s: text, stderr);
62	fflush(stderr);
63	}
64
65	void replace_all(std::string & s, const std::string & search, const std::string & replace) {
66	if (search.empty()) {
67	return;
68	}
69	std::string builder;
70	builder.reserve(res_arg: s.length());
71	size_t pos = `0`;
72	size_t last_pos = `0`;
73	while ((pos = s.find(str: search, pos: last_pos)) != std::string::npos) {
74	builder.append(str: s, pos: last_pos, n: pos - last_pos);
75	builder.append(str: replace);
76	last_pos = pos + search.length();
77	}
78	builder.append(str: s, pos: last_pos, n: std::string::npos);
79	s = std::move(builder);
80	}
81
82	std::string format(const char * fmt, ...) {
83	va_list ap;
84	va_list ap2;
85	va_start(ap, fmt);
86	va_copy(ap2, ap);
87	int size = vsnprintf(NULL, maxlen: `0`, format: fmt, arg: ap);
88	GGML_ASSERT(size >= `0` && size < INT_MAX); // NOLINT
89	std::vector<char> buf(size + `1`);
90	int size2 = vsnprintf(s: buf.data(), maxlen: size + `1`, format: fmt, arg: ap2);
91	GGML_ASSERT(size2 == size);
92	va_end(ap2);
93	va_end(ap);
94	return std::string (buf.data(), size);
95	}
96
97	std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
98	char buf[`256`];
99	snprintf(s: buf, maxlen: sizeof(buf), format: "%5" PRId64, ne.at(n: `0`));
100	for (size_t i = `1`; i < ne.size(); i++) {
101	snprintf(s: buf + strlen(s: buf), maxlen: sizeof(buf) - strlen(s: buf), format: ", %5" PRId64, ne.at(n: i));
102	}
103	return buf;
104	}
105
106	std::string llama_format_tensor_shape(const struct ggml_tensor * t) {
107	char buf[`256`];
108	snprintf(s: buf, maxlen: sizeof(buf), format: "%5" PRId64, t->ne[`0`]);
109	for (int i = `1`; i < GGML_MAX_DIMS; i++) {
110	snprintf(s: buf + strlen(s: buf), maxlen: sizeof(buf) - strlen(s: buf), format: ", %5" PRId64, t->ne[i]);
111	}
112	return buf;
113	}
114
115	static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
116	switch (type) {
117	case GGUF_TYPE_UINT8: return std::to_string(val: ((const uint8_t *)data)[i]);
118	case GGUF_TYPE_INT8: return std::to_string(val: ((const int8_t *)data)[i]);
119	case GGUF_TYPE_UINT16: return std::to_string(val: ((const uint16_t *)data)[i]);
120	case GGUF_TYPE_INT16: return std::to_string(val: ((const int16_t *)data)[i]);
121	case GGUF_TYPE_UINT32: return std::to_string(val: ((const uint32_t *)data)[i]);
122	case GGUF_TYPE_INT32: return std::to_string(val: ((const int32_t *)data)[i]);
123	case GGUF_TYPE_UINT64: return std::to_string(val: ((const uint64_t *)data)[i]);
124	case GGUF_TYPE_INT64: return std::to_string(val: ((const int64_t *)data)[i]);
125	case GGUF_TYPE_FLOAT32: return std::to_string(val: ((const float *)data)[i]);
126	case GGUF_TYPE_FLOAT64: return std::to_string(val: ((const double *)data)[i]);
127	case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
128	default: return format(fmt: "unknown type %d", type);
129	}
130	}
131
132	std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
133	const enum gguf_type type = gguf_get_kv_type(ctx: ctx_gguf, key_id: i);
134
135	switch (type) {
136	case GGUF_TYPE_STRING:
137	return gguf_get_val_str(ctx: ctx_gguf, key_id: i);
138	case GGUF_TYPE_ARRAY:
139	{
140	const enum gguf_type arr_type = gguf_get_arr_type(ctx: ctx_gguf, key_id: i);
141	int arr_n = gguf_get_arr_n(ctx: ctx_gguf, key_id: i);
142	const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx: ctx_gguf, key_id: i);
143	std::stringstream ss;
144	ss << "[";
145	for (int j = `0`; j < arr_n; j++) {
146	if (arr_type == GGUF_TYPE_STRING) {
147	std::string val = gguf_get_arr_str(ctx: ctx_gguf, key_id: i, i: j);
148	// escape quotes
149	replace_all(s&: val, search: "\\", replace: "\\\\");
150	replace_all(s&: val, search: "\"", replace: "\\\"");
151	ss << `'"'` << val << `'"'`;
152	} else if (arr_type == GGUF_TYPE_ARRAY) {
153	ss << "???";
154	} else {
155	ss << gguf_data_to_str(type: arr_type, data, i: j);
156	}
157	if (j < arr_n - `1`) {
158	ss << ", ";
159	}
160	}
161	ss << "]";
162	return ss.str();
163	}
164	default:
165	return gguf_data_to_str(type, data: gguf_get_val_data(ctx: ctx_gguf, key_id: i), i: `0`);
166	}
167	}
168

Browse the source code of llama.cpp/src/llama-impl.cpp