1#include "llama-impl.h"
2
3#include "gguf.h"
4#include "llama.h"
5
6#include <cinttypes>
7#include <climits>
8#include <cstdarg>
9#include <cstring>
10#include <vector>
11#include <sstream>
12
13struct llama_logger_state {
14 ggml_log_callback log_callback = llama_log_callback_default;
15 void * log_callback_user_data = nullptr;
16};
17
18static llama_logger_state g_logger_state;
19
20time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
21
22time_meas::~time_meas() {
23 if (t_start_us >= 0) {
24 t_acc += ggml_time_us() - t_start_us;
25 }
26 }
27
28void llama_log_set(ggml_log_callback log_callback, void * user_data) {
29 ggml_log_set(log_callback, user_data);
30 g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
31 g_logger_state.log_callback_user_data = user_data;
32}
33
34static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) {
35 va_list args_copy;
36 va_copy(args_copy, args);
37 char buffer[128];
38 int len = vsnprintf(s: buffer, maxlen: 128, format: format, arg: args);
39 if (len < 128) {
40 g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
41 } else {
42 char * buffer2 = new char[len + 1];
43 vsnprintf(s: buffer2, maxlen: len + 1, format: format, arg: args_copy);
44 buffer2[len] = 0;
45 g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
46 delete[] buffer2;
47 }
48 va_end(args_copy);
49}
50
51void llama_log_internal(ggml_log_level level, const char * format, ...) {
52 va_list args;
53 va_start(args, format);
54 llama_log_internal_v(level, format, args);
55 va_end(args);
56}
57
58void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
59 (void) level;
60 (void) user_data;
61 fputs(s: text, stderr);
62 fflush(stderr);
63}
64
65void replace_all(std::string & s, const std::string & search, const std::string & replace) {
66 if (search.empty()) {
67 return;
68 }
69 std::string builder;
70 builder.reserve(res_arg: s.length());
71 size_t pos = 0;
72 size_t last_pos = 0;
73 while ((pos = s.find(str: search, pos: last_pos)) != std::string::npos) {
74 builder.append(str: s, pos: last_pos, n: pos - last_pos);
75 builder.append(str: replace);
76 last_pos = pos + search.length();
77 }
78 builder.append(str: s, pos: last_pos, n: std::string::npos);
79 s = std::move(builder);
80}
81
82std::string format(const char * fmt, ...) {
83 va_list ap;
84 va_list ap2;
85 va_start(ap, fmt);
86 va_copy(ap2, ap);
87 int size = vsnprintf(NULL, maxlen: 0, format: fmt, arg: ap);
88 GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
89 std::vector<char> buf(size + 1);
90 int size2 = vsnprintf(s: buf.data(), maxlen: size + 1, format: fmt, arg: ap2);
91 GGML_ASSERT(size2 == size);
92 va_end(ap2);
93 va_end(ap);
94 return std::string(buf.data(), size);
95}
96
97std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
98 char buf[256];
99 snprintf(s: buf, maxlen: sizeof(buf), format: "%5" PRId64, ne.at(n: 0));
100 for (size_t i = 1; i < ne.size(); i++) {
101 snprintf(s: buf + strlen(s: buf), maxlen: sizeof(buf) - strlen(s: buf), format: ", %5" PRId64, ne.at(n: i));
102 }
103 return buf;
104}
105
106std::string llama_format_tensor_shape(const struct ggml_tensor * t) {
107 char buf[256];
108 snprintf(s: buf, maxlen: sizeof(buf), format: "%5" PRId64, t->ne[0]);
109 for (int i = 1; i < GGML_MAX_DIMS; i++) {
110 snprintf(s: buf + strlen(s: buf), maxlen: sizeof(buf) - strlen(s: buf), format: ", %5" PRId64, t->ne[i]);
111 }
112 return buf;
113}
114
115static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
116 switch (type) {
117 case GGUF_TYPE_UINT8: return std::to_string(val: ((const uint8_t *)data)[i]);
118 case GGUF_TYPE_INT8: return std::to_string(val: ((const int8_t *)data)[i]);
119 case GGUF_TYPE_UINT16: return std::to_string(val: ((const uint16_t *)data)[i]);
120 case GGUF_TYPE_INT16: return std::to_string(val: ((const int16_t *)data)[i]);
121 case GGUF_TYPE_UINT32: return std::to_string(val: ((const uint32_t *)data)[i]);
122 case GGUF_TYPE_INT32: return std::to_string(val: ((const int32_t *)data)[i]);
123 case GGUF_TYPE_UINT64: return std::to_string(val: ((const uint64_t *)data)[i]);
124 case GGUF_TYPE_INT64: return std::to_string(val: ((const int64_t *)data)[i]);
125 case GGUF_TYPE_FLOAT32: return std::to_string(val: ((const float *)data)[i]);
126 case GGUF_TYPE_FLOAT64: return std::to_string(val: ((const double *)data)[i]);
127 case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
128 default: return format(fmt: "unknown type %d", type);
129 }
130}
131
132std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
133 const enum gguf_type type = gguf_get_kv_type(ctx: ctx_gguf, key_id: i);
134
135 switch (type) {
136 case GGUF_TYPE_STRING:
137 return gguf_get_val_str(ctx: ctx_gguf, key_id: i);
138 case GGUF_TYPE_ARRAY:
139 {
140 const enum gguf_type arr_type = gguf_get_arr_type(ctx: ctx_gguf, key_id: i);
141 int arr_n = gguf_get_arr_n(ctx: ctx_gguf, key_id: i);
142 const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx: ctx_gguf, key_id: i);
143 std::stringstream ss;
144 ss << "[";
145 for (int j = 0; j < arr_n; j++) {
146 if (arr_type == GGUF_TYPE_STRING) {
147 std::string val = gguf_get_arr_str(ctx: ctx_gguf, key_id: i, i: j);
148 // escape quotes
149 replace_all(s&: val, search: "\\", replace: "\\\\");
150 replace_all(s&: val, search: "\"", replace: "\\\"");
151 ss << '"' << val << '"';
152 } else if (arr_type == GGUF_TYPE_ARRAY) {
153 ss << "???";
154 } else {
155 ss << gguf_data_to_str(type: arr_type, data, i: j);
156 }
157 if (j < arr_n - 1) {
158 ss << ", ";
159 }
160 }
161 ss << "]";
162 return ss.str();
163 }
164 default:
165 return gguf_data_to_str(type, data: gguf_get_val_data(ctx: ctx_gguf, key_id: i), i: 0);
166 }
167}
168