gguf.h source code [llama.cpp/ggml/include/gguf.h]

1	// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
2	// GGUF files have the following structure:
3	//
4	// 1. File magic "GGUF" (4 bytes).
5	// 2. File version (uint32_t).
6	// 3. Number of ggml tensors in file (int64_t).
7	// 4. Number of key-value-pairs in file (int64_t).
8	// 5. For each KV pair:
9	// 1. The key (string).
10	// 2. The value type (gguf_type).
11	// 3a. If the value type is GGUF_TYPE_ARRAY:
12	// 1. The type of the array (gguf_type).
13	// 2. The number of elements in the array (uint64_t).
14	// 3. The binary representation of each element in the array.
15	// 3b. Otherwise:
16	// 1. The binary representation of the value.
17	// 6. For each ggml tensor:
18	// 1. The tensor name (string).
19	// 2. The number of dimensions of the tensor (uint32_t).
20	// 3. For each dimension:
21	// 1. The size of the tensor in the dimension (int64_t).
22	// 4. The tensor data type (ggml_type).
23	// 5. The tensor data offset in the tensor data binary blob (uint64_t).
24	// 7. The tensor data binary blob (optional, aligned).
25	//
26	// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
27	// All enums are stored as int32_t.
28	// All bool values are stored as int8_t.
29	// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
30	// otherwise GGUF_DEFAULT_ALIGNMENT is used.
31	//
32	// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
33
34	#pragma once
35
36	#include "ggml.h"
37
38	#include <stdbool.h>
39	#include <stdint.h>
40
41	#define GGUF_MAGIC "GGUF"
42	#define GGUF_VERSION 3
43
44	#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
45
46	#define GGUF_DEFAULT_ALIGNMENT 32
47
48	#ifdef __cplusplus
49	extern "C" {
50	#endif
51
52	// types that can be stored as GGUF KV data
53	enum gguf_type {
54	GGUF_TYPE_UINT8 = `0`,
55	GGUF_TYPE_INT8 = `1`,
56	GGUF_TYPE_UINT16 = `2`,
57	GGUF_TYPE_INT16 = `3`,
58	GGUF_TYPE_UINT32 = `4`,
59	GGUF_TYPE_INT32 = `5`,
60	GGUF_TYPE_FLOAT32 = `6`,
61	GGUF_TYPE_BOOL = `7`,
62	GGUF_TYPE_STRING = `8`,
63	GGUF_TYPE_ARRAY = `9`,
64	GGUF_TYPE_UINT64 = `10`,
65	GGUF_TYPE_INT64 = `11`,
66	GGUF_TYPE_FLOAT64 = `12`,
67	GGUF_TYPE_COUNT, // marks the end of the enum
68	};
69
70	struct gguf_context;
71
72	struct gguf_init_params {
73	bool no_alloc;
74
75	// if not NULL, create a ggml_context and allocate the tensor data in it
76	struct ggml_context ** ctx;
77	};
78
79	GGML_API struct gguf_context * gguf_init_empty(void);
80	GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
81	//GGML_API struct gguf_context gguf_init_from_buffer(..);*
82
83	GGML_API void gguf_free(struct gguf_context * ctx);
84
85	GGML_API const char * gguf_type_name(enum gguf_type type);
86
87	GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx);
88	GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
89	GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
90
91	GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx);
92	GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
93	GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
94
95	GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
96	GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
97
98	// will abort if the wrong type is used for the key
99	GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id);
100	GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id);
101	GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
102	GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
103	GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
104	GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
105	GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
106	GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
107	GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
108	GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
109	GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
110	GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
111	GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
112	GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id);
113
114	// get raw pointer to the first element of the array with the given key_id
115	// for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
116	GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
117
118	// get ith C string from array with given key_id
119	GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
120
121	GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx);
122	GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
123	GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
124	GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id);
125	GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id);
126	GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id);
127
128	// removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
129	GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
130
131	// overrides an existing KV pair or adds a new one, the new KV pair is always at the back
132	GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
133	GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
134	GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
135	GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
136	GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
137	GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
138	GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
139	GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
140	GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
141	GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
142	GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
143	GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
144
145	// creates a new array with n elements of the given type and copies the corresponding number of bytes from data
146	GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
147
148	// creates a new array with n strings and copies the corresponding strings from data
149	GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
150
151	// set or add KV pairs from another context
152	GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
153
154	// add tensor to GGUF context, tensor name must be unique
155	GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
156
157	// after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
158	// in such a way that the tensor data remains as one contiguous block (except for padding)
159	GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
160
161	// assumes that at least gguf_get_tensor_size bytes can be read from data
162	GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
163
164	// writing gguf files can be done in 3 ways:
165	//
166	// - write the entire gguf_context to a binary file in a single pass:
167	//
168	// gguf_write_to_file(ctx, fname, /only_meta =/ false);
169	//
170	// - write only the meta data to a file, then re-open the file and append the tensor data:
171	//
172	// gguf_write_to_file(ctx, fname, /only_meta =/ true);
173	// FILE f = fopen(fname, "ab");*
174	// fwrite(f, ...); // write tensor data
175	// fclose(f);
176	//
177	// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
178	//
179	// FILE f = fopen(fname, "wb");*
180	// const size_t size_meta = gguf_get_meta_size(ctx);
181	// fseek(f, size_meta, SEEK_SET);
182	// fwrite(f, ...); // write tensor data
183	// void data = malloc(size_meta);*
184	// gguf_get_meta_data(ctx, data);
185	// rewind(f);
186	// fwrite(data, 1, data, f);
187	// free(data);
188	// fclose(f);
189	//
190
191	// write the entire context to a binary file
192	GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
193
194	// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
195	GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
196
197	// writes the meta data to pointer "data"
198	GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
199
200	#ifdef __cplusplus
201	}
202	#endif
203

Browse the source code of llama.cpp/ggml/include/gguf.h