test-gguf.cpp source code [llama.cpp/tests/test-gguf.cpp]

1	#include "ggml.h"
2	#include "ggml-backend.h"
3	#include "../ggml/src/ggml-impl.h"
4
5	#include <algorithm>
6	#include <array>
7	#include <cstdint>
8	#include <cstdio>
9	#include <random>
10	#include <string>
11	#include <vector>
12
13	constexpr int offset_has_kv = `1000`;
14	constexpr int offset_has_tensors = `2000`;
15	constexpr int offset_has_data = `3000`;
16
17	enum handcrafted_file_type {
18	HANDCRAFTED_HEADER_BAD_MAGIC = `10`,
19	HANDCRAFTED_HEADER_BAD_VERSION_0 = `15`,
20	HANDCRAFTED_HEADER_BAD_VERSION_1 = `20`,
21	HANDCRAFTED_HEADER_BAD_VERSION_FUTURE = `30`,
22	HANDCRAFTED_HEADER_BAD_N_TENSORS = `40`,
23	HANDCRAFTED_HEADER_BAD_N_KV = `50`,
24	HANDCRAFTED_HEADER_EMPTY = `800`,
25
26	HANDCRAFTED_KV_BAD_KEY_SIZE = `10` + offset_has_kv,
27	HANDCRAFTED_KV_BAD_TYPE = `20` + offset_has_kv,
28	// HANDCRAFTED_KV_BAD_VALUE_SIZE = 30 + offset_has_kv, // removed because it can result in allocations > 1 TB (default sanitizer limit)
29	HANDCRAFTED_KV_DUPLICATE_KEY = `40` + offset_has_kv,
30	HANDCRAFTED_KV_BAD_ALIGN = `50` + offset_has_kv,
31	HANDCRAFTED_KV_SUCCESS = `800` + offset_has_kv,
32
33	HANDCRAFTED_TENSORS_BAD_NAME_SIZE = `10` + offset_has_tensors,
34	HANDCRAFTED_TENSORS_BAD_N_DIMS = `20` + offset_has_tensors,
35	HANDCRAFTED_TENSORS_BAD_SHAPE = `30` + offset_has_tensors,
36	HANDCRAFTED_TENSORS_NE_TOO_BIG = `40` + offset_has_tensors,
37	HANDCRAFTED_TENSORS_BAD_TYPE = `50` + offset_has_tensors,
38	HANDCRAFTED_TENSORS_BAD_OFFSET = `60` + offset_has_tensors,
39	HANDCRAFTED_TENSORS_DUPLICATE_NAME = `70` + offset_has_tensors,
40	HANDCRAFTED_TENSORS_BAD_ALIGN = `75` + offset_has_tensors,
41	HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN = `80` + offset_has_tensors,
42	HANDCRAFTED_TENSORS_SUCCESS = `800` + offset_has_tensors,
43	HANDCRAFTED_TENSORS_CUSTOM_ALIGN = `810` + offset_has_tensors,
44
45	HANDCRAFTED_DATA_NOT_ENOUGH_DATA = `10` + offset_has_data,
46	HANDCRAFTED_DATA_BAD_ALIGN = `15` + offset_has_data,
47	HANDCRAFTED_DATA_INCONSISTENT_ALIGN = `20` + offset_has_data,
48	HANDCRAFTED_DATA_SUCCESS = `800` + offset_has_data,
49	HANDCRAFTED_DATA_CUSTOM_ALIGN = `810` + offset_has_data,
50	};
51
52	static std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) {
53	switch (hft) {
54	case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC";
55	case HANDCRAFTED_HEADER_BAD_VERSION_0: return "HEADER_BAD_VERSION_0";
56	case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1";
57	case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE: return "HEADER_BAD_VERSION_FUTURE";
58	case HANDCRAFTED_HEADER_BAD_N_KV: return "HEADER_BAD_N_KV";
59	case HANDCRAFTED_HEADER_BAD_N_TENSORS: return "HEADER_BAD_N_TENSORS";
60	case HANDCRAFTED_HEADER_EMPTY: return "HEADER_EMPTY";
61
62	case HANDCRAFTED_KV_BAD_KEY_SIZE: return "KV_BAD_KEY_SIZE";
63	case HANDCRAFTED_KV_BAD_TYPE: return "KV_BAD_TYPE";
64	case HANDCRAFTED_KV_DUPLICATE_KEY: return "KV_DUPLICATE_KEY";
65	case HANDCRAFTED_KV_BAD_ALIGN: return "KV_BAD_ALIGN";
66	case HANDCRAFTED_KV_SUCCESS: return "KV_RANDOM_KV";
67
68	case HANDCRAFTED_TENSORS_BAD_NAME_SIZE: return "TENSORS_BAD_NAME_SIZE";
69	case HANDCRAFTED_TENSORS_BAD_N_DIMS: return "TENSORS_BAD_N_DIMS";
70	case HANDCRAFTED_TENSORS_BAD_SHAPE: return "TENSORS_BAD_SHAPE";
71	case HANDCRAFTED_TENSORS_NE_TOO_BIG: return "TENSORS_NE_TOO_BIG";
72	case HANDCRAFTED_TENSORS_BAD_TYPE: return "TENSORS_BAD_TYPE";
73	case HANDCRAFTED_TENSORS_BAD_OFFSET: return "TENSORS_BAD_OFFSET";
74	case HANDCRAFTED_TENSORS_DUPLICATE_NAME: return "TENSORS_DUPLICATE_NAME";
75	case HANDCRAFTED_TENSORS_BAD_ALIGN: return "TENSORS_BAD_ALIGN";
76	case HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN: return "TENSORS_INCONSISTENT_ALIGN";
77	case HANDCRAFTED_TENSORS_SUCCESS: return "TENSORS_SUCCESS";
78	case HANDCRAFTED_TENSORS_CUSTOM_ALIGN: return "TENSORS_CUSTOM_ALIGN";
79
80	case HANDCRAFTED_DATA_NOT_ENOUGH_DATA: return "DATA_NOT_ENOUGH_DATA";
81	case HANDCRAFTED_DATA_BAD_ALIGN: return "DATA_BAD_ALIGN";
82	case HANDCRAFTED_DATA_INCONSISTENT_ALIGN: return "DATA_INCONSISTENT_ALIGN";
83	case HANDCRAFTED_DATA_SUCCESS: return "DATA_SUCCESS";
84	case HANDCRAFTED_DATA_CUSTOM_ALIGN: return "DATA_CUSTOM_ALIGN";
85	}
86	GGML_ABORT("fatal error");
87	}
88
89	static bool expect_context_not_null(const enum handcrafted_file_type hft) {
90	if (hft < offset_has_kv) {
91	return hft >= HANDCRAFTED_HEADER_EMPTY;
92	}
93	if (hft < offset_has_tensors) {
94	return hft >= HANDCRAFTED_KV_SUCCESS;
95	}
96	if (hft < offset_has_data) {
97	return hft >= HANDCRAFTED_TENSORS_SUCCESS;
98	}
99	return hft >= HANDCRAFTED_DATA_SUCCESS;
100	}
101
102	typedef std::pair<enum ggml_type, std::array<int64_t, GGML_MAX_DIMS>> tensor_config_t;
103
104	static std::vector<tensor_config_t> get_tensor_configs(std::mt19937 & rng) {
105	std::vector<tensor_config_t> tensor_configs;
106	tensor_configs.reserve(n: `100`);
107
108	for (int i = `0`; i < `100`; ++i) {
109	const enum ggml_type type = ggml_type(rng () % GGML_TYPE_COUNT);
110	if (ggml_type_size(type) == `0`) {
111	continue;
112	}
113
114	std::array<int64_t, GGML_MAX_DIMS> shape = {`1`, `1`, `1`, `1`};
115	shape [`0`] = (`1` + rng () % `10`) * ggml_blck_size(type);
116	const int n_dims = `1` + rng () % GGML_MAX_DIMS;
117	for (int i = `1`; i < n_dims; ++i) {
118	shape [i] = `1` + rng () % `10`;
119	}
120
121	tensor_configs.push_back(x: std::make_pair(x: type, y&: shape));
122	}
123
124	return tensor_configs;
125	}
126
127	static std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::mt19937 rng) {
128	std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types;
129	kv_types.reserve(n: `100`);
130
131	for (int i = `0`; i < `100`; ++i) {
132	const gguf_type type = gguf_type(rng () % GGUF_TYPE_COUNT);
133
134	if (type == GGUF_TYPE_ARRAY) {
135	const gguf_type type_arr = gguf_type(rng () % GGUF_TYPE_COUNT);
136	if (type_arr == GGUF_TYPE_ARRAY) {
137	continue;
138	}
139	kv_types.push_back(x: std::make_pair(x: type, y: type_arr));
140	continue;
141	}
142
143	kv_types.push_back(x: std::make_pair(x: type, y: gguf_type(-`1`)));
144	}
145	std::shuffle(first: kv_types.begin(), last: kv_types.end(), g&: rng);
146
147	return kv_types;
148	}
149
150	template <typename T>
151	static void helper_write(FILE * file, const T & val) {
152	GGML_ASSERT(fwrite(&val, `1`, sizeof(val), file) == sizeof(val));
153	}
154
155	static void helper_write(FILE * file, const void * data, const size_t nbytes) {
156	GGML_ASSERT(fwrite(data, `1`, nbytes, file) == nbytes);
157	}
158
159	static FILE * get_handcrafted_file(const unsigned int seed, const enum handcrafted_file_type hft, const int extra_bytes = `0`) {
160	FILE * file = tmpfile();
161
162	if (!file) {
163	return file;
164	}
165
166	std::mt19937 rng(seed);
167	uint32_t alignment = GGUF_DEFAULT_ALIGNMENT;
168
169	if (hft == HANDCRAFTED_HEADER_BAD_MAGIC) {
170	const char bad_magic[`4`] = {`'F'`, `'U'`, `'G'`, `'G'`};
171	helper_write(file, data: bad_magic, nbytes: sizeof(bad_magic));
172	} else {
173	helper_write(file, GGUF_MAGIC, nbytes: `4`);
174	}
175
176	if (hft == HANDCRAFTED_HEADER_BAD_VERSION_0) {
177	const uint32_t version = `0`;
178	helper_write(file, val: version);
179	} else if (hft == HANDCRAFTED_HEADER_BAD_VERSION_1) {
180	const uint32_t version = `1`;
181	helper_write(file, val: version);
182	} else if (hft == HANDCRAFTED_HEADER_BAD_VERSION_FUTURE) {
183	const uint32_t version = GGUF_VERSION + `1`;
184	helper_write(file, val: version);
185	} else {
186	const uint32_t version = GGUF_VERSION;
187	helper_write(file, val: version);
188	}
189
190	std::vector<tensor_config_t> tensor_configs;
191	if (hft >= offset_has_tensors) {
192	tensor_configs = get_tensor_configs(rng);
193	}
194
195	if (hft == HANDCRAFTED_HEADER_BAD_N_TENSORS) {
196	const uint64_t n_tensors = -`1`;
197	helper_write(file, val: n_tensors);
198	} else {
199	const uint64_t n_tensors = tensor_configs.size();
200	helper_write(file, val: n_tensors);
201	}
202
203	std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types;
204	if (hft >= offset_has_kv) {
205	kv_types = get_kv_types(rng);
206	}
207	{
208	uint64_t n_kv = kv_types.size();
209	if (hft == HANDCRAFTED_KV_BAD_ALIGN \|\|
210	hft == HANDCRAFTED_TENSORS_BAD_ALIGN \|\| hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN \|\|
211	hft == HANDCRAFTED_DATA_BAD_ALIGN \|\| hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
212
213	n_kv += `1`;
214	} else if (hft == HANDCRAFTED_HEADER_BAD_N_KV) {
215	n_kv = -`1`;
216	}
217	helper_write(file, val: n_kv);
218	}
219
220	if (hft < offset_has_kv) {
221	while (ftell(stream: file) % alignment != `0`) {
222	const char pad = `0`;
223	helper_write(file, val: pad);
224	}
225
226	for (int i = `0`; i < extra_bytes; ++i) {
227	const char tmp = `0`;
228	helper_write(file, val: tmp);
229	}
230	rewind(stream: file);
231	return file;
232	}
233
234	for (int i = `0`; i < int(kv_types.size()); ++i) {
235	const enum gguf_type type = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types [i].first);
236	const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types [i].second);
237
238	const std::string key = "my_key_" + std::to_string(val: (hft == HANDCRAFTED_KV_DUPLICATE_KEY ? i/`2` : i));
239
240	if (hft == HANDCRAFTED_KV_BAD_KEY_SIZE) {
241	const uint64_t n = -`1`;
242	helper_write(file, val: n);
243	} else {
244	const uint64_t n = key.length();
245	helper_write(file, val: n);
246	}
247	helper_write(file, data: key.data(), nbytes: key.length());
248
249	{
250	const int32_t type32 = int32_t(type);
251	helper_write(file, val: type32);
252	}
253
254	uint32_t data[`16`];
255	for (int j = `0`; j < `16`; ++j) {
256	data[j] = rng ();
257	if (type == GGUF_TYPE_STRING \|\| type_arr == GGUF_TYPE_STRING) {
258	data[j] \|= `0x01010101`; // avoid random null-termination of string
259	}
260	}
261
262	if (type == GGUF_TYPE_STRING) {
263	const uint64_t n = rng () % sizeof(data);
264	helper_write(file, val: n);
265	helper_write(file, data, nbytes: n);
266	continue;
267	}
268
269	if (type == GGUF_TYPE_ARRAY) {
270	{
271	const int32_t type32 = int32_t(type_arr);
272	helper_write(file, val: type32);
273	}
274	if (type_arr == GGUF_TYPE_STRING) {
275	const uint64_t nstr = rng () % (`16` + `1`);
276	helper_write(file, val: nstr);
277	for (uint64_t istr = `0`; istr < nstr; ++istr) {
278	const uint64_t n = rng () % (sizeof(uint32_t) + `1`);
279	helper_write(file, val: n);
280	helper_write(file, data: &data[istr], nbytes: n);
281	}
282	continue;
283	}
284	const size_t type_size = gguf_type_size(type: type_arr);
285	const uint64_t n = (rng () % sizeof(data)) / type_size;
286	helper_write(file, val: n);
287	helper_write(file, data: &data, nbytes: n*type_size);
288	continue;
289	}
290
291	helper_write(file, data, nbytes: hft == HANDCRAFTED_KV_BAD_TYPE ? `1` : gguf_type_size(type));
292	}
293
294	if (hft == HANDCRAFTED_KV_BAD_ALIGN \|\|
295	hft == HANDCRAFTED_TENSORS_BAD_ALIGN \|\| hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN \|\|
296	hft == HANDCRAFTED_DATA_BAD_ALIGN \|\| hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
297
298	const uint64_t n = strlen(GGUF_KEY_GENERAL_ALIGNMENT);
299	helper_write(file, val: n);
300	helper_write(file, GGUF_KEY_GENERAL_ALIGNMENT, nbytes: n);
301
302	const int32_t type = gguf_type(GGUF_TYPE_UINT32);
303	helper_write(file, val: type);
304
305	alignment = expect_context_not_null(hft) ? `1` : `13`;
306	helper_write(file, val: alignment);
307	}
308
309	if (hft < offset_has_tensors) {
310	while (ftell(stream: file) % alignment != `0`) {
311	const char pad = `0`;
312	helper_write(file, val: pad);
313	}
314
315	for (int i = `0`; i < extra_bytes; ++i) {
316	const char tmp = `0`;
317	helper_write(file, val: tmp);
318	}
319	rewind(stream: file);
320	return file;
321	}
322
323	if (hft == HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN \|\| hft == HANDCRAFTED_DATA_INCONSISTENT_ALIGN) {
324	alignment = `1`;
325	}
326
327	uint64_t offset = `0`;
328	for (int i = `0`; i < int(tensor_configs.size()); ++i) {
329	const ggml_type type = tensor_configs [i].first;
330	const std::array<int64_t, GGML_MAX_DIMS> shape = tensor_configs [i].second;
331
332	std::string name = "my_tensor";
333	if (hft != HANDCRAFTED_TENSORS_DUPLICATE_NAME) {
334	name += "_" + std::to_string(val: i);
335	}
336	if (hft == HANDCRAFTED_TENSORS_BAD_NAME_SIZE) {
337	name += "_with_a_very_long_name_which_is_longer_than_what_is_allowed_for_ggml_tensors";
338	GGML_ASSERT(name.length() >= GGML_MAX_NAME);
339	}
340	{
341	const uint64_t n = name.length();
342	helper_write(file, val: n);
343	}
344	helper_write(file, data: name.data(), nbytes: name.length());
345
346	uint32_t n_dims = hft == HANDCRAFTED_TENSORS_NE_TOO_BIG ? `2` : `1`;
347	for (int i = GGML_MAX_DIMS-`1`; i >= `1`; --i) {
348	if (shape [i] != `1`) {
349	n_dims = i + `1`;
350	break;
351	}
352	}
353	if (hft == HANDCRAFTED_TENSORS_BAD_N_DIMS) {
354	const uint32_t n_dims_bad = GGML_MAX_DIMS + `1`;
355	helper_write(file, val: n_dims_bad);
356	} else {
357	helper_write(file, val: n_dims);
358	}
359
360	if (hft == HANDCRAFTED_TENSORS_BAD_SHAPE) {
361	for (uint32_t j = `0`; j < n_dims; ++j) {
362	const int64_t bad_dim = -`1`;
363	helper_write(file, val: bad_dim);
364	}
365	} else if (hft == HANDCRAFTED_TENSORS_NE_TOO_BIG){
366	for (uint32_t j = `0`; j < n_dims; ++j) {
367	const int64_t big_dim = `4`*int64_t(INT32_MAX);
368	helper_write(file, val: big_dim);
369	}
370	} else {
371	helper_write(file, data: shape.data(), nbytes: n_dims*sizeof(int64_t));
372	}
373
374	{
375	const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? GGML_TYPE_COUNT : int32_t(type);
376	helper_write(file, val: type32);
377	}
378
379	if (hft == HANDCRAFTED_TENSORS_BAD_OFFSET) {
380	const uint64_t bad_offset = -`1`;
381	helper_write(file, val: bad_offset);
382	} else {
383	helper_write(file, val: offset);
384	}
385
386	int64_t ne = shape [`0`];
387	for (uint32_t i = `1`; i < n_dims; ++i) {
388	ne *= shape [i];
389	}
390	offset += GGML_PAD(ggml_row_size(type, ne), alignment);
391	}
392
393	while (ftell(stream: file) % alignment != `0`) {
394	const char pad = `0`;
395	helper_write(file, val: pad);
396	}
397
398	if (hft >= offset_has_data) {
399	rng.seed(sd: seed + `1`);
400	uint64_t nbytes = offset;
401	if (hft == HANDCRAFTED_DATA_NOT_ENOUGH_DATA) {
402	nbytes -= `1`;
403	}
404	for (uint64_t i = `0`; i < nbytes; ++i) {
405	const uint8_t random_byte = i % `256`;
406	helper_write(file, val: random_byte);
407	}
408	}
409
410	for (int i = `0`; i < extra_bytes; ++i) {
411	const char tmp = `0`;
412	helper_write(file, val: tmp);
413	}
414	rewind(stream: file);
415	return file;
416	}
417
418	static bool handcrafted_check_header(const gguf_context * gguf_ctx, const unsigned int seed, const bool has_kv, const bool has_tensors, const bool alignment_defined) {
419	if (!gguf_ctx) {
420	return false;
421	}
422
423	std::mt19937 rng(seed);
424
425	std::vector<tensor_config_t> tensor_configs;
426	if (has_tensors) {
427	tensor_configs = get_tensor_configs(rng);
428	}
429	std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types;
430	if (has_kv) {
431	kv_types = get_kv_types(rng);
432	}
433
434	bool ok = true;
435
436	if (gguf_get_version(ctx: gguf_ctx) != GGUF_VERSION) {
437	ok = false;
438	}
439	if (gguf_get_n_tensors(ctx: gguf_ctx) != int(tensor_configs.size())) {
440	ok = false;
441	}
442	if (gguf_get_n_kv(ctx: gguf_ctx) != int(alignment_defined ? kv_types.size() + `1` : kv_types.size())) {
443	ok = false;
444	}
445
446	return ok;
447	}
448
449	static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned int seed, const bool has_tensors, const bool alignment_defined) {
450	if (!gguf_ctx) {
451	return false;
452	}
453
454	std::mt19937 rng(seed);
455
456	std::vector<tensor_config_t> tensor_configs;
457	if (has_tensors) {
458	tensor_configs = get_tensor_configs(rng);
459	}
460
461	std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types = get_kv_types(rng);
462
463	bool ok = true;
464
465	for (int i = `0`; i < int(kv_types.size()); ++i) {
466	const enum gguf_type type = gguf_type(kv_types [i].first);
467	const enum gguf_type type_arr = gguf_type(kv_types [i].second);
468
469	const std::string key = "my_key_" + std::to_string(val: i);
470
471	uint32_t data[`16`];
472	for (int j = `0`; j < `16`; ++j) {
473	data[j] = rng ();
474	if (type == GGUF_TYPE_STRING \|\| type_arr == GGUF_TYPE_STRING) {
475	data[j] \|= `0x01010101`; // avoid random null-termination of string
476	}
477	}
478
479	const char * data8 = reinterpret_cast<const char *>(data);
480	const int id = gguf_find_key(ctx: gguf_ctx, key: key.c_str());
481
482	if (type == GGUF_TYPE_STRING) {
483	const char * str = gguf_get_val_str(ctx: gguf_ctx, key_id: id);
484	const uint64_t n = strlen(s: str);
485	const uint64_t n_expected = rng () % sizeof(data);
486	if (n != n_expected) {
487	ok = false;
488	continue;
489	}
490	if (!std::equal(first1: str, last1: str + n, first2: data8)) {
491	ok = false;
492	}
493	continue;
494	}
495
496	if (type == GGUF_TYPE_ARRAY) {
497	const size_t type_size = gguf_type_size(type: type_arr);
498	const uint64_t arr_n = gguf_get_arr_n(ctx: gguf_ctx, key_id: id);
499
500	if (type_arr == GGUF_TYPE_STRING) {
501	const uint64_t nstr_expected = rng () % (`16` + `1`);
502	if (arr_n != nstr_expected) {
503	ok = false;
504	continue;
505	}
506	for (uint64_t istr = `0`; istr < nstr_expected; ++istr) {
507	const char * str = gguf_get_arr_str(ctx: gguf_ctx, key_id: id, i: istr);
508	const uint64_t n = strlen(s: str);
509	const uint64_t n_expected = rng () % (sizeof(uint32_t) + `1`);
510
511	if (n != n_expected) {
512	ok = false;
513	continue;
514	}
515	const char * str_expected = reinterpret_cast<const char *>(&data[istr]);
516	if (strncmp(s1: str, s2: str_expected, n: n) != `0`) {
517	ok = false;
518	continue;
519	}
520	}
521	continue;
522	}
523
524	const uint64_t arr_n_expected = (rng () % sizeof(data)) / type_size;
525	if (arr_n != arr_n_expected) {
526	ok = false;
527	continue;
528	}
529
530	const char * data_gguf = reinterpret_cast<const char *>(gguf_get_arr_data(ctx: gguf_ctx, key_id: id));
531
532	if (type_arr == GGUF_TYPE_BOOL) {
533	for (size_t arr_i = `0`; arr_i < arr_n; ++arr_i) {
534	if (bool(data8[arr_i]) != bool(data_gguf[arr_i])) {
535	ok = false;
536	}
537	}
538	continue;
539	}
540
541	if (!std::equal(first1: data8, last1: data8 + arr_n*type_size, first2: data_gguf)) {
542	ok = false;
543	}
544	continue;
545	}
546
547	const char * data_gguf = reinterpret_cast<const char *>(gguf_get_val_data(ctx: gguf_ctx, key_id: id));
548
549	if (type == GGUF_TYPE_BOOL) {
550	if (bool(data8) != bool(data_gguf)) {
551	ok = false;
552	}
553	continue;
554	}
555
556	if (!std::equal(first1: data8, last1: data8 + gguf_type_size(type), first2: data_gguf)) {
557	ok = false;
558	}
559	}
560
561	const uint32_t expected_alignment = alignment_defined ? `1` : GGUF_DEFAULT_ALIGNMENT;
562	if (gguf_get_alignment(ctx: gguf_ctx) != expected_alignment) {
563	ok = false;
564	}
565
566	return ok;
567	}
568
569	static bool handcrafted_check_tensors(const gguf_context * gguf_ctx, const unsigned int seed) {
570	if (!gguf_ctx) {
571	return false;
572	}
573
574	std::mt19937 rng(seed);
575
576	std::vector<tensor_config_t> tensor_configs = get_tensor_configs(rng);
577
578	// Call get_kv_types to get the same RNG state:
579	get_kv_types(rng);
580
581	bool ok = true;
582
583	const int id_alignment = gguf_find_key(ctx: gguf_ctx, GGUF_KEY_GENERAL_ALIGNMENT);
584	const uint32_t alignment = id_alignment >= `0` ? gguf_get_val_u32(ctx: gguf_ctx, key_id: id_alignment) : GGUF_DEFAULT_ALIGNMENT;
585
586	uint64_t expected_offset = `0`;
587	for (int i = `0`; i < int(tensor_configs.size()); ++i) {
588	const ggml_type type = tensor_configs [i].first;
589	const std::array<int64_t, GGML_MAX_DIMS> shape = tensor_configs [i].second;
590
591	const std::string name = "my_tensor_" + std::to_string(val: i);
592	const int id = gguf_find_tensor(ctx: gguf_ctx, name: name.c_str());
593
594	if (id >= `0`) {
595	if (std::string (gguf_get_tensor_name(ctx: gguf_ctx, tensor_id: id)) != name) {
596	ok = false;
597	}
598
599	if (gguf_get_tensor_type(ctx: gguf_ctx, tensor_id: id) != type) {
600	ok = false;
601	}
602	} else {
603	ok = false;
604	continue;
605	}
606
607	const size_t offset = gguf_get_tensor_offset(ctx: gguf_ctx, tensor_id: id);
608
609	if (offset != expected_offset) {
610	ok = false;
611	}
612
613	int64_t ne = shape [`0`];
614	for (size_t j = `1`; j < GGML_MAX_DIMS; ++j) {
615	ne *= shape [j];
616	}
617	expected_offset += GGML_PAD(ggml_row_size(type, ne), alignment);
618	}
619
620	return ok;
621	}
622
623	static bool handcrafted_check_tensor_data(const gguf_context * gguf_ctx, const unsigned int seed, FILE * file) {
624	if (!gguf_ctx) {
625	return false;
626	}
627
628	std::mt19937 rng(seed);
629
630	std::vector<tensor_config_t> tensor_configs = get_tensor_configs(rng);
631
632	bool ok = true;
633
634	for (int i = `0`; i < int(tensor_configs.size()); ++i) {
635	const ggml_type type = tensor_configs [i].first;
636	const std::array<int64_t, GGML_MAX_DIMS> shape = tensor_configs [i].second;
637
638	int64_t ne = shape [`0`];
639	for (size_t j = `1`; j < GGML_MAX_DIMS; ++j) {
640	ne *= shape [j];
641	}
642	const size_t size = ggml_row_size(type, ne);
643
644	const std::string name = "my_tensor_" + std::to_string(val: i);
645	const size_t offset = gguf_get_tensor_offset(ctx: gguf_ctx, tensor_id: gguf_find_tensor(ctx: gguf_ctx, name: name.c_str()));
646
647	std::vector<uint8_t> data(size);
648	GGML_ASSERT(fseek(file, gguf_get_data_offset(gguf_ctx) + offset, SEEK_SET) == `0`);
649	GGML_ASSERT(fread(data.data(), `1`, data.size(), file) == data.size());
650
651	for (size_t j = `0`; j < size; ++j) {
652	const uint8_t expected_byte = (j + offset) % `256`;
653	if (data [j] != expected_byte) {
654	ok = false;
655	}
656	}
657	}
658
659	return ok;
660	}
661
662	static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
663	int npass = `0`;
664	int ntest = `0`;
665
666	const std::vector<handcrafted_file_type> hfts = {
667	HANDCRAFTED_HEADER_BAD_MAGIC,
668	HANDCRAFTED_HEADER_BAD_VERSION_0,
669	HANDCRAFTED_HEADER_BAD_VERSION_1,
670	HANDCRAFTED_HEADER_BAD_VERSION_FUTURE,
671	HANDCRAFTED_HEADER_BAD_N_KV,
672	HANDCRAFTED_HEADER_BAD_N_TENSORS,
673	HANDCRAFTED_HEADER_EMPTY,
674
675	HANDCRAFTED_KV_BAD_KEY_SIZE,
676	HANDCRAFTED_KV_BAD_TYPE,
677	HANDCRAFTED_KV_DUPLICATE_KEY,
678	HANDCRAFTED_KV_BAD_ALIGN,
679	HANDCRAFTED_KV_SUCCESS,
680
681	HANDCRAFTED_TENSORS_BAD_NAME_SIZE,
682	HANDCRAFTED_TENSORS_BAD_N_DIMS,
683	HANDCRAFTED_TENSORS_BAD_SHAPE,
684	HANDCRAFTED_TENSORS_NE_TOO_BIG,
685	HANDCRAFTED_TENSORS_BAD_TYPE,
686	HANDCRAFTED_TENSORS_BAD_OFFSET,
687	HANDCRAFTED_TENSORS_DUPLICATE_NAME,
688	HANDCRAFTED_TENSORS_BAD_ALIGN,
689	HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN,
690	HANDCRAFTED_TENSORS_SUCCESS,
691	HANDCRAFTED_TENSORS_CUSTOM_ALIGN,
692
693	HANDCRAFTED_DATA_NOT_ENOUGH_DATA,
694	HANDCRAFTED_DATA_BAD_ALIGN,
695	HANDCRAFTED_DATA_INCONSISTENT_ALIGN,
696	HANDCRAFTED_DATA_SUCCESS,
697	HANDCRAFTED_DATA_CUSTOM_ALIGN,
698	};
699
700	for (enum handcrafted_file_type hft : hfts) {
701	printf(format: "%s: handcrafted_file_type=%s\n", __func__, handcrafted_file_type_name(hft).c_str());
702	FILE * file = get_handcrafted_file(seed, hft);
703
704	#ifdef _WIN32
705	if (!file) {
706	printf("failed to create tmpfile(), needs elevated privileges on Windows");
707	printf("skipping tests");
708	continue;
709	}
710	#else
711	GGML_ASSERT(file);
712	#endif // _WIN32
713
714	struct ggml_context * ctx = nullptr;
715	struct gguf_init_params gguf_params = {
716	/no_alloc =/ false,
717	/ctx =/ hft >= offset_has_data ? &ctx : nullptr,
718	};
719
720	struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, params: gguf_params);
721
722	if (expect_context_not_null(hft)) {
723	printf(format: "%s: - context_not_null: ", __func__);
724	} else {
725	printf(format: "%s: - context_null: ", __func__);
726	}
727	if (bool(gguf_ctx) == expect_context_not_null(hft)) {
728	printf(format: "\033[1;32mOK\033[0m\n");
729	npass++;
730	} else {
731	printf(format: "\033[1;31mFAIL\033[0m\n");
732	}
733	ntest++;
734
735	if (hft >= offset_has_data && !expect_context_not_null(hft)) {
736	printf(format: "%s: - no_dangling_ggml_context_pointer: ", __func__);
737	if (ctx) {
738	printf(format: "\033[1;31mFAIL\033[0m\n");
739	} else {
740	printf(format: "\033[1;32mOK\033[0m\n");
741	npass++;
742	}
743	ntest++;
744	}
745
746	const bool alignment_defined = hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN \|\| hft == HANDCRAFTED_DATA_CUSTOM_ALIGN;
747
748	if (expect_context_not_null(hft)) {
749	printf(format: "%s: - check_header: ", __func__);
750	if (handcrafted_check_header(gguf_ctx, seed, has_kv: hft >= offset_has_kv, has_tensors: hft >= offset_has_tensors, alignment_defined)) {
751	printf(format: "\033[1;32mOK\033[0m\n");
752	npass++;
753	} else {
754	printf(format: "\033[1;31mFAIL\033[0m\n");
755	}
756	ntest++;
757	}
758
759	if (expect_context_not_null(hft) && hft >= offset_has_kv) {
760	printf(format: "%s: - check_kv: ", __func__);
761	if (handcrafted_check_kv(gguf_ctx, seed, has_tensors: hft >= offset_has_tensors, alignment_defined)) {
762	printf(format: "\033[1;32mOK\033[0m\n");
763	npass++;
764	} else {
765	printf(format: "\033[1;31mFAIL\033[0m\n");
766	}
767	ntest++;
768	}
769
770	if (expect_context_not_null(hft) && hft >= offset_has_tensors) {
771	printf(format: "%s: - check_tensors: ", __func__);
772	if (handcrafted_check_tensors(gguf_ctx, seed)) {
773	printf(format: "\033[1;32mOK\033[0m\n");
774	npass++;
775	} else {
776	printf(format: "\033[1;31mFAIL\033[0m\n");
777	}
778	ntest++;
779	}
780
781	if (expect_context_not_null(hft) && hft >= offset_has_data) {
782	printf(format: "%s: - check_tensor_data: ", __func__);
783	if (handcrafted_check_tensor_data(gguf_ctx, seed, file)) {
784	printf(format: "\033[1;32mOK\033[0m\n");
785	npass++;
786	} else {
787	printf(format: "\033[1;31mFAIL\033[0m\n");
788	}
789	ntest++;
790	}
791
792	fclose(stream: file);
793	if (gguf_ctx) {
794	ggml_free(ctx);
795	gguf_free(ctx: gguf_ctx);
796	}
797	printf(format: "\n");
798	}
799
800
801	return std::make_pair(x&: npass, y&: ntest);
802	}
803
804	struct random_gguf_context_result {
805	struct gguf_context * gguf_ctx;
806	struct ggml_context * ctx;
807	ggml_backend_buffer_t buffer;
808	};
809
810	static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t backend, const unsigned int seed) {
811	std::mt19937 rng(seed);
812
813	struct gguf_context * gguf_ctx = gguf_init_empty();
814
815	for (int i = `0`; i < `256`; ++i) {
816	const std::string key = "my_key_" + std::to_string(val: rng () % `1024`);
817	const enum gguf_type type = gguf_type(rng () % GGUF_TYPE_COUNT);
818
819	switch (type) {
820	case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx: gguf_ctx, key: key.c_str(), val: rng () % (`1` << `7`)); break;
821	case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx: gguf_ctx, key: key.c_str(), val: rng () % (`1` << `7`) - (`1` << `6`)); break;
822	case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx: gguf_ctx, key: key.c_str(), val: rng () % (`1` << `15`)); break;
823	case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx: gguf_ctx, key: key.c_str(), val: rng () % (`1` << `15`) - (`1` << `14`)); break;
824	case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx: gguf_ctx, key: key.c_str(), val: rng ()); break;
825	case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx: gguf_ctx, key: key.c_str(), val: rng () - (`1` << `30`)); break;
826	case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx: gguf_ctx, key: key.c_str(), val: rng () % `1024` - `512`); break;
827	case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx: gguf_ctx, key: key.c_str(), val: rng () % `2` == `0`); break;
828	case GGUF_TYPE_STRING: gguf_set_val_str (ctx: gguf_ctx, key: key.c_str(), val: std::to_string(val: rng ()).c_str()); break;
829	case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx: gguf_ctx, key: key.c_str(), val: rng ()); break;
830	case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx: gguf_ctx, key: key.c_str(), val: rng () - (`1` << `30`)); break;
831	case GGUF_TYPE_FLOAT64: gguf_set_val_f32 (ctx: gguf_ctx, key: key.c_str(), val: rng () % `1024` - `512`); break;
832	case GGUF_TYPE_ARRAY: {
833	const enum gguf_type type_arr = gguf_type(rng () % GGUF_TYPE_COUNT);
834	const uint64_t ne = rng () % `1024`;
835
836	switch (type_arr) {
837	case GGUF_TYPE_UINT8:
838	case GGUF_TYPE_INT8:
839	case GGUF_TYPE_UINT16:
840	case GGUF_TYPE_INT16:
841	case GGUF_TYPE_UINT32:
842	case GGUF_TYPE_INT32:
843	case GGUF_TYPE_FLOAT32:
844	case GGUF_TYPE_BOOL:
845	case GGUF_TYPE_UINT64:
846	case GGUF_TYPE_INT64:
847	case GGUF_TYPE_FLOAT64: {
848	const size_t nbytes = ne*gguf_type_size(type: type_arr);
849	std::vector<uint32_t> random_data((nbytes + sizeof(uint32_t) - `1`) / sizeof(uint32_t));
850	for (size_t j = `0`; j < random_data.size(); ++j) {
851	random_data [j] = rng ();
852	if (type_arr == GGUF_TYPE_BOOL) {
853	random_data [j] &= `0x01010101`; // the sanitizer complains if booleans are not 0 or 1
854	}
855	}
856	gguf_set_arr_data(ctx: gguf_ctx, key: key.c_str(), type: type_arr, data: random_data.data(), n: ne);
857	} break;
858	case GGUF_TYPE_STRING: {
859	std::vector<std::string> data_cpp(ne);
860	std::vector<const char *> data_c(ne);
861	for (size_t j = `0`; j < data_cpp.size(); ++j) {
862	data_cpp [j] = std::to_string(val: rng ());
863	data_c [j] = data_cpp [j].c_str();
864	}
865	gguf_set_arr_str(ctx: gguf_ctx, key: key.c_str(), data: data_c.data(), n: ne);
866	} break;
867	case GGUF_TYPE_ARRAY: {
868	break; // not supported
869	}
870	case GGUF_TYPE_COUNT:
871	default: {
872	GGML_ABORT("fatal error");
873	}
874	}
875	} break;
876	case GGUF_TYPE_COUNT:
877	default: {
878	GGML_ABORT("fatal error");
879	}
880	}
881	}
882
883	struct ggml_init_params ggml_params = {
884	/.mem_size =/ `256`*ggml_tensor_overhead(),
885	/.mem_buffer =/ nullptr,
886	/.no_alloc =/ true,
887	};
888	struct ggml_context * ctx = ggml_init(params: ggml_params);
889
890	for (int i = `0`; i < `256`; ++i) {
891	const std::string name = "my_tensor_" + std::to_string(val: i);
892	const enum ggml_type type = ggml_type(rng () % GGML_TYPE_COUNT);
893	const size_t type_size = ggml_type_size(type);
894
895	if (type_size == `0`) {
896	continue;
897	}
898
899	const int n_dims = `1` + rng () % GGML_MAX_DIMS;
900	int64_t ne[GGML_MAX_DIMS];
901	ne[`0`] = (`1` + rng () % `10`) * ggml_blck_size(type);
902	for (int j = `1`; j < n_dims; ++j) {
903	ne[j] = `1` + rng () % `10`;
904	}
905
906	struct ggml_tensor * tensor = ggml_new_tensor(ctx, type, n_dims, ne);
907	ggml_set_name(tensor, name: name.c_str());
908	}
909
910	ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
911	for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, tensor: t)) {
912	const size_t nbytes = ggml_nbytes(tensor: t);
913	std::vector<uint32_t> random_data((nbytes + sizeof(uint32_t) - `1`) / sizeof(uint32_t));
914	for (size_t j = `0`; j < random_data.size(); ++j) {
915	random_data [j] = rng ();
916	}
917	ggml_backend_tensor_set(tensor: t, data: random_data.data(), offset: `0`, size: nbytes);
918
919	gguf_add_tensor(ctx: gguf_ctx, tensor: t);
920	}
921
922	return {.gguf_ctx: gguf_ctx, .ctx: ctx, .buffer: buf};
923	}
924
925	static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other) {
926	bool ok = true;
927
928	const int n_kv = gguf_get_n_kv(ctx);
929	for (int id = `0`; id < n_kv; ++id) {
930	const char * name = gguf_get_key(ctx, key_id: id);
931
932	const int idx_other = gguf_find_key(ctx: other, key: name);
933	if (idx_other < `0`) {
934	ok = false;
935	continue;
936	}
937
938	const gguf_type type = gguf_get_kv_type(ctx, key_id: id);
939	if (type != gguf_get_kv_type(ctx: other, key_id: idx_other)) {
940	ok = false;
941	continue;
942	}
943
944	if (type == GGUF_TYPE_ARRAY) {
945	const size_t arr_n = gguf_get_arr_n(ctx, key_id: id);
946	if (arr_n != gguf_get_arr_n(ctx: other, key_id: idx_other)) {
947	ok = false;
948	continue;
949	}
950
951	const gguf_type type_arr = gguf_get_arr_type(ctx, key_id: id);
952	if (type_arr != gguf_get_arr_type(ctx: other, key_id: idx_other)) {
953	ok = false;
954	continue;
955	}
956
957	if (type_arr == GGUF_TYPE_BOOL) {
958	const int8_t * data = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx, key_id: id));
959	const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx: other, key_id: idx_other));
960	for (size_t arr_i = `0`; arr_i < arr_n; ++arr_i) {
961	if (bool(data[arr_i]) != bool(data_other[arr_i])) {
962	ok = false;
963	}
964	}
965	continue;
966	}
967
968	if (type_arr == GGUF_TYPE_STRING) {
969	for (size_t arr_i = `0`; arr_i < arr_n; ++arr_i) {
970	const std::string str = gguf_get_arr_str(ctx, key_id: id, i: arr_i);
971	const std::string str_other = gguf_get_arr_str(ctx: other, key_id: idx_other, i: arr_i);
972	if (str != str_other) {
973	ok = false;
974	}
975	}
976	continue;
977	}
978
979	const int8_t * data = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx, key_id: id));
980	const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx: other, key_id: idx_other));
981	if (!std::equal(first1: data, last1: data + arr_n*gguf_type_size(type: type_arr), first2: data_other)) {
982	ok = false;
983	}
984	continue;
985	}
986
987	if (type == GGUF_TYPE_STRING) {
988	const std::string str = gguf_get_val_str(ctx, key_id: id);
989	const std::string str_other = gguf_get_val_str(ctx: other, key_id: idx_other);
990	if (str != str_other) {
991	ok = false;
992	}
993	continue;
994	}
995
996	const char * data = reinterpret_cast<const char *>(gguf_get_val_data(ctx, key_id: id));
997	const char * data_other = reinterpret_cast<const char *>(gguf_get_val_data(ctx: other, key_id: idx_other));
998	if (!std::equal(first1: data, last1: data + gguf_type_size(type), first2: data_other)) {
999	ok = false;
1000	}
1001	}
1002
1003	return ok;
1004	}
1005
1006	static bool all_tensors_in_other(const gguf_context * ctx, const gguf_context * other) {
1007	bool ok = true;
1008
1009	const int n_tensors = gguf_get_n_tensors(ctx);
1010	for (int id = `0`; id < n_tensors; ++id) {
1011	const std::string name = gguf_get_tensor_name(ctx, tensor_id: id);
1012
1013	const int idx_other = gguf_find_tensor(ctx: other, name: name.c_str());
1014	if (id != idx_other) {
1015	ok = false;
1016	if (idx_other < `0`) {
1017	continue;
1018	}
1019	}
1020
1021	const ggml_type type = gguf_get_tensor_type(ctx, tensor_id: id);
1022	if (type != gguf_get_tensor_type(ctx: other, tensor_id: id)) {
1023	ok = false;
1024	}
1025
1026	const size_t offset = gguf_get_tensor_offset(ctx, tensor_id: id);
1027	if (offset != gguf_get_tensor_offset(ctx: other, tensor_id: id)) {
1028	ok = false;
1029	}
1030	}
1031
1032	return ok;
1033	}
1034
1035	static bool same_tensor_data(const struct ggml_context * orig, const struct ggml_context * read) {
1036	bool ok = true;
1037
1038	struct ggml_tensor * t_orig = ggml_get_first_tensor(ctx: orig);
1039	struct ggml_tensor * t_read = ggml_get_first_tensor(ctx: read);
1040
1041	if (std::string (t_read->name) != "GGUF tensor data binary blob") {
1042	return false;
1043	}
1044	t_read = ggml_get_next_tensor(ctx: read, tensor: t_read);
1045
1046	while (t_orig) {
1047	if (!t_read) {
1048	ok = false;
1049	break;
1050	}
1051
1052	const size_t nbytes = ggml_nbytes(tensor: t_orig);
1053	if (ggml_nbytes(tensor: t_read) != nbytes) {
1054	ok = false;
1055	break;
1056	}
1057	std::vector<char> data_orig(nbytes);
1058	ggml_backend_tensor_get(tensor: t_orig, data: data_orig.data(), offset: `0`, size: nbytes);
1059	if (!std::equal(first1: data_orig.data(), last1: data_orig.data() + nbytes, first2: reinterpret_cast<const char *>(t_read->data))) {
1060	ok = false;
1061	}
1062
1063	t_orig = ggml_get_next_tensor(ctx: orig, tensor: t_orig);
1064	t_read = ggml_get_next_tensor(ctx: read, tensor: t_read);
1065	}
1066	if (t_read) {
1067	ok = false;
1068	}
1069
1070	return ok;
1071	}
1072
1073	static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) {
1074	ggml_backend_t backend = ggml_backend_dev_init(device: dev, params: nullptr);
1075	printf(format: "%s: device=%s, backend=%s, only_meta=%s\n",
1076	__func__, ggml_backend_dev_description(device: dev), ggml_backend_name(backend), only_meta ? "yes" : "no");
1077
1078	int npass = `0`;
1079	int ntest = `0`;
1080
1081	struct gguf_context * gguf_ctx_0;
1082	struct ggml_context * ctx_0;
1083	ggml_backend_buffer_t bbuf;
1084	{
1085	struct random_gguf_context_result result = get_random_gguf_context(backend, seed);
1086	gguf_ctx_0 = result.gguf_ctx;
1087	ctx_0 = result.ctx;
1088	bbuf = result.buffer;
1089	}
1090
1091	FILE * file = tmpfile();
1092
1093	#ifdef _WIN32
1094	if (!file) {
1095	printf("failed to create tmpfile(), needs elevated privileges on Windows");
1096	printf("skipping tests");
1097	return std::make_pair(`0`, `0`);
1098	}
1099	#else
1100	GGML_ASSERT(file);
1101	#endif // _WIN32
1102
1103	{
1104	std::vector<int8_t> buf;
1105	gguf_write_to_buf(ctx: gguf_ctx_0, buf, only_meta);
1106	GGML_ASSERT(fwrite(buf.data(), `1`, buf.size(), file) == buf.size());
1107	rewind(stream: file);
1108	}
1109
1110	struct ggml_context * ctx_1 = nullptr;
1111	struct gguf_init_params gguf_params = {
1112	/no_alloc =/ false,
1113	/ctx =/ only_meta ? nullptr : &ctx_1,
1114	};
1115	struct gguf_context * gguf_ctx_1 = gguf_init_from_file_impl(file, params: gguf_params);
1116
1117	printf(format: "%s: same_version: ", __func__);
1118	if (gguf_get_version(ctx: gguf_ctx_0) == gguf_get_version(ctx: gguf_ctx_1)) {
1119	printf(format: "\033[1;32mOK\033[0m\n");
1120	npass++;
1121	} else {
1122	printf(format: "\033[1;31mFAIL\033[0m\n");
1123	}
1124	ntest++;
1125
1126	printf(format: "%s: same_n_kv: ", __func__);
1127	if (gguf_get_n_kv(ctx: gguf_ctx_0) == gguf_get_n_kv(ctx: gguf_ctx_1)) {
1128	printf(format: "\033[1;32mOK\033[0m\n");
1129	npass++;
1130	} else {
1131	printf(format: "\033[1;31mFAIL\033[0m\n");
1132	}
1133	ntest++;
1134
1135	printf(format: "%s: same_n_tensors: ", __func__);
1136	if (gguf_get_n_tensors(ctx: gguf_ctx_0) == gguf_get_n_tensors(ctx: gguf_ctx_1)) {
1137	printf(format: "\033[1;32mOK\033[0m\n");
1138	npass++;
1139	} else {
1140	printf(format: "\033[1;31mFAIL\033[0m\n");
1141	}
1142	ntest++;
1143
1144	printf(format: "%s: all_orig_kv_in_read: ", __func__);
1145	if (all_kv_in_other(ctx: gguf_ctx_0, other: gguf_ctx_1)) {
1146	printf(format: "\033[1;32mOK\033[0m\n");
1147	npass++;
1148	} else {
1149	printf(format: "\033[1;31mFAIL\033[0m\n");
1150	}
1151	ntest++;
1152
1153	printf(format: "%s: all_read_kv_in_orig: ", __func__);
1154	if (all_kv_in_other(ctx: gguf_ctx_1, other: gguf_ctx_0)) {
1155	printf(format: "\033[1;32mOK\033[0m\n");
1156	npass++;
1157	} else {
1158	printf(format: "\033[1;31mFAIL\033[0m\n");
1159	}
1160	ntest++;
1161
1162	printf(format: "%s: all_orig_tensors_in_read: ", __func__);
1163	if (all_tensors_in_other(ctx: gguf_ctx_0, other: gguf_ctx_1)) {
1164	printf(format: "\033[1;32mOK\033[0m\n");
1165	npass++;
1166	} else {
1167	printf(format: "\033[1;31mFAIL\033[0m\n");
1168	}
1169	ntest++;
1170
1171	printf(format: "%s: all_read_tensors_in_orig: ", __func__);
1172	if (all_tensors_in_other(ctx: gguf_ctx_1, other: gguf_ctx_0)) {
1173	printf(format: "\033[1;32mOK\033[0m\n");
1174	npass++;
1175	} else {
1176	printf(format: "\033[1;31mFAIL\033[0m\n");
1177	}
1178	ntest++;
1179
1180	if (!only_meta) {
1181	printf(format: "%s: same_tensor_data: ", __func__);
1182	if (same_tensor_data(orig: ctx_0, read: ctx_1)) {
1183	printf(format: "\033[1;32mOK\033[0m\n");
1184	npass++;
1185	} else {
1186	printf(format: "\033[1;31mFAIL\033[0m\n");
1187	}
1188	ntest++;
1189	}
1190
1191	ggml_backend_buffer_free(buffer: bbuf);
1192	ggml_free(ctx: ctx_0);
1193	ggml_free(ctx: ctx_1);
1194	gguf_free(ctx: gguf_ctx_0);
1195	gguf_free(ctx: gguf_ctx_1);
1196	ggml_backend_free(backend);
1197	fclose(stream: file);
1198
1199	printf(format: "\n");
1200	return std::make_pair(x&: npass, y&: ntest);
1201	}
1202
1203	static std::pair<int, int> test_gguf_set_kv(ggml_backend_dev_t dev, const unsigned int seed) {
1204	ggml_backend_t backend = ggml_backend_dev_init(device: dev, params: nullptr);
1205	printf(format: "%s: device=%s, backend=%s\n", __func__, ggml_backend_dev_description(device: dev), ggml_backend_name(backend));
1206
1207	int npass = `0`;
1208	int ntest = `0`;
1209
1210	struct gguf_context * gguf_ctx_0;
1211	struct ggml_context * ctx_0;
1212	ggml_backend_buffer_t bbuf_0;
1213	{
1214	struct random_gguf_context_result result = get_random_gguf_context(backend, seed);
1215	gguf_ctx_0 = result.gguf_ctx;
1216	ctx_0 = result.ctx;
1217	bbuf_0 = result.buffer;
1218	}
1219
1220	struct gguf_context * gguf_ctx_1;
1221	struct ggml_context * ctx_1;
1222	ggml_backend_buffer_t bbuf_1;
1223	{
1224	struct random_gguf_context_result result = get_random_gguf_context(backend, seed: seed + `1`);
1225	gguf_ctx_1 = result.gguf_ctx;
1226	ctx_1 = result.ctx;
1227	bbuf_1 = result.buffer;
1228	}
1229
1230	struct gguf_context * gguf_ctx_2 = gguf_init_empty();
1231
1232	gguf_set_kv(ctx: gguf_ctx_1, src: gguf_ctx_0);
1233	gguf_set_kv(ctx: gguf_ctx_2, src: gguf_ctx_0);
1234
1235	printf(format: "%s: same_n_kv: ", __func__);
1236	if (gguf_get_n_kv(ctx: gguf_ctx_0) == gguf_get_n_kv(ctx: gguf_ctx_2)) {
1237	printf(format: "\033[1;32mOK\033[0m\n");
1238	npass++;
1239	} else {
1240	printf(format: "\033[1;31mFAIL\033[0m\n");
1241	}
1242	ntest++;
1243
1244	printf(format: "%s: all_kv_0_in_1: ", __func__);
1245	if (all_kv_in_other(ctx: gguf_ctx_0, other: gguf_ctx_1)) {
1246	printf(format: "\033[1;32mOK\033[0m\n");
1247	npass++;
1248	} else {
1249	printf(format: "\033[1;31mFAIL\033[0m\n");
1250	}
1251	ntest++;
1252
1253	printf(format: "%s: all_kv_0_in_2: ", __func__);
1254	if (all_kv_in_other(ctx: gguf_ctx_0, other: gguf_ctx_2)) {
1255	printf(format: "\033[1;32mOK\033[0m\n");
1256	npass++;
1257	} else {
1258	printf(format: "\033[1;31mFAIL\033[0m\n");
1259	}
1260	ntest++;
1261
1262	gguf_set_kv(ctx: gguf_ctx_0, src: gguf_ctx_1);
1263
1264	printf(format: "%s: same_n_kv_after_double_copy: ", __func__);
1265	if (gguf_get_n_kv(ctx: gguf_ctx_0) == gguf_get_n_kv(ctx: gguf_ctx_1)) {
1266	printf(format: "\033[1;32mOK\033[0m\n");
1267	npass++;
1268	} else {
1269	printf(format: "\033[1;31mFAIL\033[0m\n");
1270	}
1271	ntest++;
1272
1273	printf(format: "%s: all_kv_1_in_0_after_double_copy: ", __func__);
1274	if (all_kv_in_other(ctx: gguf_ctx_1, other: gguf_ctx_0)) {
1275	printf(format: "\033[1;32mOK\033[0m\n");
1276	npass++;
1277	} else {
1278	printf(format: "\033[1;31mFAIL\033[0m\n");
1279	}
1280	ntest++;
1281
1282	ggml_backend_buffer_free(buffer: bbuf_0);
1283	ggml_backend_buffer_free(buffer: bbuf_1);
1284	ggml_free(ctx: ctx_0);
1285	ggml_free(ctx: ctx_1);
1286	gguf_free(ctx: gguf_ctx_0);
1287	gguf_free(ctx: gguf_ctx_1);
1288	gguf_free(ctx: gguf_ctx_2);
1289	ggml_backend_free(backend);
1290
1291	printf(format: "\n");
1292	return std::make_pair(x&: npass, y&: ntest);
1293	}
1294
1295	static void print_usage() {
1296	printf(format: "usage: test-gguf [seed]\n");
1297	printf(format: " if no seed is unspecified then a random seed is used\n");
1298	}
1299
1300	int main(int argc, char ** argv) {
1301	if (argc > `2`) {
1302	print_usage();
1303	return `1`;
1304	}
1305
1306	std::random_device rd;
1307	const unsigned int seed = argc < `2` ? rd () : std::stoi(str: argv[`1`]);
1308
1309	// Initialize ggml backends early so the prints aren't interleaved with the test results:
1310	ggml_backend_dev_count();
1311	fprintf(stderr, format: "\n");
1312
1313	int npass = `0`;
1314	int ntest = `0`;
1315	{
1316	std::pair<int, int> result = test_handcrafted_file(seed);
1317	npass += result.first;
1318	ntest += result.second;
1319	}
1320
1321	for (size_t i = `0`; i < ggml_backend_dev_count(); ++i) {
1322	ggml_backend_dev_t dev = ggml_backend_dev_get(index: i);
1323
1324	for (bool only_meta : {true, false}) {
1325	std::pair<int, int> result = test_roundtrip(dev, seed, only_meta);
1326	npass += result.first;
1327	ntest += result.second;
1328	}
1329
1330	{
1331	std::pair<int, int> result = test_gguf_set_kv(dev, seed);
1332	npass += result.first;
1333	ntest += result.second;
1334	}
1335	}
1336
1337	printf(format: "%d/%d tests passed\n", npass, ntest);
1338	if (npass != ntest) {
1339	printf(format: "\033[1;31mFAIL\033[0m\n");
1340	return `1`;
1341	}
1342	printf(format: "\033[1;32mOK\033[0m\n");
1343	return `0`;
1344	}
1345

Browse the source code of llama.cpp/tests/test-gguf.cpp