test-quantize-fns.cpp source code [llama.cpp/tests/test-quantize-fns.cpp]

1	// Unit tests for quantization specific functions - quantize, dequantize and dot product
2
3	#include "ggml.h"
4	#include "ggml-cpu.h"
5
6	#undef NDEBUG
7	#include <assert.h>
8	#include <math.h>
9	#include <stdio.h>
10	#include <string>
11	#include <vector>
12
13	#if defined(_MSC_VER)
14	#pragma warning(disable: 4244 4267) // possible loss of data
15	#endif
16
17	constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = `0.0001f`;
18	constexpr float MAX_QUANTIZATION_TOTAL_ERROR = `0.002f`;
19	constexpr float MAX_QUANTIZATION_TOTAL_ERROR_TERNARY = `0.01f`;
20	constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = `0.0075f`;
21	constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = `0.0040f`;
22	constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS = `0.0050f`;
23	constexpr float MAX_DOT_PRODUCT_ERROR = `0.02f`;
24	constexpr float MAX_DOT_PRODUCT_ERROR_LOWBIT = `0.04f`;
25	constexpr float MAX_DOT_PRODUCT_ERROR_TERNARY = `0.15f`;
26
27	static const char* RESULT_STR[] = {"ok", "FAILED"};
28
29
30	// Generate synthetic data
31	static void generate_data(float offset, size_t n, float * dst) {
32	for (size_t i = `0`; i < n; i++) {
33	dst[i] = `0.1` + `2`*cosf(x: i + offset);
34	}
35	}
36
37	// Calculate RMSE between two float arrays
38	static float array_rmse(const float * a1, const float * a2, size_t n) {
39	double sum = `0`;
40	for (size_t i = `0`; i < n; i++) {
41	double diff = a1[i] - a2[i];
42	sum += diff * diff;
43	}
44	return sqrtf(x: sum) / n;
45	}
46
47	// Total quantization error on test data
48	static float total_quantization_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data) {
49	std::vector<uint8_t> tmp_q(`2`*test_size);
50	std::vector<float> tmp_out(test_size);
51
52	qfns_cpu->from_float(test_data, tmp_q.data(), test_size);
53	qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
54	return array_rmse(a1: test_data, a2: tmp_out.data(), n: test_size);
55	}
56
57	// Total quantization error on test data
58	static float reference_quantization_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data) {
59	std::vector<uint8_t> tmp_q(`2`*test_size);
60	std::vector<float> tmp_out(test_size);
61	std::vector<float> tmp_out_ref(test_size);
62
63	// FIXME: why is done twice?
64	qfns_cpu->from_float(test_data, tmp_q.data(), test_size);
65	qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
66
67	qfns->from_float_ref(test_data, tmp_q.data(), test_size);
68	qfns->to_float(tmp_q.data(), tmp_out_ref.data(), test_size);
69
70	return array_rmse(a1: tmp_out.data(), a2: tmp_out_ref.data(), n: test_size);
71	}
72
73	static float dot_product(const float * a1, const float * a2, size_t test_size) {
74	double sum = `0`;
75	for (size_t i = `0`; i < test_size; i++) {
76	sum += a1[i] * a2[i];
77	}
78	return sum;
79	}
80
81	// Total dot product error
82	static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float * test_data2) {
83	GGML_UNUSED(qfns);
84
85	std::vector<uint8_t> tmp_q1(`2`*test_size);
86	std::vector<uint8_t> tmp_q2(`2`*test_size);
87
88	const auto * vdot = ggml_get_type_traits_cpu(type: qfns_cpu->vec_dot_type);
89
90	qfns_cpu->from_float(test_data1, tmp_q1.data(), test_size);
91	vdot->from_float(test_data2, tmp_q2.data(), test_size);
92
93	float result = INFINITY;
94	qfns_cpu->vec_dot(test_size, &result, `0`, tmp_q1.data(), `0`, tmp_q2.data(), `0`, `1`);
95
96	const float dot_ref = dot_product(a1: test_data1, a2: test_data2, test_size);
97
98	return fabsf(x: result - dot_ref) / test_size;
99	}
100
101	int main(int argc, char * argv[]) {
102	bool verbose = false;
103	const size_t test_size = `32` * `128`;
104
105	std::string arg;
106	for (int i = `1`; i < argc; i++) {
107	arg = argv[i];
108
109	if (arg == "-v") {
110	verbose = true;
111	} else {
112	fprintf(stderr, format: "error: unknown argument: %s\n", arg.c_str());
113	return `1`;
114	}
115	}
116
117	std::vector<float> test_data(test_size);
118	std::vector<float> test_data2(test_size);
119
120	generate_data(offset: `0.0`, n: test_data.size(), dst: test_data.data());
121	generate_data(offset: `1.0`, n: test_data2.size(), dst: test_data2.data());
122
123	ggml_cpu_init();
124
125	int num_failed = `0`;
126	bool failed = false;
127
128	for (int i = `0`; i < GGML_TYPE_COUNT; i++) {
129	ggml_type type = (ggml_type) i;
130	const auto * qfns = ggml_get_type_traits(type);
131	const auto * qfns_cpu = ggml_get_type_traits_cpu(type);
132
133	// deprecated - skip
134	if (qfns->blck_size == `0`) {
135	continue;
136	}
137
138	const ggml_type ei = (ggml_type)i;
139
140	printf(format: "Testing %s\n", ggml_type_name(type: (ggml_type) i));
141	ggml_quantize_init(type: ei);
142
143	if (qfns_cpu->from_float && qfns->to_float) {
144	const float total_error = total_quantization_error(qfns, qfns_cpu, test_size, test_data: test_data.data());
145	const float max_quantization_error =
146	type == GGML_TYPE_TQ1_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
147	type == GGML_TYPE_TQ2_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
148	type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
149	type == GGML_TYPE_IQ2_S ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
150	type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS :
151	type == GGML_TYPE_IQ3_S ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS :
152	type == GGML_TYPE_IQ3_XXS ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS : MAX_QUANTIZATION_TOTAL_ERROR;
153	failed = !(total_error < max_quantization_error);
154	num_failed += failed;
155	if (failed \|\| verbose) {
156	printf(format: "%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error);
157	}
158
159	const float reference_error = reference_quantization_error(qfns, qfns_cpu, test_size, test_data: test_data.data());
160	failed = !(reference_error < MAX_QUANTIZATION_REFERENCE_ERROR);
161	num_failed += failed;
162	if (failed \|\| verbose) {
163	printf(format: "%5s reference implementation error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], reference_error);
164	}
165
166	const float vec_dot_error = dot_product_error(qfns, qfns_cpu, test_size, test_data1: test_data.data(), test_data2: test_data2.data());
167	const float max_allowed_error = type == GGML_TYPE_Q2_K \|\| type == GGML_TYPE_IQ2_XS \|\| type == GGML_TYPE_IQ2_XXS \|\|
168	type == GGML_TYPE_IQ3_XXS \|\| type == GGML_TYPE_IQ3_S \|\| type == GGML_TYPE_IQ2_S
169	? MAX_DOT_PRODUCT_ERROR_LOWBIT
170	: type == GGML_TYPE_TQ1_0 \|\| type == GGML_TYPE_TQ2_0
171	? MAX_DOT_PRODUCT_ERROR_TERNARY
172	: MAX_DOT_PRODUCT_ERROR;
173	failed = !(vec_dot_error < max_allowed_error);
174	num_failed += failed;
175	if (failed \|\| verbose) {
176	printf(format: "%5s dot product error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error);
177	}
178	}
179	}
180
181	if (num_failed \|\| verbose) {
182	printf(format: "%d tests failed\n", num_failed);
183	}
184
185	return num_failed > `0`;
186	}
187

Browse the source code of llama.cpp/tests/test-quantize-fns.cpp