test-rope.cpp source code [llama.cpp/tests/test-rope.cpp]

1	#include "ggml.h"
2	#include "ggml-cpu.h"
3
4	#include <cmath>
5	#include <cstdio>
6	#include <cstdlib>
7	#include <cassert>
8	#include <vector>
9
10	#if defined(_MSC_VER)
11	#pragma warning(disable: 4244 4267) // possible loss of data
12	#endif
13
14	#if defined(__GNUC__)
15	#pragma GCC diagnostic ignored "-Wdouble-promotion"
16	#endif
17
18	#define MAX_NARGS 3
19
20	#undef MIN
21	#undef MAX
22	#define MIN(a, b) ((a) < (b) ? (a) : (b))
23	#define MAX(a, b) ((a) > (b) ? (a) : (b))
24
25	#define GGML_SILU_FP16
26
27	//
28	// logging
29	//
30
31	#if (GGML_DEBUG >= 1)
32	#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
33	#else
34	#define GGML_PRINT_DEBUG(...)
35	#endif
36
37	#if (GGML_DEBUG >= 5)
38	#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
39	#else
40	#define GGML_PRINT_DEBUG_5(...)
41	#endif
42
43	#if (GGML_DEBUG >= 10)
44	#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
45	#else
46	#define GGML_PRINT_DEBUG_10(...)
47	#endif
48
49	#define GGML_PRINT(...) printf(__VA_ARGS__)
50
51	static float frand(void) {
52	return (float)rand()/(float)RAND_MAX;
53	}
54
55	static int irand(int n) {
56	if (n == `0`) return `0`;
57	return rand()%n;
58	}
59
60	static void get_random_dims(int64_t * dims, int ndims) {
61	dims[`0`] = dims[`1`] = dims[`2`] = dims[`3`] = `1`;
62
63	for (int i = `0`; i < ndims; i++) {
64	dims[i] = `1` + irand(n: `4`);
65	}
66	}
67
68	static struct ggml_tensor * get_random_tensor_f32(
69	struct ggml_context * ctx0,
70	int ndims,
71	const int64_t ne[],
72	float fmin,
73	float fmax) {
74	struct ggml_tensor * result = ggml_new_tensor(ctx: ctx0, type: GGML_TYPE_F32, n_dims: ndims, ne);
75
76	switch (ndims) {
77	case `1`:
78	for (int i0 = `0`; i0 < ne[`0`]; i0++) {
79	((float )result->data)[i0] = frand()(fmax - fmin) + fmin;
80	}
81	break;
82	case `2`:
83	for (int i1 = `0`; i1 < ne[`1`]; i1++) {
84	for (int i0 = `0`; i0 < ne[`0`]; i0++) {
85	((float )result->data)[i1ne[`0`] + i0] = frand()*(fmax - fmin) + fmin;
86	}
87	}
88	break;
89	case `3`:
90	for (int i2 = `0`; i2 < ne[`2`]; i2++) {
91	for (int i1 = `0`; i1 < ne[`1`]; i1++) {
92	for (int i0 = `0`; i0 < ne[`0`]; i0++) {
93	((float )result->data)[i2ne[`1`]ne[`0`] + i1ne[`0`] + i0] = frand()*(fmax - fmin) + fmin;
94	}
95	}
96	}
97	break;
98	case `4`:
99	for (int i3 = `0`; i3 < ne[`3`]; i3++) {
100	for (int i2 = `0`; i2 < ne[`2`]; i2++) {
101	for (int i1 = `0`; i1 < ne[`1`]; i1++) {
102	for (int i0 = `0`; i0 < ne[`0`]; i0++) {
103	((float )result->data)[i3ne[`2`]ne[`1`]ne[`0`] + i2ne[`1`]ne[`0`] + i1ne[`0`] + i0] = frand()(fmax - fmin) + fmin;
104	}
105	}
106	}
107	}
108	break;
109	default:
110	assert(false);
111	};
112
113	return result;
114	}
115
116	static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
117	struct ggml_cplan plan = ggml_graph_plan(cgraph: graph, n_threads, threadpool: nullptr);
118
119	if (plan.work_size > `0`) {
120	buf.resize(new_size: plan.work_size);
121	plan.work_data = buf.data();
122	}
123
124	ggml_graph_compute(cgraph: graph, cplan: &plan);
125	}
126
127	int main(int /argc/, const char ** /argv/) {
128	struct ggml_init_params params = {
129	/ .mem_size = / `128``1024``1024`,
130	/ .mem_buffer = / NULL,
131	/ .no_alloc = / false,
132	};
133
134	std::vector<uint8_t> work_buffer;
135
136	struct ggml_context * ctx0 = ggml_init(params);
137
138	struct ggml_tensor * x;
139
140	// rope f32
141	for (int m = `0`; m < `6`; ++m) {
142	const int ndims = `4`;
143
144	const int64_t n_rot = `128`;
145	const int64_t ne[`4`] = { `2`*n_rot, `32`, `73`, `1` };
146
147	const int n_past_0 = `100`;
148	const int n_past_2 = `33`;
149
150	struct ggml_tensor * r0;
151	struct ggml_tensor * r1;
152	struct ggml_tensor * r2;
153	x = get_random_tensor_f32(ctx0, ndims, ne, fmin: -`1.0f`, fmax: `1.0f`);
154	int mode = -`1`;
155
156	if (m < `3`) {
157	struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[`2`]);
158	struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[`2`]);
159	struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[`2`]);
160
161	for (int i = `0`; i < ne[`2`]; ++i) {
162	((int32_t *) p0->data)[i] = n_past_0 + i;
163	((int32_t *) p1->data)[i] = n_past_2 - n_past_0;
164	((int32_t *) p2->data)[i] = n_past_2 + i;
165	}
166	// test mode 0, 2, 4 (standard, GPT-NeoX, GLM)
167	mode = m == `0` ? `0` : m == `1` ? `2` : `4`;
168
169	// 100, 101, 102, ..., 172
170	r0 = ggml_rope(ctx: ctx0, a: x, b: p0, n_dims: n_rot, mode);
171	// -67, -67, -67, ..., -67
172	r1 = ggml_rope(ctx: ctx0, a: r0, b: p1, n_dims: n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens
173
174	// 33, 34, 35, ..., 105
175	r2 = ggml_rope(ctx: ctx0, a: x, b: p2, n_dims: n_rot, mode);
176	} else {
177	// testing multi-dimension rope position embedding mode
178	struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[`2`] * `4`);
179	struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[`2`] * `4`);
180	struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx: ctx0, type: GGML_TYPE_I32, ne0: ne[`2`] * `4`);
181
182	int sections[`4`] = {`16`, `24`, `24`, `0`};
183	mode = (m == `3`) ? GGML_ROPE_TYPE_MROPE : (m == `4`) ? GGML_ROPE_TYPE_VISION : GGML_ROPE_TYPE_IMROPE;
184
185	for (int i = `0`; i < ne[`2`]; ++i) {
186	for (int j = `0`; j < `4`; ++j) {
187	((int32_t ) p0->data)[i + ne[`2`] j] = n_past_0 + i + j;
188	((int32_t ) p1->data)[i + ne[`2`] j] = n_past_2 - n_past_0;
189	((int32_t ) p2->data)[i + ne[`2`] j] = n_past_2 + i + j;
190	}
191	}
192
193	// [[100, 101, 102, ..., 172],
194	// [101, 102, 103, ..., 173],
195	// [102, 103, 104, ..., 174]]
196	r0 = ggml_rope_multi(
197	ctx: ctx0, a: x, b: p0, c: nullptr,
198	n_dims: n_rot, sections, mode, n_ctx_orig: `32768`, freq_base: `1000000`, freq_scale: `1`, ext_factor: `0`, attn_factor: `1`, beta_fast: `32`, beta_slow: `1`);
199	// [[-67, -67, -67, ..., -67]
200	// [-67, -67, -67, ..., -67]
201	// [-67, -67, -67, ..., -67]]
202	r1 = ggml_rope_multi(
203	ctx: ctx0, a: r0, b: p1, c: nullptr,
204	n_dims: n_rot, sections, mode, n_ctx_orig: `32768`, freq_base: `1000000`, freq_scale: `1`, ext_factor: `0`, attn_factor: `1`, beta_fast: `32`, beta_slow: `1`);
205
206	// [[33, 34, 35, ..., 105]
207	// [34, 35, 36, ..., 106]
208	// [35, 36, 37, ..., 107]]
209	r2 = ggml_rope_multi(
210	ctx: ctx0, a: x, b: p2, c: nullptr,
211	n_dims: n_rot, sections, mode, n_ctx_orig: `32768`, freq_base: `1000000`, freq_scale: `1`, ext_factor: `0`, attn_factor: `1`, beta_fast: `32`, beta_slow: `1`);
212	}
213
214	ggml_cgraph * gf = ggml_new_graph(ctx: ctx0);
215
216	ggml_build_forward_expand(cgraph: gf, tensor: r0);
217	ggml_build_forward_expand(cgraph: gf, tensor: r1);
218	ggml_build_forward_expand(cgraph: gf, tensor: r2);
219
220	ggml_graph_compute_helper(buf&: work_buffer, graph: gf, n_threads: `4`);
221
222	// check that r1 and r2 are the same
223	{
224	double sum0 = `0.0f`;
225	double sum1 = `0.0f`;
226	double diff = `0.0f`;
227
228	const float * r1_data = (float *) r1->data;
229	const float * r2_data = (float *) r2->data;
230
231	const int n_elements = ggml_nelements(tensor: r1);
232
233	for (int i = `0`; i < n_elements; ++i) {
234	sum0 += fabs(x: r1_data[i]);
235	sum1 += fabs(x: r2_data[i]);
236	diff += fabs(x: r1_data[i] - r2_data[i]);
237	//if (fabs(r1_data[i] - r2_data[i]) > 0.0001f) {
238	// printf("%d: %f %f\n", i, r1_data[i], r2_data[i]);
239	// printf("diff: %f\n", fabs(r1_data[i] - r2_data[i]));
240	//}
241	}
242
243	//for (int i = 4096; i < 4096 + 128; ++i) {
244	// printf("%f %f\n", r1_data[i], r2_data[i]);
245	//}
246
247	printf(format: "mode: %d\n", mode);
248	printf(format: "sum0: %f\n", sum0);
249	printf(format: "sum1: %f\n", sum1);
250	printf(format: "diff: %f\n", diff);
251	printf(format: "rel err: %f\n", diff / sum0);
252	printf(format: "rel err: %f\n", diff / sum1);
253
254	GGML_ASSERT(diff / sum0 < `0.0001f`);
255	GGML_ASSERT(diff / sum1 < `0.0001f`);
256	}
257	}
258
259	ggml_free(ctx: ctx0);
260
261	return `0`;
262	}
263

Browse the source code of llama.cpp/tests/test-rope.cpp