ggml-cpu.cpp source code [llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp]

1	#include "ggml-backend.h"
2	#include "ggml-backend-impl.h"
3	#include "ggml-cpu.h"
4	#include "repack.h"
5	#include "traits.h"
6	#include "ggml-impl.h"
7	#include "amx/amx.h"
8
9	#include <cctype>
10	#include <string>
11	#include <vector>
12
13	#ifdef GGML_USE_CPU_HBM
14	# include "hbm.h"
15	#endif
16
17	#ifdef GGML_USE_CPU_KLEIDIAI
18	# include "kleidiai/kleidiai.h"
19	#endif
20
21	#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
22	# include "spacemit/ime.h"
23	#endif
24
25	#if defined(_WIN32)
26	# define WIN32_LEAN_AND_MEAN
27	# ifndef NOMINMAX
28	# define NOMINMAX
29	# endif
30	# include <windows.h>
31	#else
32	# include <unistd.h>
33	#endif
34
35	#if defined(__APPLE__)
36	# include <sys/sysctl.h>
37	# include <sys/types.h>
38	#endif
39
40	// ggml-backend interface
41
42	std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
43	static std::vector<ggml_backend_buffer_type_t> bufts = []() {
44	std::vector<ggml_backend_buffer_type_t> bufts;
45
46	#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
47	if (ggml_backend_amx_buffer_type()) {
48	bufts.push_back(ggml_backend_amx_buffer_type());
49	}
50	#endif
51
52	#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
53	if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54	bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
55	}
56	#endif
57
58	#ifdef GGML_USE_CPU_KLEIDIAI
59	if (ggml_backend_cpu_kleidiai_buffer_type()) {
60	bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
61	}
62	#endif
63
64	#ifdef GGML_USE_CPU_REPACK
65	if (ggml_backend_cpu_repack_buffer_type()) {
66	bufts.push_back(x: ggml_backend_cpu_repack_buffer_type());
67	}
68	#endif
69
70	return bufts;
71	}();
72
73	return bufts;
74	}
75
76	static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
77	static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
78	std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
79	bufts.push_back(x: nullptr);
80	return bufts;
81	}();
82
83	return extra_bufts.data();
84
85	GGML_UNUSED(device);
86	}
87
88	static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
89	for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
90	if (extra == buft) {
91	return true;
92	}
93	}
94	return false;
95	}
96
97	// CPU backend - backend (stream)
98
99	struct ggml_backend_cpu_context {
100	int n_threads;
101	ggml_threadpool_t threadpool;
102
103	uint8_t * work_data;
104	size_t work_size;
105
106	ggml_abort_callback abort_callback;
107	void * abort_callback_data;
108	};
109
110	static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
111	return "CPU";
112
113	GGML_UNUSED(backend);
114	}
115
116	static void ggml_backend_cpu_free(ggml_backend_t backend) {
117	struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
118	delete[] cpu_ctx->work_data;
119	delete cpu_ctx;
120	delete backend;
121	}
122
123	struct ggml_backend_plan_cpu {
124	struct ggml_cplan cplan;
125	struct ggml_cgraph cgraph;
126	};
127
128	static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
129	struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
130
131	struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
132
133	cpu_plan->cplan = ggml_graph_plan(cgraph, n_threads: cpu_ctx->n_threads, threadpool: cpu_ctx->threadpool);
134	cpu_plan->cgraph = cgraph; // FIXME: deep copy*
135
136	if (cpu_plan->cplan.work_size > `0`) {
137	cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
138	if (cpu_plan->cplan.work_data == NULL) {
139	delete cpu_plan;
140	return NULL;
141	}
142	}
143
144	cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
145	cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
146
147	return cpu_plan;
148	}
149
150	static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
151	struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
152
153	delete[] cpu_plan->cplan.work_data;
154	delete cpu_plan;
155
156	GGML_UNUSED(backend);
157	}
158
159	static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
160	struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
161
162	return ggml_graph_compute(cgraph: &cpu_plan->cgraph, cplan: &cpu_plan->cplan);
163
164	GGML_UNUSED(backend);
165	}
166
167	static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
168	struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
169
170	struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads: cpu_ctx->n_threads, threadpool: cpu_ctx->threadpool);
171
172	if (cpu_ctx->work_size < cplan.work_size) {
173	delete[] cpu_ctx->work_data;
174	cpu_ctx->work_data = new uint8_t[cplan.work_size];
175	if (cpu_ctx->work_data == NULL) {
176	cpu_ctx->work_size = `0`;
177	return GGML_STATUS_ALLOC_FAILED;
178	}
179	cpu_ctx->work_size = cplan.work_size;
180	}
181	cplan.work_data = (uint8_t *)cpu_ctx->work_data;
182
183	cplan.abort_callback = cpu_ctx->abort_callback;
184	cplan.abort_callback_data = cpu_ctx->abort_callback_data;
185
186	return ggml_graph_compute(cgraph, cplan: &cplan);
187	}
188
189	static const struct ggml_backend_i ggml_backend_cpu_i = {
190	/ .get_name = / ggml_backend_cpu_get_name,
191	/ .free = / ggml_backend_cpu_free,
192	/ .set_tensor_async = / NULL,
193	/ .get_tensor_async = / NULL,
194	/ .cpy_tensor_async = / NULL,
195	/ .synchronize = / NULL,
196	/ .graph_plan_create = / ggml_backend_cpu_graph_plan_create,
197	/ .graph_plan_free = / ggml_backend_cpu_graph_plan_free,
198	/ .graph_plan_update = / NULL,
199	/ .graph_plan_compute = / ggml_backend_cpu_graph_plan_compute,
200	/ .graph_compute = / ggml_backend_cpu_graph_compute,
201	/ .event_record = / NULL,
202	/ .event_wait = / NULL,
203	/ .graph_optimize = / NULL,
204	};
205
206	static ggml_guid_t ggml_backend_cpu_guid(void) {
207	static ggml_guid guid = { `0xaa`, `0x67`, `0xc7`, `0x43`, `0x96`, `0xe6`, `0xa3`, `0x8a`, `0xe3`, `0xaf`, `0xea`, `0x92`, `0x36`, `0xbc`, `0xfc`, `0x89` };
208	return &guid;
209	}
210
211	ggml_backend_t ggml_backend_cpu_init(void) {
212	// initialize CPU backend now to avoid slowing the first graph computation
213	ggml_cpu_init();
214
215	struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
216	if (ctx == NULL) {
217	return NULL;
218	}
219
220	ctx->n_threads = GGML_DEFAULT_N_THREADS;
221	ctx->threadpool = NULL;
222	ctx->work_data = NULL;
223	ctx->work_size = `0`;
224	ctx->abort_callback = NULL;
225	ctx->abort_callback_data = NULL;
226
227	ggml_backend_t cpu_backend = new ggml_backend {
228	/ .guid = / ggml_backend_cpu_guid(),
229	/ .iface = / ggml_backend_cpu_i,
230	/ .device = / ggml_backend_reg_dev_get(reg: ggml_backend_cpu_reg(), index: `0`),
231	/ .context = / ctx,
232	};
233
234	if (cpu_backend == NULL) {
235	delete ctx;
236	return NULL;
237	}
238
239	return cpu_backend;
240	}
241
242	bool ggml_backend_is_cpu(ggml_backend_t backend) {
243	return backend != NULL && ggml_guid_matches(guid_a: backend->guid, guid_b: ggml_backend_cpu_guid());
244	}
245
246	void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
247	GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
248
249	struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
250	ctx->n_threads = n_threads;
251	}
252
253	void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
254	GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
255
256	struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
257
258	if (ctx->threadpool && ctx->threadpool != threadpool) {
259	// already had a different threadpool, pause/suspend it before switching
260	ggml_threadpool_pause(threadpool: ctx->threadpool);
261	}
262	ctx->threadpool = threadpool;
263	}
264
265	void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
266	GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
267
268	struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
269	ctx->abort_callback = abort_callback;
270	ctx->abort_callback_data = abort_callback_data;
271	}
272
273	// CPU backend - device
274
275	struct ggml_backend_cpu_device_context {
276	std::string description = "CPU";
277
278	ggml_backend_cpu_device_context() {
279	#ifdef __APPLE__
280	size_t len = `0`;
281	if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, `0`)) {
282	description.resize(len);
283	sysctlbyname("machdep.cpu.brand_string", &description[`0`], &len, NULL, `0`); // NOLINT
284	}
285	#elif defined(__linux__)
286	FILE * f = fopen(filename: "/proc/cpuinfo", modes: "r");
287	if (f) {
288	char buf[`1024`];
289	while (fgets(s: buf, n: sizeof(buf), stream: f)) {
290	if (strncmp(s1: buf, s2: "model name", n: `10`) == `0`) {
291	char * p = strchr(s: buf, c: `':'`);
292	if (p) {
293	p++;
294	while (std::isspace(*p)) {
295	p++;
296	}
297	while (std::isspace(p[strlen(s: p) - `1`])) {
298	p[strlen(s: p) - `1`] = `'\0'`;
299	}
300	description = p;
301	break;
302	}
303	}
304	}
305	fclose(stream: f);
306	}
307	#elif defined(_WIN32)
308	HKEY hKey;
309	if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
310	TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
311	`0`,
312	KEY_READ,
313	&hKey) == ERROR_SUCCESS) {
314	DWORD cpu_brand_size = `0`;
315	if (RegQueryValueExA(hKey,
316	"ProcessorNameString",
317	NULL,
318	NULL,
319	NULL,
320	&cpu_brand_size) == ERROR_SUCCESS) {
321	description.resize(cpu_brand_size);
322	if (RegQueryValueExA(hKey,
323	"ProcessorNameString",
324	NULL,
325	NULL,
326	(LPBYTE)&description[`0`], // NOLINT
327	&cpu_brand_size) == ERROR_SUCCESS) {
328	if (description.find(`'\0'`) != std::string::npos) {
329	description.resize(description.find(`'\0'`));
330	}
331	}
332	}
333	RegCloseKey(hKey);
334	}
335	#endif
336	}
337	};
338
339	static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
340	return "CPU";
341
342	GGML_UNUSED(dev);
343	}
344
345	static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
346	struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
347
348	return ctx->description.c_str();
349	}
350
351	static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
352	#ifdef _WIN32
353	MEMORYSTATUSEX status;
354	status.dwLength = sizeof(status);
355	GlobalMemoryStatusEx(&status);
356	*total = status.ullTotalPhys;
357	*free = status.ullAvailPhys;
358	#else
359	long pages = sysconf(_SC_PHYS_PAGES);
360	long page_size = sysconf(_SC_PAGE_SIZE);
361	total = pages page_size;
362
363	// "free" system memory is ill-defined, for practical purposes assume that all of it is free:
364	free = total;
365	#endif // _WIN32
366
367	GGML_UNUSED(dev);
368	}
369
370	static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
371	return GGML_BACKEND_DEVICE_TYPE_CPU;
372
373	GGML_UNUSED(dev);
374	}
375
376	static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
377	props->name = ggml_backend_cpu_device_get_name(dev);
378	props->description = ggml_backend_cpu_device_get_description(dev);
379	props->type = ggml_backend_cpu_device_get_type(dev);
380	ggml_backend_cpu_device_get_memory(dev, free: &props->memory_free, total: &props->memory_total);
381	props->caps = {
382	/ .async = / false,
383	/ .host_buffer = / false,
384	/ .buffer_from_host_ptr = / true,
385	/ .events = / false,
386	};
387	}
388
389	static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
390	return ggml_backend_cpu_init();
391
392	GGML_UNUSED(dev);
393	GGML_UNUSED(params);
394	}
395
396	static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
397	return ggml_backend_cpu_buffer_type();
398
399	GGML_UNUSED(dev);
400	}
401
402	static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
403	return ggml_backend_cpu_buffer_from_ptr(ptr, size);
404
405	GGML_UNUSED(dev);
406	GGML_UNUSED(max_tensor_size);
407	}
408
409	static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
410	const struct ggml_tensor * src0 = op->src[`0`];
411	const struct ggml_tensor * src1 = op->src[`1`];
412
413	if (op->op == GGML_OP_NONE \|\| op->op == GGML_OP_RESHAPE \|\| op->op == GGML_OP_VIEW \|\| op->op == GGML_OP_PERMUTE \|\| op->op == GGML_OP_TRANSPOSE) {
414	return true;
415	}
416
417	// check extra buffer types
418	// note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
419	for (int i = `0`; i < `4`; i++) {
420	if (op->src[i] && op->src[i]->buffer &&
421	ggml_backend_cpu_is_extra_buffer_type(buft: op->src[i]->buffer->buft)) {
422	auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
423	return buf_extra->supports_op(dev, op);
424	}
425	}
426
427	switch (op->op) {
428	case GGML_OP_CPY:
429	case GGML_OP_SET_ROWS:
430	return
431	op->type != GGML_TYPE_IQ3_XXS &&
432	op->type != GGML_TYPE_IQ3_S &&
433	op->type != GGML_TYPE_IQ2_XXS &&
434	op->type != GGML_TYPE_IQ2_XS &&
435	op->type != GGML_TYPE_IQ2_S &&
436	op->type != GGML_TYPE_IQ1_S &&
437	op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
438	case GGML_OP_MUL_MAT:
439	return src1->type == GGML_TYPE_F32 \|\| src1->type == ggml_get_type_traits_cpu(type: src0->type)->vec_dot_type;
440	case GGML_OP_SOFT_MAX_BACK: {
441	if (op->src[`0`]->type != GGML_TYPE_F32 \|\| op->src[`1`]->type != GGML_TYPE_F32) {
442	return false;
443	}
444	float max_bias = `0.0f`;
445
446	memcpy(dest: &max_bias, src: (const float ) op->op_params + `1`, n: sizeof(float*));
447
448	return max_bias == `0.0f`;
449	}
450	case GGML_OP_IM2COL_BACK:
451	return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
452	case GGML_OP_GET_ROWS_BACK:
453	return src0->type == GGML_TYPE_F32 \|\| src0->type == GGML_TYPE_F16;
454	case GGML_OP_OUT_PROD:
455	return (src0->type == GGML_TYPE_F32 \|\| (ggml_is_quantized(type: src0->type) && src0->ne[`2`] == src1->ne[`2`] && src0->ne[`3`] == src1->ne[`3`])) &&
456	src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
457	default:
458	return true;
459	}
460	}
461
462	static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
463	return ggml_backend_buft_is_host(buft) \|\| ggml_backend_cpu_is_extra_buffer_type(buft);
464	GGML_UNUSED(dev);
465	}
466
467	static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
468	/ .get_name = / ggml_backend_cpu_device_get_name,
469	/ .get_description = / ggml_backend_cpu_device_get_description,
470	/ .get_memory = / ggml_backend_cpu_device_get_memory,
471	/ .get_type = / ggml_backend_cpu_device_get_type,
472	/ .get_props = / ggml_backend_cpu_device_get_props,
473	/ .init_backend = / ggml_backend_cpu_device_init_backend,
474	/ .get_buffer_type = / ggml_backend_cpu_device_get_buffer_type,
475	/ .get_host_buffer_type = / NULL,
476	/ .buffer_from_host_ptr = / ggml_backend_cpu_device_buffer_from_host_ptr,
477	/ .supports_op = / ggml_backend_cpu_device_supports_op,
478	/ .supports_buft = / ggml_backend_cpu_device_supports_buft,
479	/ .offload_op = / NULL,
480	/ .event_new = / NULL,
481	/ .event_free = / NULL,
482	/ .event_synchronize = / NULL,
483	};
484
485	// CPU backend - backend (reg)
486
487	static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
488	return "CPU";
489
490	GGML_UNUSED(reg);
491	}
492
493	static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
494	return `1`;
495
496	GGML_UNUSED(reg);
497	}
498
499	static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
500	GGML_ASSERT(index == `0`);
501
502	static ggml_backend_cpu_device_context ctx;
503	static ggml_backend_device ggml_backend_cpu_device = {
504	/ .iface = / ggml_backend_cpu_device_i,
505	/ .reg = / reg,
506	/ .context = / &ctx,
507	};
508
509	return &ggml_backend_cpu_device;
510	}
511
512	// This is intended to replace the the ggml_cpu_has_ functions when loading the CPU backend dynamically,*
513	// and additionally to allow other backends to expose their own list of features that applications can query using the same API
514	static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
515	static std::vector<ggml_backend_feature> features = []() {
516	ggml_cpu_init();
517
518	std::vector<ggml_backend_feature> features;
519	if (ggml_cpu_has_sse3()) {
520	features.push_back(x: { .name: "SSE3", .value: "1" });
521	}
522	if (ggml_cpu_has_ssse3()) {
523	features.push_back(x: { .name: "SSSE3", .value: "1" });
524	}
525	if (ggml_cpu_has_avx()) {
526	features.push_back(x: { .name: "AVX", .value: "1" });
527	}
528	if (ggml_cpu_has_avx_vnni()) {
529	features.push_back(x: { .name: "AVX_VNNI", .value: "1" });
530	}
531	if (ggml_cpu_has_avx2()) {
532	features.push_back(x: { .name: "AVX2", .value: "1" });
533	}
534	if (ggml_cpu_has_f16c()) {
535	features.push_back(x: { .name: "F16C", .value: "1" });
536	}
537	if (ggml_cpu_has_fma()) {
538	features.push_back(x: { .name: "FMA", .value: "1" });
539	}
540	if (ggml_cpu_has_bmi2()) {
541	features.push_back(x: { .name: "BMI2", .value: "1" });
542	}
543	if (ggml_cpu_has_avx512()) {
544	features.push_back(x: { .name: "AVX512", .value: "1" });
545	}
546	if (ggml_cpu_has_avx512_vbmi()) {
547	features.push_back(x: { .name: "AVX512_VBMI", .value: "1" });
548	}
549	if (ggml_cpu_has_avx512_vnni()) {
550	features.push_back(x: { .name: "AVX512_VNNI", .value: "1" });
551	}
552	if (ggml_cpu_has_avx512_bf16()) {
553	features.push_back(x: { .name: "AVX512_BF16", .value: "1" });
554	}
555	if (ggml_cpu_has_amx_int8()) {
556	features.push_back(x: { .name: "AMX_INT8", .value: "1" });
557	}
558	if (ggml_cpu_has_neon()) {
559	features.push_back(x: { .name: "NEON", .value: "1" });
560	}
561	if (ggml_cpu_has_arm_fma()) {
562	features.push_back(x: { .name: "ARM_FMA", .value: "1" });
563	}
564	if (ggml_cpu_has_fp16_va()) {
565	features.push_back(x: { .name: "FP16_VA", .value: "1" });
566	}
567	if (ggml_cpu_has_matmul_int8()) {
568	features.push_back(x: { .name: "MATMUL_INT8", .value: "1" });
569	}
570	if (ggml_cpu_has_sve()) {
571	features.push_back(x: { .name: "SVE", .value: "1" });
572	}
573	if (ggml_cpu_has_dotprod()) {
574	features.push_back(x: { .name: "DOTPROD", .value: "1" });
575	}
576	if (ggml_cpu_get_sve_cnt() > `0`) {
577	static std::string sve_cnt = std::to_string(val: ggml_cpu_get_sve_cnt());
578	features.push_back(x: { .name: "SVE_CNT", .value: sve_cnt.c_str() });
579	}
580	if (ggml_cpu_has_sme()) {
581	features.push_back(x: { .name: "SME", .value: "1" });
582	}
583	if (ggml_cpu_has_riscv_v()) {
584	features.push_back(x: { .name: "RISCV_V", .value: "1" });
585	}
586	if (ggml_cpu_has_vsx()) {
587	features.push_back(x: { .name: "VSX", .value: "1" });
588	}
589	if (ggml_cpu_has_vxe()) {
590	features.push_back(x: { .name: "VXE", .value: "1" });
591	}
592	if (ggml_cpu_has_wasm_simd()) {
593	features.push_back(x: { .name: "WASM_SIMD", .value: "1" });
594	}
595	if (ggml_cpu_has_llamafile()) {
596	features.push_back(x: { .name: "LLAMAFILE", .value: "1" });
597	}
598	#ifdef GGML_USE_ACCELERATE
599	features.push_back({ "ACCELERATE", "1" });
600	#endif
601	#ifdef GGML_USE_CPU_HBM
602	features.push_back({ "CPU_HBM", "1" });
603	#endif
604	#ifdef GGML_USE_OPENMP
605	features.push_back(x: { .name: "OPENMP", .value: "1" });
606	#endif
607	#ifdef GGML_USE_CPU_KLEIDIAI
608	features.push_back({ "KLEIDIAI", "1" });
609	#endif
610	#ifdef GGML_USE_CPU_REPACK
611	features.push_back(x: { .name: "REPACK", .value: "1" });
612	#endif
613
614	features.push_back(x: { .name: nullptr, .value: nullptr });
615
616	return features;
617	}();
618
619	return features.data();
620
621	GGML_UNUSED(reg);
622	}
623
624	static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
625	if (strcmp(s1: name, s2: "ggml_backend_set_n_threads") == `0`) {
626	ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
627	return (void *)fct;
628	}
629	if (strcmp(s1: name, s2: "ggml_backend_dev_get_extra_bufts") == `0`) {
630	ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
631	return (void *)fct;
632	}
633	if (strcmp(s1: name, s2: "ggml_backend_get_features") == `0`) {
634	return (void *)ggml_backend_cpu_get_features;
635	}
636	if (strcmp(s1: name, s2: "ggml_backend_set_abort_callback") == `0`) {
637	return (void *)ggml_backend_cpu_set_abort_callback;
638	}
639	if (strcmp(s1: name, s2: "ggml_backend_cpu_numa_init") == `0`) {
640	return (void *)ggml_numa_init;
641	}
642	if (strcmp(s1: name, s2: "ggml_backend_cpu_is_numa") == `0`) {
643	return (void *)ggml_is_numa;
644	}
645
646	// threadpool - TODO: move to ggml-base
647	if (strcmp(s1: name, s2: "ggml_threadpool_new") == `0`) {
648	return (void *)ggml_threadpool_new;
649	}
650	if (strcmp(s1: name, s2: "ggml_threadpool_free") == `0`) {
651	return (void *)ggml_threadpool_free;
652	}
653	if (strcmp(s1: name, s2: "ggml_backend_cpu_set_threadpool") == `0`) {
654	return (void *)ggml_backend_cpu_set_threadpool;
655	}
656
657	return NULL;
658
659	GGML_UNUSED(reg);
660	}
661
662	static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
663	/ .get_name = / ggml_backend_cpu_reg_get_name,
664	/ .get_device_count = / ggml_backend_cpu_reg_get_device_count,
665	/ .get_device = / ggml_backend_cpu_reg_get_device,
666	/ .get_proc_address = / ggml_backend_cpu_get_proc_address,
667	};
668
669	ggml_backend_reg_t ggml_backend_cpu_reg(void) {
670	// init CPU feature detection
671	ggml_cpu_init();
672
673	static struct ggml_backend_reg ggml_backend_cpu_reg = {
674	/ .api_version = / GGML_BACKEND_API_VERSION,
675	/ .iface = / ggml_backend_cpu_reg_i,
676	/ .context = / NULL,
677	};
678
679	return &ggml_backend_cpu_reg;
680	}
681
682	GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
683

Browse the source code of llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp