1#include "ggml-backend.h"
2#include "ggml-backend-impl.h"
3#include "ggml-cpu.h"
4#include "repack.h"
5#include "traits.h"
6#include "ggml-impl.h"
7#include "amx/amx.h"
8
9#include <cctype>
10#include <string>
11#include <vector>
12
13#ifdef GGML_USE_CPU_HBM
14# include "hbm.h"
15#endif
16
17#ifdef GGML_USE_CPU_KLEIDIAI
18# include "kleidiai/kleidiai.h"
19#endif
20
21#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
22# include "spacemit/ime.h"
23#endif
24
25#if defined(_WIN32)
26# define WIN32_LEAN_AND_MEAN
27# ifndef NOMINMAX
28# define NOMINMAX
29# endif
30# include <windows.h>
31#else
32# include <unistd.h>
33#endif
34
35#if defined(__APPLE__)
36# include <sys/sysctl.h>
37# include <sys/types.h>
38#endif
39
40// ggml-backend interface
41
42std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
43 static std::vector<ggml_backend_buffer_type_t> bufts = []() {
44 std::vector<ggml_backend_buffer_type_t> bufts;
45
46#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
47 if (ggml_backend_amx_buffer_type()) {
48 bufts.push_back(ggml_backend_amx_buffer_type());
49 }
50#endif
51
52#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
53 if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54 bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
55 }
56#endif
57
58#ifdef GGML_USE_CPU_KLEIDIAI
59 if (ggml_backend_cpu_kleidiai_buffer_type()) {
60 bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
61 }
62#endif
63
64#ifdef GGML_USE_CPU_REPACK
65 if (ggml_backend_cpu_repack_buffer_type()) {
66 bufts.push_back(x: ggml_backend_cpu_repack_buffer_type());
67 }
68#endif
69
70 return bufts;
71 }();
72
73 return bufts;
74}
75
76static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
77 static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
78 std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
79 bufts.push_back(x: nullptr);
80 return bufts;
81 }();
82
83 return extra_bufts.data();
84
85 GGML_UNUSED(device);
86}
87
88static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
89 for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
90 if (extra == buft) {
91 return true;
92 }
93 }
94 return false;
95}
96
97// CPU backend - backend (stream)
98
99struct ggml_backend_cpu_context {
100 int n_threads;
101 ggml_threadpool_t threadpool;
102
103 uint8_t * work_data;
104 size_t work_size;
105
106 ggml_abort_callback abort_callback;
107 void * abort_callback_data;
108};
109
110static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
111 return "CPU";
112
113 GGML_UNUSED(backend);
114}
115
116static void ggml_backend_cpu_free(ggml_backend_t backend) {
117 struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
118 delete[] cpu_ctx->work_data;
119 delete cpu_ctx;
120 delete backend;
121}
122
123struct ggml_backend_plan_cpu {
124 struct ggml_cplan cplan;
125 struct ggml_cgraph cgraph;
126};
127
128static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
129 struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
130
131 struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
132
133 cpu_plan->cplan = ggml_graph_plan(cgraph, n_threads: cpu_ctx->n_threads, threadpool: cpu_ctx->threadpool);
134 cpu_plan->cgraph = *cgraph; // FIXME: deep copy
135
136 if (cpu_plan->cplan.work_size > 0) {
137 cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
138 if (cpu_plan->cplan.work_data == NULL) {
139 delete cpu_plan;
140 return NULL;
141 }
142 }
143
144 cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
145 cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
146
147 return cpu_plan;
148}
149
150static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
151 struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
152
153 delete[] cpu_plan->cplan.work_data;
154 delete cpu_plan;
155
156 GGML_UNUSED(backend);
157}
158
159static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
160 struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
161
162 return ggml_graph_compute(cgraph: &cpu_plan->cgraph, cplan: &cpu_plan->cplan);
163
164 GGML_UNUSED(backend);
165}
166
167static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
168 struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
169
170 struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads: cpu_ctx->n_threads, threadpool: cpu_ctx->threadpool);
171
172 if (cpu_ctx->work_size < cplan.work_size) {
173 delete[] cpu_ctx->work_data;
174 cpu_ctx->work_data = new uint8_t[cplan.work_size];
175 if (cpu_ctx->work_data == NULL) {
176 cpu_ctx->work_size = 0;
177 return GGML_STATUS_ALLOC_FAILED;
178 }
179 cpu_ctx->work_size = cplan.work_size;
180 }
181 cplan.work_data = (uint8_t *)cpu_ctx->work_data;
182
183 cplan.abort_callback = cpu_ctx->abort_callback;
184 cplan.abort_callback_data = cpu_ctx->abort_callback_data;
185
186 return ggml_graph_compute(cgraph, cplan: &cplan);
187}
188
189static const struct ggml_backend_i ggml_backend_cpu_i = {
190 /* .get_name = */ ggml_backend_cpu_get_name,
191 /* .free = */ ggml_backend_cpu_free,
192 /* .set_tensor_async = */ NULL,
193 /* .get_tensor_async = */ NULL,
194 /* .cpy_tensor_async = */ NULL,
195 /* .synchronize = */ NULL,
196 /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
197 /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
198 /* .graph_plan_update = */ NULL,
199 /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
200 /* .graph_compute = */ ggml_backend_cpu_graph_compute,
201 /* .event_record = */ NULL,
202 /* .event_wait = */ NULL,
203 /* .graph_optimize = */ NULL,
204};
205
206static ggml_guid_t ggml_backend_cpu_guid(void) {
207 static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
208 return &guid;
209}
210
211ggml_backend_t ggml_backend_cpu_init(void) {
212 // initialize CPU backend now to avoid slowing the first graph computation
213 ggml_cpu_init();
214
215 struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
216 if (ctx == NULL) {
217 return NULL;
218 }
219
220 ctx->n_threads = GGML_DEFAULT_N_THREADS;
221 ctx->threadpool = NULL;
222 ctx->work_data = NULL;
223 ctx->work_size = 0;
224 ctx->abort_callback = NULL;
225 ctx->abort_callback_data = NULL;
226
227 ggml_backend_t cpu_backend = new ggml_backend {
228 /* .guid = */ ggml_backend_cpu_guid(),
229 /* .iface = */ ggml_backend_cpu_i,
230 /* .device = */ ggml_backend_reg_dev_get(reg: ggml_backend_cpu_reg(), index: 0),
231 /* .context = */ ctx,
232 };
233
234 if (cpu_backend == NULL) {
235 delete ctx;
236 return NULL;
237 }
238
239 return cpu_backend;
240}
241
242bool ggml_backend_is_cpu(ggml_backend_t backend) {
243 return backend != NULL && ggml_guid_matches(guid_a: backend->guid, guid_b: ggml_backend_cpu_guid());
244}
245
246void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
247 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
248
249 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
250 ctx->n_threads = n_threads;
251}
252
253void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
254 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
255
256 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
257
258 if (ctx->threadpool && ctx->threadpool != threadpool) {
259 // already had a different threadpool, pause/suspend it before switching
260 ggml_threadpool_pause(threadpool: ctx->threadpool);
261 }
262 ctx->threadpool = threadpool;
263}
264
265void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
266 GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
267
268 struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
269 ctx->abort_callback = abort_callback;
270 ctx->abort_callback_data = abort_callback_data;
271}
272
273// CPU backend - device
274
275struct ggml_backend_cpu_device_context {
276 std::string description = "CPU";
277
278 ggml_backend_cpu_device_context() {
279#ifdef __APPLE__
280 size_t len = 0;
281 if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
282 description.resize(len);
283 sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
284 }
285#elif defined(__linux__)
286 FILE * f = fopen(filename: "/proc/cpuinfo", modes: "r");
287 if (f) {
288 char buf[1024];
289 while (fgets(s: buf, n: sizeof(buf), stream: f)) {
290 if (strncmp(s1: buf, s2: "model name", n: 10) == 0) {
291 char * p = strchr(s: buf, c: ':');
292 if (p) {
293 p++;
294 while (std::isspace(*p)) {
295 p++;
296 }
297 while (std::isspace(p[strlen(s: p) - 1])) {
298 p[strlen(s: p) - 1] = '\0';
299 }
300 description = p;
301 break;
302 }
303 }
304 }
305 fclose(stream: f);
306 }
307#elif defined(_WIN32)
308 HKEY hKey;
309 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
310 TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
311 0,
312 KEY_READ,
313 &hKey) == ERROR_SUCCESS) {
314 DWORD cpu_brand_size = 0;
315 if (RegQueryValueExA(hKey,
316 "ProcessorNameString",
317 NULL,
318 NULL,
319 NULL,
320 &cpu_brand_size) == ERROR_SUCCESS) {
321 description.resize(cpu_brand_size);
322 if (RegQueryValueExA(hKey,
323 "ProcessorNameString",
324 NULL,
325 NULL,
326 (LPBYTE)&description[0], // NOLINT
327 &cpu_brand_size) == ERROR_SUCCESS) {
328 if (description.find('\0') != std::string::npos) {
329 description.resize(description.find('\0'));
330 }
331 }
332 }
333 RegCloseKey(hKey);
334 }
335#endif
336 }
337};
338
339static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
340 return "CPU";
341
342 GGML_UNUSED(dev);
343}
344
345static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
346 struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
347
348 return ctx->description.c_str();
349}
350
351static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
352#ifdef _WIN32
353 MEMORYSTATUSEX status;
354 status.dwLength = sizeof(status);
355 GlobalMemoryStatusEx(&status);
356 *total = status.ullTotalPhys;
357 *free = status.ullAvailPhys;
358#else
359 long pages = sysconf(_SC_PHYS_PAGES);
360 long page_size = sysconf(_SC_PAGE_SIZE);
361 *total = pages * page_size;
362
363 // "free" system memory is ill-defined, for practical purposes assume that all of it is free:
364 *free = *total;
365#endif // _WIN32
366
367 GGML_UNUSED(dev);
368}
369
370static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
371 return GGML_BACKEND_DEVICE_TYPE_CPU;
372
373 GGML_UNUSED(dev);
374}
375
376static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
377 props->name = ggml_backend_cpu_device_get_name(dev);
378 props->description = ggml_backend_cpu_device_get_description(dev);
379 props->type = ggml_backend_cpu_device_get_type(dev);
380 ggml_backend_cpu_device_get_memory(dev, free: &props->memory_free, total: &props->memory_total);
381 props->caps = {
382 /* .async = */ false,
383 /* .host_buffer = */ false,
384 /* .buffer_from_host_ptr = */ true,
385 /* .events = */ false,
386 };
387}
388
389static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
390 return ggml_backend_cpu_init();
391
392 GGML_UNUSED(dev);
393 GGML_UNUSED(params);
394}
395
396static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
397 return ggml_backend_cpu_buffer_type();
398
399 GGML_UNUSED(dev);
400}
401
402static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
403 return ggml_backend_cpu_buffer_from_ptr(ptr, size);
404
405 GGML_UNUSED(dev);
406 GGML_UNUSED(max_tensor_size);
407}
408
409static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
410 const struct ggml_tensor * src0 = op->src[0];
411 const struct ggml_tensor * src1 = op->src[1];
412
413 if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
414 return true;
415 }
416
417 // check extra buffer types
418 // note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
419 for (int i = 0; i < 4; i++) {
420 if (op->src[i] && op->src[i]->buffer &&
421 ggml_backend_cpu_is_extra_buffer_type(buft: op->src[i]->buffer->buft)) {
422 auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
423 return buf_extra->supports_op(dev, op);
424 }
425 }
426
427 switch (op->op) {
428 case GGML_OP_CPY:
429 case GGML_OP_SET_ROWS:
430 return
431 op->type != GGML_TYPE_IQ3_XXS &&
432 op->type != GGML_TYPE_IQ3_S &&
433 op->type != GGML_TYPE_IQ2_XXS &&
434 op->type != GGML_TYPE_IQ2_XS &&
435 op->type != GGML_TYPE_IQ2_S &&
436 op->type != GGML_TYPE_IQ1_S &&
437 op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
438 case GGML_OP_MUL_MAT:
439 return src1->type == GGML_TYPE_F32 || src1->type == ggml_get_type_traits_cpu(type: src0->type)->vec_dot_type;
440 case GGML_OP_SOFT_MAX_BACK: {
441 if (op->src[0]->type != GGML_TYPE_F32 || op->src[1]->type != GGML_TYPE_F32) {
442 return false;
443 }
444 float max_bias = 0.0f;
445
446 memcpy(dest: &max_bias, src: (const float *) op->op_params + 1, n: sizeof(float));
447
448 return max_bias == 0.0f;
449 }
450 case GGML_OP_IM2COL_BACK:
451 return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
452 case GGML_OP_GET_ROWS_BACK:
453 return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
454 case GGML_OP_OUT_PROD:
455 return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(type: src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
456 src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
457 default:
458 return true;
459 }
460}
461
462static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
463 return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
464 GGML_UNUSED(dev);
465}
466
467static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
468 /* .get_name = */ ggml_backend_cpu_device_get_name,
469 /* .get_description = */ ggml_backend_cpu_device_get_description,
470 /* .get_memory = */ ggml_backend_cpu_device_get_memory,
471 /* .get_type = */ ggml_backend_cpu_device_get_type,
472 /* .get_props = */ ggml_backend_cpu_device_get_props,
473 /* .init_backend = */ ggml_backend_cpu_device_init_backend,
474 /* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,
475 /* .get_host_buffer_type = */ NULL,
476 /* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,
477 /* .supports_op = */ ggml_backend_cpu_device_supports_op,
478 /* .supports_buft = */ ggml_backend_cpu_device_supports_buft,
479 /* .offload_op = */ NULL,
480 /* .event_new = */ NULL,
481 /* .event_free = */ NULL,
482 /* .event_synchronize = */ NULL,
483};
484
485// CPU backend - backend (reg)
486
487static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
488 return "CPU";
489
490 GGML_UNUSED(reg);
491}
492
493static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
494 return 1;
495
496 GGML_UNUSED(reg);
497}
498
499static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
500 GGML_ASSERT(index == 0);
501
502 static ggml_backend_cpu_device_context ctx;
503 static ggml_backend_device ggml_backend_cpu_device = {
504 /* .iface = */ ggml_backend_cpu_device_i,
505 /* .reg = */ reg,
506 /* .context = */ &ctx,
507 };
508
509 return &ggml_backend_cpu_device;
510}
511
512// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
513// and additionally to allow other backends to expose their own list of features that applications can query using the same API
514static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
515 static std::vector<ggml_backend_feature> features = []() {
516 ggml_cpu_init();
517
518 std::vector<ggml_backend_feature> features;
519 if (ggml_cpu_has_sse3()) {
520 features.push_back(x: { .name: "SSE3", .value: "1" });
521 }
522 if (ggml_cpu_has_ssse3()) {
523 features.push_back(x: { .name: "SSSE3", .value: "1" });
524 }
525 if (ggml_cpu_has_avx()) {
526 features.push_back(x: { .name: "AVX", .value: "1" });
527 }
528 if (ggml_cpu_has_avx_vnni()) {
529 features.push_back(x: { .name: "AVX_VNNI", .value: "1" });
530 }
531 if (ggml_cpu_has_avx2()) {
532 features.push_back(x: { .name: "AVX2", .value: "1" });
533 }
534 if (ggml_cpu_has_f16c()) {
535 features.push_back(x: { .name: "F16C", .value: "1" });
536 }
537 if (ggml_cpu_has_fma()) {
538 features.push_back(x: { .name: "FMA", .value: "1" });
539 }
540 if (ggml_cpu_has_bmi2()) {
541 features.push_back(x: { .name: "BMI2", .value: "1" });
542 }
543 if (ggml_cpu_has_avx512()) {
544 features.push_back(x: { .name: "AVX512", .value: "1" });
545 }
546 if (ggml_cpu_has_avx512_vbmi()) {
547 features.push_back(x: { .name: "AVX512_VBMI", .value: "1" });
548 }
549 if (ggml_cpu_has_avx512_vnni()) {
550 features.push_back(x: { .name: "AVX512_VNNI", .value: "1" });
551 }
552 if (ggml_cpu_has_avx512_bf16()) {
553 features.push_back(x: { .name: "AVX512_BF16", .value: "1" });
554 }
555 if (ggml_cpu_has_amx_int8()) {
556 features.push_back(x: { .name: "AMX_INT8", .value: "1" });
557 }
558 if (ggml_cpu_has_neon()) {
559 features.push_back(x: { .name: "NEON", .value: "1" });
560 }
561 if (ggml_cpu_has_arm_fma()) {
562 features.push_back(x: { .name: "ARM_FMA", .value: "1" });
563 }
564 if (ggml_cpu_has_fp16_va()) {
565 features.push_back(x: { .name: "FP16_VA", .value: "1" });
566 }
567 if (ggml_cpu_has_matmul_int8()) {
568 features.push_back(x: { .name: "MATMUL_INT8", .value: "1" });
569 }
570 if (ggml_cpu_has_sve()) {
571 features.push_back(x: { .name: "SVE", .value: "1" });
572 }
573 if (ggml_cpu_has_dotprod()) {
574 features.push_back(x: { .name: "DOTPROD", .value: "1" });
575 }
576 if (ggml_cpu_get_sve_cnt() > 0) {
577 static std::string sve_cnt = std::to_string(val: ggml_cpu_get_sve_cnt());
578 features.push_back(x: { .name: "SVE_CNT", .value: sve_cnt.c_str() });
579 }
580 if (ggml_cpu_has_sme()) {
581 features.push_back(x: { .name: "SME", .value: "1" });
582 }
583 if (ggml_cpu_has_riscv_v()) {
584 features.push_back(x: { .name: "RISCV_V", .value: "1" });
585 }
586 if (ggml_cpu_has_vsx()) {
587 features.push_back(x: { .name: "VSX", .value: "1" });
588 }
589 if (ggml_cpu_has_vxe()) {
590 features.push_back(x: { .name: "VXE", .value: "1" });
591 }
592 if (ggml_cpu_has_wasm_simd()) {
593 features.push_back(x: { .name: "WASM_SIMD", .value: "1" });
594 }
595 if (ggml_cpu_has_llamafile()) {
596 features.push_back(x: { .name: "LLAMAFILE", .value: "1" });
597 }
598 #ifdef GGML_USE_ACCELERATE
599 features.push_back({ "ACCELERATE", "1" });
600 #endif
601 #ifdef GGML_USE_CPU_HBM
602 features.push_back({ "CPU_HBM", "1" });
603 #endif
604 #ifdef GGML_USE_OPENMP
605 features.push_back(x: { .name: "OPENMP", .value: "1" });
606 #endif
607 #ifdef GGML_USE_CPU_KLEIDIAI
608 features.push_back({ "KLEIDIAI", "1" });
609 #endif
610 #ifdef GGML_USE_CPU_REPACK
611 features.push_back(x: { .name: "REPACK", .value: "1" });
612 #endif
613
614 features.push_back(x: { .name: nullptr, .value: nullptr });
615
616 return features;
617 }();
618
619 return features.data();
620
621 GGML_UNUSED(reg);
622}
623
624static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
625 if (strcmp(s1: name, s2: "ggml_backend_set_n_threads") == 0) {
626 ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
627 return (void *)fct;
628 }
629 if (strcmp(s1: name, s2: "ggml_backend_dev_get_extra_bufts") == 0) {
630 ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
631 return (void *)fct;
632 }
633 if (strcmp(s1: name, s2: "ggml_backend_get_features") == 0) {
634 return (void *)ggml_backend_cpu_get_features;
635 }
636 if (strcmp(s1: name, s2: "ggml_backend_set_abort_callback") == 0) {
637 return (void *)ggml_backend_cpu_set_abort_callback;
638 }
639 if (strcmp(s1: name, s2: "ggml_backend_cpu_numa_init") == 0) {
640 return (void *)ggml_numa_init;
641 }
642 if (strcmp(s1: name, s2: "ggml_backend_cpu_is_numa") == 0) {
643 return (void *)ggml_is_numa;
644 }
645
646 // threadpool - TODO: move to ggml-base
647 if (strcmp(s1: name, s2: "ggml_threadpool_new") == 0) {
648 return (void *)ggml_threadpool_new;
649 }
650 if (strcmp(s1: name, s2: "ggml_threadpool_free") == 0) {
651 return (void *)ggml_threadpool_free;
652 }
653 if (strcmp(s1: name, s2: "ggml_backend_cpu_set_threadpool") == 0) {
654 return (void *)ggml_backend_cpu_set_threadpool;
655 }
656
657 return NULL;
658
659 GGML_UNUSED(reg);
660}
661
662static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
663 /* .get_name = */ ggml_backend_cpu_reg_get_name,
664 /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
665 /* .get_device = */ ggml_backend_cpu_reg_get_device,
666 /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
667};
668
669ggml_backend_reg_t ggml_backend_cpu_reg(void) {
670 // init CPU feature detection
671 ggml_cpu_init();
672
673 static struct ggml_backend_reg ggml_backend_cpu_reg = {
674 /* .api_version = */ GGML_BACKEND_API_VERSION,
675 /* .iface = */ ggml_backend_cpu_reg_i,
676 /* .context = */ NULL,
677 };
678
679 return &ggml_backend_cpu_reg;
680}
681
682GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
683