1#include "ggml-backend-impl.h"
2#include "ggml-backend.h"
3#include "ggml-impl.h"
4#include <algorithm>
5#include <cstring>
6#include <filesystem>
7#include <memory>
8#include <string>
9#include <type_traits>
10#include <vector>
11#include <cctype>
12
13#ifdef _WIN32
14# define WIN32_LEAN_AND_MEAN
15# ifndef NOMINMAX
16# define NOMINMAX
17# endif
18# include <windows.h>
19#elif defined(__APPLE__)
20# include <mach-o/dyld.h>
21# include <dlfcn.h>
22#else
23# include <dlfcn.h>
24# include <unistd.h>
25#endif
26
27// Backend registry
28#ifdef GGML_USE_CPU
29#include "ggml-cpu.h"
30#endif
31
32#ifdef GGML_USE_CUDA
33#include "ggml-cuda.h"
34#endif
35
36#ifdef GGML_USE_METAL
37#include "ggml-metal.h"
38#endif
39
40#ifdef GGML_USE_SYCL
41#include "ggml-sycl.h"
42#endif
43
44#ifdef GGML_USE_VULKAN
45#include "ggml-vulkan.h"
46#endif
47
48#ifdef GGML_USE_WEBGPU
49#include "ggml-webgpu.h"
50#endif
51
52#ifdef GGML_USE_ZDNN
53#include "ggml-zdnn.h"
54#endif
55
56#ifdef GGML_USE_OPENCL
57#include "ggml-opencl.h"
58#endif
59
60#ifdef GGML_USE_HEXAGON
61#include "ggml-hexagon.h"
62#endif
63
64#ifdef GGML_USE_BLAS
65#include "ggml-blas.h"
66#endif
67
68#ifdef GGML_USE_RPC
69#include "ggml-rpc.h"
70#endif
71
72#ifdef GGML_USE_CANN
73#include "ggml-cann.h"
74#endif
75
76// disable C++17 deprecation warning for std::codecvt_utf8
77#if defined(__clang__)
78# pragma clang diagnostic push
79# pragma clang diagnostic ignored "-Wdeprecated-declarations"
80#elif defined(__GNUC__)
81# pragma GCC diagnostic push
82# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
83#endif
84
85namespace fs = std::filesystem;
86
87static std::string path_str(const fs::path & path) {
88 std::string u8path;
89 try {
90#if defined(__cpp_lib_char8_t)
91 // C++20 and later: u8string() returns std::u8string
92 std::u8string u8str = path.u8string();
93 u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
94#else
95 // C++17: u8string() returns std::string
96 u8path = path.u8string();
97#endif
98 } catch (...) {
99 }
100 return u8path;
101}
102
103#if defined(__clang__)
104# pragma clang diagnostic pop
105#elif defined(__GNUC__)
106# pragma GCC diagnostic pop
107#endif
108
109#ifdef _WIN32
110
111using dl_handle = std::remove_pointer_t<HMODULE>;
112
113struct dl_handle_deleter {
114 void operator()(HMODULE handle) {
115 FreeLibrary(handle);
116 }
117};
118
119static dl_handle * dl_load_library(const fs::path & path) {
120 // suppress error dialogs for missing DLLs
121 DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
122 SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
123
124 HMODULE handle = LoadLibraryW(path.wstring().c_str());
125
126 SetErrorMode(old_mode);
127
128 return handle;
129}
130
131static void * dl_get_sym(dl_handle * handle, const char * name) {
132 DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
133 SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
134
135 void * p = (void *) GetProcAddress(handle, name);
136
137 SetErrorMode(old_mode);
138
139 return p;
140}
141
142static const char * dl_error() {
143 return "";
144}
145
146#else
147
148using dl_handle = void;
149
150struct dl_handle_deleter {
151 void operator()(void * handle) {
152 dlclose(handle: handle);
153 }
154};
155
156static void * dl_load_library(const fs::path & path) {
157 dl_handle * handle = dlopen(file: path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
158
159 return handle;
160}
161
162static void * dl_get_sym(dl_handle * handle, const char * name) {
163 return dlsym(handle: handle, name: name);
164}
165
166static const char * dl_error() {
167 const char *rslt = dlerror();
168 return rslt != nullptr ? rslt : "";
169}
170
171#endif
172
173using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
174
175struct ggml_backend_reg_entry {
176 ggml_backend_reg_t reg;
177 dl_handle_ptr handle;
178};
179
180struct ggml_backend_registry {
181 std::vector<ggml_backend_reg_entry> backends;
182 std::vector<ggml_backend_dev_t> devices;
183
184 ggml_backend_registry() {
185#ifdef GGML_USE_CUDA
186 register_backend(reg: ggml_backend_cuda_reg());
187#endif
188#ifdef GGML_USE_METAL
189 register_backend(ggml_backend_metal_reg());
190#endif
191#ifdef GGML_USE_SYCL
192 register_backend(ggml_backend_sycl_reg());
193#endif
194#ifdef GGML_USE_VULKAN
195 register_backend(reg: ggml_backend_vk_reg());
196#endif
197#ifdef GGML_USE_WEBGPU
198 register_backend(ggml_backend_webgpu_reg());
199#endif
200#ifdef GGML_USE_ZDNN
201 register_backend(ggml_backend_zdnn_reg());
202#endif
203#ifdef GGML_USE_OPENCL
204 register_backend(ggml_backend_opencl_reg());
205#endif
206#ifdef GGML_USE_HEXAGON
207 register_backend(ggml_backend_hexagon_reg());
208#endif
209#ifdef GGML_USE_CANN
210 register_backend(ggml_backend_cann_reg());
211#endif
212#ifdef GGML_USE_BLAS
213 register_backend(ggml_backend_blas_reg());
214#endif
215#ifdef GGML_USE_RPC
216 register_backend(ggml_backend_rpc_reg());
217#endif
218#ifdef GGML_USE_CPU
219 register_backend(reg: ggml_backend_cpu_reg());
220#endif
221 }
222
223 ~ggml_backend_registry() {
224 // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
225 // since backend threads may still be running and accessing resources from the dynamic library
226 for (auto & entry : backends) {
227 if (entry.handle) {
228 entry.handle.release(); // NOLINT
229 }
230 }
231 }
232
233 void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
234 if (!reg) {
235 return;
236 }
237
238#ifndef NDEBUG
239 GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
240 __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
241#endif
242 backends.push_back(x: { .reg: reg, .handle: std::move(handle) });
243 for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
244 register_device(device: ggml_backend_reg_dev_get(reg, index: i));
245 }
246 }
247
248 void register_device(ggml_backend_dev_t device) {
249#ifndef NDEBUG
250 GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
251#endif
252 devices.push_back(x: device);
253 }
254
255 ggml_backend_reg_t load_backend(const fs::path & path, bool silent) {
256 dl_handle_ptr handle { dl_load_library(path) };
257 if (!handle) {
258 if (!silent) {
259 GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
260 }
261 return nullptr;
262 }
263
264 auto score_fn = (ggml_backend_score_t) dl_get_sym(handle: handle.get(), name: "ggml_backend_score");
265 if (score_fn && score_fn() == 0) {
266 if (!silent) {
267 GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path_str(path).c_str());
268 }
269 return nullptr;
270 }
271
272 auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle: handle.get(), name: "ggml_backend_init");
273 if (!backend_init_fn) {
274 if (!silent) {
275 GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path_str(path).c_str());
276 }
277 return nullptr;
278 }
279
280 ggml_backend_reg_t reg = backend_init_fn();
281 if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
282 if (!silent) {
283 if (!reg) {
284 GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n",
285 __func__, path_str(path).c_str());
286 } else {
287 GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
288 __func__, path_str(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
289 }
290 }
291 return nullptr;
292 }
293
294 GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path_str(path).c_str());
295
296 register_backend(reg, handle: std::move(handle));
297
298 return reg;
299 }
300
301 void unload_backend(ggml_backend_reg_t reg, bool silent) {
302 auto it = std::find_if(first: backends.begin(), last: backends.end(),
303 pred: [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
304
305 if (it == backends.end()) {
306 if (!silent) {
307 GGML_LOG_ERROR("%s: backend not found\n", __func__);
308 }
309 return;
310 }
311
312 if (!silent) {
313 GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
314 }
315
316 // remove devices
317 devices.erase(
318 first: std::remove_if(first: devices.begin(), last: devices.end(),
319 pred: [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(device: dev) == reg; }),
320 last: devices.end());
321
322 // remove backend
323 backends.erase(position: it);
324 }
325};
326
327static ggml_backend_registry & get_reg() {
328 static ggml_backend_registry reg;
329 return reg;
330}
331
332// Internal API
333void ggml_backend_register(ggml_backend_reg_t reg) {
334 get_reg().register_backend(reg);
335}
336
337void ggml_backend_device_register(ggml_backend_dev_t device) {
338 get_reg().register_device(device);
339}
340
341// Backend (reg) enumeration
342static bool striequals(const char * a, const char * b) {
343 for (; *a && *b; a++, b++) {
344 if (std::tolower(c: *a) != std::tolower(c: *b)) {
345 return false;
346 }
347 }
348 return *a == *b;
349}
350
351size_t ggml_backend_reg_count() {
352 return get_reg().backends.size();
353}
354
355ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
356 GGML_ASSERT(index < ggml_backend_reg_count());
357 return get_reg().backends[index].reg;
358}
359
360ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
361 for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
362 ggml_backend_reg_t reg = ggml_backend_reg_get(index: i);
363 if (striequals(a: ggml_backend_reg_name(reg), b: name)) {
364 return reg;
365 }
366 }
367 return nullptr;
368}
369
370// Device enumeration
371size_t ggml_backend_dev_count() {
372 return get_reg().devices.size();
373}
374
375ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
376 GGML_ASSERT(index < ggml_backend_dev_count());
377 return get_reg().devices[index];
378}
379
380ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
381 for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
382 ggml_backend_dev_t dev = ggml_backend_dev_get(index: i);
383 if (striequals(a: ggml_backend_dev_name(device: dev), b: name)) {
384 return dev;
385 }
386 }
387 return nullptr;
388}
389
390ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
391 for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
392 ggml_backend_dev_t dev = ggml_backend_dev_get(index: i);
393 if (ggml_backend_dev_type(device: dev) == type) {
394 return dev;
395 }
396 }
397 return nullptr;
398}
399
400// Convenience functions
401ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
402 ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
403 if (!dev) {
404 return nullptr;
405 }
406 return ggml_backend_dev_init(device: dev, params);
407}
408
409ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
410 ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
411 if (!dev) {
412 return nullptr;
413 }
414 return ggml_backend_dev_init(device: dev, params);
415}
416
417ggml_backend_t ggml_backend_init_best(void) {
418 ggml_backend_dev_t dev = ggml_backend_dev_by_type(type: GGML_BACKEND_DEVICE_TYPE_GPU);
419 dev = dev ? dev : ggml_backend_dev_by_type(type: GGML_BACKEND_DEVICE_TYPE_IGPU);
420 dev = dev ? dev : ggml_backend_dev_by_type(type: GGML_BACKEND_DEVICE_TYPE_CPU);
421 if (!dev) {
422 return nullptr;
423 }
424 return ggml_backend_dev_init(device: dev, params: nullptr);
425}
426
427// Dynamic loading
428ggml_backend_reg_t ggml_backend_load(const char * path) {
429 return get_reg().load_backend(path, silent: false);
430}
431
432void ggml_backend_unload(ggml_backend_reg_t reg) {
433 get_reg().unload_backend(reg, silent: true);
434}
435
436static fs::path get_executable_path() {
437#if defined(__APPLE__)
438 // get executable path
439 std::vector<char> path;
440 uint32_t size;
441 while (true) {
442 size = path.size();
443 if (_NSGetExecutablePath(path.data(), &size) == 0) {
444 break;
445 }
446 path.resize(size);
447 }
448 std::string base_path(path.data(), size);
449 // remove executable name
450 auto last_slash = base_path.find_last_of('/');
451 if (last_slash != std::string::npos) {
452 base_path = base_path.substr(0, last_slash);
453 }
454 return base_path + "/";
455#elif defined(__linux__) || defined(__FreeBSD__)
456 std::string base_path = ".";
457 std::vector<char> path(1024);
458 while (true) {
459 // get executable path
460# if defined(__linux__)
461 ssize_t len = readlink(path: "/proc/self/exe", buf: path.data(), len: path.size());
462# elif defined(__FreeBSD__)
463 ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
464# endif
465 if (len == -1) {
466 break;
467 }
468 if (len < (ssize_t) path.size()) {
469 base_path = std::string(path.data(), len);
470 // remove executable name
471 auto last_slash = base_path.find_last_of(c: '/');
472 if (last_slash != std::string::npos) {
473 base_path = base_path.substr(pos: 0, n: last_slash);
474 }
475 break;
476 }
477 path.resize(new_size: path.size() * 2);
478 }
479
480 return base_path + "/";
481#elif defined(_WIN32)
482 std::vector<wchar_t> path(MAX_PATH);
483 DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
484 if (len == 0) {
485 return {};
486 }
487 std::wstring base_path(path.data(), len);
488 // remove executable name
489 auto last_slash = base_path.find_last_of('\\');
490 if (last_slash != std::string::npos) {
491 base_path = base_path.substr(0, last_slash);
492 }
493 return base_path + L"\\";
494#else
495 return {};
496#endif
497}
498
499static fs::path backend_filename_prefix() {
500#ifdef _WIN32
501 return fs::u8path("ggml-");
502#else
503 return fs::u8path(source: "libggml-");
504#endif
505}
506
507static fs::path backend_filename_extension() {
508#ifdef _WIN32
509 return fs::u8path(".dll");
510#else
511 return fs::u8path(source: ".so");
512#endif
513}
514
515static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
516 // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
517 const fs::path name_path = fs::u8path(source: name);
518 const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path(source: "-").native();
519 const fs::path file_extension = backend_filename_extension();
520
521 std::vector<fs::path> search_paths;
522 if (user_search_path == nullptr) {
523#ifdef GGML_BACKEND_DIR
524 search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
525#endif
526 // default search paths: executable directory, current directory
527 search_paths.push_back(x: get_executable_path());
528 search_paths.push_back(x: fs::current_path());
529 } else {
530 search_paths.push_back(x: fs::u8path(source: user_search_path));
531 }
532
533 int best_score = 0;
534 fs::path best_path;
535
536 for (const auto & search_path : search_paths) {
537 if (!fs::exists(p: search_path)) {
538 GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
539 continue;
540 }
541 fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
542 for (const auto & entry : dir_it) {
543 if (entry.is_regular_file()) {
544 auto filename = entry.path().filename();
545 auto ext = entry.path().extension();
546 if (filename.native().find(str: file_prefix) == 0 && ext == file_extension) {
547 dl_handle_ptr handle { dl_load_library(path: entry) };
548 if (!handle && !silent) {
549 GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
550 }
551 if (handle) {
552 auto score_fn = (ggml_backend_score_t) dl_get_sym(handle: handle.get(), name: "ggml_backend_score");
553 if (score_fn) {
554 int s = score_fn();
555#ifndef NDEBUG
556 GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, path_str(entry.path()).c_str(), s);
557#endif
558 if (s > best_score) {
559 best_score = s;
560 best_path = entry.path();
561 }
562 } else {
563 if (!silent) {
564 GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, path_str(entry.path()).c_str());
565 }
566 }
567 }
568 }
569 }
570 }
571 }
572
573 if (best_score == 0) {
574 // try to load the base backend
575 for (const auto & search_path : search_paths) {
576 fs::path filename = backend_filename_prefix().native() + name_path.native() + backend_filename_extension().native();
577 fs::path path = search_path / filename;
578 if (fs::exists(p: path)) {
579 return get_reg().load_backend(path, silent);
580 }
581 }
582 return nullptr;
583 }
584
585 return get_reg().load_backend(path: best_path, silent);
586}
587
588void ggml_backend_load_all() {
589 ggml_backend_load_all_from_path(dir_path: nullptr);
590}
591
592void ggml_backend_load_all_from_path(const char * dir_path) {
593#ifdef NDEBUG
594 bool silent = true;
595#else
596 bool silent = false;
597#endif
598
599 ggml_backend_load_best(name: "blas", silent, user_search_path: dir_path);
600 ggml_backend_load_best(name: "cann", silent, user_search_path: dir_path);
601 ggml_backend_load_best(name: "cuda", silent, user_search_path: dir_path);
602 ggml_backend_load_best(name: "hip", silent, user_search_path: dir_path);
603 ggml_backend_load_best(name: "metal", silent, user_search_path: dir_path);
604 ggml_backend_load_best(name: "rpc", silent, user_search_path: dir_path);
605 ggml_backend_load_best(name: "sycl", silent, user_search_path: dir_path);
606 ggml_backend_load_best(name: "vulkan", silent, user_search_path: dir_path);
607 ggml_backend_load_best(name: "opencl", silent, user_search_path: dir_path);
608 ggml_backend_load_best(name: "hexagon", silent, user_search_path: dir_path);
609 ggml_backend_load_best(name: "musa", silent, user_search_path: dir_path);
610 ggml_backend_load_best(name: "cpu", silent, user_search_path: dir_path);
611 // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
612 const char * backend_path = std::getenv(name: "GGML_BACKEND_PATH");
613 if (backend_path) {
614 ggml_backend_load(path: backend_path);
615 }
616}
617