| 1 | #pragma once |
| 2 | |
| 3 | // ggml-backend internal header |
| 4 | |
| 5 | #include "ggml-backend.h" |
| 6 | |
| 7 | #ifdef __cplusplus |
| 8 | extern "C" { |
| 9 | #endif |
| 10 | |
| 11 | #define GGML_BACKEND_API_VERSION 2 |
| 12 | |
| 13 | // |
| 14 | // Backend buffer type |
| 15 | // |
| 16 | |
| 17 | struct ggml_backend_buffer_type_i { |
| 18 | const char * (*get_name) (ggml_backend_buffer_type_t buft); |
| 19 | // allocate a buffer of this type |
| 20 | ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); |
| 21 | // tensor alignment |
| 22 | size_t (*get_alignment) (ggml_backend_buffer_type_t buft); |
| 23 | // (optional) max buffer size that can be allocated (defaults to SIZE_MAX) |
| 24 | size_t (*get_max_size) (ggml_backend_buffer_type_t buft); |
| 25 | // (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes) |
| 26 | size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); |
| 27 | // (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false) |
| 28 | bool (*is_host) (ggml_backend_buffer_type_t buft); |
| 29 | }; |
| 30 | |
| 31 | struct ggml_backend_buffer_type { |
| 32 | struct ggml_backend_buffer_type_i iface; |
| 33 | ggml_backend_dev_t device; |
| 34 | void * context; |
| 35 | }; |
| 36 | |
| 37 | // |
| 38 | // Backend buffer |
| 39 | // |
| 40 | |
| 41 | struct ggml_backend_buffer_i { |
| 42 | // (optional) free the buffer |
| 43 | void (*free_buffer) (ggml_backend_buffer_t buffer); |
| 44 | // base address of the buffer |
| 45 | void * (*get_base) (ggml_backend_buffer_t buffer); |
| 46 | // (optional) initialize a tensor in the buffer (eg. add tensor extras) |
| 47 | enum ggml_status (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); |
| 48 | // tensor data access |
| 49 | void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size); |
| 50 | void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); |
| 51 | void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); |
| 52 | // (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported) |
| 53 | bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); |
| 54 | // clear the entire buffer |
| 55 | void (*clear) (ggml_backend_buffer_t buffer, uint8_t value); |
| 56 | // (optional) reset any internal state due to tensor initialization, such as tensor extras |
| 57 | void (*reset) (ggml_backend_buffer_t buffer); |
| 58 | }; |
| 59 | |
| 60 | struct ggml_backend_buffer { |
| 61 | struct ggml_backend_buffer_i iface; |
| 62 | ggml_backend_buffer_type_t buft; |
| 63 | void * context; |
| 64 | size_t size; |
| 65 | enum ggml_backend_buffer_usage usage; |
| 66 | }; |
| 67 | |
| 68 | GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( |
| 69 | ggml_backend_buffer_type_t buft, |
| 70 | struct ggml_backend_buffer_i iface, |
| 71 | void * context, |
| 72 | size_t size); |
| 73 | |
| 74 | // do not use directly, use ggml_backend_tensor_copy instead |
| 75 | GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst); |
| 76 | |
| 77 | // multi-buffer |
| 78 | // buffer that contains a collection of buffers |
| 79 | GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers); |
| 80 | GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer); |
| 81 | GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage); |
| 82 | |
| 83 | // |
| 84 | // Backend (stream) |
| 85 | // |
| 86 | |
| 87 | struct ggml_backend_i { |
| 88 | const char * (*get_name)(ggml_backend_t backend); |
| 89 | |
| 90 | void (*free)(ggml_backend_t backend); |
| 91 | |
| 92 | // (optional) asynchronous tensor data access |
| 93 | void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); |
| 94 | void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); |
| 95 | bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); |
| 96 | |
| 97 | // (optional) complete all pending operations (required if the backend supports async operations) |
| 98 | void (*synchronize)(ggml_backend_t backend); |
| 99 | |
| 100 | // (optional) graph plans (not used currently) |
| 101 | // compute graph with a plan |
| 102 | ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); |
| 103 | void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); |
| 104 | // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology |
| 105 | void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph); |
| 106 | // compute the graph with the plan |
| 107 | enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); |
| 108 | |
| 109 | // compute graph (always async if supported by the backend) |
| 110 | enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph); |
| 111 | |
| 112 | // (optional) event synchronization |
| 113 | // record an event on this stream |
| 114 | void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event); |
| 115 | // wait for an event on on a different stream |
| 116 | void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event); |
| 117 | |
| 118 | // (optional) sort/optimize the nodes in the graph |
| 119 | void (*graph_optimize) (ggml_backend_t backend, struct ggml_cgraph * cgraph); |
| 120 | }; |
| 121 | |
| 122 | struct ggml_backend { |
| 123 | ggml_guid_t guid; |
| 124 | struct ggml_backend_i iface; |
| 125 | ggml_backend_dev_t device; |
| 126 | void * context; |
| 127 | }; |
| 128 | |
| 129 | struct ggml_backend_event { |
| 130 | struct ggml_backend_device * device; |
| 131 | void * context; |
| 132 | }; |
| 133 | |
| 134 | // |
| 135 | // Backend device |
| 136 | // |
| 137 | |
| 138 | // Note: if additional properties are needed, we should add a struct with all of them |
| 139 | // the current functions to obtain the properties can remain, since they are more convenient for often used properties |
| 140 | struct ggml_backend_device_i { |
| 141 | // device name: short identifier for this device, such as "CPU" or "CUDA0" |
| 142 | const char * (*get_name)(ggml_backend_dev_t dev); |
| 143 | |
| 144 | // device description: short informative description of the device, could be the model name |
| 145 | const char * (*get_description)(ggml_backend_dev_t dev); |
| 146 | |
| 147 | // device memory in bytes |
| 148 | void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total); |
| 149 | |
| 150 | // device type |
| 151 | enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev); |
| 152 | |
| 153 | // device properties |
| 154 | void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props); |
| 155 | |
| 156 | // backend (stream) initialization |
| 157 | ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params); |
| 158 | |
| 159 | // preferred buffer type |
| 160 | ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev); |
| 161 | |
| 162 | // (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device) |
| 163 | ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev); |
| 164 | |
| 165 | // (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries) |
| 166 | ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size); |
| 167 | |
| 168 | // check if the backend can compute an operation |
| 169 | bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); |
| 170 | |
| 171 | // check if the backend can use tensors allocated in a buffer type |
| 172 | bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft); |
| 173 | |
| 174 | // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer |
| 175 | // these should be expensive operations that may benefit from running on this backend instead of the CPU backend |
| 176 | bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op); |
| 177 | |
| 178 | // (optional) event synchronization |
| 179 | ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev); |
| 180 | void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event); |
| 181 | void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event); |
| 182 | }; |
| 183 | |
| 184 | struct ggml_backend_device { |
| 185 | struct ggml_backend_device_i iface; |
| 186 | ggml_backend_reg_t reg; |
| 187 | void * context; |
| 188 | }; |
| 189 | |
| 190 | // |
| 191 | // Backend (reg) |
| 192 | // |
| 193 | |
| 194 | struct ggml_backend_reg_i { |
| 195 | const char * (*get_name)(ggml_backend_reg_t reg); |
| 196 | |
| 197 | // enumerate available devices |
| 198 | size_t (*get_device_count)(ggml_backend_reg_t reg); |
| 199 | ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index); |
| 200 | |
| 201 | // (optional) get a pointer to a function in the backend |
| 202 | // backends can add custom functions that are not part of the standard ggml-backend interface |
| 203 | void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name); |
| 204 | }; |
| 205 | |
| 206 | struct ggml_backend_reg { |
| 207 | int api_version; // initialize to GGML_BACKEND_API_VERSION |
| 208 | struct ggml_backend_reg_i iface; |
| 209 | void * context; |
| 210 | }; |
| 211 | |
| 212 | // Add backend dynamic loading support to the backend |
| 213 | |
| 214 | // Initialize the backend |
| 215 | typedef ggml_backend_reg_t (*ggml_backend_init_t)(void); |
| 216 | // Optional: obtain a score for the backend based on the system configuration |
| 217 | // Higher scores are preferred, 0 means the backend is not supported in the current system |
| 218 | typedef int (*ggml_backend_score_t)(void); |
| 219 | |
| 220 | #ifdef GGML_BACKEND_DL |
| 221 | # ifdef __cplusplus |
| 222 | # define GGML_BACKEND_DL_IMPL(reg_fn) \ |
| 223 | extern "C" { \ |
| 224 | GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ |
| 225 | } \ |
| 226 | ggml_backend_reg_t ggml_backend_init(void) { \ |
| 227 | return reg_fn(); \ |
| 228 | } |
| 229 | # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \ |
| 230 | extern "C" { \ |
| 231 | GGML_BACKEND_API int ggml_backend_score(void); \ |
| 232 | } \ |
| 233 | int ggml_backend_score(void) { \ |
| 234 | return score_fn(); \ |
| 235 | } |
| 236 | # else |
| 237 | # define GGML_BACKEND_DL_IMPL(reg_fn) \ |
| 238 | GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \ |
| 239 | ggml_backend_reg_t ggml_backend_init(void) { \ |
| 240 | return reg_fn(); \ |
| 241 | } |
| 242 | # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \ |
| 243 | GGML_BACKEND_API int ggml_backend_score(void); \ |
| 244 | int ggml_backend_score(void) { \ |
| 245 | return score_fn(); \ |
| 246 | } |
| 247 | # endif |
| 248 | #else |
| 249 | # define GGML_BACKEND_DL_IMPL(reg_fn) |
| 250 | # define GGML_BACKEND_DL_SCORE_IMPL(score_fn) |
| 251 | #endif |
| 252 | |
| 253 | #ifdef __cplusplus |
| 254 | } |
| 255 | #endif |
| 256 | |