ggml-backend-impl.h source code [llama.cpp/ggml/src/ggml-backend-impl.h]

1	#pragma once
2
3	// ggml-backend internal header
4
5	#include "ggml-backend.h"
6
7	#ifdef __cplusplus
8	extern "C" {
9	#endif
10
11	#define GGML_BACKEND_API_VERSION 2
12
13	//
14	// Backend buffer type
15	//
16
17	struct ggml_backend_buffer_type_i {
18	const char * (*get_name) (ggml_backend_buffer_type_t buft);
19	// allocate a buffer of this type
20	ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
21	// tensor alignment
22	size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
23	// (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
24	size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
25	// (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
26	size_t (get_alloc_size)(ggml_backend_buffer_type_t buft, const* struct ggml_tensor * tensor);
27	// (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
28	bool (*is_host) (ggml_backend_buffer_type_t buft);
29	};
30
31	struct ggml_backend_buffer_type {
32	struct ggml_backend_buffer_type_i iface;
33	ggml_backend_dev_t device;
34	void * context;
35	};
36
37	//
38	// Backend buffer
39	//
40
41	struct ggml_backend_buffer_i {
42	// (optional) free the buffer
43	void (*free_buffer) (ggml_backend_buffer_t buffer);
44	// base address of the buffer
45	void * (*get_base) (ggml_backend_buffer_t buffer);
46	// (optional) initialize a tensor in the buffer (eg. add tensor extras)
47	enum ggml_status (init_tensor)(ggml_backend_buffer_t buffer, struct* ggml_tensor * tensor);
48	// tensor data access
49	void (memset_tensor)(ggml_backend_buffer_t buffer, struct* ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
50	void (set_tensor) (ggml_backend_buffer_t buffer, struct* ggml_tensor * tensor, const void * data, size_t offset, size_t size);
51	void (get_tensor) (ggml_backend_buffer_t buffer, const* struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
52	// (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
53	bool (cpy_tensor) (ggml_backend_buffer_t buffer, const* struct ggml_tensor * src, struct ggml_tensor * dst);
54	// clear the entire buffer
55	void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
56	// (optional) reset any internal state due to tensor initialization, such as tensor extras
57	void (*reset) (ggml_backend_buffer_t buffer);
58	};
59
60	struct ggml_backend_buffer {
61	struct ggml_backend_buffer_i iface;
62	ggml_backend_buffer_type_t buft;
63	void * context;
64	size_t size;
65	enum ggml_backend_buffer_usage usage;
66	};
67
68	GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
69	ggml_backend_buffer_type_t buft,
70	struct ggml_backend_buffer_i iface,
71	void * context,
72	size_t size);
73
74	// do not use directly, use ggml_backend_tensor_copy instead
75	GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
76
77	// multi-buffer
78	// buffer that contains a collection of buffers
79	GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
80	GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
81	GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
82
83	//
84	// Backend (stream)
85	//
86
87	struct ggml_backend_i {
88	const char * (*get_name)(ggml_backend_t backend);
89
90	void (*free)(ggml_backend_t backend);
91
92	// (optional) asynchronous tensor data access
93	void (set_tensor_async)(ggml_backend_t backend, struct* ggml_tensor * tensor, const void * data, size_t offset, size_t size);
94	void (get_tensor_async)(ggml_backend_t backend, const* struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
95	bool (cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const* struct ggml_tensor * src, struct ggml_tensor * dst);
96
97	// (optional) complete all pending operations (required if the backend supports async operations)
98	void (*synchronize)(ggml_backend_t backend);
99
100	// (optional) graph plans (not used currently)
101	// compute graph with a plan
102	ggml_backend_graph_plan_t (graph_plan_create) (ggml_backend_t backend, const* struct ggml_cgraph * cgraph);
103	void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
104	// update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
105	void (graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const* struct ggml_cgraph * cgraph);
106	// compute the graph with the plan
107	enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
108
109	// compute graph (always async if supported by the backend)
110	enum ggml_status (graph_compute) (ggml_backend_t backend, struct* ggml_cgraph * cgraph);
111
112	// (optional) event synchronization
113	// record an event on this stream
114	void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
115	// wait for an event on on a different stream
116	void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
117
118	// (optional) sort/optimize the nodes in the graph
119	void (graph_optimize) (ggml_backend_t backend, struct* ggml_cgraph * cgraph);
120	};
121
122	struct ggml_backend {
123	ggml_guid_t guid;
124	struct ggml_backend_i iface;
125	ggml_backend_dev_t device;
126	void * context;
127	};
128
129	struct ggml_backend_event {
130	struct ggml_backend_device * device;
131	void * context;
132	};
133
134	//
135	// Backend device
136	//
137
138	// Note: if additional properties are needed, we should add a struct with all of them
139	// the current functions to obtain the properties can remain, since they are more convenient for often used properties
140	struct ggml_backend_device_i {
141	// device name: short identifier for this device, such as "CPU" or "CUDA0"
142	const char * (*get_name)(ggml_backend_dev_t dev);
143
144	// device description: short informative description of the device, could be the model name
145	const char * (*get_description)(ggml_backend_dev_t dev);
146
147	// device memory in bytes
148	void (get_memory)(ggml_backend_dev_t dev, size_t free, size_t * total);
149
150	// device type
151	enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
152
153	// device properties
154	void (get_props)(ggml_backend_dev_t dev, struct* ggml_backend_dev_props * props);
155
156	// backend (stream) initialization
157	ggml_backend_t (init_backend)(ggml_backend_dev_t dev, const* char * params);
158
159	// preferred buffer type
160	ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
161
162	// (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
163	ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
164
165	// (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
166	ggml_backend_buffer_t (buffer_from_host_ptr)(ggml_backend_dev_t dev, void* * ptr, size_t size, size_t max_tensor_size);
167
168	// check if the backend can compute an operation
169	bool (supports_op)(ggml_backend_dev_t dev, const* struct ggml_tensor * op);
170
171	// check if the backend can use tensors allocated in a buffer type
172	bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
173
174	// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
175	// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
176	bool (offload_op)(ggml_backend_dev_t dev, const* struct ggml_tensor * op);
177
178	// (optional) event synchronization
179	ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev);
180	void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event);
181	void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
182	};
183
184	struct ggml_backend_device {
185	struct ggml_backend_device_i iface;
186	ggml_backend_reg_t reg;
187	void * context;
188	};
189
190	//
191	// Backend (reg)
192	//
193
194	struct ggml_backend_reg_i {
195	const char * (*get_name)(ggml_backend_reg_t reg);
196
197	// enumerate available devices
198	size_t (*get_device_count)(ggml_backend_reg_t reg);
199	ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
200
201	// (optional) get a pointer to a function in the backend
202	// backends can add custom functions that are not part of the standard ggml-backend interface
203	void * (get_proc_address)(ggml_backend_reg_t reg, const* char * name);
204	};
205
206	struct ggml_backend_reg {
207	int api_version; // initialize to GGML_BACKEND_API_VERSION
208	struct ggml_backend_reg_i iface;
209	void * context;
210	};
211
212	// Add backend dynamic loading support to the backend
213
214	// Initialize the backend
215	typedef ggml_backend_reg_t (ggml_backend_init_t)(void*);
216	// Optional: obtain a score for the backend based on the system configuration
217	// Higher scores are preferred, 0 means the backend is not supported in the current system
218	typedef int (ggml_backend_score_t)(void*);
219
220	#ifdef GGML_BACKEND_DL
221	# ifdef __cplusplus
222	# define GGML_BACKEND_DL_IMPL(reg_fn) \
223	extern "C" { \
224	GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
225	} \
226	ggml_backend_reg_t ggml_backend_init(void) { \
227	return reg_fn(); \
228	}
229	# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
230	extern "C" { \
231	GGML_BACKEND_API int ggml_backend_score(void); \
232	} \
233	int ggml_backend_score(void) { \
234	return score_fn(); \
235	}
236	# else
237	# define GGML_BACKEND_DL_IMPL(reg_fn) \
238	GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
239	ggml_backend_reg_t ggml_backend_init(void) { \
240	return reg_fn(); \
241	}
242	# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
243	GGML_BACKEND_API int ggml_backend_score(void); \
244	int ggml_backend_score(void) { \
245	return score_fn(); \
246	}
247	# endif
248	#else
249	# define GGML_BACKEND_DL_IMPL(reg_fn)
250	# define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
251	#endif
252
253	#ifdef __cplusplus
254	}
255	#endif
256

Browse the source code of llama.cpp/ggml/src/ggml-backend-impl.h