| 1 | #pragma once |
| 2 | |
| 3 | #include "ggml.h" |
| 4 | |
| 5 | #ifdef __cplusplus |
| 6 | extern "C" { |
| 7 | #endif |
| 8 | |
| 9 | typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; |
| 10 | typedef struct ggml_backend_buffer * ggml_backend_buffer_t; |
| 11 | typedef struct ggml_backend * ggml_backend_t; |
| 12 | |
| 13 | // Tensor allocator |
| 14 | struct ggml_tallocr { |
| 15 | ggml_backend_buffer_t buffer; |
| 16 | void * base; |
| 17 | size_t alignment; |
| 18 | size_t offset; |
| 19 | }; |
| 20 | |
| 21 | GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); |
| 22 | GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); |
| 23 | |
| 24 | // Graph allocator |
| 25 | /* |
| 26 | Example usage: |
| 27 | ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); |
| 28 | |
| 29 | // optional: create a worst-case graph and reserve the buffers to avoid reallocations |
| 30 | ggml_gallocr_reserve(galloc, build_graph(max_batch)); |
| 31 | |
| 32 | // allocate the graph |
| 33 | struct ggml_cgraph * graph = build_graph(batch); |
| 34 | ggml_gallocr_alloc_graph(galloc, graph); |
| 35 | |
| 36 | printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); |
| 37 | |
| 38 | // evaluate the graph |
| 39 | ggml_backend_graph_compute(backend, graph); |
| 40 | */ |
| 41 | |
| 42 | // special tensor flags for use with the graph allocator: |
| 43 | // ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses |
| 44 | // ggml_set_output(): output tensors are never freed and never overwritten |
| 45 | |
| 46 | typedef struct ggml_gallocr * ggml_gallocr_t; |
| 47 | |
| 48 | GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); |
| 49 | GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); |
| 50 | GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); |
| 51 | |
| 52 | // pre-allocate buffers from a measure graph - does not allocate or modify the graph |
| 53 | // call with a worst-case graph to avoid buffer reallocations |
| 54 | // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed |
| 55 | // returns false if the buffer allocation failed |
| 56 | GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); |
| 57 | GGML_API bool ggml_gallocr_reserve_n( |
| 58 | ggml_gallocr_t galloc, |
| 59 | struct ggml_cgraph * graph, |
| 60 | const int * node_buffer_ids, |
| 61 | const int * leaf_buffer_ids); |
| 62 | |
| 63 | // automatic reallocation if the topology changes when using a single buffer |
| 64 | // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) |
| 65 | GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); |
| 66 | |
| 67 | GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); |
| 68 | |
| 69 | // Utils |
| 70 | // Create a buffer and allocate all the tensors in a ggml_context |
| 71 | GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); |
| 72 | GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); |
| 73 | |
| 74 | #ifdef __cplusplus |
| 75 | } |
| 76 | #endif |
| 77 | |