| 1 | #pragma once |
| 2 | |
| 3 | #include "ggml.h" |
| 4 | #include "ggml-backend.h" |
| 5 | |
| 6 | #ifdef __cplusplus |
| 7 | extern "C" { |
| 8 | #endif |
| 9 | |
| 10 | // the compute plan that needs to be prepared for ggml_graph_compute() |
| 11 | // since https://github.com/ggml-org/ggml/issues/287 |
| 12 | struct ggml_cplan { |
| 13 | size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` |
| 14 | uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` |
| 15 | |
| 16 | int n_threads; |
| 17 | struct ggml_threadpool * threadpool; |
| 18 | |
| 19 | // abort ggml_graph_compute when true |
| 20 | ggml_abort_callback abort_callback; |
| 21 | void * abort_callback_data; |
| 22 | }; |
| 23 | |
| 24 | // numa strategies |
| 25 | enum ggml_numa_strategy { |
| 26 | GGML_NUMA_STRATEGY_DISABLED = 0, |
| 27 | GGML_NUMA_STRATEGY_DISTRIBUTE = 1, |
| 28 | GGML_NUMA_STRATEGY_ISOLATE = 2, |
| 29 | GGML_NUMA_STRATEGY_NUMACTL = 3, |
| 30 | GGML_NUMA_STRATEGY_MIRROR = 4, |
| 31 | GGML_NUMA_STRATEGY_COUNT |
| 32 | }; |
| 33 | |
| 34 | GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems |
| 35 | GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node |
| 36 | |
| 37 | GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value); |
| 38 | GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); |
| 39 | |
| 40 | GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value); |
| 41 | GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value); |
| 42 | |
| 43 | GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i); |
| 44 | GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value); |
| 45 | |
| 46 | GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3); |
| 47 | GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value); |
| 48 | |
| 49 | GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i); |
| 50 | GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value); |
| 51 | |
| 52 | GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3); |
| 53 | GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value); |
| 54 | |
| 55 | GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params); |
| 56 | GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool); |
| 57 | GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool); |
| 58 | GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool); |
| 59 | GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool); |
| 60 | |
| 61 | // ggml_graph_plan() has to be called before ggml_graph_compute() |
| 62 | // when plan.work_size > 0, caller must allocate memory for plan.work_data |
| 63 | GGML_BACKEND_API struct ggml_cplan ggml_graph_plan( |
| 64 | const struct ggml_cgraph * cgraph, |
| 65 | int n_threads, /* = GGML_DEFAULT_N_THREADS */ |
| 66 | struct ggml_threadpool * threadpool /* = NULL */ ); |
| 67 | GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); |
| 68 | |
| 69 | // same as ggml_graph_compute() but the work data is allocated as a part of the context |
| 70 | // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data |
| 71 | GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); |
| 72 | |
| 73 | // |
| 74 | // system info |
| 75 | // |
| 76 | |
| 77 | // x86 |
| 78 | GGML_BACKEND_API int ggml_cpu_has_sse3 (void); |
| 79 | GGML_BACKEND_API int ggml_cpu_has_ssse3 (void); |
| 80 | GGML_BACKEND_API int ggml_cpu_has_avx (void); |
| 81 | GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void); |
| 82 | GGML_BACKEND_API int ggml_cpu_has_avx2 (void); |
| 83 | GGML_BACKEND_API int ggml_cpu_has_bmi2 (void); |
| 84 | GGML_BACKEND_API int ggml_cpu_has_f16c (void); |
| 85 | GGML_BACKEND_API int ggml_cpu_has_fma (void); |
| 86 | GGML_BACKEND_API int ggml_cpu_has_avx512 (void); |
| 87 | GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void); |
| 88 | GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void); |
| 89 | GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void); |
| 90 | GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void); |
| 91 | // ARM |
| 92 | GGML_BACKEND_API int ggml_cpu_has_neon (void); |
| 93 | GGML_BACKEND_API int ggml_cpu_has_arm_fma (void); |
| 94 | GGML_BACKEND_API int ggml_cpu_has_fp16_va (void); |
| 95 | GGML_BACKEND_API int ggml_cpu_has_dotprod (void); |
| 96 | GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void); |
| 97 | GGML_BACKEND_API int ggml_cpu_has_sve (void); |
| 98 | GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes |
| 99 | GGML_BACKEND_API int ggml_cpu_has_sme (void); |
| 100 | // other |
| 101 | GGML_BACKEND_API int ggml_cpu_has_riscv_v (void); |
| 102 | GGML_BACKEND_API int ggml_cpu_has_vsx (void); |
| 103 | GGML_BACKEND_API int ggml_cpu_has_vxe (void); |
| 104 | GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void); |
| 105 | GGML_BACKEND_API int ggml_cpu_has_llamafile (void); |
| 106 | |
| 107 | // Internal types and functions exposed for tests and benchmarks |
| 108 | |
| 109 | typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx, |
| 110 | const void * GGML_RESTRICT y, size_t by, int nrc); |
| 111 | |
| 112 | struct ggml_type_traits_cpu { |
| 113 | ggml_from_float_t from_float; |
| 114 | ggml_vec_dot_t vec_dot; |
| 115 | enum ggml_type vec_dot_type; |
| 116 | int64_t nrows; // number of rows to process simultaneously |
| 117 | }; |
| 118 | |
| 119 | GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type); |
| 120 | |
| 121 | GGML_BACKEND_API void ggml_cpu_init(void); |
| 122 | |
| 123 | // |
| 124 | // CPU backend |
| 125 | // |
| 126 | |
| 127 | GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void); |
| 128 | |
| 129 | GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend); |
| 130 | GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads); |
| 131 | GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool); |
| 132 | GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data); |
| 133 | |
| 134 | GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void); |
| 135 | |
| 136 | GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t); |
| 137 | GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t); |
| 138 | GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t); |
| 139 | GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t); |
| 140 | GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t); |
| 141 | GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t); |
| 142 | |
| 143 | #ifdef __cplusplus |
| 144 | } |
| 145 | #endif |
| 146 | |