ggml-cpu.h source code [llama.cpp/ggml/include/ggml-cpu.h]

1	#pragma once
2
3	#include "ggml.h"
4	#include "ggml-backend.h"
5
6	#ifdef __cplusplus
7	extern "C" {
8	#endif
9
10	// the compute plan that needs to be prepared for ggml_graph_compute()
11	// since https://github.com/ggml-org/ggml/issues/287
12	struct ggml_cplan {
13	size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
14	uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
15
16	int n_threads;
17	struct ggml_threadpool * threadpool;
18
19	// abort ggml_graph_compute when true
20	ggml_abort_callback abort_callback;
21	void * abort_callback_data;
22	};
23
24	// numa strategies
25	enum ggml_numa_strategy {
26	GGML_NUMA_STRATEGY_DISABLED = `0`,
27	GGML_NUMA_STRATEGY_DISTRIBUTE = `1`,
28	GGML_NUMA_STRATEGY_ISOLATE = `2`,
29	GGML_NUMA_STRATEGY_NUMACTL = `3`,
30	GGML_NUMA_STRATEGY_MIRROR = `4`,
31	GGML_NUMA_STRATEGY_COUNT
32	};
33
34	GGML_BACKEND_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
35	GGML_BACKEND_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
36
37	GGML_BACKEND_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
38	GGML_BACKEND_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
39
40	GGML_BACKEND_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
41	GGML_BACKEND_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
42
43	GGML_BACKEND_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
44	GGML_BACKEND_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
45
46	GGML_BACKEND_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
47	GGML_BACKEND_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
48
49	GGML_BACKEND_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
50	GGML_BACKEND_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
51
52	GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
53	GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
54
55	GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
56	GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
57	GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
58	GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
59	GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
60
61	// ggml_graph_plan() has to be called before ggml_graph_compute()
62	// when plan.work_size > 0, caller must allocate memory for plan.work_data
63	GGML_BACKEND_API struct ggml_cplan ggml_graph_plan(
64	const struct ggml_cgraph * cgraph,
65	int n_threads, / = GGML_DEFAULT_N_THREADS /
66	struct ggml_threadpool * threadpool / = NULL / );
67	GGML_BACKEND_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
68
69	// same as ggml_graph_compute() but the work data is allocated as a part of the context
70	// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
71	GGML_BACKEND_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
72
73	//
74	// system info
75	//
76
77	// x86
78	GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
79	GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
80	GGML_BACKEND_API int ggml_cpu_has_avx (void);
81	GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82	GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83	GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
84	GGML_BACKEND_API int ggml_cpu_has_f16c (void);
85	GGML_BACKEND_API int ggml_cpu_has_fma (void);
86	GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
87	GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
88	GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
89	GGML_BACKEND_API int ggml_cpu_has_avx512_bf16(void);
90	GGML_BACKEND_API int ggml_cpu_has_amx_int8 (void);
91	// ARM
92	GGML_BACKEND_API int ggml_cpu_has_neon (void);
93	GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
94	GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
95	GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
96	GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
97	GGML_BACKEND_API int ggml_cpu_has_sve (void);
98	GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes
99	GGML_BACKEND_API int ggml_cpu_has_sme (void);
100	// other
101	GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
102	GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103	GGML_BACKEND_API int ggml_cpu_has_vxe (void);
104	GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
105	GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
106
107	// Internal types and functions exposed for tests and benchmarks
108
109	typedef void (ggml_vec_dot_t) (int* n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
110	const void * GGML_RESTRICT y, size_t by, int nrc);
111
112	struct ggml_type_traits_cpu {
113	ggml_from_float_t from_float;
114	ggml_vec_dot_t vec_dot;
115	enum ggml_type vec_dot_type;
116	int64_t nrows; // number of rows to process simultaneously
117	};
118
119	GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
120
121	GGML_BACKEND_API void ggml_cpu_init(void);
122
123	//
124	// CPU backend
125	//
126
127	GGML_BACKEND_API ggml_backend_t ggml_backend_cpu_init(void);
128
129	GGML_BACKEND_API bool ggml_backend_is_cpu (ggml_backend_t backend);
130	GGML_BACKEND_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
131	GGML_BACKEND_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
132	GGML_BACKEND_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
133
134	GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
135
136	GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float , float* *, int64_t);
137	GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float , int32_t , int64_t);
138	GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float , ggml_fp16_t , int64_t);
139	GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t , float* *, int64_t);
140	GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float , ggml_bf16_t , int64_t);
141	GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t , float* *, int64_t);
142
143	#ifdef __cplusplus
144	}
145	#endif
146

Browse the source code of llama.cpp/ggml/include/ggml-cpu.h