llama-cparams.h source code [llama.cpp/src/llama-cparams.h]

1	#pragma once
2
3	#include "llama.h"
4
5	#include <cstdint>
6
7	#define LLAMA_MAX_SEQ 256
8
9	struct llama_cparams {
10	uint32_t n_ctx; // context size used during inference
11	uint32_t n_ctx_seq; // context for a single sequence
12	uint32_t n_batch;
13	uint32_t n_ubatch;
14	uint32_t n_seq_max;
15	int32_t n_threads; // number of threads to use for generation
16	int32_t n_threads_batch; // number of threads to use for batch processing
17
18	float rope_freq_base;
19	float rope_freq_scale;
20
21	uint32_t n_ctx_orig_yarn;
22	// These hyperparameters are not exposed in GGUF, because all
23	// existing YaRN models use the same values for them.
24	float yarn_ext_factor;
25	float yarn_attn_factor;
26	float yarn_beta_fast;
27	float yarn_beta_slow;
28
29	bool embeddings;
30	bool causal_attn;
31	bool offload_kqv;
32	bool flash_attn;
33	bool no_perf;
34	bool warmup;
35	bool op_offload;
36	bool kv_unified;
37
38	enum llama_pooling_type pooling_type;
39
40	ggml_backend_sched_eval_callback cb_eval;
41	void * cb_eval_user_data;
42	};
43

Browse the source code of llama.cpp/src/llama-cparams.h