1#pragma once
2
3#include "ggml.h" // ggml_op
4
5#include <string>
6
7//
8// gguf constants (sync with gguf.py)
9//
10
11enum llm_arch {
12 LLM_ARCH_CLIP,
13 LLM_ARCH_LLAMA,
14 LLM_ARCH_LLAMA4,
15 LLM_ARCH_DECI,
16 LLM_ARCH_FALCON,
17 LLM_ARCH_BAICHUAN,
18 LLM_ARCH_GROK,
19 LLM_ARCH_GPT2,
20 LLM_ARCH_GPTJ,
21 LLM_ARCH_GPTNEOX,
22 LLM_ARCH_MPT,
23 LLM_ARCH_STARCODER,
24 LLM_ARCH_REFACT,
25 LLM_ARCH_BERT,
26 LLM_ARCH_NOMIC_BERT,
27 LLM_ARCH_NOMIC_BERT_MOE,
28 LLM_ARCH_NEO_BERT,
29 LLM_ARCH_JINA_BERT_V2,
30 LLM_ARCH_JINA_BERT_V3,
31 LLM_ARCH_BLOOM,
32 LLM_ARCH_STABLELM,
33 LLM_ARCH_QWEN,
34 LLM_ARCH_QWEN2,
35 LLM_ARCH_QWEN2MOE,
36 LLM_ARCH_QWEN2VL,
37 LLM_ARCH_QWEN3,
38 LLM_ARCH_QWEN3MOE,
39 LLM_ARCH_QWEN3VL,
40 LLM_ARCH_QWEN3VLMOE,
41 LLM_ARCH_PHI2,
42 LLM_ARCH_PHI3,
43 LLM_ARCH_PHIMOE,
44 LLM_ARCH_PLAMO,
45 LLM_ARCH_PLAMO2,
46 LLM_ARCH_CODESHELL,
47 LLM_ARCH_ORION,
48 LLM_ARCH_INTERNLM2,
49 LLM_ARCH_MINICPM,
50 LLM_ARCH_MINICPM3,
51 LLM_ARCH_GEMMA,
52 LLM_ARCH_GEMMA2,
53 LLM_ARCH_GEMMA3,
54 LLM_ARCH_GEMMA3N,
55 LLM_ARCH_GEMMA_EMBEDDING,
56 LLM_ARCH_STARCODER2,
57 LLM_ARCH_MAMBA,
58 LLM_ARCH_MAMBA2,
59 LLM_ARCH_JAMBA,
60 LLM_ARCH_FALCON_H1,
61 LLM_ARCH_XVERSE,
62 LLM_ARCH_COMMAND_R,
63 LLM_ARCH_COHERE2,
64 LLM_ARCH_DBRX,
65 LLM_ARCH_OLMO,
66 LLM_ARCH_OLMO2,
67 LLM_ARCH_OLMOE,
68 LLM_ARCH_OPENELM,
69 LLM_ARCH_ARCTIC,
70 LLM_ARCH_DEEPSEEK,
71 LLM_ARCH_DEEPSEEK2,
72 LLM_ARCH_CHATGLM,
73 LLM_ARCH_GLM4,
74 LLM_ARCH_GLM4_MOE,
75 LLM_ARCH_BITNET,
76 LLM_ARCH_T5,
77 LLM_ARCH_T5ENCODER,
78 LLM_ARCH_JAIS,
79 LLM_ARCH_NEMOTRON,
80 LLM_ARCH_NEMOTRON_H,
81 LLM_ARCH_EXAONE,
82 LLM_ARCH_EXAONE4,
83 LLM_ARCH_RWKV6,
84 LLM_ARCH_RWKV6QWEN2,
85 LLM_ARCH_RWKV7,
86 LLM_ARCH_ARWKV7,
87 LLM_ARCH_GRANITE,
88 LLM_ARCH_GRANITE_MOE,
89 LLM_ARCH_GRANITE_HYBRID,
90 LLM_ARCH_CHAMELEON,
91 LLM_ARCH_WAVTOKENIZER_DEC,
92 LLM_ARCH_PLM,
93 LLM_ARCH_BAILINGMOE,
94 LLM_ARCH_BAILINGMOE2,
95 LLM_ARCH_DOTS1,
96 LLM_ARCH_ARCEE,
97 LLM_ARCH_ERNIE4_5,
98 LLM_ARCH_ERNIE4_5_MOE,
99 LLM_ARCH_HUNYUAN_MOE,
100 LLM_ARCH_HUNYUAN_DENSE,
101 LLM_ARCH_SMOLLM3,
102 LLM_ARCH_OPENAI_MOE,
103 LLM_ARCH_LFM2,
104 LLM_ARCH_LFM2MOE,
105 LLM_ARCH_DREAM,
106 LLM_ARCH_SMALLTHINKER,
107 LLM_ARCH_LLADA,
108 LLM_ARCH_LLADA_MOE,
109 LLM_ARCH_SEED_OSS,
110 LLM_ARCH_GROVEMOE,
111 LLM_ARCH_APERTUS,
112 LLM_ARCH_MINIMAX_M2,
113 LLM_ARCH_COGVLM,
114 LLM_ARCH_PANGU_EMBED,
115 LLM_ARCH_UNKNOWN,
116};
117
118enum llm_kv {
119 LLM_KV_GENERAL_TYPE,
120 LLM_KV_GENERAL_ARCHITECTURE,
121 LLM_KV_GENERAL_QUANTIZATION_VERSION,
122 LLM_KV_GENERAL_ALIGNMENT,
123 LLM_KV_GENERAL_FILE_TYPE,
124 LLM_KV_GENERAL_NAME,
125 LLM_KV_GENERAL_AUTHOR,
126 LLM_KV_GENERAL_VERSION,
127 LLM_KV_GENERAL_URL,
128 LLM_KV_GENERAL_DESCRIPTION,
129 LLM_KV_GENERAL_LICENSE,
130 LLM_KV_GENERAL_SOURCE_URL,
131 LLM_KV_GENERAL_SOURCE_HF_REPO,
132
133 LLM_KV_VOCAB_SIZE,
134 LLM_KV_CONTEXT_LENGTH,
135 LLM_KV_EMBEDDING_LENGTH,
136 LLM_KV_FEATURES_LENGTH,
137 LLM_KV_BLOCK_COUNT,
138 LLM_KV_LEADING_DENSE_BLOCK_COUNT,
139 LLM_KV_FEED_FORWARD_LENGTH,
140 LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
141 LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
142 LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,
143 LLM_KV_USE_PARALLEL_RESIDUAL,
144 LLM_KV_TENSOR_DATA_LAYOUT,
145 LLM_KV_EXPERT_COUNT,
146 LLM_KV_EXPERT_USED_COUNT,
147 LLM_KV_EXPERT_SHARED_COUNT,
148 LLM_KV_EXPERT_GROUP_COUNT,
149 LLM_KV_EXPERT_GROUP_USED_COUNT,
150 LLM_KV_EXPERT_WEIGHTS_SCALE,
151 LLM_KV_EXPERT_WEIGHTS_NORM,
152 LLM_KV_EXPERT_GATING_FUNC,
153 LLM_KV_EXPERT_GROUP_SCALE,
154 LLM_KV_EXPERTS_PER_GROUP,
155 LLM_KV_MOE_EVERY_N_LAYERS,
156 LLM_KV_NEXTN_PREDICT_LAYERS,
157 LLM_KV_NUM_DEEPSTACK_LAYERS,
158 LLM_KV_POOLING_TYPE,
159 LLM_KV_LOGIT_SCALE,
160 LLM_KV_DECODER_START_TOKEN_ID,
161 LLM_KV_DECODER_BLOCK_COUNT,
162 LLM_KV_ATTN_LOGIT_SOFTCAPPING,
163 LLM_KV_ROUTER_LOGIT_SOFTCAPPING,
164 LLM_KV_FINAL_LOGIT_SOFTCAPPING,
165 LLM_KV_SWIN_NORM,
166 LLM_KV_RESCALE_EVERY_N_LAYERS,
167 LLM_KV_TIME_MIX_EXTRA_DIM,
168 LLM_KV_TIME_DECAY_EXTRA_DIM,
169 LLM_KV_RESIDUAL_SCALE,
170 LLM_KV_EMBEDDING_SCALE,
171 LLM_KV_TOKEN_SHIFT_COUNT,
172 LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
173
174 LLM_KV_ATTENTION_HEAD_COUNT,
175 LLM_KV_ATTENTION_HEAD_COUNT_KV,
176 LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
177 LLM_KV_ATTENTION_CLAMP_KQV,
178 LLM_KV_ATTENTION_KEY_LENGTH,
179 LLM_KV_ATTENTION_VALUE_LENGTH,
180 LLM_KV_ATTENTION_LAYERNORM_EPS,
181 LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
182 LLM_KV_ATTENTION_GROUPNORM_EPS,
183 LLM_KV_ATTENTION_GROUPNORM_GROUPS,
184 LLM_KV_ATTENTION_CAUSAL,
185 LLM_KV_ATTENTION_Q_LORA_RANK,
186 LLM_KV_ATTENTION_KV_LORA_RANK,
187 LLM_KV_ATTENTION_DECAY_LORA_RANK,
188 LLM_KV_ATTENTION_ICLR_LORA_RANK,
189 LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
190 LLM_KV_ATTENTION_GATE_LORA_RANK,
191 LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
192 LLM_KV_ATTENTION_SLIDING_WINDOW,
193 LLM_KV_ATTENTION_SCALE,
194 LLM_KV_ATTENTION_OUTPUT_SCALE,
195 LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
196 LLM_KV_ATTENTION_KEY_LENGTH_MLA,
197 LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
198
199 LLM_KV_ROPE_DIMENSION_COUNT,
200 LLM_KV_ROPE_DIMENSION_SECTIONS,
201 LLM_KV_ROPE_FREQ_BASE,
202 LLM_KV_ROPE_SCALE_LINEAR,
203 LLM_KV_ROPE_SCALING_TYPE,
204 LLM_KV_ROPE_SCALING_FACTOR,
205 LLM_KV_ROPE_SCALING_ATTN_FACTOR,
206 LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
207 LLM_KV_ROPE_SCALING_FINETUNED,
208 LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
209 LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,
210 LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,
211 LLM_KV_ROPE_SCALING_YARN_BETA_FAST,
212 LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,
213
214 LLM_KV_SPLIT_NO,
215 LLM_KV_SPLIT_COUNT,
216 LLM_KV_SPLIT_TENSORS_COUNT,
217
218 LLM_KV_SSM_INNER_SIZE,
219 LLM_KV_SSM_CONV_KERNEL,
220 LLM_KV_SSM_STATE_SIZE,
221 LLM_KV_SSM_TIME_STEP_RANK,
222 LLM_KV_SSM_GROUP_COUNT,
223 LLM_KV_SSM_DT_B_C_RMS,
224
225 LLM_KV_WKV_HEAD_SIZE,
226
227 LLM_KV_TOKENIZER_MODEL,
228 LLM_KV_TOKENIZER_PRE,
229 LLM_KV_TOKENIZER_LIST,
230 LLM_KV_TOKENIZER_TOKEN_TYPE,
231 LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
232 LLM_KV_TOKENIZER_SCORES,
233 LLM_KV_TOKENIZER_MERGES,
234 LLM_KV_TOKENIZER_BOS_ID,
235 LLM_KV_TOKENIZER_EOS_ID,
236 LLM_KV_TOKENIZER_EOT_ID,
237 LLM_KV_TOKENIZER_EOM_ID,
238 LLM_KV_TOKENIZER_UNK_ID,
239 LLM_KV_TOKENIZER_SEP_ID,
240 LLM_KV_TOKENIZER_PAD_ID,
241 LLM_KV_TOKENIZER_CLS_ID,
242 LLM_KV_TOKENIZER_MASK_ID,
243 LLM_KV_TOKENIZER_ADD_BOS,
244 LLM_KV_TOKENIZER_ADD_EOS,
245 LLM_KV_TOKENIZER_ADD_SEP,
246 LLM_KV_TOKENIZER_ADD_PREFIX,
247 LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
248 LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
249 LLM_KV_TOKENIZER_HF_JSON,
250 LLM_KV_TOKENIZER_RWKV,
251 LLM_KV_TOKENIZER_CHAT_TEMPLATE,
252 LLM_KV_TOKENIZER_FIM_PRE_ID,
253 LLM_KV_TOKENIZER_FIM_SUF_ID,
254 LLM_KV_TOKENIZER_FIM_MID_ID,
255 LLM_KV_TOKENIZER_FIM_PAD_ID,
256 LLM_KV_TOKENIZER_FIM_REP_ID,
257 LLM_KV_TOKENIZER_FIM_SEP_ID,
258
259 LLM_KV_ADAPTER_TYPE,
260 LLM_KV_ADAPTER_LORA_ALPHA,
261 LLM_KV_ADAPTER_LORA_TASK_NAME,
262 LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,
263 LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS,
264
265 LLM_KV_POSNET_EMBEDDING_LENGTH,
266 LLM_KV_POSNET_BLOCK_COUNT,
267
268 LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
269 LLM_KV_CONVNEXT_BLOCK_COUNT,
270
271 LLM_KV_CLASSIFIER_OUTPUT_LABELS,
272
273 LLM_KV_SHORTCONV_L_CACHE,
274
275 LLM_KV_XIELU_ALPHA_N,
276 LLM_KV_XIELU_ALPHA_P,
277 LLM_KV_XIELU_BETA,
278 LLM_KV_XIELU_EPS,
279
280 // deprecated:
281 LLM_KV_TOKENIZER_PREFIX_ID,
282 LLM_KV_TOKENIZER_SUFFIX_ID,
283 LLM_KV_TOKENIZER_MIDDLE_ID,
284
285 // sentence-transformers dense layers in and out features
286 LLM_KV_DENSE_2_FEAT_IN,
287 LLM_KV_DENSE_2_FEAT_OUT,
288 LLM_KV_DENSE_3_FEAT_IN,
289 LLM_KV_DENSE_3_FEAT_OUT,
290};
291
292enum llm_tensor {
293 LLM_TENSOR_TOKEN_EMBD,
294 LLM_TENSOR_TOKEN_EMBD_NORM,
295 LLM_TENSOR_TOKEN_TYPES,
296 LLM_TENSOR_POS_EMBD,
297 LLM_TENSOR_DENSE_2_OUT,
298 LLM_TENSOR_DENSE_3_OUT,
299 LLM_TENSOR_OUTPUT,
300 LLM_TENSOR_OUTPUT_NORM,
301 LLM_TENSOR_ROPE_FREQS,
302 LLM_TENSOR_ROPE_FACTORS_LONG,
303 LLM_TENSOR_ROPE_FACTORS_SHORT,
304 LLM_TENSOR_ATTN_Q,
305 LLM_TENSOR_ATTN_K,
306 LLM_TENSOR_ATTN_V,
307 LLM_TENSOR_ATTN_QKV,
308 LLM_TENSOR_ATTN_OUT,
309 LLM_TENSOR_ATTN_NORM,
310 LLM_TENSOR_ATTN_NORM_2,
311 LLM_TENSOR_ATTN_OUT_NORM,
312 LLM_TENSOR_ATTN_POST_NORM,
313 LLM_TENSOR_ATTN_ROT_EMBD,
314 LLM_TENSOR_ATTN_SINKS,
315 LLM_TENSOR_FFN_GATE_INP,
316 LLM_TENSOR_FFN_GATE_INP_SHEXP,
317 LLM_TENSOR_FFN_NORM,
318 LLM_TENSOR_FFN_POST_NORM,
319 LLM_TENSOR_FFN_GATE,
320 LLM_TENSOR_FFN_DOWN,
321 LLM_TENSOR_FFN_UP,
322 LLM_TENSOR_FFN_ACT,
323 LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
324 LLM_TENSOR_FFN_GATE_EXP,
325 LLM_TENSOR_FFN_UP_EXP,
326 LLM_TENSOR_FFN_NORM_EXPS,
327 LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
328 LLM_TENSOR_FFN_GATE_EXPS,
329 LLM_TENSOR_FFN_UP_EXPS,
330 LLM_TENSOR_FFN_DOWN_SHEXP,
331 LLM_TENSOR_FFN_GATE_SHEXP,
332 LLM_TENSOR_FFN_UP_SHEXP,
333 LLM_TENSOR_FFN_DOWN_CHEXPS,
334 LLM_TENSOR_FFN_GATE_CHEXPS,
335 LLM_TENSOR_FFN_UP_CHEXPS,
336 LLM_TENSOR_FFN_EXP_PROBS_B,
337 LLM_TENSOR_ATTN_Q_NORM,
338 LLM_TENSOR_ATTN_K_NORM,
339 LLM_TENSOR_LAYER_OUT_NORM,
340 LLM_TENSOR_POST_ATTN_NORM,
341 LLM_TENSOR_POST_MLP_NORM,
342 LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
343 LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
344 LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
345 LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
346 LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
347 LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
348 LLM_TENSOR_ALTUP_PROJ, // gemma3n
349 LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
350 LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
351 LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
352 LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
353 LLM_TENSOR_ALTUP_ROUTER, // gemma3n
354 LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
355 LLM_TENSOR_LAUREL_L, // gemma3n
356 LLM_TENSOR_LAUREL_R, // gemma3n
357 LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
358 LLM_TENSOR_SSM_IN,
359 LLM_TENSOR_SSM_CONV1D,
360 LLM_TENSOR_SSM_X,
361 LLM_TENSOR_SSM_DT,
362 LLM_TENSOR_SSM_DT_NORM,
363 LLM_TENSOR_SSM_A,
364 LLM_TENSOR_SSM_B_NORM,
365 LLM_TENSOR_SSM_C_NORM,
366 LLM_TENSOR_SSM_D,
367 LLM_TENSOR_SSM_NORM,
368 LLM_TENSOR_SSM_OUT,
369 LLM_TENSOR_TIME_MIX_W0,
370 LLM_TENSOR_TIME_MIX_W1,
371 LLM_TENSOR_TIME_MIX_W2,
372 LLM_TENSOR_TIME_MIX_A0,
373 LLM_TENSOR_TIME_MIX_A1,
374 LLM_TENSOR_TIME_MIX_A2,
375 LLM_TENSOR_TIME_MIX_V0,
376 LLM_TENSOR_TIME_MIX_V1,
377 LLM_TENSOR_TIME_MIX_V2,
378 LLM_TENSOR_TIME_MIX_G1,
379 LLM_TENSOR_TIME_MIX_G2,
380 LLM_TENSOR_TIME_MIX_K_K,
381 LLM_TENSOR_TIME_MIX_K_A,
382 LLM_TENSOR_TIME_MIX_R_K,
383 LLM_TENSOR_TIME_MIX_LERP_X,
384 LLM_TENSOR_TIME_MIX_LERP_W,
385 LLM_TENSOR_TIME_MIX_LERP_K,
386 LLM_TENSOR_TIME_MIX_LERP_V,
387 LLM_TENSOR_TIME_MIX_LERP_R,
388 LLM_TENSOR_TIME_MIX_LERP_G,
389 LLM_TENSOR_TIME_MIX_LERP_FUSED,
390 LLM_TENSOR_TIME_MIX_FIRST,
391 LLM_TENSOR_TIME_MIX_DECAY,
392 LLM_TENSOR_TIME_MIX_DECAY_W1,
393 LLM_TENSOR_TIME_MIX_DECAY_W2,
394 LLM_TENSOR_TIME_MIX_KEY,
395 LLM_TENSOR_TIME_MIX_VALUE,
396 LLM_TENSOR_TIME_MIX_RECEPTANCE,
397 LLM_TENSOR_TIME_MIX_GATE,
398 LLM_TENSOR_TIME_MIX_LN,
399 LLM_TENSOR_TIME_MIX_OUTPUT,
400 LLM_TENSOR_CHANNEL_MIX_LERP_K,
401 LLM_TENSOR_CHANNEL_MIX_LERP_R,
402 LLM_TENSOR_CHANNEL_MIX_KEY,
403 LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
404 LLM_TENSOR_CHANNEL_MIX_VALUE,
405 LLM_TENSOR_ATTN_Q_A,
406 LLM_TENSOR_ATTN_Q_B,
407 LLM_TENSOR_ATTN_KV_A_MQA,
408 LLM_TENSOR_ATTN_KV_B,
409 LLM_TENSOR_ATTN_K_B,
410 LLM_TENSOR_ATTN_V_B,
411 LLM_TENSOR_ATTN_Q_A_NORM,
412 LLM_TENSOR_ATTN_KV_A_NORM,
413 LLM_TENSOR_ATTN_SUB_NORM,
414 LLM_TENSOR_FFN_SUB_NORM,
415 LLM_TENSOR_DEC_ATTN_NORM,
416 LLM_TENSOR_DEC_ATTN_Q,
417 LLM_TENSOR_DEC_ATTN_K,
418 LLM_TENSOR_DEC_ATTN_V,
419 LLM_TENSOR_DEC_ATTN_OUT,
420 LLM_TENSOR_DEC_ATTN_REL_B,
421 LLM_TENSOR_DEC_CROSS_ATTN_NORM,
422 LLM_TENSOR_DEC_CROSS_ATTN_Q,
423 LLM_TENSOR_DEC_CROSS_ATTN_K,
424 LLM_TENSOR_DEC_CROSS_ATTN_V,
425 LLM_TENSOR_DEC_CROSS_ATTN_OUT,
426 LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
427 LLM_TENSOR_DEC_FFN_NORM,
428 LLM_TENSOR_DEC_FFN_GATE,
429 LLM_TENSOR_DEC_FFN_DOWN,
430 LLM_TENSOR_DEC_FFN_UP,
431 LLM_TENSOR_DEC_OUTPUT_NORM,
432 LLM_TENSOR_ENC_ATTN_NORM,
433 LLM_TENSOR_ENC_ATTN_Q,
434 LLM_TENSOR_ENC_ATTN_K,
435 LLM_TENSOR_ENC_ATTN_V,
436 LLM_TENSOR_ENC_ATTN_OUT,
437 LLM_TENSOR_ENC_ATTN_REL_B,
438 LLM_TENSOR_ENC_FFN_NORM,
439 LLM_TENSOR_ENC_FFN_GATE,
440 LLM_TENSOR_ENC_FFN_DOWN,
441 LLM_TENSOR_ENC_FFN_UP,
442 LLM_TENSOR_ENC_OUTPUT_NORM,
443 LLM_TENSOR_CLS,
444 LLM_TENSOR_CLS_OUT,
445 LLM_TENSOR_CONV1D,
446 LLM_TENSOR_CONVNEXT_DW,
447 LLM_TENSOR_CONVNEXT_NORM,
448 LLM_TENSOR_CONVNEXT_PW1,
449 LLM_TENSOR_CONVNEXT_PW2,
450 LLM_TENSOR_CONVNEXT_GAMMA,
451 LLM_TENSOR_POS_NET_CONV1,
452 LLM_TENSOR_POS_NET_CONV2,
453 LLM_TENSOR_POS_NET_NORM,
454 LLM_TENSOR_POS_NET_NORM1,
455 LLM_TENSOR_POS_NET_NORM2,
456 LLM_TENSOR_POS_NET_ATTN_NORM,
457 LLM_TENSOR_POS_NET_ATTN_Q,
458 LLM_TENSOR_POS_NET_ATTN_K,
459 LLM_TENSOR_POS_NET_ATTN_V,
460 LLM_TENSOR_POS_NET_ATTN_OUT,
461 LLM_TENSOR_SHORTCONV_CONV,
462 LLM_TENSOR_SHORTCONV_INPROJ,
463 LLM_TENSOR_SHORTCONV_OUTPROJ,
464 LLM_TENSOR_VISEXP_ATTN_QKV,
465 LLM_TENSOR_VISEXP_ATTN_OUT,
466 LLM_TENSOR_VISEXP_FFN_GATE,
467 LLM_TENSOR_VISEXP_FFN_DOWN,
468 LLM_TENSOR_VISEXP_FFN_UP,
469 LLM_TENSOR_NEXTN_EH_PROJ,
470 LLM_TENSOR_NEXTN_EMBED_TOKENS,
471 LLM_TENSOR_NEXTN_ENORM,
472 LLM_TENSOR_NEXTN_HNORM,
473 LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
474 LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
475};
476
477enum llm_tensor_layer {
478 LLM_TENSOR_LAYER_INPUT,
479 LLM_TENSOR_LAYER_REPEATING,
480 LLM_TENSOR_LAYER_OUTPUT,
481};
482
483struct LLM_KV {
484 LLM_KV(llm_arch arch, const char * suffix = nullptr);
485
486 llm_arch arch;
487 const char * suffix;
488
489 std::string operator()(llm_kv kv) const;
490};
491
492// helper to handle gguf constants
493// usage:
494//
495// const auto tn = LLM_TN(LLM_ARCH_LLAMA);
496//
497// std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
498// std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
499// std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
500//
501struct LLM_TN_IMPL {
502 const llm_arch arch;
503 const llm_tensor tensor;
504 const char * const suffix;
505 const int bid;
506 const int xid;
507
508 std::string str() const;
509
510 operator std::string() const {
511 return str();
512 }
513
514 friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
515 return str == tn.str();
516 }
517
518 friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
519 return str != tn.str();
520 }
521};
522
523struct LLM_TN {
524 LLM_TN(llm_arch arch) : arch(arch) {}
525
526 llm_arch arch;
527
528 LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
529 return { .arch: arch, .tensor: tensor, .suffix: suffix, .bid: bid, .xid: xid };
530 }
531
532 LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
533 return { .arch: arch, .tensor: tensor, .suffix: nullptr, .bid: bid, .xid: xid };
534 }
535};
536
537
538struct llm_tensor_info {
539 llm_tensor_layer layer;
540 ggml_op op;
541};
542
543const char * llm_arch_name(llm_arch arch);
544
545llm_arch llm_arch_from_string(const std::string & name);
546
547const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
548
549bool llm_arch_is_recurrent(const llm_arch & arch);
550bool llm_arch_is_hybrid (const llm_arch & arch);
551bool llm_arch_is_diffusion(const llm_arch & arch);
552