1#include "llama-arch.h"
2
3#include "llama-impl.h"
4
5#include <map>
6
7static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8 { LLM_ARCH_CLIP, "clip" }, // dummy, only used by llama-quantize
9 { LLM_ARCH_LLAMA, "llama" },
10 { LLM_ARCH_LLAMA4, "llama4" },
11 { LLM_ARCH_DECI, "deci" },
12 { LLM_ARCH_FALCON, "falcon" },
13 { LLM_ARCH_GROK, "grok" },
14 { LLM_ARCH_GPT2, "gpt2" },
15 { LLM_ARCH_GPTJ, "gptj" },
16 { LLM_ARCH_GPTNEOX, "gptneox" },
17 { LLM_ARCH_MPT, "mpt" },
18 { LLM_ARCH_BAICHUAN, "baichuan" },
19 { LLM_ARCH_STARCODER, "starcoder" },
20 { LLM_ARCH_REFACT, "refact" },
21 { LLM_ARCH_BERT, "bert" },
22 { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
23 { LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
24 { LLM_ARCH_NEO_BERT, "neo-bert" },
25 { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
26 { LLM_ARCH_JINA_BERT_V3, "jina-bert-v3" },
27 { LLM_ARCH_BLOOM, "bloom" },
28 { LLM_ARCH_STABLELM, "stablelm" },
29 { LLM_ARCH_QWEN, "qwen" },
30 { LLM_ARCH_QWEN2, "qwen2" },
31 { LLM_ARCH_QWEN2MOE, "qwen2moe" },
32 { LLM_ARCH_QWEN2VL, "qwen2vl" },
33 { LLM_ARCH_QWEN3, "qwen3" },
34 { LLM_ARCH_QWEN3MOE, "qwen3moe" },
35 { LLM_ARCH_QWEN3VL, "qwen3vl" },
36 { LLM_ARCH_QWEN3VLMOE, "qwen3vlmoe" },
37 { LLM_ARCH_PHI2, "phi2" },
38 { LLM_ARCH_PHI3, "phi3" },
39 { LLM_ARCH_PHIMOE, "phimoe" },
40 { LLM_ARCH_PLAMO, "plamo" },
41 { LLM_ARCH_PLAMO2, "plamo2" },
42 { LLM_ARCH_CODESHELL, "codeshell" },
43 { LLM_ARCH_ORION, "orion" },
44 { LLM_ARCH_INTERNLM2, "internlm2" },
45 { LLM_ARCH_MINICPM, "minicpm" },
46 { LLM_ARCH_MINICPM3, "minicpm3" },
47 { LLM_ARCH_GEMMA, "gemma" },
48 { LLM_ARCH_GEMMA2, "gemma2" },
49 { LLM_ARCH_GEMMA3, "gemma3" },
50 { LLM_ARCH_GEMMA3N, "gemma3n" },
51 { LLM_ARCH_GEMMA_EMBEDDING, "gemma-embedding" },
52 { LLM_ARCH_STARCODER2, "starcoder2" },
53 { LLM_ARCH_MAMBA, "mamba" },
54 { LLM_ARCH_MAMBA2, "mamba2" },
55 { LLM_ARCH_JAMBA, "jamba" },
56 { LLM_ARCH_FALCON_H1, "falcon-h1" },
57 { LLM_ARCH_XVERSE, "xverse" },
58 { LLM_ARCH_COMMAND_R, "command-r" },
59 { LLM_ARCH_COHERE2, "cohere2" },
60 { LLM_ARCH_DBRX, "dbrx" },
61 { LLM_ARCH_OLMO, "olmo" },
62 { LLM_ARCH_OLMO2, "olmo2" },
63 { LLM_ARCH_OLMOE, "olmoe" },
64 { LLM_ARCH_OPENELM, "openelm" },
65 { LLM_ARCH_ARCTIC, "arctic" },
66 { LLM_ARCH_DEEPSEEK, "deepseek" },
67 { LLM_ARCH_DEEPSEEK2, "deepseek2" },
68 { LLM_ARCH_CHATGLM, "chatglm" },
69 { LLM_ARCH_GLM4, "glm4" },
70 { LLM_ARCH_GLM4_MOE, "glm4moe" },
71 { LLM_ARCH_BITNET, "bitnet" },
72 { LLM_ARCH_T5, "t5" },
73 { LLM_ARCH_T5ENCODER, "t5encoder" },
74 { LLM_ARCH_JAIS, "jais" },
75 { LLM_ARCH_NEMOTRON, "nemotron" },
76 { LLM_ARCH_NEMOTRON_H, "nemotron_h" },
77 { LLM_ARCH_EXAONE, "exaone" },
78 { LLM_ARCH_EXAONE4, "exaone4" },
79 { LLM_ARCH_RWKV6, "rwkv6" },
80 { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
81 { LLM_ARCH_RWKV7, "rwkv7" },
82 { LLM_ARCH_ARWKV7, "arwkv7" },
83 { LLM_ARCH_GRANITE, "granite" },
84 { LLM_ARCH_GRANITE_MOE, "granitemoe" },
85 { LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
86 { LLM_ARCH_CHAMELEON, "chameleon" },
87 { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
88 { LLM_ARCH_PLM, "plm" },
89 { LLM_ARCH_BAILINGMOE, "bailingmoe" },
90 { LLM_ARCH_BAILINGMOE2, "bailingmoe2" },
91 { LLM_ARCH_DOTS1, "dots1" },
92 { LLM_ARCH_ARCEE, "arcee" },
93 { LLM_ARCH_ERNIE4_5, "ernie4_5" },
94 { LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
95 { LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
96 { LLM_ARCH_HUNYUAN_DENSE, "hunyuan-dense" },
97 { LLM_ARCH_SMOLLM3, "smollm3" },
98 { LLM_ARCH_OPENAI_MOE, "gpt-oss" },
99 { LLM_ARCH_LFM2, "lfm2" },
100 { LLM_ARCH_LFM2MOE, "lfm2moe" },
101 { LLM_ARCH_DREAM, "dream" },
102 { LLM_ARCH_SMALLTHINKER, "smallthinker" },
103 { LLM_ARCH_LLADA, "llada" },
104 { LLM_ARCH_LLADA_MOE, "llada-moe" },
105 { LLM_ARCH_SEED_OSS, "seed_oss" },
106 { LLM_ARCH_GROVEMOE, "grovemoe" },
107 { LLM_ARCH_APERTUS, "apertus" },
108 { LLM_ARCH_MINIMAX_M2, "minimax-m2" },
109 { LLM_ARCH_COGVLM, "cogvlm" },
110 { LLM_ARCH_PANGU_EMBED, "pangu-embedded" },
111 { LLM_ARCH_UNKNOWN, "(unknown)" },
112};
113
114static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
115 { LLM_KV_GENERAL_TYPE, "general.type" },
116 { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
117 { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
118 { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
119 { LLM_KV_GENERAL_FILE_TYPE, "general.file_type" },
120 { LLM_KV_GENERAL_NAME, "general.name" },
121 { LLM_KV_GENERAL_AUTHOR, "general.author" },
122 { LLM_KV_GENERAL_VERSION, "general.version" },
123 { LLM_KV_GENERAL_URL, "general.url" },
124 { LLM_KV_GENERAL_DESCRIPTION, "general.description" },
125 { LLM_KV_GENERAL_LICENSE, "general.license" },
126 { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" },
127 { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" },
128
129 { LLM_KV_VOCAB_SIZE, "%s.vocab_size" },
130 { LLM_KV_CONTEXT_LENGTH, "%s.context_length" },
131 { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" },
132 { LLM_KV_FEATURES_LENGTH, "%s.features_length" },
133 { LLM_KV_BLOCK_COUNT, "%s.block_count" },
134 { LLM_KV_LEADING_DENSE_BLOCK_COUNT, "%s.leading_dense_block_count" },
135 { LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
136 { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, "%s.expert_feed_forward_length" },
137 { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
138 { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH, "%s.expert_chunk_feed_forward_length" },
139 { LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
140 { LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
141 { LLM_KV_EXPERT_COUNT, "%s.expert_count" },
142 { LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
143 { LLM_KV_EXPERT_SHARED_COUNT, "%s.expert_shared_count" },
144 { LLM_KV_EXPERT_GROUP_COUNT, "%s.expert_group_count" },
145 { LLM_KV_EXPERT_GROUP_USED_COUNT, "%s.expert_group_used_count" },
146 { LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
147 { LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
148 { LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
149 { LLM_KV_EXPERT_GROUP_SCALE, "%s.expert_group_scale" },
150 { LLM_KV_EXPERTS_PER_GROUP, "%s.experts_per_group" },
151 { LLM_KV_MOE_EVERY_N_LAYERS, "%s.moe_every_n_layers" },
152 { LLM_KV_NEXTN_PREDICT_LAYERS, "%s.nextn_predict_layers" },
153 { LLM_KV_NUM_DEEPSTACK_LAYERS, "%s.n_deepstack_layers" },
154 { LLM_KV_POOLING_TYPE, "%s.pooling_type" },
155 { LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
156 { LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
157 { LLM_KV_DECODER_BLOCK_COUNT, "%s.decoder_block_count" },
158 { LLM_KV_ATTN_LOGIT_SOFTCAPPING, "%s.attn_logit_softcapping" },
159 { LLM_KV_ROUTER_LOGIT_SOFTCAPPING, "%s.router_logit_softcapping" },
160 { LLM_KV_FINAL_LOGIT_SOFTCAPPING, "%s.final_logit_softcapping" },
161 { LLM_KV_SWIN_NORM, "%s.swin_norm" },
162 { LLM_KV_RESCALE_EVERY_N_LAYERS, "%s.rescale_every_n_layers" },
163 { LLM_KV_TIME_MIX_EXTRA_DIM, "%s.time_mix_extra_dim" },
164 { LLM_KV_TIME_DECAY_EXTRA_DIM, "%s.time_decay_extra_dim" },
165 { LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
166 { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
167 { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
168 { LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
169
170 { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
171 { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
172 { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
173 { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
174 { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
175 { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
176 { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
177 { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
178 { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
179 { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
180 { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
181 { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
182 { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
183 { LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
184 { LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
185 { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
186 { LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
187 { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
188 { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
189 { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
190 { LLM_KV_ATTENTION_OUTPUT_SCALE, "%s.attention.output_scale" },
191 { LLM_KV_ATTENTION_TEMPERATURE_LENGTH, "%s.attention.temperature_length" },
192 { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
193 { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
194
195 { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
196 { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
197 { LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
198 { LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
199 { LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
200 { LLM_KV_ROPE_SCALING_FACTOR, "%s.rope.scaling.factor" },
201 { LLM_KV_ROPE_SCALING_ATTN_FACTOR, "%s.rope.scaling.attn_factor" },
202 { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
203 { LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
204 { LLM_KV_ROPE_SCALING_YARN_LOG_MUL, "%s.rope.scaling.yarn_log_multiplier" },
205 { LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR, "%s.rope.scaling.yarn_ext_factor" },
206 { LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, "%s.rope.scaling.yarn_attn_factor" },
207 { LLM_KV_ROPE_SCALING_YARN_BETA_FAST, "%s.rope.scaling.yarn_beta_fast" },
208 { LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, "%s.rope.scaling.yarn_beta_slow" },
209
210 { LLM_KV_SPLIT_NO, "split.no" },
211 { LLM_KV_SPLIT_COUNT, "split.count" },
212 { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
213
214 { LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
215 { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
216 { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
217 { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
218 { LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
219 { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
220
221 { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
222
223 { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
224 { LLM_KV_POSNET_BLOCK_COUNT, "%s.posnet.block_count" },
225
226 { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
227 { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
228
229 { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
230
231 { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
232 // sentence-transformers dense modules feature dims
233 { LLM_KV_DENSE_2_FEAT_IN, "%s.dense_2_feat_in" },
234 { LLM_KV_DENSE_2_FEAT_OUT, "%s.dense_2_feat_out" },
235 { LLM_KV_DENSE_3_FEAT_IN, "%s.dense_3_feat_in" },
236 { LLM_KV_DENSE_3_FEAT_OUT, "%s.dense_3_feat_out" },
237
238 { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
239 { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
240 { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
241 { LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
242 { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
243 { LLM_KV_TOKENIZER_SCORES, "tokenizer.ggml.scores" },
244 { LLM_KV_TOKENIZER_MERGES, "tokenizer.ggml.merges" },
245 { LLM_KV_TOKENIZER_BOS_ID, "tokenizer.ggml.bos_token_id" },
246 { LLM_KV_TOKENIZER_EOS_ID, "tokenizer.ggml.eos_token_id" },
247 { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
248 { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
249 { LLM_KV_TOKENIZER_UNK_ID, "tokenizer.ggml.unknown_token_id" },
250 { LLM_KV_TOKENIZER_SEP_ID, "tokenizer.ggml.seperator_token_id" },
251 { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" },
252 { LLM_KV_TOKENIZER_CLS_ID, "tokenizer.ggml.cls_token_id" },
253 { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
254 { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
255 { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
256 { LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
257 { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
258 { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
259 { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
260 { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
261 { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
262 { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
263 { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
264 { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
265 { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
266 { LLM_KV_TOKENIZER_FIM_PAD_ID, "tokenizer.ggml.fim_pad_token_id" },
267 { LLM_KV_TOKENIZER_FIM_REP_ID, "tokenizer.ggml.fim_rep_token_id" },
268 { LLM_KV_TOKENIZER_FIM_SEP_ID, "tokenizer.ggml.fim_sep_token_id" },
269
270 { LLM_KV_ADAPTER_TYPE, "adapter.type" },
271 { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
272 { LLM_KV_ADAPTER_LORA_TASK_NAME, "adapter.lora.task_name" },
273 { LLM_KV_ADAPTER_LORA_PROMPT_PREFIX, "adapter.lora.prompt_prefix" },
274 { LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, "adapter.alora.invocation_tokens" },
275
276 { LLM_KV_XIELU_ALPHA_N, "xielu.alpha_n" },
277 { LLM_KV_XIELU_ALPHA_P, "xielu.alpha_p" },
278 { LLM_KV_XIELU_BETA, "xielu.beta" },
279 { LLM_KV_XIELU_EPS, "xielu.eps" },
280
281 // deprecated
282 { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
283 { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
284 { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
285};
286
287static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
288 {
289 LLM_ARCH_CLIP,
290 {},
291 },
292 {
293 LLM_ARCH_LLAMA,
294 {
295 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
296 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
297 { LLM_TENSOR_OUTPUT, "output" },
298 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
299 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
300 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
301 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
302 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
303 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
304 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
305 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
306 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
307 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
308 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
309 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
310 { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
311 { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
312 { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
313 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
314 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
315 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
316 },
317 },
318 {
319 LLM_ARCH_ARCEE,
320 {
321 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
322 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
323 { LLM_TENSOR_OUTPUT, "output" },
324 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
325 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
326 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
327 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
328 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
329 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
330 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
331 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
332 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
333 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
334 },
335 },
336 {
337 LLM_ARCH_LLAMA4,
338 {
339 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
340 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
341 { LLM_TENSOR_OUTPUT, "output" },
342 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
343 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
344 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
345 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
346 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
347 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
348 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
349 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
350 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
351 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
352 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
353 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
354 { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
355 { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
356 { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
357 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
358 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
359 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
360 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
361 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
362 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
363 },
364 },
365 {
366 LLM_ARCH_DECI,
367 {
368 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
369 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
370 { LLM_TENSOR_OUTPUT, "output" },
371 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
372 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
373 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
374 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
375 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
376 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
377 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
378 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
379 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
380 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
381 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
382 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
383 { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
384 { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
385 { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
386 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
387 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
388 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
389 },
390 },
391 {
392 LLM_ARCH_BAICHUAN,
393 {
394 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
395 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
396 { LLM_TENSOR_OUTPUT, "output" },
397 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
398 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
399 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
400 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
401 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
402 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
403 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
404 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
405 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
406 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
407 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
408 },
409 },
410 {
411 LLM_ARCH_FALCON,
412 {
413 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
414 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
415 { LLM_TENSOR_OUTPUT, "output" },
416 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
417 { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
418 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
419 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
420 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
421 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
422 },
423 },
424 {
425 LLM_ARCH_GROK,
426 {
427 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
428 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
429 { LLM_TENSOR_OUTPUT, "output" },
430 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
431 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
432 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
433 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
434 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
435 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
436 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
437 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
438 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
439 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
440 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
441 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
442 { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
443 { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
444 { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
445 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
446 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
447 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
448 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
449 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
450 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
451 },
452 },
453 {
454 LLM_ARCH_GPT2,
455 {
456 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
457 { LLM_TENSOR_POS_EMBD, "position_embd" },
458 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
459 { LLM_TENSOR_OUTPUT, "output" },
460 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
461 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
462 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
463 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
464 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
465 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
466 },
467 },
468 {
469 LLM_ARCH_GPTJ,
470 {
471 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
472 },
473 },
474 {
475 LLM_ARCH_GPTNEOX,
476 {
477 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
478 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
479 { LLM_TENSOR_OUTPUT, "output" },
480 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
481 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
482 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
483 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
484 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
485 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
486 },
487 },
488 {
489 LLM_ARCH_MPT,
490 {
491 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
492 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
493 { LLM_TENSOR_OUTPUT, "output"},
494 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
495 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
496 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
497 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
498 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
499 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
500 { LLM_TENSOR_FFN_ACT, "blk.%d.ffn.act" },
501 { LLM_TENSOR_POS_EMBD, "position_embd" },
502 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm"},
503 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm"},
504 },
505 },
506 {
507 LLM_ARCH_STARCODER,
508 {
509 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
510 { LLM_TENSOR_POS_EMBD, "position_embd" },
511 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
512 { LLM_TENSOR_OUTPUT, "output" },
513 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
514 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
515 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
516 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
517 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
518 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
519 },
520 },
521 {
522 LLM_ARCH_REFACT,
523 {
524 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
525 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
526 { LLM_TENSOR_OUTPUT, "output" },
527 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
528 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
529 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
530 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
531 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
532 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
533 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
534 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
535 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
536 },
537 },
538 {
539 LLM_ARCH_BERT,
540 {
541 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
542 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
543 { LLM_TENSOR_TOKEN_TYPES, "token_types" },
544 { LLM_TENSOR_POS_EMBD, "position_embd" },
545 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
546 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
547 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
548 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
549 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
550 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
551 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
552 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
553 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
554 { LLM_TENSOR_CLS, "cls" },
555 { LLM_TENSOR_CLS_OUT, "cls.output" },
556 },
557 },
558 {
559 LLM_ARCH_NOMIC_BERT,
560 {
561 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
562 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
563 { LLM_TENSOR_TOKEN_TYPES, "token_types" },
564 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
565 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
566 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
567 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
568 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
569 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
570 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
571 },
572 },
573 {
574 LLM_ARCH_NOMIC_BERT_MOE,
575 {
576 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
577 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
578 { LLM_TENSOR_TOKEN_TYPES, "token_types" },
579 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
580 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
581 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
582 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
583 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
584 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
585 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
586 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
587 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
588 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
589 },
590 },
591 {
592 LLM_ARCH_NEO_BERT,
593 {
594 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
595 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
596 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
597 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
598 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
599 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
600 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
601 { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
602 { LLM_TENSOR_CLS, "cls" },
603 { LLM_TENSOR_CLS_OUT, "cls.output" },
604 },
605 },
606 {
607 LLM_ARCH_JINA_BERT_V2,
608 {
609 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
610 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
611 { LLM_TENSOR_TOKEN_TYPES, "token_types" },
612 { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
613 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
614 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
615 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
616 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
617 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
618 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
619 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
620 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
621 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
622 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
623 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
624 { LLM_TENSOR_CLS, "cls" },
625 },
626 },
627 {
628 LLM_ARCH_JINA_BERT_V3,
629 {
630 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
631 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
632 { LLM_TENSOR_TOKEN_TYPES, "token_types" },
633 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
634 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
635 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
636 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
637 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
638 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
639 },
640 },
641 {
642 LLM_ARCH_BLOOM,
643 {
644 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
645 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
646 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
647 { LLM_TENSOR_OUTPUT, "output" },
648 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
649 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
650 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
651 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
652 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
653 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
654 },
655 },
656 {
657 LLM_ARCH_STABLELM,
658 {
659 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
660 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
661 { LLM_TENSOR_OUTPUT, "output" },
662 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
663 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
664 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
665 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
666 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
667 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
668 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
669 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
670 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
671 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
672 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
673 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
674 },
675 },
676 {
677 LLM_ARCH_QWEN,
678 {
679 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
680 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
681 { LLM_TENSOR_OUTPUT, "output" },
682 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
683 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
684 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
685 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
686 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
687 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
688 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
689 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
690 },
691 },
692 {
693 LLM_ARCH_QWEN2,
694 {
695 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
696 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
697 { LLM_TENSOR_OUTPUT, "output" },
698 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
699 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
700 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
701 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
702 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
703 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
704 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
705 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
706 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
707 },
708 },
709 {
710 LLM_ARCH_QWEN2VL,
711 {
712 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
713 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
714 { LLM_TENSOR_OUTPUT, "output" },
715 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
716 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
717 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
718 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
719 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
720 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
721 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
722 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
723 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
724 },
725 },
726 {
727 LLM_ARCH_QWEN2MOE,
728 {
729 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
730 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
731 { LLM_TENSOR_OUTPUT, "output" },
732 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
733 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
734 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
735 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
736 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
737 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
738 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
739 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
740 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
741 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
742 { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
743 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
744 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
745 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
746 },
747 },
748 {
749 LLM_ARCH_QWEN3,
750 {
751 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
752 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
753 { LLM_TENSOR_OUTPUT, "output" },
754 { LLM_TENSOR_CLS_OUT, "cls.output" },
755 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
756 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
757 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
758 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
759 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
760 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
761 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
762 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
763 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
764 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
765 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
766 },
767 },
768 {
769 LLM_ARCH_QWEN3MOE,
770 {
771 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
772 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
773 { LLM_TENSOR_OUTPUT, "output" },
774 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
775 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
776 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
777 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
778 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
779 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
780 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
781 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
782 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
783 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
784 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
785 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
786 },
787 },
788 {
789 LLM_ARCH_QWEN3VL,
790 {
791 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
792 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
793 { LLM_TENSOR_OUTPUT, "output" },
794 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
795 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
796 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
797 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
798 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
799 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
800 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
801 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
802 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
803 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
804 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
805 },
806 },
807 {
808 LLM_ARCH_QWEN3VLMOE,
809 {
810 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
811 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
812 { LLM_TENSOR_OUTPUT, "output" },
813 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
814 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
815 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
816 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
817 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
818 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
819 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
820 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
821 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
822 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
823 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
824 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
825 },
826 },
827 {
828 LLM_ARCH_PHI2,
829 {
830 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
831 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
832 { LLM_TENSOR_OUTPUT, "output" },
833 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
834 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
835 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
836 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
837 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
838 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
839 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
840 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
841 },
842 },
843 {
844 LLM_ARCH_PHI3,
845 {
846 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
847 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
848 { LLM_TENSOR_OUTPUT, "output" },
849 { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
850 { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
851 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
852 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
853 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
854 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
855 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
856 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
857 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
858 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
859 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
860 },
861 },
862 {
863 LLM_ARCH_PHIMOE,
864 {
865 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
866 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
867 { LLM_TENSOR_OUTPUT, "output" },
868 { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
869 { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
870 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
871 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
872 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
873 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
874 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
875 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
876 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
877 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
878 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
879 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
880 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
881 },
882 },
883 {
884 LLM_ARCH_PLAMO,
885 {
886 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
887 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
888 { LLM_TENSOR_OUTPUT, "output" },
889 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
890 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
891 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
892 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
893 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
894 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
895 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
896 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
897 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
898 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
899 },
900 },
901 {
902 LLM_ARCH_PLAMO2,
903 {
904 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
905 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
906 { LLM_TENSOR_OUTPUT, "output" },
907 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
908 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
909 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
910 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
911 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
912 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
913 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
914 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
915 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
916 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
917 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
918 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
919 { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
920 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
921 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
922 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
923 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
924 { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
925 { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
926 { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
927 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
928 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
929 },
930 },
931 {
932 LLM_ARCH_CODESHELL,
933 {
934 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
935 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
936 { LLM_TENSOR_OUTPUT, "output" },
937 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
938 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
939 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
940 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
941 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
942 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
943 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
944 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
945 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
946 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
947 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
948 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
949 },
950 },
951 {
952 LLM_ARCH_ORION,
953 {
954 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
955 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
956 { LLM_TENSOR_OUTPUT, "output" },
957 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
958 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
959 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
960 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
961 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
962 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
963 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
964 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
965 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
966 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
967 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
968 },
969 },
970 {
971 LLM_ARCH_INTERNLM2,
972 {
973 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
974 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
975 { LLM_TENSOR_OUTPUT, "output" },
976 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
977 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
978 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
979 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
980 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
981 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
982 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
983 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
984 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
985 },
986 },
987 {
988 LLM_ARCH_MINICPM,
989 {
990 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
991 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
992 { LLM_TENSOR_OUTPUT, "output" },
993 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
994 { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
995 { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
996 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
997 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
998 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
999 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1000 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1001 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1002 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1003 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1004 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1005 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1006 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1007 { LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
1008 { LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
1009 { LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
1010 },
1011 },
1012 {
1013 LLM_ARCH_MINICPM3,
1014 {
1015 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1016 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1017 { LLM_TENSOR_OUTPUT, "output" },
1018 { LLM_TENSOR_ROPE_FACTORS_LONG, "rope_factors_long" },
1019 { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
1020 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1021 { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1022 { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1023 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1024 { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1025 { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1026 { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1027 { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1028 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1029 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1030 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1031 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1032 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1033 },
1034 },
1035 {
1036 LLM_ARCH_GEMMA,
1037 {
1038 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1039 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1040 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1041 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1042 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1043 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1044 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1045 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1046 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1047 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1048 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1049 },
1050 },
1051 {
1052 LLM_ARCH_GEMMA2,
1053 {
1054 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1055 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1056 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1057 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1058 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1059 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1060 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1061 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1062 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1063 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1064 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1065 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1066 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1067 },
1068 },
1069 {
1070 LLM_ARCH_GEMMA3,
1071 {
1072 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1073 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1074 { LLM_TENSOR_OUTPUT, "output" },
1075 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1076 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1077 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1078 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1079 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1080 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1081 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1082 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1083 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1084 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1085 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1086 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1087 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1088 },
1089 },
1090 {
1091 LLM_ARCH_GEMMA3N,
1092 {
1093 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1094 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1095 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1096 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1097 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1098 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1099 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1100 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1101 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1102 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1103 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1104 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1105 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1106 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1107 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1108 { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
1109 { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
1110 { LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
1111 { LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
1112 { LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
1113 { LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
1114 { LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
1115 { LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
1116 { LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
1117 { LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
1118 { LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
1119 { LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
1120 { LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
1121 { LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
1122 { LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
1123 { LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
1124 },
1125 },
1126 {
1127 LLM_ARCH_GEMMA_EMBEDDING,
1128 {
1129 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1130 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1131 { LLM_TENSOR_OUTPUT, "output" },
1132 { LLM_TENSOR_DENSE_2_OUT, "dense_2" },
1133 { LLM_TENSOR_DENSE_3_OUT, "dense_3" },
1134 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1135 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1136 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1137 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1138 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1139 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1140 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1141 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1142 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1143 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1144 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1145 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1146 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1147 },
1148 },
1149 {
1150 LLM_ARCH_STARCODER2,
1151 {
1152 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1153 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1154 { LLM_TENSOR_OUTPUT, "output" },
1155 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1156 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1157 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1158 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1159 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1160 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1161 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1162 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1163 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1164 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1165 },
1166 },
1167 {
1168 LLM_ARCH_MAMBA,
1169 {
1170 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1171 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1172 { LLM_TENSOR_OUTPUT, "output" },
1173 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1174 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1175 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1176 { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
1177 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1178 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1179 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1180 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1181 },
1182 },
1183 {
1184 LLM_ARCH_MAMBA2,
1185 {
1186 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1187 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1188 { LLM_TENSOR_OUTPUT, "output" },
1189 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1190 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1191 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1192 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1193 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1194 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1195 { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1196 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1197 },
1198 },
1199 {
1200 LLM_ARCH_JAMBA,
1201 {
1202 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1203 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1204 { LLM_TENSOR_OUTPUT, "output" },
1205 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1206 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1207 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1208 { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
1209 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1210 { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
1211 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1212 { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
1213 { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
1214 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1215 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1216 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1217 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1218 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1219 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1220 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1221 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1222 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1223 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1224 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1225 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1226 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1227 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1228 },
1229 },
1230 {
1231 LLM_ARCH_FALCON_H1,
1232 {
1233 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1234 { LLM_TENSOR_OUTPUT, "output" },
1235 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1236 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1237 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1238 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1239 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1240 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1241 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1242 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1243 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1244 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1245 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1246 { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1247 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1248 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1249 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1250 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1251 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1252 },
1253 },
1254 {
1255 LLM_ARCH_XVERSE,
1256 {
1257 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1258 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1259 { LLM_TENSOR_OUTPUT, "output" },
1260 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1261 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1262 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1263 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1264 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1265 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1266 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1267 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1268 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1269 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1270 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1271 },
1272 },
1273 {
1274 LLM_ARCH_COMMAND_R,
1275 {
1276 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1277 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1278 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1279 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1280 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1281 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1282 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1283 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1284 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1285 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1286 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1287 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1288 },
1289 },
1290 {
1291 LLM_ARCH_COHERE2,
1292 {
1293 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1294 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1295 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1296 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1297 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1298 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1299 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1300 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1301 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1302 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1303 },
1304 },
1305 {
1306 LLM_ARCH_DBRX,
1307 {
1308 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1309 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1310 { LLM_TENSOR_OUTPUT, "output" },
1311 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1312 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1313 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1314 { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
1315 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1316 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1317 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1318 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1319 },
1320 },
1321 {
1322 LLM_ARCH_OLMO,
1323 {
1324 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1325 { LLM_TENSOR_OUTPUT, "output" },
1326 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1327 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1328 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1329 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1330 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1331 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1332 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1333 },
1334 },
1335 {
1336 LLM_ARCH_OLMO2,
1337 {
1338 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1339 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1340 { LLM_TENSOR_OUTPUT, "output" },
1341 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1342 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1343 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1344 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1345 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1346 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1347 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1348 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1349 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1350 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1351 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1352 },
1353 },
1354 {
1355 LLM_ARCH_OLMOE,
1356 {
1357 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1358 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1359 { LLM_TENSOR_OUTPUT, "output" },
1360 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1361 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1362 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1363 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1364 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1365 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1366 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1367 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1368 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1369 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1370 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1371 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1372 },
1373 },
1374 {
1375 LLM_ARCH_OPENELM,
1376 {
1377 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1378 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1379 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1380 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1381 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1382 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1383 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1384 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1385 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1386 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1387 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1388 },
1389 },
1390 {
1391 LLM_ARCH_ARCTIC,
1392 {
1393 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1394 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1395 { LLM_TENSOR_OUTPUT, "output" },
1396 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1397 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1398 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1399 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1400 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1401 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1402 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1403 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1404 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1405 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1406 { LLM_TENSOR_FFN_NORM_EXPS, "blk.%d.ffn_norm_exps" },
1407 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1408 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1409 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1410 },
1411 },
1412 {
1413 LLM_ARCH_DEEPSEEK,
1414 {
1415 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1416 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1417 { LLM_TENSOR_OUTPUT, "output" },
1418 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1419 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1420 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1421 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1422 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1423 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1424 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1425 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1426 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1427 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1428 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1429 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1430 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1431 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1432 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1433 { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1434 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1435 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1436 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1437 },
1438 },
1439 {
1440 LLM_ARCH_DEEPSEEK2,
1441 {
1442 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1443 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1444 { LLM_TENSOR_OUTPUT, "output" },
1445 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1446 { LLM_TENSOR_ATTN_Q_A_NORM, "blk.%d.attn_q_a_norm" },
1447 { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1448 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1449 { LLM_TENSOR_ATTN_Q_A, "blk.%d.attn_q_a" },
1450 { LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
1451 { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1452 { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1453 { LLM_TENSOR_ATTN_K_B, "blk.%d.attn_k_b" },
1454 { LLM_TENSOR_ATTN_V_B, "blk.%d.attn_v_b" },
1455 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1456 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1457 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1458 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1459 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1460 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1461 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1462 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1463 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1464 { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1465 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1466 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1467 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1468 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1469 },
1470 },
1471 {
1472 LLM_ARCH_PLM,
1473 {
1474 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1475 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1476 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1477 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1478 { LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
1479 { LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
1480 { LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
1481 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1482 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1483 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1484 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1485 },
1486 },
1487 {
1488 LLM_ARCH_CHATGLM,
1489 {
1490 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1491 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1492 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1493 { LLM_TENSOR_OUTPUT, "output" },
1494 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1495 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1496 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1497 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1498 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1499 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1500 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1501 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1502 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1503 },
1504 },
1505 {
1506 LLM_ARCH_GLM4,
1507 {
1508 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1509 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1510 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1511 { LLM_TENSOR_OUTPUT, "output" },
1512 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1513 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1514 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1515 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1516 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1517 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1518 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1519 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1520 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1521 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1522 },
1523 },
1524 {
1525 LLM_ARCH_GLM4_MOE,
1526 {
1527 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1528 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1529 { LLM_TENSOR_OUTPUT, "output" },
1530 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1531 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1532 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1533 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1534 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1535 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1536 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1537 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1538 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1539 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1540 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1541 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1542 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1543 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1544 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1545 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1546 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1547 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1548 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1549 // NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1550 { LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.nextn.eh_proj" },
1551 { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
1552 { LLM_TENSOR_NEXTN_ENORM, "blk.%d.nextn.enorm" },
1553 { LLM_TENSOR_NEXTN_HNORM, "blk.%d.nextn.hnorm" },
1554 { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
1555 { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
1556 },
1557 },
1558 {
1559 LLM_ARCH_BITNET,
1560 {
1561 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1562 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1563 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1564 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1565 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1566 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1567 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1568 { LLM_TENSOR_ATTN_SUB_NORM, "blk.%d.attn_sub_norm" },
1569 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1570 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1571 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1572 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1573 { LLM_TENSOR_FFN_SUB_NORM, "blk.%d.ffn_sub_norm" },
1574 },
1575 },
1576 {
1577 LLM_ARCH_T5,
1578 {
1579 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1580 { LLM_TENSOR_OUTPUT, "output" },
1581 { LLM_TENSOR_DEC_OUTPUT_NORM, "dec.output_norm" },
1582 { LLM_TENSOR_DEC_ATTN_NORM, "dec.blk.%d.attn_norm" },
1583 { LLM_TENSOR_DEC_ATTN_Q, "dec.blk.%d.attn_q" },
1584 { LLM_TENSOR_DEC_ATTN_K, "dec.blk.%d.attn_k" },
1585 { LLM_TENSOR_DEC_ATTN_V, "dec.blk.%d.attn_v" },
1586 { LLM_TENSOR_DEC_ATTN_OUT, "dec.blk.%d.attn_o" },
1587 { LLM_TENSOR_DEC_ATTN_REL_B, "dec.blk.%d.attn_rel_b" },
1588 { LLM_TENSOR_DEC_CROSS_ATTN_NORM, "dec.blk.%d.cross_attn_norm" },
1589 { LLM_TENSOR_DEC_CROSS_ATTN_Q, "dec.blk.%d.cross_attn_q" },
1590 { LLM_TENSOR_DEC_CROSS_ATTN_K, "dec.blk.%d.cross_attn_k" },
1591 { LLM_TENSOR_DEC_CROSS_ATTN_V, "dec.blk.%d.cross_attn_v" },
1592 { LLM_TENSOR_DEC_CROSS_ATTN_OUT, "dec.blk.%d.cross_attn_o" },
1593 { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1594 { LLM_TENSOR_DEC_FFN_NORM, "dec.blk.%d.ffn_norm" },
1595 { LLM_TENSOR_DEC_FFN_GATE, "dec.blk.%d.ffn_gate" },
1596 { LLM_TENSOR_DEC_FFN_DOWN, "dec.blk.%d.ffn_down" },
1597 { LLM_TENSOR_DEC_FFN_UP, "dec.blk.%d.ffn_up" },
1598 { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1599 { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1600 { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1601 { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1602 { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1603 { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1604 { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1605 { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1606 { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1607 { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1608 { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1609 },
1610 },
1611 {
1612 LLM_ARCH_T5ENCODER,
1613 {
1614 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1615 { LLM_TENSOR_OUTPUT, "output" },
1616 { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
1617 { LLM_TENSOR_ENC_ATTN_NORM, "enc.blk.%d.attn_norm" },
1618 { LLM_TENSOR_ENC_ATTN_Q, "enc.blk.%d.attn_q" },
1619 { LLM_TENSOR_ENC_ATTN_K, "enc.blk.%d.attn_k" },
1620 { LLM_TENSOR_ENC_ATTN_V, "enc.blk.%d.attn_v" },
1621 { LLM_TENSOR_ENC_ATTN_OUT, "enc.blk.%d.attn_o" },
1622 { LLM_TENSOR_ENC_ATTN_REL_B, "enc.blk.%d.attn_rel_b" },
1623 { LLM_TENSOR_ENC_FFN_NORM, "enc.blk.%d.ffn_norm" },
1624 { LLM_TENSOR_ENC_FFN_GATE, "enc.blk.%d.ffn_gate" },
1625 { LLM_TENSOR_ENC_FFN_DOWN, "enc.blk.%d.ffn_down" },
1626 { LLM_TENSOR_ENC_FFN_UP, "enc.blk.%d.ffn_up" },
1627 },
1628 },
1629 {
1630 LLM_ARCH_JAIS,
1631 {
1632 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1633 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1634 { LLM_TENSOR_OUTPUT, "output" },
1635 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1636 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
1637 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1638 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1639 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1640 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1641 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1642 },
1643 },
1644 {
1645 LLM_ARCH_NEMOTRON,
1646 {
1647 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1648 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1649 { LLM_TENSOR_OUTPUT, "output" },
1650 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1651 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1652 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1653 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1654 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1655 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1656 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1657 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1658 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1659 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1660 },
1661 },
1662 {
1663 LLM_ARCH_NEMOTRON_H,
1664 {
1665 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1666 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1667 { LLM_TENSOR_OUTPUT, "output" },
1668 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1669 // mamba(2) ssm layers
1670 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1671 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1672 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1673 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1674 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1675 { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1676 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1677 // attention layers
1678 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1679 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1680 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1681 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1682 // dense FFN
1683 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1684 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1685 },
1686 },
1687 {
1688 LLM_ARCH_EXAONE,
1689 {
1690 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1691 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1692 { LLM_TENSOR_OUTPUT, "output" },
1693 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1694 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1695 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1696 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1697 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1698 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1699 { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
1700 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1701 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1702 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1703 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1704 },
1705 },
1706 {
1707 LLM_ARCH_EXAONE4,
1708 {
1709 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1710 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1711 { LLM_TENSOR_OUTPUT, "output" },
1712 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1713 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1714 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1715 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1716 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1717 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1718 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1719 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1720 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1721 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1722 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1723 { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1724 }
1725 },
1726 {
1727 LLM_ARCH_RWKV6,
1728 {
1729 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1730 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1731 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1732 { LLM_TENSOR_OUTPUT, "output" },
1733 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1734 { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1735 { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1736 { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1737 { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1738 { LLM_TENSOR_TIME_MIX_LERP_W, "blk.%d.time_mix_lerp_w" },
1739 { LLM_TENSOR_TIME_MIX_LERP_K, "blk.%d.time_mix_lerp_k" },
1740 { LLM_TENSOR_TIME_MIX_LERP_V, "blk.%d.time_mix_lerp_v" },
1741 { LLM_TENSOR_TIME_MIX_LERP_R, "blk.%d.time_mix_lerp_r" },
1742 { LLM_TENSOR_TIME_MIX_LERP_G, "blk.%d.time_mix_lerp_g" },
1743 { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1744 { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1745 { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1746 { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1747 { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1748 { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1749 { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1750 { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1751 { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1752 { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1753 { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1754 { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1755 { LLM_TENSOR_CHANNEL_MIX_LERP_R, "blk.%d.channel_mix_lerp_r" },
1756 { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1757 { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1758 { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, "blk.%d.channel_mix_receptance" },
1759 },
1760 },
1761 {
1762 LLM_ARCH_RWKV6QWEN2,
1763 {
1764 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1765 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1766 { LLM_TENSOR_OUTPUT, "output" },
1767 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1768 { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1769 { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1770 { LLM_TENSOR_TIME_MIX_LERP_X, "blk.%d.time_mix_lerp_x" },
1771 { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1772 { LLM_TENSOR_TIME_MIX_FIRST, "blk.%d.time_mix_first" },
1773 { LLM_TENSOR_TIME_MIX_DECAY, "blk.%d.time_mix_decay" },
1774 { LLM_TENSOR_TIME_MIX_DECAY_W1, "blk.%d.time_mix_decay_w1" },
1775 { LLM_TENSOR_TIME_MIX_DECAY_W2, "blk.%d.time_mix_decay_w2" },
1776 { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1777 { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1778 { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1779 { LLM_TENSOR_TIME_MIX_GATE, "blk.%d.time_mix_gate" },
1780 { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1781 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1782 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1783 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1784 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1785 },
1786 },
1787 {
1788 LLM_ARCH_RWKV7,
1789 {
1790 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1791 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1792 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1793 { LLM_TENSOR_OUTPUT, "output" },
1794 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1795 { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1796 { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1797 { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1798 { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1799 { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1800 { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1801 { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1802 { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1803 { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1804 { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1805 { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1806 { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1807 { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1808 { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1809 { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1810 { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1811 { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1812 { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1813 { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1814 { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1815 { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1816 { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1817 { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1818 { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1819 },
1820 },
1821 {
1822 LLM_ARCH_ARWKV7,
1823 {
1824 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1825 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1826 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1827 { LLM_TENSOR_OUTPUT, "output" },
1828 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1829 { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1830 { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1831 { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1832 { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1833 { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1834 { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1835 { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1836 { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1837 { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1838 { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1839 { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1840 { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1841 { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1842 { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1843 { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1844 { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1845 { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1846 { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1847 { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1848 { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1849 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1850 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1851 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1852 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1853 },
1854 },
1855 {
1856 LLM_ARCH_GRANITE,
1857 {
1858 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1859 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1860 { LLM_TENSOR_OUTPUT, "output" },
1861 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1862 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1863 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1864 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1865 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1866 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1867 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1868 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1869 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1870 },
1871 },
1872 {
1873 LLM_ARCH_GRANITE_MOE,
1874 {
1875 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1876 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1877 { LLM_TENSOR_OUTPUT, "output" },
1878 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1879 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1880 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1881 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1882 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1883 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1884 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1885 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1886 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1887 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1888 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1889 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1890 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1891 },
1892 },
1893 {
1894 LLM_ARCH_GRANITE_HYBRID,
1895 {
1896 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1897 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1898 { LLM_TENSOR_OUTPUT, "output" },
1899 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1900 // mamba(2) ssm layers
1901 { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1902 { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1903 { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1904 { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1905 { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1906 { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1907 { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1908 // attention layers
1909 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1910 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1911 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1912 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1913 // dense FFN
1914 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1915 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1916 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1917 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1918 // moe FFN
1919 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1920 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1921 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1922 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1923 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1924 // shared expert
1925 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1926 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1927 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1928 },
1929 },
1930 {
1931 LLM_ARCH_CHAMELEON,
1932 {
1933 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1934 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1935 { LLM_TENSOR_OUTPUT, "output" },
1936 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1937 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1938 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1939 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1940 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1941 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1942 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1943 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1944 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1945 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1946 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1947 },
1948 },
1949 {
1950 LLM_ARCH_WAVTOKENIZER_DEC,
1951 {
1952 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1953 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1954 { LLM_TENSOR_CONV1D, "conv1d" },
1955 { LLM_TENSOR_CONVNEXT_DW, "convnext.%d.dw" },
1956 { LLM_TENSOR_CONVNEXT_NORM, "convnext.%d.norm" },
1957 { LLM_TENSOR_CONVNEXT_PW1, "convnext.%d.pw1" },
1958 { LLM_TENSOR_CONVNEXT_PW2, "convnext.%d.pw2" },
1959 { LLM_TENSOR_CONVNEXT_GAMMA, "convnext.%d.gamma" },
1960 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1961 { LLM_TENSOR_OUTPUT, "output" },
1962 { LLM_TENSOR_POS_NET_CONV1, "posnet.%d.conv1" },
1963 { LLM_TENSOR_POS_NET_CONV2, "posnet.%d.conv2" },
1964 { LLM_TENSOR_POS_NET_NORM, "posnet.%d.norm" },
1965 { LLM_TENSOR_POS_NET_NORM1, "posnet.%d.norm1" },
1966 { LLM_TENSOR_POS_NET_NORM2, "posnet.%d.norm2" },
1967 { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
1968 { LLM_TENSOR_POS_NET_ATTN_Q, "posnet.%d.attn_q" },
1969 { LLM_TENSOR_POS_NET_ATTN_K, "posnet.%d.attn_k" },
1970 { LLM_TENSOR_POS_NET_ATTN_V, "posnet.%d.attn_v" },
1971 { LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
1972 },
1973 },
1974 {
1975 LLM_ARCH_BAILINGMOE,
1976 {
1977 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1978 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1979 { LLM_TENSOR_OUTPUT, "output" },
1980 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1981 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1982 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1983 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1984 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1985 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1986 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1987 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1988 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1989 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1990 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1991 { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1992 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1993 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1994 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1995 },
1996 },
1997 {
1998 LLM_ARCH_BAILINGMOE2,
1999 {
2000 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2001 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2002 { LLM_TENSOR_OUTPUT, "output" },
2003 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2004 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2005 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2006 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
2007 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2008 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2009 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
2010 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2011 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2012 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2013 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2014 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2015 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2016 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2017 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
2018 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
2019 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
2020 { LLM_TENSOR_NEXTN_EH_PROJ, "blk.%d.nextn.eh_proj" },
2021 { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
2022 { LLM_TENSOR_NEXTN_ENORM, "blk.%d.nextn.enorm" },
2023 { LLM_TENSOR_NEXTN_HNORM, "blk.%d.nextn.hnorm" },
2024 { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
2025 { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
2026 { LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
2027 },
2028 },
2029 {
2030 LLM_ARCH_DOTS1,
2031 {
2032 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2033 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2034 { LLM_TENSOR_OUTPUT, "output" },
2035 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2036 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2037 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2038 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2039 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2040 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2041 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2042 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2043 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2044 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2045 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2046 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2047 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2048 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2049 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2050 { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
2051 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
2052 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
2053 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
2054 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
2055 }
2056 },
2057 {
2058 LLM_ARCH_ERNIE4_5,
2059 {
2060 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2061 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2062 { LLM_TENSOR_OUTPUT, "output" },
2063 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2064 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2065 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2066 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2067 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2068 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2069 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2070 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2071 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2072 },
2073 },
2074 {
2075 LLM_ARCH_ERNIE4_5_MOE,
2076 {
2077 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2078 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2079 { LLM_TENSOR_OUTPUT, "output" },
2080 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2081 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2082 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2083 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2084 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2085 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2086 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2087 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2088 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2089 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2090 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
2091 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
2092 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
2093 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2094 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2095 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2096 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
2097 },
2098 },
2099 {
2100 LLM_ARCH_HUNYUAN_MOE,
2101 {
2102 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2103 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2104 { LLM_TENSOR_OUTPUT, "output" },
2105 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2106 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2107 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2108 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2109 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2110 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2111 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2112 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2113 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2114 { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
2115 { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
2116 { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
2117 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2118 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2119 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2120 },
2121 },
2122 {
2123 LLM_ARCH_HUNYUAN_DENSE,
2124 {
2125 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2126 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2127 { LLM_TENSOR_OUTPUT, "output" },
2128 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2129 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2130 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2131 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2132 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2133 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2134 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2135 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2136 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2137 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2138 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2139
2140 },
2141 },
2142 {
2143 LLM_ARCH_SMOLLM3,
2144 {
2145 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2146 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2147 { LLM_TENSOR_OUTPUT, "output" },
2148 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2149 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2150 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2151 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2152 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2153 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2154 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2155 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2156 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2157 },
2158 },
2159 {
2160 LLM_ARCH_OPENAI_MOE,
2161 {
2162 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2163 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2164 { LLM_TENSOR_OUTPUT, "output" },
2165 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2166 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
2167 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2168 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2169 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2170 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2171 { LLM_TENSOR_ATTN_SINKS, "blk.%d.attn_sinks" },
2172 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2173 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2174 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2175 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2176 },
2177 },
2178 {
2179 LLM_ARCH_LFM2,
2180 {
2181 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2182 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2183 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2184 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2185 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2186 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2187 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2188 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2189 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2190 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2191 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2192 { LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
2193 { LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
2194 { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
2195 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2196 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
2197 { LLM_TENSOR_OUTPUT, "output" },
2198 }
2199 },
2200 {
2201 LLM_ARCH_LFM2MOE,
2202 {
2203 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2204 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2205 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2206 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2207 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2208 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2209 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2210 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2211 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2212 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2213 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2214 { LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
2215 { LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
2216 { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
2217 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2218 { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
2219 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2220 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2221 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2222 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2223 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
2224 }
2225 },
2226 {
2227 LLM_ARCH_SMALLTHINKER,
2228 {
2229 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2230 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2231 { LLM_TENSOR_OUTPUT, "output" },
2232 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2233 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2234 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2235 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2236 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2237 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2238 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2239 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2240 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2241 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2242 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2243 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2244 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }
2245 },
2246 },
2247 {
2248 LLM_ARCH_APERTUS,
2249 {
2250 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2251 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2252 { LLM_TENSOR_OUTPUT, "output" },
2253 { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
2254 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2255 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2256 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2257 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2258 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2259 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2260 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2261 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2262 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2263 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2264 },
2265 },
2266 {
2267 LLM_ARCH_DREAM,
2268 {
2269 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2270 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2271 { LLM_TENSOR_OUTPUT, "output" },
2272 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2273 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2274 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2275 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2276 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2277 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2278 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2279 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2280 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2281 },
2282 },
2283 {
2284 LLM_ARCH_LLADA,
2285 {
2286 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2287 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2288 { LLM_TENSOR_OUTPUT, "output" },
2289 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2290 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2291 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2292 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2293 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2294 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2295 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2296 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2297 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2298 },
2299 },
2300 {
2301 LLM_ARCH_LLADA_MOE,
2302 {
2303 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2304 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2305 { LLM_TENSOR_OUTPUT, "output" },
2306 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2307 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2308 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2309 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2310 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2311 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2312 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2313 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2314 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2315 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2316 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2317 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2318 },
2319 },
2320 {
2321 LLM_ARCH_SEED_OSS,
2322 {
2323 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2324 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2325 { LLM_TENSOR_OUTPUT, "output" },
2326 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2327 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2328 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2329 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2330 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2331 { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
2332 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2333 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2334 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2335 },
2336 },
2337 {
2338 LLM_ARCH_GROVEMOE,
2339 {
2340 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2341 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2342 { LLM_TENSOR_OUTPUT, "output" },
2343 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2344 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2345 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2346 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2347 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2348 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2349 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2350 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2351 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2352 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2353 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2354 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2355 { LLM_TENSOR_FFN_GATE_CHEXPS, "blk.%d.ffn_gate_chexps" },
2356 { LLM_TENSOR_FFN_DOWN_CHEXPS, "blk.%d.ffn_down_chexps" },
2357 { LLM_TENSOR_FFN_UP_CHEXPS, "blk.%d.ffn_up_chexps" },
2358 },
2359 },
2360 {
2361 LLM_ARCH_MINIMAX_M2,
2362 {
2363 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2364 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2365 { LLM_TENSOR_OUTPUT, "output" },
2366 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2367 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2368 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2369 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2370 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2371 { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
2372 { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
2373 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2374 { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
2375 { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
2376 { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
2377 { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
2378 { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
2379 },
2380 },
2381 {
2382 LLM_ARCH_PANGU_EMBED,
2383 {
2384 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2385 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2386 { LLM_TENSOR_OUTPUT, "output" },
2387 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2388 { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
2389 { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
2390 { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
2391 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2392 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2393 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2394 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2395 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2396 },
2397 },
2398 {
2399 LLM_ARCH_COGVLM,
2400 {
2401 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2402 { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
2403 { LLM_TENSOR_OUTPUT, "output" },
2404 { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
2405 { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
2406 { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
2407 { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
2408 { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
2409 { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
2410 { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
2411 { LLM_TENSOR_VISEXP_ATTN_QKV, "blk.%d.vis_attn_qkv" },
2412 { LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
2413 { LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
2414 { LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
2415 { LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
2416 },
2417 },
2418 {
2419 LLM_ARCH_UNKNOWN,
2420 {
2421 { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2422 },
2423 },
2424};
2425
2426static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
2427 {LLM_TENSOR_TOKEN_EMBD, {.layer: LLM_TENSOR_LAYER_INPUT, .op: GGML_OP_GET_ROWS}},
2428 {LLM_TENSOR_POS_EMBD, {.layer: LLM_TENSOR_LAYER_INPUT, .op: GGML_OP_GET_ROWS}},
2429 {LLM_TENSOR_TOKEN_EMBD_NORM, {.layer: LLM_TENSOR_LAYER_INPUT, .op: GGML_OP_GET_ROWS}},
2430 {LLM_TENSOR_TOKEN_TYPES, {.layer: LLM_TENSOR_LAYER_INPUT, .op: GGML_OP_GET_ROWS}},
2431 {LLM_TENSOR_OUTPUT, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2432 {LLM_TENSOR_CLS, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2433 {LLM_TENSOR_CLS_OUT, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2434 {LLM_TENSOR_DENSE_2_OUT, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}}, // Dense layer output
2435 {LLM_TENSOR_DENSE_3_OUT, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}}, // Dense layer output
2436 {LLM_TENSOR_OUTPUT_NORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL}},
2437 {LLM_TENSOR_DEC_OUTPUT_NORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL}},
2438 {LLM_TENSOR_ENC_OUTPUT_NORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL}},
2439 {LLM_TENSOR_ROPE_FREQS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ROPE}},
2440 {LLM_TENSOR_ROPE_FACTORS_LONG, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ROPE}},
2441 {LLM_TENSOR_ROPE_FACTORS_SHORT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ROPE}},
2442 {LLM_TENSOR_ATTN_Q, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2443 {LLM_TENSOR_ATTN_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2444 {LLM_TENSOR_ATTN_V, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2445 {LLM_TENSOR_ATTN_QKV, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2446 {LLM_TENSOR_ATTN_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2447 {LLM_TENSOR_FFN_GATE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2448 {LLM_TENSOR_FFN_DOWN, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2449 {LLM_TENSOR_FFN_UP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2450 {LLM_TENSOR_FFN_DOWN_SHEXP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2451 {LLM_TENSOR_FFN_GATE_SHEXP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2452 {LLM_TENSOR_FFN_UP_SHEXP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2453 {LLM_TENSOR_ATTN_Q_A, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2454 {LLM_TENSOR_ATTN_Q_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2455 {LLM_TENSOR_ATTN_KV_A_MQA, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2456 {LLM_TENSOR_ATTN_KV_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2457 {LLM_TENSOR_ATTN_K_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2458 {LLM_TENSOR_ATTN_V_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2459 {LLM_TENSOR_ATTN_SINKS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_SCALE}},
2460 {LLM_TENSOR_DEC_ATTN_Q, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2461 {LLM_TENSOR_DEC_ATTN_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2462 {LLM_TENSOR_DEC_ATTN_V, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2463 {LLM_TENSOR_DEC_ATTN_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2464 {LLM_TENSOR_DEC_CROSS_ATTN_Q, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2465 {LLM_TENSOR_DEC_CROSS_ATTN_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2466 {LLM_TENSOR_DEC_CROSS_ATTN_V, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2467 {LLM_TENSOR_DEC_CROSS_ATTN_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2468 {LLM_TENSOR_DEC_FFN_GATE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2469 {LLM_TENSOR_DEC_FFN_DOWN, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2470 {LLM_TENSOR_DEC_FFN_UP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2471 {LLM_TENSOR_ENC_ATTN_Q, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2472 {LLM_TENSOR_ENC_ATTN_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2473 {LLM_TENSOR_ENC_ATTN_V, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2474 {LLM_TENSOR_ENC_ATTN_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2475 {LLM_TENSOR_ENC_FFN_GATE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2476 {LLM_TENSOR_ENC_FFN_DOWN, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2477 {LLM_TENSOR_ENC_FFN_UP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2478 {LLM_TENSOR_FFN_GATE_INP_SHEXP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2479 {LLM_TENSOR_FFN_GATE_INP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2480 {LLM_TENSOR_SSM_IN, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2481 {LLM_TENSOR_SSM_X, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2482 {LLM_TENSOR_SSM_DT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2483 {LLM_TENSOR_SSM_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2484 {LLM_TENSOR_TIME_MIX_W1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2485 {LLM_TENSOR_TIME_MIX_W2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2486 {LLM_TENSOR_TIME_MIX_A1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2487 {LLM_TENSOR_TIME_MIX_A2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2488 {LLM_TENSOR_TIME_MIX_V1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2489 {LLM_TENSOR_TIME_MIX_V2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2490 {LLM_TENSOR_TIME_MIX_G1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2491 {LLM_TENSOR_TIME_MIX_G2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2492 {LLM_TENSOR_TIME_MIX_DECAY_W1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2493 {LLM_TENSOR_TIME_MIX_DECAY_W2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2494 {LLM_TENSOR_TIME_MIX_KEY, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2495 {LLM_TENSOR_TIME_MIX_VALUE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2496 {LLM_TENSOR_TIME_MIX_RECEPTANCE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2497 {LLM_TENSOR_TIME_MIX_GATE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2498 {LLM_TENSOR_TIME_MIX_OUTPUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2499 {LLM_TENSOR_CHANNEL_MIX_KEY, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2500 {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2501 {LLM_TENSOR_CHANNEL_MIX_VALUE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2502 {LLM_TENSOR_FFN_ACT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_DIV}},
2503 {LLM_TENSOR_SSM_CONV1D, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_SSM_CONV}},
2504 {LLM_TENSOR_SSM_A, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_SSM_SCAN}},
2505 {LLM_TENSOR_SSM_DT_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2506 {LLM_TENSOR_SSM_B_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2507 {LLM_TENSOR_SSM_C_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2508 {LLM_TENSOR_SSM_D, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2509 {LLM_TENSOR_SSM_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2510 {LLM_TENSOR_TIME_MIX_LERP_X, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2511 {LLM_TENSOR_TIME_MIX_LN, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2512 {LLM_TENSOR_CHANNEL_MIX_LERP_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2513 {LLM_TENSOR_CHANNEL_MIX_LERP_R, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2514 {LLM_TENSOR_TIME_MIX_K_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2515 {LLM_TENSOR_TIME_MIX_K_A, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2516 {LLM_TENSOR_TIME_MIX_R_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2517 {LLM_TENSOR_TIME_MIX_LERP_W, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2518 {LLM_TENSOR_TIME_MIX_LERP_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2519 {LLM_TENSOR_TIME_MIX_LERP_V, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2520 {LLM_TENSOR_TIME_MIX_LERP_R, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2521 {LLM_TENSOR_TIME_MIX_LERP_G, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2522 {LLM_TENSOR_TIME_MIX_LERP_FUSED, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2523 {LLM_TENSOR_TIME_MIX_DECAY, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2524 {LLM_TENSOR_TIME_MIX_W0, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2525 {LLM_TENSOR_TIME_MIX_A0, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2526 {LLM_TENSOR_TIME_MIX_V0, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2527 {LLM_TENSOR_TIME_MIX_FIRST, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_RWKV_WKV6}},
2528 {LLM_TENSOR_ATTN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2529 {LLM_TENSOR_ATTN_NORM_2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2530 {LLM_TENSOR_ATTN_OUT_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2531 {LLM_TENSOR_ATTN_POST_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2532 {LLM_TENSOR_FFN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2533 {LLM_TENSOR_FFN_POST_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2534 {LLM_TENSOR_FFN_NORM_EXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2535 {LLM_TENSOR_ATTN_Q_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2536 {LLM_TENSOR_ATTN_K_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2537 {LLM_TENSOR_LAYER_OUT_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2538 {LLM_TENSOR_ATTN_Q_A_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2539 {LLM_TENSOR_ATTN_KV_A_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2540 {LLM_TENSOR_ATTN_SUB_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2541 {LLM_TENSOR_FFN_SUB_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2542 {LLM_TENSOR_DEC_ATTN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2543 {LLM_TENSOR_DEC_CROSS_ATTN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2544 {LLM_TENSOR_DEC_FFN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2545 {LLM_TENSOR_ENC_ATTN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2546 {LLM_TENSOR_ENC_FFN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2547 {LLM_TENSOR_DEC_ATTN_REL_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_GET_ROWS}},
2548 {LLM_TENSOR_ENC_ATTN_REL_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_GET_ROWS}},
2549 {LLM_TENSOR_FFN_DOWN_EXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT_ID}},
2550 {LLM_TENSOR_FFN_GATE_EXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT_ID}},
2551 {LLM_TENSOR_FFN_UP_EXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT_ID}},
2552 {LLM_TENSOR_FFN_DOWN_CHEXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT_ID}},
2553 {LLM_TENSOR_FFN_GATE_CHEXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT_ID}},
2554 {LLM_TENSOR_FFN_UP_CHEXPS, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT_ID}},
2555 {LLM_TENSOR_FFN_EXP_PROBS_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_ADD}},
2556 // altup / laurel (gemma 3n)
2557 {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_GET_ROWS}},
2558 {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2559 {LLM_TENSOR_PER_LAYER_PROJ_NORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL}},
2560 {LLM_TENSOR_ALTUP_PROJ, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2561 {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2562 {LLM_TENSOR_PER_LAYER_INP_GATE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2563 {LLM_TENSOR_PER_LAYER_PROJ, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2564 {LLM_TENSOR_PER_LAYER_POST_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2565 {LLM_TENSOR_ALTUP_CORRECT_COEF, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2566 {LLM_TENSOR_ALTUP_CORRECT_SCALE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2567 {LLM_TENSOR_ALTUP_PREDICT_COEF, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2568 {LLM_TENSOR_ALTUP_ROUTER, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2569 {LLM_TENSOR_ALTUP_ROUTER_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2570 {LLM_TENSOR_LAUREL_L, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2571 {LLM_TENSOR_LAUREL_R, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2572 {LLM_TENSOR_LAUREL_POST_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2573 // this tensor is loaded for T5, but never used
2574 {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_NONE}},
2575 {LLM_TENSOR_CONV1D, {.layer: LLM_TENSOR_LAYER_INPUT, .op: GGML_OP_IM2COL}},
2576 {LLM_TENSOR_POS_NET_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2577 {LLM_TENSOR_POS_NET_NORM1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2578 {LLM_TENSOR_POS_NET_NORM2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2579 {LLM_TENSOR_POS_NET_CONV1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_IM2COL}},
2580 {LLM_TENSOR_POS_NET_CONV2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_IM2COL}},
2581 {LLM_TENSOR_POS_NET_ATTN_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2582 {LLM_TENSOR_POS_NET_ATTN_Q, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2583 {LLM_TENSOR_POS_NET_ATTN_K, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2584 {LLM_TENSOR_POS_NET_ATTN_V, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2585 {LLM_TENSOR_POS_NET_ATTN_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2586 {LLM_TENSOR_CONVNEXT_DW, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_IM2COL}},
2587 {LLM_TENSOR_CONVNEXT_NORM, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2588 {LLM_TENSOR_CONVNEXT_PW1, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2589 {LLM_TENSOR_CONVNEXT_PW2, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2590 {LLM_TENSOR_CONVNEXT_GAMMA, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL}},
2591 {LLM_TENSOR_SHORTCONV_CONV, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_SSM_CONV}},
2592 {LLM_TENSOR_SHORTCONV_INPROJ, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2593 {LLM_TENSOR_SHORTCONV_OUTPROJ, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2594 {LLM_TENSOR_VISEXP_ATTN_QKV, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2595 {LLM_TENSOR_VISEXP_ATTN_OUT, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2596 {LLM_TENSOR_VISEXP_FFN_GATE, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2597 {LLM_TENSOR_VISEXP_FFN_DOWN, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2598 {LLM_TENSOR_VISEXP_FFN_UP, {.layer: LLM_TENSOR_LAYER_REPEATING, .op: GGML_OP_MUL_MAT}},
2599 // NextN/MTP tensors are currently ignored (reserved for future MTP support)
2600 // These tensors only exist in the last layer(s) and are treated as output tensors
2601 {LLM_TENSOR_NEXTN_EH_PROJ, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2602 {LLM_TENSOR_NEXTN_EMBED_TOKENS, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_GET_ROWS}},
2603 {LLM_TENSOR_NEXTN_ENORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_GET_ROWS}},
2604 {LLM_TENSOR_NEXTN_HNORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL}},
2605 {LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL_MAT}},
2606 {LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, {.layer: LLM_TENSOR_LAYER_OUTPUT, .op: GGML_OP_MUL}},
2607};
2608
2609LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
2610
2611std::string LLM_KV::operator()(llm_kv kv) const {
2612 std::string name = ::format(fmt: LLM_KV_NAMES.at(k: kv), LLM_ARCH_NAMES.at(k: arch));
2613
2614 if (suffix != nullptr) {
2615 name += ".";
2616 name += suffix;
2617 }
2618
2619 return name;
2620}
2621
2622std::string LLM_TN_IMPL::str() const {
2623 if (LLM_TENSOR_NAMES.at(k: arch).find(x: tensor) == LLM_TENSOR_NAMES.at(k: arch).end()) {
2624 return "__missing__";
2625 }
2626
2627 std::string name = ::format(fmt: LLM_TENSOR_NAMES.at(k: arch).at(k: tensor), bid, xid);
2628
2629 if (suffix != nullptr) {
2630 name += ".";
2631 name += suffix;
2632 }
2633
2634 return name;
2635}
2636
2637const char * llm_arch_name(llm_arch arch) {
2638 auto it = LLM_ARCH_NAMES.find(x: arch);
2639 if (it == LLM_ARCH_NAMES.end()) {
2640 return "unknown";
2641 }
2642 return it->second;
2643}
2644
2645llm_arch llm_arch_from_string(const std::string & name) {
2646 for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
2647 if (kv.second == name) {
2648 return kv.first;
2649 }
2650 }
2651
2652 return LLM_ARCH_UNKNOWN;
2653}
2654
2655const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
2656 return LLM_TENSOR_INFOS.at(k: tensor);
2657}
2658
2659bool llm_arch_is_recurrent(const llm_arch & arch) {
2660 switch (arch) {
2661 case LLM_ARCH_MAMBA:
2662 case LLM_ARCH_MAMBA2:
2663 case LLM_ARCH_RWKV6:
2664 case LLM_ARCH_RWKV6QWEN2:
2665 case LLM_ARCH_RWKV7:
2666 case LLM_ARCH_ARWKV7:
2667 return true;
2668 default:
2669 return false;
2670 }
2671}
2672
2673bool llm_arch_is_hybrid(const llm_arch & arch) {
2674 switch (arch) {
2675 case LLM_ARCH_JAMBA:
2676 case LLM_ARCH_FALCON_H1:
2677 case LLM_ARCH_PLAMO2:
2678 case LLM_ARCH_GRANITE_HYBRID:
2679 case LLM_ARCH_LFM2:
2680 case LLM_ARCH_LFM2MOE:
2681 case LLM_ARCH_NEMOTRON_H:
2682 return true;
2683 default:
2684 return false;
2685 }
2686}
2687
2688bool llm_arch_is_diffusion(const llm_arch & arch) {
2689 switch (arch) {
2690 case LLM_ARCH_DREAM:
2691 case LLM_ARCH_LLADA:
2692 case LLM_ARCH_LLADA_MOE:
2693 return true;
2694 default:
2695 return false;
2696 }
2697}
2698