1#include "llama-chat.h"
2
3#include "llama.h"
4
5#include <map>
6#include <sstream>
7#include <algorithm>
8
9#if __cplusplus >= 202000L
10 #define LU8(x) (const char*)(u8##x)
11#else
12 #define LU8(x) u8##x
13#endif
14
15// trim whitespace from the beginning and end of a string
16static std::string trim(const std::string & str) {
17 size_t start = 0;
18 size_t end = str.size();
19 while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
20 start += 1;
21 }
22 while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
23 end -= 1;
24 }
25 return str.substr(pos: start, n: end - start);
26}
27
28static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29 { "chatml", LLM_CHAT_TEMPLATE_CHATML },
30 { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
31 { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
32 { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
33 { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34 { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
35 { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
36 { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37 { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38 { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
39 { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
40 { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
41 { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
42 { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
43 { "monarch", LLM_CHAT_TEMPLATE_MONARCH },
44 { "gemma", LLM_CHAT_TEMPLATE_GEMMA },
45 { "orion", LLM_CHAT_TEMPLATE_ORION },
46 { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
47 { "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
48 { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
49 { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
50 { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
51 { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
52 { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
53 { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
54 { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
55 { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
56 { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
57 { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
58 { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
59 { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 },
60 { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
61 { "granite", LLM_CHAT_TEMPLATE_GRANITE },
62 { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
63 { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
64 { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
65 { "bailing", LLM_CHAT_TEMPLATE_BAILING },
66 { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK },
67 { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 },
68 { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
69 { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
70 { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
71 { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE },
72 { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE },
73 { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
74 { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
75 { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
76 { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
77};
78
79llm_chat_template llm_chat_template_from_str(const std::string & name) {
80 return LLM_CHAT_TEMPLATES.at(k: name);
81}
82
83llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
84 try {
85 return llm_chat_template_from_str(name: tmpl);
86 } catch (const std::out_of_range &) {
87 // ignore
88 }
89
90 auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
91 return tmpl.find(s: haystack) != std::string::npos;
92 };
93 if (tmpl_contains("<|im_start|>")) {
94 return tmpl_contains("<|im_sep|>")
95 ? LLM_CHAT_TEMPLATE_PHI_4
96 : tmpl_contains("<end_of_utterance>")
97 ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
98 : LLM_CHAT_TEMPLATE_CHATML;
99 } else if (tmpl.find(s: "mistral") == 0 || tmpl_contains("[INST]")) {
100 if (tmpl_contains("[SYSTEM_PROMPT]")) {
101 return LLM_CHAT_TEMPLATE_MISTRAL_V7;
102 } else if (
103 // catches official 'v1' template
104 tmpl_contains("' [INST] ' + system_message")
105 // catches official 'v3' and 'v3-tekken' templates
106 || tmpl_contains("[AVAILABLE_TOOLS]")
107 ) {
108 // Official mistral 'v1', 'v3' and 'v3-tekken' templates
109 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
110 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
111 if (tmpl_contains(" [INST]")) {
112 return LLM_CHAT_TEMPLATE_MISTRAL_V1;
113 } else if (tmpl_contains("\"[INST]\"")) {
114 return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
115 }
116 return LLM_CHAT_TEMPLATE_MISTRAL_V3;
117 } else {
118 // llama2 template and its variants
119 // [variant] support system message
120 // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
121 bool support_system_message = tmpl_contains("<<SYS>>");
122 bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
123 bool strip_message = tmpl_contains("content.strip()");
124 if (strip_message) {
125 return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
126 } else if (add_bos_inside_history) {
127 return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
128 } else if (support_system_message) {
129 return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
130 } else {
131 return LLM_CHAT_TEMPLATE_LLAMA_2;
132 }
133 }
134 } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
135 return LLM_CHAT_TEMPLATE_PHI_3;
136 } else if (tmpl_contains("[gMASK]<sop>")) {
137 return LLM_CHAT_TEMPLATE_CHATGLM_4;
138 } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
139 return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
140 } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
141 return LLM_CHAT_TEMPLATE_GLMEDGE;
142 } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
143 return LLM_CHAT_TEMPLATE_ZEPHYR;
144 } else if (tmpl_contains("bos_token + message['role']")) {
145 return LLM_CHAT_TEMPLATE_MONARCH;
146 } else if (tmpl_contains("<start_of_turn>")) {
147 return LLM_CHAT_TEMPLATE_GEMMA;
148 } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
149 // OrionStarAI/Orion-14B-Chat
150 return LLM_CHAT_TEMPLATE_ORION;
151 } else if (tmpl_contains("GPT4 Correct ")) {
152 // openchat/openchat-3.5-0106
153 return LLM_CHAT_TEMPLATE_OPENCHAT;
154 } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
155 // eachadea/vicuna-13b-1.1 (and Orca variant)
156 if (tmpl_contains("SYSTEM: ")) {
157 return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
158 }
159 return LLM_CHAT_TEMPLATE_VICUNA;
160 } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
161 // deepseek-ai/deepseek-coder-33b-instruct
162 return LLM_CHAT_TEMPLATE_DEEPSEEK;
163 } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
164 // CohereForAI/c4ai-command-r-plus
165 return LLM_CHAT_TEMPLATE_COMMAND_R;
166 } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
167 return LLM_CHAT_TEMPLATE_LLAMA_3;
168 } else if (tmpl_contains("[gMASK]sop")) {
169 // chatglm3-6b
170 return LLM_CHAT_TEMPLATE_CHATGLM_3;
171 } else if (tmpl_contains(LU8("<用户>"))) {
172 // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
173 return LLM_CHAT_TEMPLATE_MINICPM;
174 } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
175 return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
176 } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
177 return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
178 } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
179 if (tmpl_contains("[|tool|]")) {
180 return LLM_CHAT_TEMPLATE_EXAONE_4;
181 }
182 // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
183 // EXAONE-3.0-7.8B-Instruct
184 return LLM_CHAT_TEMPLATE_EXAONE_3;
185 } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
186 return LLM_CHAT_TEMPLATE_RWKV_WORLD;
187 } else if (tmpl_contains("<|start_of_role|>")) {
188 return LLM_CHAT_TEMPLATE_GRANITE;
189 } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
190 return LLM_CHAT_TEMPLATE_GIGACHAT;
191 } else if (tmpl_contains("<|role_start|>")) {
192 return LLM_CHAT_TEMPLATE_MEGREZ;
193 } else if (tmpl_contains(" Ассистент:")) {
194 return LLM_CHAT_TEMPLATE_YANDEX;
195 } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
196 return LLM_CHAT_TEMPLATE_BAILING;
197 } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
198 return LLM_CHAT_TEMPLATE_BAILING_THINK;
199 } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
200 return LLM_CHAT_TEMPLATE_BAILING2;
201 } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
202 return LLM_CHAT_TEMPLATE_LLAMA4;
203 } else if (tmpl_contains("<|endofuserprompt|>")) {
204 return LLM_CHAT_TEMPLATE_DOTS1;
205 } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
206 return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
207 } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
208 return LLM_CHAT_TEMPLATE_OPENAI_MOE;
209 } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
210 return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
211 } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
212 return LLM_CHAT_TEMPLATE_KIMI_K2;
213 } else if (tmpl_contains("<seed:bos>")) {
214 return LLM_CHAT_TEMPLATE_SEED_OSS;
215 } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) {
216 return LLM_CHAT_TEMPLATE_GROK_2;
217 } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
218 return LLM_CHAT_TEMPLATE_PANGU_EMBED;
219 }
220 return LLM_CHAT_TEMPLATE_UNKNOWN;
221}
222
223// Simple version of "llama_apply_chat_template" that only works with strings
224// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
225int32_t llm_chat_apply_template(
226 llm_chat_template tmpl,
227 const std::vector<const llama_chat_message *> & chat,
228 std::string & dest, bool add_ass) {
229 // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
230 std::stringstream ss;
231 if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
232 // chatml template
233 for (auto message : chat) {
234 ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
235 }
236 if (add_ass) {
237 ss << "<|im_start|>assistant\n";
238 }
239 } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
240 // Official mistral 'v7' template
241 // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
242 // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
243 const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
244 for (auto message : chat) {
245 std::string role(message->role);
246 std::string content(message->content);
247 if (role == "system") {
248 ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
249 } else if (role == "user") {
250 ss << "[INST]" << trailing_space << content << "[/INST]";
251 } else {
252 ss << trailing_space << content << "</s>";
253 }
254 }
255 } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
256 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
257 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
258 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
259 // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
260 std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
261 std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
262 bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
263 bool is_inside_turn = false;
264 for (auto message : chat) {
265 if (!is_inside_turn) {
266 ss << leading_space << "[INST]" << trailing_space;
267 is_inside_turn = true;
268 }
269 std::string role(message->role);
270 std::string content(message->content);
271 if (role == "system") {
272 ss << content << "\n\n";
273 } else if (role == "user") {
274 ss << content << leading_space << "[/INST]";
275 } else {
276 ss << trailing_space << (trim_assistant_message ? trim(str: content) : content) << "</s>";
277 is_inside_turn = false;
278 }
279 }
280 } else if (
281 tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
282 || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
283 || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
284 || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
285 // llama2 template and its variants
286 // [variant] support system message
287 // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
288 bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
289 // [variant] add BOS inside history
290 bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
291 // [variant] trim spaces from the input message
292 bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
293 // construct the prompt
294 bool is_inside_turn = true; // skip BOS at the beginning
295 ss << "[INST] ";
296 for (auto message : chat) {
297 std::string content = strip_message ? trim(str: message->content) : message->content;
298 std::string role(message->role);
299 if (!is_inside_turn) {
300 is_inside_turn = true;
301 ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
302 }
303 if (role == "system") {
304 if (support_system_message) {
305 ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
306 } else {
307 // if the model does not support system message, we still include it in the first message, but without <<SYS>>
308 ss << content << "\n";
309 }
310 } else if (role == "user") {
311 ss << content << " [/INST]";
312 } else {
313 ss << content << "</s>";
314 is_inside_turn = false;
315 }
316 }
317 } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
318 // Phi 3
319 for (auto message : chat) {
320 std::string role(message->role);
321 ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
322 }
323 if (add_ass) {
324 ss << "<|assistant|>\n";
325 }
326 } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
327 // chatml template
328 for (auto message : chat) {
329 ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
330 }
331 if (add_ass) {
332 ss << "<|im_start|>assistant<|im_sep|>";
333 }
334 } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
335 // Falcon 3
336 for (auto message : chat) {
337 std::string role(message->role);
338 ss << "<|" << role << "|>\n" << message->content << "\n";
339 }
340 if (add_ass) {
341 ss << "<|assistant|>\n";
342 }
343 } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
344 // zephyr template
345 for (auto message : chat) {
346 ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
347 }
348 if (add_ass) {
349 ss << "<|assistant|>\n";
350 }
351 } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
352 // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
353 for (auto message : chat) {
354 std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
355 ss << bos << message->role << "\n" << message->content << "</s>\n";
356 }
357 if (add_ass) {
358 ss << "<s>assistant\n";
359 }
360 } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
361 // google/gemma-7b-it
362 std::string system_prompt = "";
363 for (auto message : chat) {
364 std::string role(message->role);
365 if (role == "system") {
366 // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
367 system_prompt += trim(str: message->content);
368 continue;
369 }
370 // in gemma, "assistant" is "model"
371 role = role == "assistant" ? "model" : message->role;
372 ss << "<start_of_turn>" << role << "\n";
373 if (!system_prompt.empty() && role != "model") {
374 ss << system_prompt << "\n\n";
375 system_prompt = "";
376 }
377 ss << trim(str: message->content) << "<end_of_turn>\n";
378 }
379 if (add_ass) {
380 ss << "<start_of_turn>model\n";
381 }
382 } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
383 // OrionStarAI/Orion-14B-Chat
384 std::string system_prompt = "";
385 for (auto message : chat) {
386 std::string role(message->role);
387 if (role == "system") {
388 // there is no system message support, we will merge it with user prompt
389 system_prompt += message->content;
390 continue;
391 } else if (role == "user") {
392 ss << "Human: ";
393 if (!system_prompt.empty()) {
394 ss << system_prompt << "\n\n";
395 system_prompt = "";
396 }
397 ss << message->content << "\n\nAssistant: </s>";
398 } else {
399 ss << message->content << "</s>";
400 }
401 }
402 } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
403 // openchat/openchat-3.5-0106,
404 for (auto message : chat) {
405 std::string role(message->role);
406 if (role == "system") {
407 ss << message->content << "<|end_of_turn|>";
408 } else {
409 role[0] = toupper(c: role[0]);
410 ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
411 }
412 }
413 if (add_ass) {
414 ss << "GPT4 Correct Assistant:";
415 }
416 } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
417 // eachadea/vicuna-13b-1.1 (and Orca variant)
418 for (auto message : chat) {
419 std::string role(message->role);
420 if (role == "system") {
421 // Orca-Vicuna variant uses a system prefix
422 if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
423 ss << "SYSTEM: " << message->content << "\n";
424 } else {
425 ss << message->content << "\n\n";
426 }
427 } else if (role == "user") {
428 ss << "USER: " << message->content << "\n";
429 } else if (role == "assistant") {
430 ss << "ASSISTANT: " << message->content << "</s>\n";
431 }
432 }
433 if (add_ass) {
434 ss << "ASSISTANT:";
435 }
436 } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
437 // deepseek-ai/deepseek-coder-33b-instruct
438 for (auto message : chat) {
439 std::string role(message->role);
440 if (role == "system") {
441 ss << message->content;
442 } else if (role == "user") {
443 ss << "### Instruction:\n" << message->content << "\n";
444 } else if (role == "assistant") {
445 ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
446 }
447 }
448 if (add_ass) {
449 ss << "### Response:\n";
450 }
451 } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
452 // CohereForAI/c4ai-command-r-plus
453 for (auto message : chat) {
454 std::string role(message->role);
455 if (role == "system") {
456 ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(str: message->content) << "<|END_OF_TURN_TOKEN|>";
457 } else if (role == "user") {
458 ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(str: message->content) << "<|END_OF_TURN_TOKEN|>";
459 } else if (role == "assistant") {
460 ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(str: message->content) << "<|END_OF_TURN_TOKEN|>";
461 }
462 }
463 if (add_ass) {
464 ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
465 }
466 } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
467 // Llama 3
468 for (auto message : chat) {
469 std::string role(message->role);
470 ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(str: message->content) << "<|eot_id|>";
471 }
472 if (add_ass) {
473 ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
474 }
475 } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
476 // chatglm3-6b
477 ss << "[gMASK]" << "sop";
478 for (auto message : chat) {
479 std::string role(message->role);
480 ss << "<|" << role << "|>" << "\n " << message->content;
481 }
482 if (add_ass) {
483 ss << "<|assistant|>";
484 }
485 } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
486 ss << "[gMASK]" << "<sop>";
487 for (auto message : chat) {
488 std::string role(message->role);
489 ss << "<|" << role << "|>" << "\n" << message->content;
490 }
491 if (add_ass) {
492 ss << "<|assistant|>\n";
493 }
494 } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
495 for (auto message : chat) {
496 std::string role(message->role);
497 ss << "<|" << role << "|>" << "\n" << message->content;
498 }
499 if (add_ass) {
500 ss << "<|assistant|>";
501 }
502 } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
503 // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
504 for (auto message : chat) {
505 std::string role(message->role);
506 if (role == "user") {
507 ss << LU8("<用户>");
508 ss << trim(str: message->content);
509 ss << "<AI>";
510 } else {
511 ss << trim(str: message->content);
512 }
513 }
514 } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
515 // DeepSeek-V2
516 for (auto message : chat) {
517 std::string role(message->role);
518 if (role == "system") {
519 ss << message->content << "\n\n";
520 } else if (role == "user") {
521 ss << "User: " << message->content << "\n\n";
522 } else if (role == "assistant") {
523 ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
524 }
525 }
526 if (add_ass) {
527 ss << "Assistant:";
528 }
529 } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
530 // DeepSeek-V3
531 for (auto message : chat) {
532 std::string role(message->role);
533 if (role == "system") {
534 ss << message->content << "\n\n";
535 } else if (role == "user") {
536 ss << LU8("<|User|>") << message->content;
537 } else if (role == "assistant") {
538 ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
539 }
540 }
541 if (add_ass) {
542 ss << LU8("<|Assistant|>");
543 }
544 } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
545 // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
546 // EXAONE-3.0-7.8B-Instruct
547 for (auto message : chat) {
548 std::string role(message->role);
549 if (role == "system") {
550 ss << "[|system|]" << trim(str: message->content) << "[|endofturn|]\n";
551 } else if (role == "user") {
552 ss << "[|user|]" << trim(str: message->content) << "\n";
553 } else if (role == "assistant") {
554 ss << "[|assistant|]" << trim(str: message->content) << "[|endofturn|]\n";
555 }
556 }
557 if (add_ass) {
558 ss << "[|assistant|]";
559 }
560 } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
561 for (auto message : chat) {
562 std::string role(message->role);
563 if (role == "system") {
564 ss << "[|system|]" << trim(str: message->content) << "[|endofturn|]\n";
565 } else if (role == "user") {
566 ss << "[|user|]" << trim(str: message->content) << "\n";
567 } else if (role == "assistant") {
568 ss << "[|assistant|]" << trim(str: message->content) << "[|endofturn|]\n";
569 } else if (role == "tool") {
570 ss << "[|tool|]" << trim(str: message->content) << "[|endofturn|]\n";
571 }
572 }
573 if (add_ass) {
574 ss << "[|assistant|]";
575 }
576 } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
577 // this template requires the model to have "\n\n" as EOT token
578 for (size_t i = 0; i < chat.size(); i++) {
579 std::string role(chat[i]->role);
580 if (role == "system") {
581 ss << "System: " << trim(str: chat[i]->content) << "\n\n";
582 } else if (role == "user") {
583 ss << "User: " << trim(str: chat[i]->content) << "\n\n";
584 if (i == chat.size() - 1) {
585 ss << "Assistant:";
586 }
587 } else if (role == "assistant") {
588 ss << "Assistant: " << trim(str: chat[i]->content) << "\n\n";
589 }
590 }
591 } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
592 // IBM Granite template
593 for (const auto & message : chat) {
594 std::string role(message->role);
595 ss << "<|start_of_role|>" << role << "<|end_of_role|>";
596 if (role == "assistant_tool_call") {
597 ss << "<|tool_call|>";
598 }
599 ss << message->content << "<|end_of_text|>\n";
600 }
601 if (add_ass) {
602 ss << "<|start_of_role|>assistant<|end_of_role|>";
603 }
604 } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
605 // GigaChat template
606 bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
607
608 // Handle system message if present
609 if (has_system) {
610 ss << "<s>" << chat[0]->content << "<|message_sep|>";
611 } else {
612 ss << "<s>";
613 }
614
615 // Process remaining messages
616 for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
617 std::string role(chat[i]->role);
618 if (role == "user") {
619 ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
620 << "available functions<|role_sep|>[]<|message_sep|>";
621 } else if (role == "assistant") {
622 ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
623 }
624 }
625
626 // Add generation prompt if needed
627 if (add_ass) {
628 ss << "assistant<|role_sep|>";
629 }
630 } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
631 // Megrez template
632 for (auto message : chat) {
633 std::string role(message->role);
634 ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
635 }
636
637 if (add_ass) {
638 ss << "<|role_start|>assistant<|role_end|>";
639 }
640 } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
641 // Yandex template ("\n\n" is defined as EOT token)
642
643 for (size_t i = 0; i < chat.size(); i++) {
644 std::string role(chat[i]->role);
645 if (role == "user") {
646 ss << " Пользователь: " << chat[i]->content << "\n\n";
647 } else if (role == "assistant") {
648 ss << " Ассистент: " << chat[i]->content << "\n\n";
649 }
650 }
651
652 // Add generation prompt if needed
653 if (add_ass) {
654 ss << " Ассистент:[SEP]";
655 }
656 } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
657 // Bailing (Ling/Ring) template
658 for (auto message : chat) {
659 std::string role(message->role);
660
661 if (role == "user") {
662 role = "HUMAN";
663 } else {
664 std::transform(first: role.begin(), last: role.end(), result: role.begin(), unary_op: ::toupper);
665 }
666
667 ss << "<role>" << role << "</role>" << message->content;
668 }
669
670 if (add_ass) {
671 ss << "<role>ASSISTANT</role>";
672
673 if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
674 ss << "<think>";
675 }
676 }
677 } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
678 // Bailing2 (Ling 2.0) template
679 bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
680
681 if (!has_system) {
682 ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
683 }
684
685 for (auto message : chat) {
686 std::string role(message->role);
687
688 if (role == "user") {
689 role = "HUMAN";
690 } else {
691 std::transform(first: role.begin(), last: role.end(), result: role.begin(), unary_op: ::toupper);
692 }
693
694 ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
695 }
696
697 if (add_ass) {
698 ss << "<role>ASSISTANT</role>";
699 }
700 } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
701 // Llama 4
702 for (auto message : chat) {
703 std::string role(message->role);
704 ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(str: message->content) << "<|eot|>";
705 }
706 if (add_ass) {
707 ss << "<|header_start|>assistant<|header_end|>\n\n";
708 }
709 } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
710 // SmolVLM
711 ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
712 for (auto message : chat) {
713 std::string role(message->role);
714 if (role == "system") {
715 ss << message->content << "\n\n";
716 } else if (role == "user") {
717 ss << "User: " << message->content << "<end_of_utterance>\n";
718 } else {
719 ss << "Assistant: " << message->content << "<end_of_utterance>\n";
720 }
721 }
722 if (add_ass) {
723 ss << "Assistant:";
724 }
725 } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
726 // dots.llm1.inst (DOTS1)
727 for (auto message : chat) {
728 std::string role(message->role);
729 if (role == "system") {
730 ss << "<|system|>" << message->content << "<|endofsystem|>";
731 } else if (role == "user") {
732 ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
733 } else {
734 ss << "<|response|>" << message->content << "<|endofresponse|>";
735 }
736 }
737 if (add_ass) {
738 ss << "<|response|>";
739 }
740 } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
741 // tencent/Hunyuan-A13B-Instruct
742 for (auto message : chat) {
743 std::string role(message->role);
744 if (role == "system") {
745 ss << "<|startoftext|>" << message->content << "<|extra_4|>";
746 } else if (role == "assistant") {
747 ss << message->content << "<|eos|>";
748 } else {
749 ss << "<|startoftext|>" << message->content << "<|extra_0|>";
750 }
751 }
752 } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
753 // OpenAI MoE (based on Harmony chat template)
754 for (auto message : chat) {
755 std::string role(message->role);
756 ss << "<|start|>" << role << "<|message|>" << message->content;
757 ss << (role == "assistant" ? "<|return|>" : "<|end|>");
758 }
759 if (add_ass) {
760 ss << "<|start|>assistant";
761 }
762 } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
763 // tencent/Hunyuan-4B-Instruct
764 for (size_t i = 0; i < chat.size(); i++) {
765 std::string role(chat[i]->role);
766 if (i == 0) {
767 if (role == "system") {
768 ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>";
769 }
770 }
771
772 if (role == "assistant") {
773 ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>";
774 } else if (role == "user") {
775 ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>";
776 }
777 }
778 } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
779 // moonshotai/Kimi-K2-Instruct
780 for (auto message : chat) {
781 std::string role(message->role);
782 if (role == "system") {
783 ss << "<|im_system|>system<|im_middle|>";
784 } else if (role == "user") {
785 ss << "<|im_user|>user<|im_middle|>";
786 } else if (role == "assistant") {
787 ss << "<|im_assistant|>assistant<|im_middle|>";
788 } else if (role == "tool") {
789 ss << "<|im_system|>tool<|im_middle|>";
790 }
791
792 ss << message->content << "<|im_end|>";
793 }
794 if (add_ass) {
795 ss << "<|im_assistant|>assistant<|im_middle|>";
796 }
797 } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
798 for (auto message: chat) {
799 std::string role(message->role);
800 ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(str: message->content) : message->content) << "<seed:eos>";
801 }
802 if (add_ass) {
803 ss << "<seed:bos>assistant\n";
804 }
805 } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
806 for (auto message : chat) {
807 std::string role(message->role);
808 if (role == "system") {
809 ss << "System: " << trim(str: message->content) << "<|separator|>\n\n";
810 } else if (role == "user") {
811 ss << "Human: " << trim(str: message->content) << "<|separator|>\n\n";
812 } else if (role == "assistant") {
813 ss << "Assistant: " << message->content << "<|separator|>\n\n";
814 }
815 }
816 if (add_ass) {
817 ss << "Assistant:";
818 }
819 }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
820 // [unused9]系统:xxx[unused10]
821 // [unused9]用户:xxx[unused10]
822 // [unused9]助手:xxx[unused10]
823 // ...
824 for (size_t i = 0; i < chat.size(); ++i) {
825 const auto & msg = chat[i];
826 const std::string & role = msg->role;
827 const std::string & content = msg->content;
828
829 if (i == 0 && role != "system") {
830 ss << "[unused9]系统:[unused10]";
831 }
832
833 if (role == "system") {
834 ss << "[unused9]系统:" << content << "[unused10]";
835 } else if (role == "user") {
836 ss << "[unused9]用户:" << content << "[unused10]";
837 } else if (role == "assistant") {
838 ss << "[unused9]助手:" << content << "[unused10]";
839 } else if (role == "tool") {
840 ss << "[unused9]工具:" << content << "[unused10]";
841 } else if (role == "function") {
842 ss << "[unused9]方法:" << content << "[unused10]";
843 }
844 }
845 if (add_ass) {
846 ss << "[unused9]助手:";
847 }
848 } else {
849 // template not supported
850 return -1;
851 }
852 dest = ss.str();
853 return dest.size();
854}
855
856// public interface
857
858int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
859 auto it = LLM_CHAT_TEMPLATES.begin();
860 for (size_t i = 0; i < std::min(a: len, b: LLM_CHAT_TEMPLATES.size()); i++) {
861 output[i] = it->first.c_str();
862 std::advance(i&: it, n: 1);
863 }
864 return (int32_t) LLM_CHAT_TEMPLATES.size();
865}
866