llama-chat.cpp source code [llama.cpp/src/llama-chat.cpp]

1	#include "llama-chat.h"
2
3	#include "llama.h"
4
5	#include <map>
6	#include <sstream>
7	#include <algorithm>
8
9	#if __cplusplus >= 202000L
10	#define LU8(x) (const char*)(u8##x)
11	#else
12	#define LU8(x) u8##x
13	#endif
14
15	// trim whitespace from the beginning and end of a string
16	static std::string trim(const std::string & str) {
17	size_t start = `0`;
18	size_t end = str.size();
19	while (start < end && isspace(static_cast<unsigned char>(str [start]))) {
20	start += `1`;
21	}
22	while (end > start && isspace(static_cast<unsigned char>(str [end - `1`]))) {
23	end -= `1`;
24	}
25	return str.substr(pos: start, n: end - start);
26	}
27
28	static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29	{ "chatml", LLM_CHAT_TEMPLATE_CHATML },
30	{ "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 },
31	{ "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS },
32	{ "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS },
33	{ "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34	{ "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 },
35	{ "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
36	{ "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37	{ "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38	{ "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
39	{ "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
40	{ "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
41	{ "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
42	{ "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
43	{ "monarch", LLM_CHAT_TEMPLATE_MONARCH },
44	{ "gemma", LLM_CHAT_TEMPLATE_GEMMA },
45	{ "orion", LLM_CHAT_TEMPLATE_ORION },
46	{ "openchat", LLM_CHAT_TEMPLATE_OPENCHAT },
47	{ "vicuna", LLM_CHAT_TEMPLATE_VICUNA },
48	{ "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
49	{ "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
50	{ "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
51	{ "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
52	{ "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
53	{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
54	{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
55	{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
56	{ "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
57	{ "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
58	{ "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
59	{ "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 },
60	{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
61	{ "granite", LLM_CHAT_TEMPLATE_GRANITE },
62	{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
63	{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
64	{ "yandex", LLM_CHAT_TEMPLATE_YANDEX },
65	{ "bailing", LLM_CHAT_TEMPLATE_BAILING },
66	{ "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK },
67	{ "bailing2", LLM_CHAT_TEMPLATE_BAILING2 },
68	{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
69	{ "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
70	{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
71	{ "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE },
72	{ "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE },
73	{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
74	{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
75	{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
76	{ "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED },
77	};
78
79	llm_chat_template llm_chat_template_from_str(const std::string & name) {
80	return LLM_CHAT_TEMPLATES.at(k: name);
81	}
82
83	llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
84	try {
85	return llm_chat_template_from_str(name: tmpl);
86	} catch (const std::out_of_range &) {
87	// ignore
88	}
89
90	auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
91	return tmpl.find(s: haystack) != std::string::npos;
92	};
93	if (tmpl_contains ("<\|im_start\|>")) {
94	return tmpl_contains ("<\|im_sep\|>")
95	? LLM_CHAT_TEMPLATE_PHI_4
96	: tmpl_contains ("<end_of_utterance>")
97	? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <\|im_start\|> as BOS, but it is NOT chatml
98	: LLM_CHAT_TEMPLATE_CHATML;
99	} else if (tmpl.find(s: "mistral") == `0` \|\| tmpl_contains ("[INST]")) {
100	if (tmpl_contains ("[SYSTEM_PROMPT]")) {
101	return LLM_CHAT_TEMPLATE_MISTRAL_V7;
102	} else if (
103	// catches official 'v1' template
104	tmpl_contains ("' [INST] ' + system_message")
105	// catches official 'v3' and 'v3-tekken' templates
106	\|\| tmpl_contains ("[AVAILABLE_TOOLS]")
107	) {
108	// Official mistral 'v1', 'v3' and 'v3-tekken' templates
109	// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
110	// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
111	if (tmpl_contains (" [INST]")) {
112	return LLM_CHAT_TEMPLATE_MISTRAL_V1;
113	} else if (tmpl_contains ("\"[INST]\"")) {
114	return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
115	}
116	return LLM_CHAT_TEMPLATE_MISTRAL_V3;
117	} else {
118	// llama2 template and its variants
119	// [variant] support system message
120	// See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
121	bool support_system_message = tmpl_contains ("<<SYS>>");
122	bool add_bos_inside_history = tmpl_contains ("bos_token + '[INST]");
123	bool strip_message = tmpl_contains ("content.strip()");
124	if (strip_message) {
125	return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
126	} else if (add_bos_inside_history) {
127	return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
128	} else if (support_system_message) {
129	return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
130	} else {
131	return LLM_CHAT_TEMPLATE_LLAMA_2;
132	}
133	}
134	} else if (tmpl_contains ("<\|assistant\|>") && tmpl_contains ("<\|end\|>")) {
135	return LLM_CHAT_TEMPLATE_PHI_3;
136	} else if (tmpl_contains ("[gMASK]<sop>")) {
137	return LLM_CHAT_TEMPLATE_CHATGLM_4;
138	} else if (tmpl_contains ("<\|assistant\|>") && tmpl_contains ("<\|user\|>")) {
139	return tmpl_contains ("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
140	} else if (tmpl_contains ("<\|{{ item['role'] }}\|>") && tmpl_contains ("<\|begin_of_image\|>")) {
141	return LLM_CHAT_TEMPLATE_GLMEDGE;
142	} else if (tmpl_contains ("<\|user\|>") && tmpl_contains ("<\|endoftext\|>")) {
143	return LLM_CHAT_TEMPLATE_ZEPHYR;
144	} else if (tmpl_contains ("bos_token + message['role']")) {
145	return LLM_CHAT_TEMPLATE_MONARCH;
146	} else if (tmpl_contains ("<start_of_turn>")) {
147	return LLM_CHAT_TEMPLATE_GEMMA;
148	} else if (tmpl_contains ("'\\n\\nAssistant: ' + eos_token")) {
149	// OrionStarAI/Orion-14B-Chat
150	return LLM_CHAT_TEMPLATE_ORION;
151	} else if (tmpl_contains ("GPT4 Correct ")) {
152	// openchat/openchat-3.5-0106
153	return LLM_CHAT_TEMPLATE_OPENCHAT;
154	} else if (tmpl_contains ("USER: ") && tmpl_contains ("ASSISTANT: ")) {
155	// eachadea/vicuna-13b-1.1 (and Orca variant)
156	if (tmpl_contains ("SYSTEM: ")) {
157	return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
158	}
159	return LLM_CHAT_TEMPLATE_VICUNA;
160	} else if (tmpl_contains ("### Instruction:") && tmpl_contains ("<\|EOT\|>")) {
161	// deepseek-ai/deepseek-coder-33b-instruct
162	return LLM_CHAT_TEMPLATE_DEEPSEEK;
163	} else if (tmpl_contains ("<\|START_OF_TURN_TOKEN\|>") && tmpl_contains ("<\|USER_TOKEN\|>")) {
164	// CohereForAI/c4ai-command-r-plus
165	return LLM_CHAT_TEMPLATE_COMMAND_R;
166	} else if (tmpl_contains ("<\|start_header_id\|>") && tmpl_contains ("<\|end_header_id\|>")) {
167	return LLM_CHAT_TEMPLATE_LLAMA_3;
168	} else if (tmpl_contains ("[gMASK]sop")) {
169	// chatglm3-6b
170	return LLM_CHAT_TEMPLATE_CHATGLM_3;
171	} else if (tmpl_contains (LU8("<用户>"))) {
172	// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
173	return LLM_CHAT_TEMPLATE_MINICPM;
174	} else if (tmpl_contains ("'Assistant: ' + message['content'] + eos_token")) {
175	return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
176	} else if (tmpl_contains (LU8("<｜Assistant｜>")) && tmpl_contains (LU8("<｜User｜>")) && tmpl_contains (LU8("<｜end▁of▁sentence｜>"))) {
177	return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
178	} else if (tmpl_contains ("[\|system\|]") && tmpl_contains ("[\|assistant\|]") && tmpl_contains ("[\|endofturn\|]")) {
179	if (tmpl_contains ("[\|tool\|]")) {
180	return LLM_CHAT_TEMPLATE_EXAONE_4;
181	}
182	// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
183	// EXAONE-3.0-7.8B-Instruct
184	return LLM_CHAT_TEMPLATE_EXAONE_3;
185	} else if (tmpl_contains ("rwkv-world") \|\| tmpl_contains ("{{- 'User: ' + message['content']\|trim + '\\n\\n' -}}")) {
186	return LLM_CHAT_TEMPLATE_RWKV_WORLD;
187	} else if (tmpl_contains ("<\|start_of_role\|>")) {
188	return LLM_CHAT_TEMPLATE_GRANITE;
189	} else if (tmpl_contains ("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
190	return LLM_CHAT_TEMPLATE_GIGACHAT;
191	} else if (tmpl_contains ("<\|role_start\|>")) {
192	return LLM_CHAT_TEMPLATE_MEGREZ;
193	} else if (tmpl_contains (" Ассистент:")) {
194	return LLM_CHAT_TEMPLATE_YANDEX;
195	} else if (tmpl_contains ("<role>ASSISTANT</role>") && tmpl_contains ("'HUMAN'")) {
196	return LLM_CHAT_TEMPLATE_BAILING;
197	} else if (tmpl_contains ("<role>ASSISTANT</role>") && tmpl_contains ("\"HUMAN\"") && tmpl_contains ("<think>")) {
198	return LLM_CHAT_TEMPLATE_BAILING_THINK;
199	} else if (tmpl_contains ("<role>ASSISTANT</role>") && tmpl_contains ("<role>HUMAN</role>") && tmpl_contains ("<\|role_end\|>")) {
200	return LLM_CHAT_TEMPLATE_BAILING2;
201	} else if (tmpl_contains ("<\|header_start\|>") && tmpl_contains ("<\|header_end\|>")) {
202	return LLM_CHAT_TEMPLATE_LLAMA4;
203	} else if (tmpl_contains ("<\|endofuserprompt\|>")) {
204	return LLM_CHAT_TEMPLATE_DOTS1;
205	} else if (tmpl_contains ("<\|extra_0\|>") && tmpl_contains ("<\|extra_4\|>")) {
206	return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
207	} else if (tmpl_contains ("<\|start\|>") && tmpl_contains ("<\|channel\|>")) {
208	return LLM_CHAT_TEMPLATE_OPENAI_MOE;
209	} else if (tmpl_contains ("<｜hy_Assistant｜>") && tmpl_contains ("<｜hy_place▁holder▁no▁3｜>")) {
210	return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
211	} else if (tmpl_contains ("<\|im_assistant\|>assistant<\|im_middle\|>")) {
212	return LLM_CHAT_TEMPLATE_KIMI_K2;
213	} else if (tmpl_contains ("<seed:bos>")) {
214	return LLM_CHAT_TEMPLATE_SEED_OSS;
215	} else if (tmpl_contains ("'Assistant: ' + message['content'] + '<\|separator\|>")) {
216	return LLM_CHAT_TEMPLATE_GROK_2;
217	} else if (tmpl_contains (LU8("[unused9]系统：[unused10]"))) {
218	return LLM_CHAT_TEMPLATE_PANGU_EMBED;
219	}
220	return LLM_CHAT_TEMPLATE_UNKNOWN;
221	}
222
223	// Simple version of "llama_apply_chat_template" that only works with strings
224	// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
225	int32_t llm_chat_apply_template(
226	llm_chat_template tmpl,
227	const std::vector<const llama_chat_message *> & chat,
228	std::string & dest, bool add_ass) {
229	// Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
230	std::stringstream ss;
231	if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
232	// chatml template
233	for (auto message : chat) {
234	ss << "<\|im_start\|>" << message->role << "\n" << message->content << "<\|im_end\|>\n";
235	}
236	if (add_ass) {
237	ss << "<\|im_start\|>assistant\n";
238	}
239	} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 \|\| tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
240	// Official mistral 'v7' template
241	// See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
242	// https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
243	const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
244	for (auto message : chat) {
245	std::string role(message->role);
246	std::string content(message->content);
247	if (role == "system") {
248	ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
249	} else if (role == "user") {
250	ss << "[INST]" << trailing_space << content << "[/INST]";
251	} else {
252	ss << trailing_space << content << "</s>";
253	}
254	}
255	} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
256	\|\| tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
257	\|\| tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
258	// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
259	// See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
260	std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
261	std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
262	bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
263	bool is_inside_turn = false;
264	for (auto message : chat) {
265	if (!is_inside_turn) {
266	ss << leading_space << "[INST]" << trailing_space;
267	is_inside_turn = true;
268	}
269	std::string role(message->role);
270	std::string content(message->content);
271	if (role == "system") {
272	ss << content << "\n\n";
273	} else if (role == "user") {
274	ss << content << leading_space << "[/INST]";
275	} else {
276	ss << trailing_space << (trim_assistant_message ? trim(str: content) : content) << "</s>";
277	is_inside_turn = false;
278	}
279	}
280	} else if (
281	tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
282	\|\| tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
283	\|\| tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
284	\|\| tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
285	// llama2 template and its variants
286	// [variant] support system message
287	// See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
288	bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
289	// [variant] add BOS inside history
290	bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
291	// [variant] trim spaces from the input message
292	bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
293	// construct the prompt
294	bool is_inside_turn = true; // skip BOS at the beginning
295	ss << "[INST] ";
296	for (auto message : chat) {
297	std::string content = strip_message ? trim(str: message->content) : message->content;
298	std::string role(message->role);
299	if (!is_inside_turn) {
300	is_inside_turn = true;
301	ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
302	}
303	if (role == "system") {
304	if (support_system_message) {
305	ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
306	} else {
307	// if the model does not support system message, we still include it in the first message, but without <<SYS>>
308	ss << content << "\n";
309	}
310	} else if (role == "user") {
311	ss << content << " [/INST]";
312	} else {
313	ss << content << "</s>";
314	is_inside_turn = false;
315	}
316	}
317	} else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
318	// Phi 3
319	for (auto message : chat) {
320	std::string role(message->role);
321	ss << "<\|" << role << "\|>\n" << message->content << "<\|end\|>\n";
322	}
323	if (add_ass) {
324	ss << "<\|assistant\|>\n";
325	}
326	} else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
327	// chatml template
328	for (auto message : chat) {
329	ss << "<\|im_start\|>" << message->role << "<\|im_sep\|>" << message->content << "<\|im_end\|>";
330	}
331	if (add_ass) {
332	ss << "<\|im_start\|>assistant<\|im_sep\|>";
333	}
334	} else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
335	// Falcon 3
336	for (auto message : chat) {
337	std::string role(message->role);
338	ss << "<\|" << role << "\|>\n" << message->content << "\n";
339	}
340	if (add_ass) {
341	ss << "<\|assistant\|>\n";
342	}
343	} else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
344	// zephyr template
345	for (auto message : chat) {
346	ss << "<\|" << message->role << "\|>" << "\n" << message->content << "<\|endoftext\|>\n";
347	}
348	if (add_ass) {
349	ss << "<\|assistant\|>\n";
350	}
351	} else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
352	// mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
353	for (auto message : chat) {
354	std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
355	ss << bos << message->role << "\n" << message->content << "</s>\n";
356	}
357	if (add_ass) {
358	ss << "<s>assistant\n";
359	}
360	} else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
361	// google/gemma-7b-it
362	std::string system_prompt = "";
363	for (auto message : chat) {
364	std::string role(message->role);
365	if (role == "system") {
366	// there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
367	system_prompt += trim(str: message->content);
368	continue;
369	}
370	// in gemma, "assistant" is "model"
371	role = role == "assistant" ? "model" : message->role;
372	ss << "<start_of_turn>" << role << "\n";
373	if (!system_prompt.empty() && role != "model") {
374	ss << system_prompt << "\n\n";
375	system_prompt = "";
376	}
377	ss << trim(str: message->content) << "<end_of_turn>\n";
378	}
379	if (add_ass) {
380	ss << "<start_of_turn>model\n";
381	}
382	} else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
383	// OrionStarAI/Orion-14B-Chat
384	std::string system_prompt = "";
385	for (auto message : chat) {
386	std::string role(message->role);
387	if (role == "system") {
388	// there is no system message support, we will merge it with user prompt
389	system_prompt += message->content;
390	continue;
391	} else if (role == "user") {
392	ss << "Human: ";
393	if (!system_prompt.empty()) {
394	ss << system_prompt << "\n\n";
395	system_prompt = "";
396	}
397	ss << message->content << "\n\nAssistant: </s>";
398	} else {
399	ss << message->content << "</s>";
400	}
401	}
402	} else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
403	// openchat/openchat-3.5-0106,
404	for (auto message : chat) {
405	std::string role(message->role);
406	if (role == "system") {
407	ss << message->content << "<\|end_of_turn\|>";
408	} else {
409	role [`0`] = toupper(c: role [`0`]);
410	ss << "GPT4 Correct " << role << ": " << message->content << "<\|end_of_turn\|>";
411	}
412	}
413	if (add_ass) {
414	ss << "GPT4 Correct Assistant:";
415	}
416	} else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA \|\| tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
417	// eachadea/vicuna-13b-1.1 (and Orca variant)
418	for (auto message : chat) {
419	std::string role(message->role);
420	if (role == "system") {
421	// Orca-Vicuna variant uses a system prefix
422	if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
423	ss << "SYSTEM: " << message->content << "\n";
424	} else {
425	ss << message->content << "\n\n";
426	}
427	} else if (role == "user") {
428	ss << "USER: " << message->content << "\n";
429	} else if (role == "assistant") {
430	ss << "ASSISTANT: " << message->content << "</s>\n";
431	}
432	}
433	if (add_ass) {
434	ss << "ASSISTANT:";
435	}
436	} else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
437	// deepseek-ai/deepseek-coder-33b-instruct
438	for (auto message : chat) {
439	std::string role(message->role);
440	if (role == "system") {
441	ss << message->content;
442	} else if (role == "user") {
443	ss << "### Instruction:\n" << message->content << "\n";
444	} else if (role == "assistant") {
445	ss << "### Response:\n" << message->content << "\n<\|EOT\|>\n";
446	}
447	}
448	if (add_ass) {
449	ss << "### Response:\n";
450	}
451	} else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
452	// CohereForAI/c4ai-command-r-plus
453	for (auto message : chat) {
454	std::string role(message->role);
455	if (role == "system") {
456	ss << "<\|START_OF_TURN_TOKEN\|><\|SYSTEM_TOKEN\|>" << trim(str: message->content) << "<\|END_OF_TURN_TOKEN\|>";
457	} else if (role == "user") {
458	ss << "<\|START_OF_TURN_TOKEN\|><\|USER_TOKEN\|>" << trim(str: message->content) << "<\|END_OF_TURN_TOKEN\|>";
459	} else if (role == "assistant") {
460	ss << "<\|START_OF_TURN_TOKEN\|><\|CHATBOT_TOKEN\|>" << trim(str: message->content) << "<\|END_OF_TURN_TOKEN\|>";
461	}
462	}
463	if (add_ass) {
464	ss << "<\|START_OF_TURN_TOKEN\|><\|CHATBOT_TOKEN\|>";
465	}
466	} else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
467	// Llama 3
468	for (auto message : chat) {
469	std::string role(message->role);
470	ss << "<\|start_header_id\|>" << role << "<\|end_header_id\|>\n\n" << trim(str: message->content) << "<\|eot_id\|>";
471	}
472	if (add_ass) {
473	ss << "<\|start_header_id\|>assistant<\|end_header_id\|>\n\n";
474	}
475	} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
476	// chatglm3-6b
477	ss << "[gMASK]" << "sop";
478	for (auto message : chat) {
479	std::string role(message->role);
480	ss << "<\|" << role << "\|>" << "\n " << message->content;
481	}
482	if (add_ass) {
483	ss << "<\|assistant\|>";
484	}
485	} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
486	ss << "[gMASK]" << "<sop>";
487	for (auto message : chat) {
488	std::string role(message->role);
489	ss << "<\|" << role << "\|>" << "\n" << message->content;
490	}
491	if (add_ass) {
492	ss << "<\|assistant\|>\n";
493	}
494	} else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
495	for (auto message : chat) {
496	std::string role(message->role);
497	ss << "<\|" << role << "\|>" << "\n" << message->content;
498	}
499	if (add_ass) {
500	ss << "<\|assistant\|>";
501	}
502	} else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
503	// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
504	for (auto message : chat) {
505	std::string role(message->role);
506	if (role == "user") {
507	ss << LU8("<用户>");
508	ss << trim(str: message->content);
509	ss << "<AI>";
510	} else {
511	ss << trim(str: message->content);
512	}
513	}
514	} else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
515	// DeepSeek-V2
516	for (auto message : chat) {
517	std::string role(message->role);
518	if (role == "system") {
519	ss << message->content << "\n\n";
520	} else if (role == "user") {
521	ss << "User: " << message->content << "\n\n";
522	} else if (role == "assistant") {
523	ss << "Assistant: " << message->content << LU8("<｜end▁of▁sentence｜>");
524	}
525	}
526	if (add_ass) {
527	ss << "Assistant:";
528	}
529	} else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
530	// DeepSeek-V3
531	for (auto message : chat) {
532	std::string role(message->role);
533	if (role == "system") {
534	ss << message->content << "\n\n";
535	} else if (role == "user") {
536	ss << LU8("<｜User｜>") << message->content;
537	} else if (role == "assistant") {
538	ss << LU8("<｜Assistant｜>") << message->content << LU8("<｜end▁of▁sentence｜>");
539	}
540	}
541	if (add_ass) {
542	ss << LU8("<｜Assistant｜>");
543	}
544	} else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
545	// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
546	// EXAONE-3.0-7.8B-Instruct
547	for (auto message : chat) {
548	std::string role(message->role);
549	if (role == "system") {
550	ss << "[\|system\|]" << trim(str: message->content) << "[\|endofturn\|]\n";
551	} else if (role == "user") {
552	ss << "[\|user\|]" << trim(str: message->content) << "\n";
553	} else if (role == "assistant") {
554	ss << "[\|assistant\|]" << trim(str: message->content) << "[\|endofturn\|]\n";
555	}
556	}
557	if (add_ass) {
558	ss << "[\|assistant\|]";
559	}
560	} else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
561	for (auto message : chat) {
562	std::string role(message->role);
563	if (role == "system") {
564	ss << "[\|system\|]" << trim(str: message->content) << "[\|endofturn\|]\n";
565	} else if (role == "user") {
566	ss << "[\|user\|]" << trim(str: message->content) << "\n";
567	} else if (role == "assistant") {
568	ss << "[\|assistant\|]" << trim(str: message->content) << "[\|endofturn\|]\n";
569	} else if (role == "tool") {
570	ss << "[\|tool\|]" << trim(str: message->content) << "[\|endofturn\|]\n";
571	}
572	}
573	if (add_ass) {
574	ss << "[\|assistant\|]";
575	}
576	} else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
577	// this template requires the model to have "\n\n" as EOT token
578	for (size_t i = `0`; i < chat.size(); i++) {
579	std::string role(chat [i]->role);
580	if (role == "system") {
581	ss << "System: " << trim(str: chat [i]->content) << "\n\n";
582	} else if (role == "user") {
583	ss << "User: " << trim(str: chat [i]->content) << "\n\n";
584	if (i == chat.size() - `1`) {
585	ss << "Assistant:";
586	}
587	} else if (role == "assistant") {
588	ss << "Assistant: " << trim(str: chat [i]->content) << "\n\n";
589	}
590	}
591	} else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
592	// IBM Granite template
593	for (const auto & message : chat) {
594	std::string role(message->role);
595	ss << "<\|start_of_role\|>" << role << "<\|end_of_role\|>";
596	if (role == "assistant_tool_call") {
597	ss << "<\|tool_call\|>";
598	}
599	ss << message->content << "<\|end_of_text\|>\n";
600	}
601	if (add_ass) {
602	ss << "<\|start_of_role\|>assistant<\|end_of_role\|>";
603	}
604	} else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
605	// GigaChat template
606	bool has_system = !chat.empty() && std::string (chat [`0`]->role) == "system";
607
608	// Handle system message if present
609	if (has_system) {
610	ss << "<s>" << chat [`0`]->content << "<\|message_sep\|>";
611	} else {
612	ss << "<s>";
613	}
614
615	// Process remaining messages
616	for (size_t i = has_system ? `1` : `0`; i < chat.size(); i++) {
617	std::string role(chat [i]->role);
618	if (role == "user") {
619	ss << "user<\|role_sep\|>" << chat [i]->content << "<\|message_sep\|>"
620	<< "available functions<\|role_sep\|>[]<\|message_sep\|>";
621	} else if (role == "assistant") {
622	ss << "assistant<\|role_sep\|>" << chat [i]->content << "<\|message_sep\|>";
623	}
624	}
625
626	// Add generation prompt if needed
627	if (add_ass) {
628	ss << "assistant<\|role_sep\|>";
629	}
630	} else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
631	// Megrez template
632	for (auto message : chat) {
633	std::string role(message->role);
634	ss << "<\|role_start\|>" << role << "<\|role_end\|>" << message->content << "<\|turn_end\|>";
635	}
636
637	if (add_ass) {
638	ss << "<\|role_start\|>assistant<\|role_end\|>";
639	}
640	} else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
641	// Yandex template ("\n\n" is defined as EOT token)
642
643	for (size_t i = `0`; i < chat.size(); i++) {
644	std::string role(chat [i]->role);
645	if (role == "user") {
646	ss << " Пользователь: " << chat [i]->content << "\n\n";
647	} else if (role == "assistant") {
648	ss << " Ассистент: " << chat [i]->content << "\n\n";
649	}
650	}
651
652	// Add generation prompt if needed
653	if (add_ass) {
654	ss << " Ассистент:[SEP]";
655	}
656	} else if (tmpl == LLM_CHAT_TEMPLATE_BAILING \|\| tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
657	// Bailing (Ling/Ring) template
658	for (auto message : chat) {
659	std::string role(message->role);
660
661	if (role == "user") {
662	role = "HUMAN";
663	} else {
664	std::transform(first: role.begin(), last: role.end(), result: role.begin(), unary_op: ::toupper);
665	}
666
667	ss << "<role>" << role << "</role>" << message->content;
668	}
669
670	if (add_ass) {
671	ss << "<role>ASSISTANT</role>";
672
673	if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
674	ss << "<think>";
675	}
676	}
677	} else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
678	// Bailing2 (Ling 2.0) template
679	bool has_system = !chat.empty() && std::string (chat [`0`]->role) == "system";
680
681	if (!has_system) {
682	ss << "<role>SYSTEM</role>detailed thinking off<\|role_end\|>";
683	}
684
685	for (auto message : chat) {
686	std::string role(message->role);
687
688	if (role == "user") {
689	role = "HUMAN";
690	} else {
691	std::transform(first: role.begin(), last: role.end(), result: role.begin(), unary_op: ::toupper);
692	}
693
694	ss << "<role>" << role << "</role>" << message->content << "<\|role_end\|>";
695	}
696
697	if (add_ass) {
698	ss << "<role>ASSISTANT</role>";
699	}
700	} else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
701	// Llama 4
702	for (auto message : chat) {
703	std::string role(message->role);
704	ss << "<\|header_start\|>" << role << "<\|header_end\|>\n\n" << trim(str: message->content) << "<\|eot\|>";
705	}
706	if (add_ass) {
707	ss << "<\|header_start\|>assistant<\|header_end\|>\n\n";
708	}
709	} else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
710	// SmolVLM
711	ss << "<\|im_start\|>"; // uses <\|im_start\|> as BOS, but the actual content is NOT chatml
712	for (auto message : chat) {
713	std::string role(message->role);
714	if (role == "system") {
715	ss << message->content << "\n\n";
716	} else if (role == "user") {
717	ss << "User: " << message->content << "<end_of_utterance>\n";
718	} else {
719	ss << "Assistant: " << message->content << "<end_of_utterance>\n";
720	}
721	}
722	if (add_ass) {
723	ss << "Assistant:";
724	}
725	} else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
726	// dots.llm1.inst (DOTS1)
727	for (auto message : chat) {
728	std::string role(message->role);
729	if (role == "system") {
730	ss << "<\|system\|>" << message->content << "<\|endofsystem\|>";
731	} else if (role == "user") {
732	ss << "<\|userprompt\|>" << message->content << "<\|endofuserprompt\|>";
733	} else {
734	ss << "<\|response\|>" << message->content << "<\|endofresponse\|>";
735	}
736	}
737	if (add_ass) {
738	ss << "<\|response\|>";
739	}
740	} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
741	// tencent/Hunyuan-A13B-Instruct
742	for (auto message : chat) {
743	std::string role(message->role);
744	if (role == "system") {
745	ss << "<\|startoftext\|>" << message->content << "<\|extra_4\|>";
746	} else if (role == "assistant") {
747	ss << message->content << "<\|eos\|>";
748	} else {
749	ss << "<\|startoftext\|>" << message->content << "<\|extra_0\|>";
750	}
751	}
752	} else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
753	// OpenAI MoE (based on Harmony chat template)
754	for (auto message : chat) {
755	std::string role(message->role);
756	ss << "<\|start\|>" << role << "<\|message\|>" << message->content;
757	ss << (role == "assistant" ? "<\|return\|>" : "<\|end\|>");
758	}
759	if (add_ass) {
760	ss << "<\|start\|>assistant";
761	}
762	} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
763	// tencent/Hunyuan-4B-Instruct
764	for (size_t i = `0`; i < chat.size(); i++) {
765	std::string role(chat [i]->role);
766	if (i == `0`) {
767	if (role == "system") {
768	ss << chat [i]->content << "<｜hy_place▁holder▁no▁3｜>";
769	}
770	}
771
772	if (role == "assistant") {
773	ss << "<｜hy_Assistant｜>" << chat [i]->content << "<｜hy_place▁holder▁no▁2｜>";
774	} else if (role == "user") {
775	ss << "<｜hy_User｜>" << chat [i]->content << "<｜hy_Assistant｜>";
776	}
777	}
778	} else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
779	// moonshotai/Kimi-K2-Instruct
780	for (auto message : chat) {
781	std::string role(message->role);
782	if (role == "system") {
783	ss << "<\|im_system\|>system<\|im_middle\|>";
784	} else if (role == "user") {
785	ss << "<\|im_user\|>user<\|im_middle\|>";
786	} else if (role == "assistant") {
787	ss << "<\|im_assistant\|>assistant<\|im_middle\|>";
788	} else if (role == "tool") {
789	ss << "<\|im_system\|>tool<\|im_middle\|>";
790	}
791
792	ss << message->content << "<\|im_end\|>";
793	}
794	if (add_ass) {
795	ss << "<\|im_assistant\|>assistant<\|im_middle\|>";
796	}
797	} else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
798	for (auto message: chat) {
799	std::string role(message->role);
800	ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(str: message->content) : message->content) << "<seed:eos>";
801	}
802	if (add_ass) {
803	ss << "<seed:bos>assistant\n";
804	}
805	} else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
806	for (auto message : chat) {
807	std::string role(message->role);
808	if (role == "system") {
809	ss << "System: " << trim(str: message->content) << "<\|separator\|>\n\n";
810	} else if (role == "user") {
811	ss << "Human: " << trim(str: message->content) << "<\|separator\|>\n\n";
812	} else if (role == "assistant") {
813	ss << "Assistant: " << message->content << "<\|separator\|>\n\n";
814	}
815	}
816	if (add_ass) {
817	ss << "Assistant:";
818	}
819	}else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
820	// [unused9]系统：xxx[unused10]
821	// [unused9]用户：xxx[unused10]
822	// [unused9]助手：xxx[unused10]
823	// ...
824	for (size_t i = `0`; i < chat.size(); ++i) {
825	const auto & msg = chat [i];
826	const std::string & role = msg->role;
827	const std::string & content = msg->content;
828
829	if (i == `0` && role != "system") {
830	ss << "[unused9]系统：[unused10]";
831	}
832
833	if (role == "system") {
834	ss << "[unused9]系统：" << content << "[unused10]";
835	} else if (role == "user") {
836	ss << "[unused9]用户：" << content << "[unused10]";
837	} else if (role == "assistant") {
838	ss << "[unused9]助手：" << content << "[unused10]";
839	} else if (role == "tool") {
840	ss << "[unused9]工具：" << content << "[unused10]";
841	} else if (role == "function") {
842	ss << "[unused9]方法：" << content << "[unused10]";
843	}
844	}
845	if (add_ass) {
846	ss << "[unused9]助手：";
847	}
848	} else {
849	// template not supported
850	return -`1`;
851	}
852	dest = ss.str();
853	return dest.size();
854	}
855
856	// public interface
857
858	int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
859	auto it = LLM_CHAT_TEMPLATES.begin();
860	for (size_t i = `0`; i < std::min(a: len, b: LLM_CHAT_TEMPLATES.size()); i++) {
861	output[i] = it ->first.c_str();
862	std::advance(i&: it, n: `1`);
863	}
864	return (int32_t) LLM_CHAT_TEMPLATES.size();
865	}
866

Browse the source code of llama.cpp/src/llama-chat.cpp