1#include "chat.h"
2#include "chat-parser.h"
3#include "common.h"
4#include "json-partial.h"
5#include "json-schema-to-grammar.h"
6#include "log.h"
7#include "regex-partial.h"
8
9#include <minja/chat-template.hpp>
10#include <minja/minja.hpp>
11
12#include <algorithm>
13#include <cstdio>
14#include <cctype>
15#include <exception>
16#include <functional>
17#include <iostream>
18#include <optional>
19#include <stdexcept>
20#include <string>
21#include <vector>
22
23using json = nlohmann::ordered_json;
24
25static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
26 auto time = std::chrono::system_clock::to_time_t(t: now);
27 auto local_time = *std::localtime(timer: &time);
28 std::ostringstream ss;
29 ss << std::put_time(tmb: &local_time, fmt: format.c_str());
30 auto res = ss.str();
31 return res;
32}
33
34static std::string string_diff(const std::string & last, const std::string & current) {
35 if (last.empty()) {
36 return current;
37 }
38 if (!string_starts_with(str: current, prefix: last)) {
39 if (string_starts_with(str: last, prefix: current)) {
40 // This happens if the last generation ended on a partial stop word (not erased),
41 // and the current ended on a stop word (erased).
42 return "";
43 }
44 throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
45 }
46 return current.substr(pos: last.size());
47}
48
49static bool has_content_or_tool_calls(const common_chat_msg & msg) {
50 return !msg.content.empty() || !msg.tool_calls.empty();
51}
52
53template <>
54json common_chat_msg::to_json_oaicompat() const
55{
56 json message {
57 {"role", "assistant"},
58 };
59 if (!reasoning_content.empty()) {
60 message["reasoning_content"] = reasoning_content;
61 }
62 if (content.empty() && !tool_calls.empty()) {
63 message["content"] = json();
64 } else {
65 message["content"] = content;
66 }
67 if (!tool_calls.empty()) {
68 auto arr = json::array();
69 for (const auto & tc : tool_calls) {
70 arr.push_back(init: {
71 {"type", "function"},
72 {"function", {
73 {"name", tc.name},
74 {"arguments", tc.arguments},
75 }},
76 {"id", tc.id},
77 // // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
78 // // We only generate a random id for the ones that don't generate one by themselves
79 // // (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
80 // {"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
81 });
82 }
83 message["tool_calls"] = arr;
84 }
85 return message;
86}
87
88std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
89 std::vector<common_chat_msg_diff> diffs;
90 if (previous_msg.reasoning_content != new_msg.reasoning_content) {
91 auto & diff = diffs.emplace_back();
92 diff.reasoning_content_delta = string_diff(last: previous_msg.reasoning_content, current: new_msg.reasoning_content);
93 }
94 if (previous_msg.content != new_msg.content) {
95 auto & diff = diffs.emplace_back();
96 diff.content_delta = string_diff(last: previous_msg.content, current: new_msg.content);
97 }
98
99 if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
100 throw std::runtime_error("Invalid diff: now finding less tool calls!");
101 }
102
103 if (!previous_msg.tool_calls.empty()) {
104 auto idx = previous_msg.tool_calls.size() - 1;
105 const auto & pref = previous_msg.tool_calls[idx];
106 const auto & newf = new_msg.tool_calls[idx];
107 if (pref.name != newf.name) {
108 throw std::runtime_error("Invalid diff: tool call mismatch!");
109 }
110 auto args_diff = string_diff(last: pref.arguments, current: newf.arguments);
111 if (!args_diff.empty() || pref.id != newf.id) {
112 auto & diff = diffs.emplace_back();
113 diff.tool_call_index = idx;
114 if (pref.id != newf.id) {
115 diff.tool_call_delta.id = newf.id;
116 diff.tool_call_delta.name = newf.name;
117 }
118 diff.tool_call_delta.arguments = args_diff;
119 }
120 }
121 for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
122 auto & diff = diffs.emplace_back();
123 diff.tool_call_index = idx;
124 diff.tool_call_delta = new_msg.tool_calls[idx];
125 }
126 return diffs;
127}
128
129typedef minja::chat_template common_chat_template;
130
131struct common_chat_templates {
132 bool add_bos;
133 bool add_eos;
134 bool has_explicit_template; // Model had builtin template or template overridde was specified.
135 std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
136 std::unique_ptr<common_chat_template> template_tool_use;
137};
138
139struct templates_params {
140 json messages;
141 json tools;
142 common_chat_tool_choice tool_choice;
143 json json_schema;
144 bool parallel_tool_calls;
145 bool stream;
146 std::string grammar;
147 bool add_generation_prompt = true;
148 bool enable_thinking = true;
149 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
150 json extra_context;
151 bool add_bos;
152 bool add_eos;
153 bool is_inference = true;
154};
155
156common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
157 if (tool_choice == "auto") {
158 return COMMON_CHAT_TOOL_CHOICE_AUTO;
159 }
160 if (tool_choice == "none") {
161 return COMMON_CHAT_TOOL_CHOICE_NONE;
162 }
163 if (tool_choice == "required") {
164 return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
165 }
166 throw std::runtime_error("Invalid tool_choice: " + tool_choice);
167}
168
169bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
170 common_chat_templates_inputs dummy_inputs;
171 common_chat_msg msg;
172 msg.role = "user";
173 msg.content = "test";
174 dummy_inputs.messages = {msg};
175 dummy_inputs.enable_thinking = false;
176 const auto rendered_no_thinking = common_chat_templates_apply(tmpls: chat_templates, inputs: dummy_inputs);
177 dummy_inputs.enable_thinking = true;
178 const auto rendered_with_thinking = common_chat_templates_apply(tmpls: chat_templates, inputs: dummy_inputs);
179 return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
180}
181
182template <>
183std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
184 std::vector<common_chat_msg> msgs;
185
186 try {
187
188 if (!messages.is_array()) {
189 throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
190 }
191
192 for (const auto & message : messages) {
193 if (!message.is_object()) {
194 throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
195 }
196
197 common_chat_msg msg;
198 if (!message.contains(key: "role")) {
199 throw std::runtime_error("Missing 'role' in message: " + message.dump());
200 }
201 msg.role = message.at(key: "role");
202
203 auto has_content = message.contains(key: "content");
204 auto has_tool_calls = message.contains(key: "tool_calls");
205 if (has_content) {
206 const auto & content = message.at(key: "content");
207 if (content.is_string()) {
208 msg.content = content;
209 } else if (content.is_array()) {
210 for (const auto & part : content) {
211 if (!part.contains(key: "type")) {
212 throw std::runtime_error("Missing content part type: " + part.dump());
213 }
214 const auto & type = part.at(key: "type");
215 if (type != "text") {
216 throw std::runtime_error("Unsupported content part type: " + type.dump());
217 }
218 common_chat_msg_content_part msg_part;
219 msg_part.type = type;
220 msg_part.text = part.at(key: "text");
221 msg.content_parts.push_back(x: msg_part);
222 }
223 } else if (!content.is_null()) {
224 throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
225 }
226 }
227 if (has_tool_calls) {
228 for (const auto & tool_call : message.at(key: "tool_calls")) {
229 common_chat_tool_call tc;
230 if (!tool_call.contains(key: "type")) {
231 throw std::runtime_error("Missing tool call type: " + tool_call.dump());
232 }
233 const auto & type = tool_call.at(key: "type");
234 if (type != "function") {
235 throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
236 }
237 if (!tool_call.contains(key: "function")) {
238 throw std::runtime_error("Missing tool call function: " + tool_call.dump());
239 }
240 const auto & fc = tool_call.at(key: "function");
241 if (!fc.contains(key: "name")) {
242 throw std::runtime_error("Missing tool call name: " + tool_call.dump());
243 }
244 tc.name = fc.at(key: "name");
245 tc.arguments = fc.at(key: "arguments");
246 if (tool_call.contains(key: "id")) {
247 tc.id = tool_call.at(key: "id");
248 }
249 msg.tool_calls.push_back(x: tc);
250 }
251 }
252 if (!has_content && !has_tool_calls) {
253 throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
254 }
255 if (message.contains(key: "reasoning_content")) {
256 msg.reasoning_content = message.at(key: "reasoning_content");
257 }
258 if (message.contains(key: "name")) {
259 msg.tool_name = message.at(key: "name");
260 }
261 if (message.contains(key: "tool_call_id")) {
262 msg.tool_call_id = message.at(key: "tool_call_id");
263 }
264
265 msgs.push_back(x: msg);
266 }
267 } catch (const std::exception & e) {
268 // @ngxson : disable otherwise it's bloating the API response
269 // printf("%s\n", std::string("; messages = ") + messages.dump(2));
270 throw std::runtime_error("Failed to parse messages: " + std::string(e.what()));
271 }
272
273 return msgs;
274}
275
276template <>
277json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text) {
278 json messages = json::array();
279 for (const auto & msg : msgs) {
280 if (!msg.content.empty() && !msg.content_parts.empty()) {
281 throw std::runtime_error("Cannot specify both content and content_parts");
282 }
283 json jmsg {
284 {"role", msg.role},
285 };
286 if (!msg.content.empty()) {
287 jmsg["content"] = msg.content;
288 } else if (!msg.content_parts.empty()) {
289 if (concat_typed_text) {
290 std::string text;
291 for (const auto & part : msg.content_parts) {
292 if (part.type != "text") {
293 LOG_WRN("Ignoring content part type: %s\n", part.type.c_str());
294 continue;
295 }
296 if (!text.empty()) {
297 text += '\n';
298 }
299 text += part.text;
300 }
301 jmsg["content"] = text;
302 } else {
303 auto & parts = jmsg["content"] = json::array();
304 for (const auto & part : msg.content_parts) {
305 parts.push_back(init: {
306 {"type", part.type},
307 {"text", part.text},
308 });
309 }
310 }
311 } else {
312 jmsg["content"] = json(); // null
313 }
314 if (!msg.reasoning_content.empty()) {
315 jmsg["reasoning_content"] = msg.reasoning_content;
316 }
317 if (!msg.tool_name.empty()) {
318 jmsg["name"] = msg.tool_name;
319 }
320 if (!msg.tool_call_id.empty()) {
321 jmsg["tool_call_id"] = msg.tool_call_id;
322 }
323 if (!msg.tool_calls.empty()) {
324 auto & tool_calls = jmsg["tool_calls"] = json::array();
325 for (const auto & tool_call : msg.tool_calls) {
326 json tc {
327 {"type", "function"},
328 {"function", {
329 {"name", tool_call.name},
330 {"arguments", tool_call.arguments},
331 }},
332 };
333 if (!tool_call.id.empty()) {
334 tc["id"] = tool_call.id;
335 }
336 tool_calls.push_back(val: tc);
337 }
338 }
339 messages.push_back(val: jmsg);
340 }
341 return messages;
342}
343
344template <>
345std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const std::string & messages) {
346 return common_chat_msgs_parse_oaicompat(messages: json::parse(i: messages));
347}
348
349template <>
350std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & tools) {
351 std::vector<common_chat_tool> result;
352
353 try {
354 if (!tools.is_null()) {
355 if (!tools.is_array()) {
356 throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
357 }
358 for (const auto & tool : tools) {
359 if (!tool.contains(key: "type")) {
360 throw std::runtime_error("Missing tool type: " + tool.dump());
361 }
362 const auto & type = tool.at(key: "type");
363 if (!type.is_string() || type != "function") {
364 throw std::runtime_error("Unsupported tool type: " + tool.dump());
365 }
366 if (!tool.contains(key: "function")) {
367 throw std::runtime_error("Missing tool function: " + tool.dump());
368 }
369
370 const auto & function = tool.at(key: "function");
371 result.push_back(x: {
372 /* .name = */ function.at(key: "name"),
373 /* .description = */ function.at(key: "description"),
374 /* .parameters = */ function.at(key: "parameters").dump(),
375 });
376 }
377 }
378 } catch (const std::exception & e) {
379 throw std::runtime_error("Failed to parse tools: " + std::string(e.what()) + "; tools = " + tools.dump(indent: 2));
380 }
381
382 return result;
383}
384
385template <>
386std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const std::string & tools) {
387 return common_chat_tools_parse_oaicompat(tools: json::parse(i: tools));
388}
389
390template <>
391json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools) {
392 if (tools.empty()) {
393 return json();
394 }
395
396 auto result = json::array();
397 for (const auto & tool : tools) {
398 result.push_back(init: {
399 {"type", "function"},
400 {"function", {
401 {"name", tool.name},
402 {"description", tool.description},
403 {"parameters", json::parse(i: tool.parameters)},
404 }},
405 });
406 }
407 return result;
408}
409
410template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
411 json delta = json::object();
412 if (!diff.reasoning_content_delta.empty()) {
413 delta["reasoning_content"] = diff.reasoning_content_delta;
414 }
415 if (!diff.content_delta.empty()) {
416 delta["content"] = diff.content_delta;
417 }
418 if (diff.tool_call_index != std::string::npos) {
419 json tool_call;
420 tool_call["index"] = diff.tool_call_index;
421 if (!diff.tool_call_delta.id.empty()) {
422 tool_call["id"] = diff.tool_call_delta.id;
423 tool_call["type"] = "function";
424 }
425 json function = json::object();
426 if (!diff.tool_call_delta.name.empty()) {
427 function["name"] = diff.tool_call_delta.name;
428 }
429 function["arguments"] = diff.tool_call_delta.arguments;
430 tool_call["function"] = function;
431 delta["tool_calls"] = json::array(init: {tool_call});
432 }
433 return delta;
434}
435
436bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
437 if (use_jinja) {
438 try {
439 common_chat_msg msg;
440 msg.role = "user";
441 msg.content = "test";
442
443 auto tmpls = common_chat_templates_init(/* model= */ nullptr, chat_template_override: tmpl);
444
445 common_chat_templates_inputs inputs;
446 inputs.messages = {msg};
447
448 common_chat_templates_apply(tmpls: tmpls.get(), inputs);
449 return true;
450 } catch (const std::exception & e) {
451 LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what());
452 return false;
453 }
454 }
455 llama_chat_message chat[] = {{.role: "user", .content: "test"}};
456 const int res = llama_chat_apply_template(tmpl: tmpl.c_str(), chat, n_msg: 1, add_ass: true, buf: nullptr, length: 0);
457 return res >= 0;
458}
459
460std::string common_chat_format_single(
461 const struct common_chat_templates * tmpls,
462 const std::vector<common_chat_msg> & past_msg,
463 const common_chat_msg & new_msg,
464 bool add_ass,
465 bool use_jinja) {
466
467 common_chat_templates_inputs inputs;
468 inputs.use_jinja = use_jinja;
469 inputs.add_bos = tmpls->add_bos;
470 inputs.add_eos = tmpls->add_eos;
471
472 std::string fmt_past_msg;
473 if (!past_msg.empty()) {
474 inputs.messages = past_msg;
475 inputs.add_generation_prompt = false;
476 fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
477 }
478 std::ostringstream ss;
479 // if the past_msg ends with a newline, we must preserve it in the formatted version
480 if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
481 ss << "\n";
482 };
483 // format chat with new_msg
484 inputs.messages.push_back(x: new_msg);
485 inputs.add_generation_prompt = add_ass;
486 auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
487 // get the diff part
488 ss << fmt_new_msg.substr(pos: fmt_past_msg.size(), n: fmt_new_msg.size() - fmt_past_msg.size());
489 return ss.str();
490}
491
492std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
493 common_chat_templates_inputs inputs;
494 inputs.use_jinja = use_jinja;
495 inputs.add_bos = tmpls->add_bos;
496 inputs.add_eos = tmpls->add_eos;
497 inputs.chat_template_kwargs = chat_template_kwargs;
498 auto add_simple_msg = [&](auto role, auto content) {
499 common_chat_msg msg;
500 msg.role = role;
501 msg.content = content;
502 inputs.messages.push_back(x: msg);
503 };
504 add_simple_msg("system", "You are a helpful assistant");
505 add_simple_msg("user", "Hello");
506 add_simple_msg("assistant", "Hi there");
507 add_simple_msg("user", "How are you?");
508 return common_chat_templates_apply(tmpls, inputs).prompt;
509}
510
511#define CHATML_TEMPLATE_SRC \
512 "{%- for message in messages -%}\n" \
513 " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
514 "{%- endfor -%}\n" \
515 "{%- if add_generation_prompt -%}\n" \
516 " {{- '<|im_start|>assistant\n' -}}\n" \
517 "{%- endif -%}"
518
519void common_chat_templates_free(struct common_chat_templates * tmpls) {
520 delete tmpls;
521}
522
523bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls) {
524 return tmpls->has_explicit_template;
525}
526
527const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant) {
528 if (variant != nullptr) {
529 if (strcmp(s1: variant, s2: "tool_use") == 0) {
530 if (tmpls->template_tool_use) {
531 return tmpls->template_tool_use->source().c_str();
532 }
533 return nullptr;
534 } else {
535 LOG_DBG("%s: unknown template variant: %s\n", __func__, variant);
536 }
537 }
538 return tmpls->template_default->source().c_str();
539}
540
541common_chat_templates_ptr common_chat_templates_init(
542 const struct llama_model * model,
543 const std::string & chat_template_override,
544 const std::string & bos_token_override,
545 const std::string & eos_token_override)
546{
547 std::string default_template_src;
548 std::string template_tool_use_src;
549
550 bool has_explicit_template = !chat_template_override.empty();
551 if (chat_template_override.empty()) {
552 GGML_ASSERT(model != nullptr);
553 const auto * str = llama_model_chat_template(model, /* name */ nullptr);
554 if (str) {
555 default_template_src = str;
556 has_explicit_template = true;
557 }
558 str = llama_model_chat_template(model, /* name */ "tool_use");
559 if (str) {
560 template_tool_use_src = str;
561 has_explicit_template = true;
562 }
563 } else {
564 default_template_src = chat_template_override;
565 }
566 if (default_template_src.empty() || default_template_src == "chatml") {
567 if (!template_tool_use_src.empty()) {
568 default_template_src = template_tool_use_src;
569 } else {
570 default_template_src = CHATML_TEMPLATE_SRC;
571 }
572 }
573
574 // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
575 // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
576 if (default_template_src.find(s: "<|channel|>") != std::string::npos
577 // search for the error message and patch it
578 && default_template_src.find(s: "in message.content or") != std::string::npos) {
579 string_replace_all(s&: default_template_src,
580 search: "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
581 replace: "{%- if false %}");
582 }
583
584 std::string token_bos = bos_token_override;
585 std::string token_eos = eos_token_override;
586 bool add_bos = false;
587 bool add_eos = false;
588 if (model) {
589 const auto * vocab = llama_model_get_vocab(model);
590 const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
591 if (token == LLAMA_TOKEN_NULL) {
592 if (default_template_src.find(s: jinja_variable_name) != std::string::npos
593 || template_tool_use_src.find(s: jinja_variable_name) != std::string::npos) {
594 LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
595 }
596 return std::string();
597 }
598 return common_token_to_piece(vocab, token, special: true);
599 };
600 token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
601 token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
602 add_bos = llama_vocab_get_add_bos(vocab);
603 add_eos = llama_vocab_get_add_eos(vocab);
604 }
605 common_chat_templates_ptr tmpls(new common_chat_templates());
606 tmpls->has_explicit_template = has_explicit_template;
607 tmpls->add_bos = add_bos;
608 tmpls->add_eos = add_eos;
609 try {
610 tmpls->template_default = std::make_unique<minja::chat_template>(args&: default_template_src, args&: token_bos, args&: token_eos);
611 } catch (const std::exception & e) {
612 LOG_ERR("%s: failed to parse chat template (defaulting to chatml): %s \n", __func__, e.what());
613 tmpls->template_default = std::make_unique<minja::chat_template>(CHATML_TEMPLATE_SRC, args&: token_bos, args&: token_eos);
614 }
615 if (!template_tool_use_src.empty()) {
616 try {
617 tmpls->template_tool_use = std::make_unique<minja::chat_template>(args&: template_tool_use_src, args&: token_bos, args&: token_eos);
618 } catch (const std::exception & e) {
619 LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n", __func__, e.what());
620 }
621 }
622 return tmpls;
623}
624
625const char * common_chat_format_name(common_chat_format format) {
626 switch (format) {
627 case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
628 case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
629 case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
630 case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
631 case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
632 case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
633 case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
634 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
635 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
636 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
637 case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
638 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
639 case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
640 case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
641 case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
642 case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
643 case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
644 case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
645 case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
646 default:
647 throw std::runtime_error("Unknown chat format");
648 }
649}
650
651const char * common_reasoning_format_name(common_reasoning_format format) {
652 switch (format) {
653 case COMMON_REASONING_FORMAT_NONE: return "none";
654 case COMMON_REASONING_FORMAT_AUTO: return "auto";
655 case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
656 case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
657 default:
658 throw std::runtime_error("Unknown reasoning format");
659 }
660}
661
662common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
663 if (format == "none") {
664 return COMMON_REASONING_FORMAT_NONE;
665 } else if (format == "auto") {
666 return COMMON_REASONING_FORMAT_AUTO;
667 } else if (format == "deepseek") {
668 return COMMON_REASONING_FORMAT_DEEPSEEK;
669 } else if (format == "deepseek-legacy") {
670 return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
671 }
672 throw std::runtime_error("Unknown reasoning format: " + format);
673}
674
675static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
676 std::string arguments;
677 if (builder.is_partial()) {
678 arguments = (json {{"code", code + builder.healing_marker()}}).dump();
679 auto idx = arguments.find(str: builder.healing_marker());
680 if (idx != std::string::npos) {
681 arguments.resize(n: idx);
682 }
683 } else {
684 arguments = (json {{"code", code}}).dump();
685 }
686 return arguments;
687}
688
689/**
690 * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
691 * Aggregates the prefix, suffix and in-between text into the content.
692 */
693static void parse_json_tool_calls(
694 common_chat_msg_parser & builder,
695 const std::optional<common_regex> & block_open,
696 const std::optional<common_regex> & function_regex_start_only,
697 const std::optional<common_regex> & function_regex,
698 const common_regex & close_regex,
699 const std::optional<common_regex> & block_close,
700 bool allow_raw_python = false,
701 const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
702
703 auto parse_tool_calls = [&]() {
704 size_t from = std::string::npos;
705 auto first = true;
706 while (true) {
707 auto start_pos = builder.pos();
708 auto res = function_regex_start_only && first
709 ? builder.try_consume_regex(regex: *function_regex_start_only)
710 : function_regex
711 ? builder.try_find_regex(regex: *function_regex, from)
712 : std::nullopt;
713
714 if (res) {
715 std::string name;
716 if (get_function_name) {
717 name = get_function_name(*res);
718 } else {
719 GGML_ASSERT(res->groups.size() == 2);
720 name = builder.str(rng: res->groups[1]);
721 }
722 first = false;
723 if (name.empty()) {
724 // get_function_name signalled us that we should skip this match and treat it as content.
725 from = res->groups[0].begin + 1;
726 continue;
727 }
728 from = std::string::npos;
729
730 auto maybe_raw_python = name == "python" && allow_raw_python;
731 if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
732 if (auto arguments = builder.try_consume_json_with_dumped_args(args_paths: {{}})) {
733 if (!builder.add_tool_call(name, id: "", arguments: arguments->value) || arguments->is_partial) {
734 throw common_chat_msg_partial_exception("incomplete tool call");
735 }
736 builder.consume_regex(regex: close_regex);
737 }
738 continue;
739 }
740 if (maybe_raw_python) {
741 auto arguments = wrap_code_as_arguments(builder, code: builder.consume_rest());
742 if (!builder.add_tool_call(name, id: "", arguments)) {
743 throw common_chat_msg_partial_exception("incomplete tool call");
744 }
745 return;
746 }
747 throw common_chat_msg_partial_exception("incomplete tool call");
748 } else {
749 builder.move_to(pos: start_pos);
750 }
751 break;
752 }
753 if (block_close) {
754 builder.consume_regex(regex: *block_close);
755 }
756 builder.consume_spaces();
757 builder.add_content(content: builder.consume_rest());
758 };
759 if (block_open) {
760 if (auto res = builder.try_find_regex(regex: *block_open)) {
761 parse_tool_calls();
762 } else {
763 builder.add_content(content: builder.consume_rest());
764 }
765 } else {
766 parse_tool_calls();
767 }
768}
769
770static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) {
771 static const std::vector<std::vector<std::string>> args_paths = {{"arguments"}};
772 if (auto res = builder.try_find_regex(regex: prefix)) {
773 builder.move_back(n: rstrip_prefix);
774 auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
775 if (!builder.add_tool_calls(arr: tool_calls.value) || tool_calls.is_partial) {
776 throw common_chat_msg_partial_exception("incomplete tool call array");
777 }
778 } else {
779 builder.add_content(content: builder.consume_rest());
780 }
781}
782
783static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
784 for (const auto & tool : tools) {
785 if (!tool.contains(key: "type") || tool.at(key: "type") != "function" || !tool.contains(key: "function")) {
786 LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
787 continue;
788 }
789 fn(tool);
790 }
791}
792
793static std::string apply(
794 const common_chat_template & tmpl,
795 const struct templates_params & inputs,
796 const std::optional<json> & messages_override = std::nullopt,
797 const std::optional<json> & tools_override = std::nullopt,
798 const std::optional<json> & additional_context = std::nullopt)
799{
800 minja::chat_template_inputs tmpl_inputs;
801 tmpl_inputs.messages = messages_override ? *messages_override : inputs.messages;
802 if (tools_override) {
803 tmpl_inputs.tools = *tools_override;
804 } else {
805 tmpl_inputs.tools = inputs.tools.empty() ? json() : inputs.tools;
806 }
807 tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt;
808 tmpl_inputs.extra_context = inputs.extra_context;
809 tmpl_inputs.extra_context["enable_thinking"] = inputs.enable_thinking;
810 if (additional_context) {
811 tmpl_inputs.extra_context.merge_patch(apply_patch: *additional_context);
812 }
813 // TODO: add flag to control date/time, if only for testing purposes.
814 // tmpl_inputs.now = std::chrono::system_clock::now();
815
816 minja::chat_template_options tmpl_opts;
817 // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
818 // instead of using `chat_template_options.use_bos_token = false`, since these tokens
819 // may be needed inside the template / between messages too.
820 auto result = tmpl.apply(inputs: tmpl_inputs, opts: tmpl_opts);
821 if (inputs.add_bos && string_starts_with(str: result, prefix: tmpl.bos_token())) {
822 result = result.substr(pos: tmpl.bos_token().size());
823 }
824 if (inputs.add_eos && string_ends_with(str: result, suffix: tmpl.eos_token())) {
825 result = result.substr(pos: 0, n: result.size() - tmpl.eos_token().size());
826 }
827 return result;
828}
829
830static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
831 common_chat_params data;
832
833 auto tool_call_schemas = json::array();
834 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
835 const auto & function = tool.at(key: "function");
836 auto tool_schema = json {
837 {"type", "object"},
838 {"properties", {
839 {"name", {
840 {"type", "string"},
841 {"const", function.at(key: "name")},
842 }},
843 {"arguments", function.at(key: "parameters")},
844 }},
845 {"required", json::array(init: {"name", "arguments"})},
846 };
847 if (function.contains(key: "description")) {
848 tool_schema["description"] = function.at(key: "description");
849 }
850 if (inputs.parallel_tool_calls) {
851 tool_schema.at(key: "properties")["id"] = {
852 {"type", "string"},
853 {"minLength", 4},
854 };
855 tool_schema.at(key: "required").push_back(val: "id");
856 }
857 tool_call_schemas.emplace_back(args&: tool_schema);
858 });
859 const auto tool_call =
860 inputs.parallel_tool_calls
861 ? json {
862 {"type", "object"},
863 {"properties", {
864 {"tool_calls", {
865 {"type", "array"},
866 {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
867 {"anyOf", tool_call_schemas},
868 }},
869 {"minItems", 1},
870 }},
871 }},
872 {"required", json::array(init: {"tool_calls"})},
873 }
874 : json {
875 {"type", "object"},
876 {"properties", {
877 {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
878 {"anyOf", tool_call_schemas},
879 }},
880 }},
881 {"required", json::array(init: {"tool_call"})},
882 };
883 const auto schema =
884 inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
885 ? json {
886 {"anyOf", json::array(init: {
887 tool_call,
888 {
889 {"type", "object"},
890 {"properties", {
891 {"response", inputs.json_schema.is_null()
892 ? json {{"type", "string"}}
893 : inputs.json_schema
894 },
895 }},
896 {"required", json::array(init: {"response"})},
897 },
898 })}
899 }
900 : tool_call;
901
902 data.grammar_lazy = false;
903 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
904 builder.add_schema("root", schema);
905 });
906
907 auto tweaked_messages = common_chat_template::add_system(
908 messages: inputs.messages,
909 system_prompt: "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
910
911 data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
912 data.format = COMMON_CHAT_FORMAT_GENERIC;
913 return data;
914}
915static void common_chat_parse_generic(common_chat_msg_parser & builder) {
916 if (!builder.syntax().parse_tool_calls) {
917 builder.add_content(content: builder.consume_rest());
918 return;
919 }
920 static const std::vector<std::vector<std::string>> content_paths = {
921 {"response"},
922 };
923 static const std::vector<std::vector<std::string>> args_paths = {
924 {"tool_call", "arguments"},
925 {"tool_calls", "arguments"},
926 };
927 auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
928 if (data.value.contains(key: "tool_calls")) {
929 if (!builder.add_tool_calls(arr: data.value.at(key: "tool_calls")) || data.is_partial) {
930 throw common_chat_msg_partial_exception("incomplete tool calls");
931 }
932 } else if (data.value.contains(key: "tool_call")) {
933 if (!builder.add_tool_call(tool_call: data.value.at(key: "tool_call")) || data.is_partial) {
934 throw common_chat_msg_partial_exception("incomplete tool call");
935 }
936 } else if (data.value.contains(key: "response")) {
937 const auto & response = data.value.at(key: "response");
938 builder.add_content(content: response.is_string() ? response.template get<std::string>() : response.dump(indent: 2));
939 if (data.is_partial) {
940 throw common_chat_msg_partial_exception("incomplete response");
941 }
942 } else {
943 throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
944 }
945}
946
947static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
948 common_chat_params data;
949 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
950 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
951 auto schemas = json::array();
952 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
953 const auto & function = tool.at(key: "function");
954 schemas.push_back(init: {
955 {"type", "object"},
956 {"properties", {
957 // Important note: the model is probably trained to take a JSON stringified arguments value.
958 // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
959 {"name", {
960 {"type", "string"},
961 {"const", function.at(key: "name")},
962 }},
963 {"arguments", function.at(key: "parameters")},
964 {"id", {
965 {"type", "string"},
966 // Nemo's template expects a 9-character alphanumeric ID.
967 {"pattern", "^[a-zA-Z0-9]{9}$"},
968 }},
969 }},
970 {"required", json::array(init: {"name", "arguments", "id"})},
971 });
972 });
973 auto schema = json {
974 {"type", "array"},
975 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
976 {"minItems", 1},
977 };
978 if (!inputs.parallel_tool_calls) {
979 schema["maxItems"] = 1;
980 }
981 builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
982 });
983 data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "[TOOL_CALLS]"});
984 data.preserved_tokens = {
985 "[TOOL_CALLS]",
986 };
987 data.prompt = apply(tmpl, inputs);
988 data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
989 return data;
990}
991
992
993// Case-insensitive find
994static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
995 auto it = std::search(
996 first1: haystack.begin() + pos, last1: haystack.end(),
997 first2: needle.begin(), last2: needle.end(),
998 predicate: [](char a, char b) { return std::tolower(c: a) == std::tolower(c: b); }
999 );
1000 return (it == haystack.end()) ? std::string::npos : std::distance(first: haystack.begin(), last: it);
1001}
1002
1003static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1004 common_chat_params data;
1005 const auto is_json_schema_provided = !inputs.json_schema.is_null();
1006 const auto is_grammar_provided = !inputs.grammar.empty();
1007 const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
1008
1009 // the logic requires potentially modifying the messages
1010 auto tweaked_messages = inputs.messages;
1011
1012 auto replace_json_schema_marker = [](json & messages) -> bool {
1013 static std::string marker1 = "force json schema.\n";
1014 static std::string marker2 = "force json schema.";
1015
1016 if (messages.empty() || messages.at(idx: 0).at(key: "role") != "system") {
1017 return false;
1018 }
1019
1020 std::string content = messages.at(idx: 0).at(key: "content");
1021
1022 for (const auto & marker : {marker1, marker2}) {
1023 const auto pos = ifind_string(haystack: content, needle: marker);
1024 if (pos != std::string::npos) {
1025 content.replace(pos: pos, n1: marker.length(), s: "");
1026 // inject modified content back into the messages
1027 messages.at(idx: 0).at(key: "content") = content;
1028 return true;
1029 }
1030 }
1031
1032 return false;
1033 };
1034
1035 // Lfm2 model does not natively work with json, but can generally understand the tools structure
1036 //
1037 // Example of the pytorch dialog structure:
1038 // <|startoftext|><|im_start|>system
1039 // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
1040 // <|im_start|>user
1041 // What is the current status of candidate ID 12345?<|im_end|>
1042 // <|im_start|>assistant
1043 // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
1044 // <|im_start|>tool
1045 // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
1046 // <|im_start|>assistant
1047 // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
1048 //
1049 // For the llama server compatibility with json tools semantic,
1050 // the client can add "Follow json schema." line into the system message prompt to force the json output.
1051 //
1052 if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
1053 // server/utils.hpp prohibits that branch for the custom grammar anyways
1054 throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
1055 } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
1056 LOG_INF("%s: Using tools to build a grammar\n", __func__);
1057
1058 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1059 auto schemas = json::array();
1060 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1061 const auto & function = tool.at(key: "function");
1062 schemas.push_back(init: {
1063 {"type", "object"},
1064 {"properties", {
1065 {"name", {
1066 {"type", "string"},
1067 {"const", function.at(key: "name")},
1068 }},
1069 {"arguments", function.at(key: "parameters")},
1070 }},
1071 {"required", json::array(init: {"name", "arguments", "id"})},
1072 });
1073 });
1074 auto schema = json {
1075 {"type", "array"},
1076 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1077 {"minItems", 1},
1078 };
1079 if (!inputs.parallel_tool_calls) {
1080 schema["maxItems"] = 1;
1081 }
1082
1083 builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
1084 });
1085 // model has no concept of tool selection mode choice,
1086 // if the system prompt rendered correctly it will produce a tool call
1087 // the grammar goes inside the tool call body
1088 data.grammar_lazy = true;
1089 data.grammar_triggers = {{.type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, .value: "\\s*<\\|tool_call_start\\|>\\s*\\["}};
1090 data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1091 data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
1092 } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
1093 LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
1094 // output those tokens
1095 data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
1096 } else if (is_json_schema_provided) {
1097 LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
1098 data.grammar = json_schema_to_grammar(schema: inputs.json_schema);
1099 } else if (is_grammar_provided) {
1100 LOG_INF("%s: Using provided grammar\n", __func__);
1101 data.grammar = inputs.grammar;
1102 } else {
1103 LOG_INF("%s: Using content relying on the template\n", __func__);
1104 }
1105
1106 data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
1107 LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
1108
1109 return data;
1110}
1111
1112static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
1113 common_chat_params data;
1114 data.prompt = apply(tmpl, inputs);
1115 data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
1116 data.preserved_tokens = {
1117 "[THINK]",
1118 "[/THINK]",
1119 };
1120
1121 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1122 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1123 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1124 auto schemas = json::array();
1125 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1126 const auto & function = tool.at(key: "function");
1127 schemas.push_back(init: {
1128 {"type", "object"},
1129 {"properties", {
1130 {"name", {
1131 {"type", "string"},
1132 {"const", function.at(key: "name")},
1133 }},
1134 {"arguments", function.at(key: "parameters")},
1135 {"id", {
1136 {"type", "string"},
1137 {"pattern", "^[a-zA-Z0-9]{9}$"},
1138 }},
1139 }},
1140 {"required", json::array(init: {"name", "arguments", "id"})},
1141 });
1142 });
1143 auto schema = json {
1144 {"type", "array"},
1145 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1146 {"minItems", 1},
1147 };
1148 if (!inputs.parallel_tool_calls) {
1149 schema["maxItems"] = 1;
1150 }
1151 builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
1152 });
1153 data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "[TOOL_CALLS]"});
1154 data.preserved_tokens.push_back(x: "[TOOL_CALLS]");
1155 } else {
1156 data.grammar_lazy = false;
1157 if (!inputs.json_schema.is_null()) {
1158 if (!inputs.grammar.empty()) {
1159 throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
1160 }
1161 data.grammar = json_schema_to_grammar(schema: inputs.json_schema);
1162 } else {
1163 data.grammar = inputs.grammar;
1164 }
1165 }
1166
1167 return data;
1168}
1169
1170static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
1171 if (!builder.syntax().parse_tool_calls) {
1172 builder.add_content(content: builder.consume_rest());
1173 return;
1174 }
1175
1176 static const common_regex prefix(regex_escape(s: "[TOOL_CALLS]"));
1177 parse_prefixed_json_tool_call_array(builder, prefix);
1178}
1179
1180static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
1181 builder.try_parse_reasoning(start_think: "[THINK]", end_think: "[/THINK]");
1182
1183 if (!builder.syntax().parse_tool_calls) {
1184 builder.add_content(content: builder.consume_rest());
1185 return;
1186 }
1187
1188 static const common_regex prefix(regex_escape(s: "[TOOL_CALLS]"));
1189 parse_prefixed_json_tool_call_array(builder, prefix);
1190}
1191
1192static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
1193 common_chat_params data;
1194
1195 auto adjusted_messages = json::array();
1196 for (const auto & msg : inputs.messages) {
1197 auto has_reasoning_content = msg.contains(key: "reasoning_content") && msg.at(key: "reasoning_content").is_string();
1198 auto has_tool_calls = msg.contains(key: "tool_calls") && msg.at(key: "tool_calls").is_array();
1199 if (has_reasoning_content && has_tool_calls) {
1200 auto adjusted_message = msg;
1201 adjusted_message["tool_plan"] = msg.at(key: "reasoning_content");
1202 adjusted_message.erase(key: "reasoning_content");
1203 adjusted_messages.push_back(val: adjusted_message);
1204 } else {
1205 adjusted_messages.push_back(val: msg);
1206 }
1207 }
1208 data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
1209 data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
1210 if (string_ends_with(str: data.prompt, suffix: "<|START_THINKING|>")) {
1211 if (!inputs.enable_thinking) {
1212 data.prompt += "<|END_THINKING|>";
1213 } else {
1214 data.thinking_forced_open = true;
1215 }
1216 } else if (!inputs.enable_thinking && string_ends_with(str: data.prompt, suffix: "<|CHATBOT_TOKEN|>")) {
1217 data.prompt += "<|START_THINKING|><|END_THINKING|>";
1218 }
1219
1220 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1221 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1222 auto schemas = json::array();
1223 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1224 const auto & function = tool.at(key: "function");
1225 schemas.push_back(init: {
1226 {"type", "object"},
1227 {"properties", {
1228 {"tool_call_id", {
1229 {"type", "string"},
1230 // Command-R's template expects an integer string.
1231 {"pattern", "^[0-9]{1,10}$"},
1232 }},
1233 {"tool_name", {
1234 {"type", "string"},
1235 {"const", function.at(key: "name")},
1236 }},
1237 {"parameters", function.at(key: "parameters")},
1238 }},
1239 {"required", json::array(init: {"tool_call_id", "tool_name", "parameters"})},
1240 });
1241 });
1242 auto schema = json {
1243 {"type", "array"},
1244 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
1245 {"minItems", 1},
1246 };
1247 if (!inputs.parallel_tool_calls) {
1248 schema["maxItems"] = 1;
1249 }
1250 builder.add_rule("root",
1251 std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
1252 "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
1253 });
1254 data.grammar_triggers.push_back(x: {
1255 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1256 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1257 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1258 .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
1259 "(<\\|START_ACTION\\|>)[\\s\\S]*"
1260 });
1261 data.preserved_tokens = {
1262 "<|START_ACTION|>",
1263 "<|END_ACTION|>",
1264 "<|START_RESPONSE|>",
1265 "<|END_RESPONSE|>",
1266 "<|START_THINKING|>",
1267 "<|END_THINKING|>",
1268 };
1269 return data;
1270}
1271
1272static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
1273 builder.try_parse_reasoning(start_think: "<|START_THINKING|>", end_think: "<|END_THINKING|>");
1274
1275 static const common_regex start_action_regex("<\\|START_ACTION\\|>");
1276 static const common_regex end_action_regex("<\\|END_ACTION\\|>");
1277 static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
1278 static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
1279
1280 if (auto res = builder.try_find_regex(regex: start_action_regex)) {
1281 // If we didn't extract thoughts, prelude includes them.
1282 auto tool_calls = builder.consume_json_with_dumped_args(args_paths: {{"parameters"}});
1283 for (const auto & tool_call : tool_calls.value) {
1284 std::string name = tool_call.contains(key: "tool_name") ? tool_call.at(key: "tool_name") : "";
1285 std::string id = tool_call.contains(key: "tool_call_id") ? tool_call.at(key: "tool_call_id") : "";
1286 std::string arguments = tool_call.contains(key: "parameters") ? tool_call.at(key: "parameters") : "";
1287 if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
1288 throw common_chat_msg_partial_exception("incomplete tool call");
1289 }
1290 }
1291 if (tool_calls.is_partial) {
1292 throw common_chat_msg_partial_exception("incomplete tool call");
1293 }
1294 builder.consume_regex(regex: end_action_regex);
1295 } else if (auto res = builder.try_find_regex(regex: start_response_regex)) {
1296 if (!builder.try_find_regex(regex: end_response_regex)) {
1297 builder.add_content(content: builder.consume_rest());
1298 throw common_chat_msg_partial_exception(end_response_regex.str());
1299 }
1300 } else {
1301 builder.add_content(content: builder.consume_rest());
1302 }
1303}
1304
1305static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
1306 if (!parameters.is_object() || !parameters.contains(key: "type") || parameters.at(key: "type") != "object" || !parameters.contains(key: "properties") || !parameters.contains(key: "required")) {
1307 throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
1308 }
1309 const auto & parameters_properties = parameters.at(key: "properties");
1310 const auto & parameters_required = parameters.at(key: "required");
1311 for (const auto & prop : expected_properties) {
1312 if (!parameters_properties.contains(key: prop)) {
1313 throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
1314 }
1315 if (std::find(first: parameters_required.begin(), last: parameters_required.end(), val: json(prop)) == parameters_required.end()) {
1316 throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
1317 }
1318 }
1319 if (parameters_properties.size() != expected_properties.size()) {
1320 throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(values: expected_properties, separator: ", "));
1321 }
1322}
1323
1324static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
1325 auto builtin_tools = json::array();
1326 common_chat_params data;
1327 if (!inputs.tools.is_null()) {
1328 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1329 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1330 std::vector<std::string> tool_rules;
1331
1332 auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
1333 if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
1334 // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
1335 // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
1336 expect_tool_parameters(name, parameters, expected_properties: {"query"});
1337 } else if (name == "python" || name == "code_interpreter") {
1338 // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
1339 expect_tool_parameters(name, parameters, expected_properties: {"code"});
1340 } else {
1341 return false;
1342 }
1343
1344 std::vector<std::string> kvs;
1345 for (const auto & [key, value] : parameters.at(key: "properties").items()) {
1346 kvs.push_back(x: "\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
1347 }
1348
1349 tool_rules.push_back(
1350 x: builder.add_rule(
1351 name + "-call",
1352 "\"<|python_tag|>" + name + ".call(\" " + string_join(values: kvs, separator: " \", \" ") + " \")\""));
1353 builtin_tools.push_back(val: name);
1354
1355 return true;
1356 };
1357
1358 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1359 const auto & function = tool.at(key: "function");
1360 std::string name = function.at(key: "name");
1361 auto parameters = function.at(key: "parameters");
1362 builder.resolve_refs(parameters);
1363
1364 // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
1365 if (allow_python_tag_builtin_tools) {
1366 handle_builtin_tool(name, parameters);
1367 }
1368 tool_rules.push_back(
1369 x: builder.add_rule(
1370 name + "-call",
1371 "\"{\" space "
1372 "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
1373 " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
1374 " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
1375 "\"}\" space"));
1376 });
1377 // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
1378 data.grammar_triggers.push_back(x: {
1379 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1380 .value: "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
1381 });
1382 if (!builtin_tools.empty()) {
1383 data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<|python_tag|>"});
1384 data.preserved_tokens.push_back(x: "<|python_tag|>");
1385 }
1386 // Allow a few empty lines on top of the usual constrained json schema space rule.
1387 builder.add_rule("root", string_join(values: tool_rules, separator: " | "));
1388 data.additional_stops.push_back(x: "<|eom_id|>");
1389 });
1390 data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
1391 ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
1392 : COMMON_CHAT_FORMAT_LLAMA_3_X;
1393 } else {
1394 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1395 }
1396 data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, additional_context: json {
1397 {"date_string", format_time(now: inputs.now, format: "%d %b %Y")},
1398 {"tools_in_user_message", false},
1399 {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
1400 });
1401 return data;
1402}
1403
1404static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
1405 common_chat_params data;
1406
1407 // Generate the prompt using the apply() function with the template
1408 data.prompt = apply(tmpl, inputs);
1409 data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
1410
1411 // Handle thinking tags appropriately based on inputs.enable_thinking
1412 if (string_ends_with(str: data.prompt, suffix: "<think>\n")) {
1413 if (!inputs.enable_thinking) {
1414 data.prompt += "</think>";
1415 } else {
1416 data.thinking_forced_open = true;
1417 }
1418 }
1419
1420 // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
1421 if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1422 data.grammar_lazy = true;
1423 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1424 auto schemas = json::array();
1425 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1426 const auto & function = tool.at(key: "function");
1427 schemas.push_back(init: {
1428 { "type", "object" },
1429 { "properties",
1430 {
1431 { "name",
1432 {
1433 { "type", "string" },
1434 { "const", function.at(key: "name") },
1435 } },
1436 { "arguments", function.at(key: "parameters") },
1437 } },
1438 { "required", json::array(init: { "name", "arguments" }) },
1439 });
1440 });
1441 auto schema = json{
1442 { "type", "array" },
1443 { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1444 { "minItems", 1 },
1445 };
1446 if (!inputs.parallel_tool_calls) {
1447 schema["maxItems"] = 1;
1448 }
1449 builder.add_rule("root",
1450 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1451 "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
1452 " \"</TOOLCALL>\"");
1453 });
1454 data.grammar_triggers.push_back(x: { .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1455 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1456 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1457 .value: std::string(data.thinking_forced_open ?
1458 "[\\s\\S]*?(</think>\\s*)" :
1459 "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1460 "(<TOOLCALL>)[\\s\\S]*" });
1461 }
1462 return data;
1463}
1464
1465static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
1466 common_chat_params data;
1467
1468 // Generate the prompt using the apply() function with the template
1469 data.prompt = apply(tmpl, inputs);
1470 data.format = COMMON_CHAT_FORMAT_APERTUS;
1471
1472 // Handle thinking tags appropriately based on inputs.enable_thinking
1473 if (string_ends_with(str: data.prompt, suffix: "<|inner_prefix|>")) {
1474 if (!inputs.enable_thinking) {
1475 data.prompt += "<|inner_suffix|>";
1476 } else {
1477 data.thinking_forced_open = true;
1478 }
1479 }
1480
1481 // When tools are present, build grammar for the <|tools_prefix|> format
1482 if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
1483 data.grammar_lazy = true;
1484 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1485 auto schemas = json::array();
1486 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1487 const auto & function = tool.at(key: "function");
1488 schemas.push_back(init: {
1489 { "type", "object" },
1490 { "properties",
1491 {
1492 { function.at(key: "name"), function.at(key: "parameters") }
1493 } },
1494 { "required", json::array(init: { function.at(key: "name") }) },
1495 });
1496 });
1497 auto schema = json{
1498 { "type", "array" },
1499 { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
1500 { "minItems", 1 },
1501 };
1502 if (!inputs.parallel_tool_calls) {
1503 schema["maxItems"] = 1;
1504 }
1505 builder.add_rule("root",
1506 std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
1507 "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
1508 });
1509 data.grammar_triggers.push_back(x: { .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1510 // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
1511 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1512 .value: std::string(data.thinking_forced_open ?
1513 "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
1514 "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
1515 "(<\\|tools_prefix\\|>)[\\s\\S]*" });
1516 data.preserved_tokens = {
1517 "<|system_start|>",
1518 "<|system_end|>",
1519 "<|developer_start|>",
1520 "<|developer_end|>",
1521 "<|user_start|>",
1522 "<|user_end|>",
1523 "<|assistant_start|>",
1524 "<|assistant_end|>",
1525 "<|inner_prefix|>",
1526 "<|inner_suffix|>",
1527 "<|tools_prefix|>",
1528 "<|tools_suffix|>",
1529 };
1530 }
1531 return data;
1532}
1533static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
1534 builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>");
1535
1536 if (!builder.syntax().parse_tool_calls) {
1537 builder.add_content(content: builder.consume_rest());
1538 return;
1539 }
1540
1541 static const common_regex function_regex(
1542 "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
1543 static const common_regex close_regex("\\}\\s*");
1544
1545 static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
1546 static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
1547
1548 if (with_builtin_tools) {
1549 static const common_regex builtin_call_regex("<\\|python_tag\\|>");
1550 if (auto res = builder.try_find_regex(regex: builtin_call_regex)) {
1551 auto fun_res = builder.consume_regex(regex: function_name_regex);
1552 auto function_name = builder.str(rng: fun_res.groups[1]);
1553
1554 common_healing_marker healing_marker;
1555 json args = json::object();
1556 while (true) {
1557 if (auto arg_res = builder.try_consume_regex(regex: arg_name_regex)) {
1558 auto arg_name = builder.str(rng: arg_res->groups[1]);
1559 auto partial = builder.consume_json();
1560 args[arg_name] = partial.json;
1561 healing_marker.marker = partial.healing_marker.marker;
1562 healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
1563 builder.consume_spaces();
1564 if (!builder.try_consume_literal(literal: ",")) {
1565 break;
1566 }
1567 } else {
1568 break;
1569 }
1570 }
1571 builder.consume_literal(literal: ")");
1572 builder.consume_spaces();
1573
1574 auto arguments = args.dump();
1575 if (!builder.add_tool_call(name: function_name, id: "", arguments)) {
1576 throw common_chat_msg_partial_exception("Incomplete tool call");
1577 }
1578 return;
1579 }
1580 }
1581 parse_json_tool_calls(
1582 builder,
1583 /* block_open= */ std::nullopt,
1584 /* function_regex_start_only= */ function_regex,
1585 /* function_regex= */ std::nullopt,
1586 close_regex,
1587 block_close: std::nullopt);
1588
1589}
1590
1591static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1592 common_chat_params data;
1593 auto prompt = apply(tmpl, inputs);
1594
1595 // Hacks to fix the official (broken) prompt.
1596 // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
1597 // until the official template is fixed.
1598 if (tmpl.source().find(s: "{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
1599 // Don't leave the chat dangling after tool results
1600 if (string_ends_with(str: prompt, suffix: "<|tool▁outputs▁end|>")) {
1601 prompt += "<|end▁of▁sentence|>";
1602 if (inputs.add_generation_prompt) {
1603 prompt += "<|Assistant|>";
1604 }
1605 }
1606 // Fix up tool call delta example added by Minja
1607 prompt = std::regex_replace(
1608 s: prompt,
1609 e: std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
1610 fmt: "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
1611 }
1612 data.prompt = prompt;
1613 data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
1614 if (string_ends_with(str: data.prompt, suffix: "<think>\n")) {
1615 if (!inputs.enable_thinking) {
1616 data.prompt += "</think>";
1617 } else {
1618 data.thinking_forced_open = true;
1619 }
1620 }
1621
1622 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1623 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
1624 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1625 std::vector<std::string> tool_rules;
1626 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1627 const auto & function = tool.at(key: "function");
1628 std::string name = function.at(key: "name");
1629 auto parameters = function.at(key: "parameters");
1630 builder.resolve_refs(parameters);
1631 tool_rules.push_back(x: builder.add_rule(name + "-call",
1632 "( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n"
1633 "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
1634 "\"```<|tool▁call▁end|>\""));
1635 });
1636 // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1637 // so we accept common variants (then it's all constrained)
1638 builder.add_rule("root",
1639 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1640 "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
1641 "(" + string_join(values: tool_rules, separator: " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
1642 "\"<|tool▁calls▁end|>\""
1643 " space");
1644 data.grammar_triggers.push_back(x: {
1645 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1646 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1647 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1648 .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1649 "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
1650 });
1651 data.preserved_tokens = {
1652 "<think>",
1653 "</think>",
1654 "<|tool▁calls▁begin|>",
1655 "<|tool▁call▁begin|>",
1656 "<|tool▁sep|>",
1657 "<|tool▁call▁end|>",
1658 "<|tool▁calls▁end|",
1659 };
1660 });
1661 }
1662 return data;
1663}
1664
1665static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1666 common_chat_params data;
1667
1668 // Pass thinking context for DeepSeek V3.1 template
1669 json additional_context = {
1670 {"thinking", inputs.enable_thinking},
1671 };
1672
1673 auto prompt = apply(tmpl, inputs,
1674 /* messages_override= */ inputs.messages,
1675 /* tools_override= */ std::nullopt,
1676 additional_context);
1677 data.prompt = prompt;
1678 data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
1679 if (string_ends_with(str: data.prompt, suffix: "<think>")) {
1680 if (!inputs.enable_thinking) {
1681 data.prompt += "</think>";
1682 } else {
1683 data.thinking_forced_open = true;
1684 }
1685 }
1686 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1687 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
1688 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1689 std::vector<std::string> tool_rules;
1690 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1691 const auto & function = tool.at(key: "function");
1692 std::string name = function.at(key: "name");
1693 auto parameters = function.at(key: "parameters");
1694 builder.resolve_refs(parameters);
1695 tool_rules.push_back(x: builder.add_rule(name + "-call",
1696 "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>"
1697 "\" " + builder.add_schema(name + "-args", parameters) + " "
1698 "\"<|tool▁call▁end|>\""));
1699 });
1700 // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1701 // so we accept common variants (then it's all constrained)
1702 builder.add_rule("root",
1703 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
1704 "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
1705 "(" + string_join(values: tool_rules, separator: " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
1706 "\"<|tool▁calls▁end|>\""
1707 " space");
1708 data.grammar_triggers.push_back(x: {
1709 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1710 // If thinking_forced_open, then we capture the </think> tag in the grammar,
1711 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1712 .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
1713 "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
1714 });
1715 data.preserved_tokens = {
1716 "<think>",
1717 "</think>",
1718 "<|tool▁calls▁begin|>",
1719 "<|tool▁call▁begin|>",
1720 "<|tool▁sep|>",
1721 "<|tool▁call▁end|>",
1722 "<|tool▁calls▁end|>",
1723 };
1724 });
1725 }
1726 return data;
1727}
1728
1729static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
1730 builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>");
1731 if (!builder.syntax().parse_tool_calls) {
1732 builder.add_content(content: builder.consume_rest());
1733 return;
1734 }
1735
1736 static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1737 static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1738 static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
1739 static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
1740
1741 parse_json_tool_calls(
1742 builder,
1743 /* block_open= */ tool_calls_begin,
1744 /* function_regex_start_only= */ std::nullopt,
1745 function_regex,
1746 close_regex,
1747 block_close: tool_calls_end);
1748}
1749
1750static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
1751 static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
1752
1753 static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
1754 static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
1755 static const common_regex tool_calls_end("<|tool▁calls▁end|>");
1756
1757 if (!builder.syntax().parse_tool_calls) {
1758 LOG_DBG("%s: not parse_tool_calls\n", __func__);
1759 builder.add_content(content: builder.consume_rest());
1760 return;
1761 }
1762
1763 LOG_DBG("%s: parse_tool_calls\n", __func__);
1764
1765 parse_json_tool_calls(
1766 builder,
1767 /* block_open= */ tool_calls_begin,
1768 /* function_regex_start_only= */ std::nullopt,
1769 function_regex,
1770 close_regex,
1771 block_close: tool_calls_end);
1772}
1773
1774static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1775 // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1776 // First try to parse using the standard reasoning parsing method
1777 LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
1778
1779 auto start_pos = builder.pos();
1780 auto found_end_think = builder.try_find_literal(literal: "</think>");
1781 builder.move_to(pos: start_pos);
1782
1783 if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
1784 LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
1785 common_chat_parse_deepseek_v3_1_content(builder);
1786 } else if (builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>")) {
1787 // If reasoning was parsed successfully, the remaining content is regular content
1788 LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
1789 // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1790 common_chat_parse_deepseek_v3_1_content(builder);
1791 } else {
1792 if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1793 LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
1794 common_chat_parse_deepseek_v3_1_content(builder);
1795 return;
1796 }
1797 // If no reasoning tags found, check if we should treat everything as reasoning
1798 if (builder.syntax().thinking_forced_open) {
1799 // If thinking is forced open but no tags found, treat everything as reasoning
1800 LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
1801 builder.add_reasoning_content(reasoning_content: builder.consume_rest());
1802 } else {
1803 LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
1804 // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1805 common_chat_parse_deepseek_v3_1_content(builder);
1806 }
1807 }
1808}
1809
1810static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
1811 common_chat_params data;
1812
1813 // Copy reasoning to the "thinking" field as expected by the gpt-oss template
1814 auto adjusted_messages = json::array();
1815 for (const auto & msg : inputs.messages) {
1816 auto has_reasoning_content = msg.contains(key: "reasoning_content") && msg.at(key: "reasoning_content").is_string();
1817 auto has_tool_calls = msg.contains(key: "tool_calls") && msg.at(key: "tool_calls").is_array();
1818
1819 if (has_reasoning_content && has_tool_calls) {
1820 auto adjusted_message = msg;
1821 adjusted_message["thinking"] = msg.at(key: "reasoning_content");
1822 adjusted_messages.push_back(val: adjusted_message);
1823 } else {
1824 adjusted_messages.push_back(val: msg);
1825 }
1826 }
1827
1828 auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
1829
1830 // Check if we need to replace the return token with end token during
1831 // inference and without generation prompt. For more details see:
1832 // https://github.com/ggml-org/llama.cpp/issues/15417
1833 if (inputs.is_inference && !inputs.add_generation_prompt) {
1834 static constexpr std::string_view return_token = "<|return|>";
1835 static constexpr std::string_view end_token = "<|end|>";
1836 if (size_t pos = prompt.rfind(svt: return_token); pos != std::string::npos) {
1837 prompt.replace(pos: pos, n: return_token.length(), svt: end_token);
1838 }
1839 }
1840
1841 data.prompt = prompt;
1842 data.format = COMMON_CHAT_FORMAT_GPT_OSS;
1843
1844 // These special tokens are required to parse properly, so we include them
1845 // even if parse_tool_calls is false.
1846 data.preserved_tokens = {
1847 "<|channel|>",
1848 "<|constrain|>",
1849 "<|message|>",
1850 "<|start|>",
1851 "<|end|>",
1852 };
1853
1854 if (!inputs.json_schema.is_null()) {
1855 data.grammar_lazy = false;
1856 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1857 auto schema = inputs.json_schema;
1858 builder.resolve_refs(schema);
1859
1860 auto not_end = builder.add_rule("not-end",
1861 "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
1862 auto analysis = builder.add_rule("analysis",
1863 "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1864 auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
1865 auto final = builder.add_rule("final",
1866 "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
1867 builder.add_schema("response", schema)
1868 );
1869
1870 builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
1871 });
1872 }
1873
1874 if (inputs.tools.is_array() && !inputs.tools.empty()) {
1875 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1876 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
1877 // tool calls can appear in commentary or analysis channels
1878 auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
1879
1880 std::vector<std::string> tool_rules_recipient_in_role;
1881 std::vector<std::string> tool_rules_recipient_in_channel;
1882 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
1883 const auto & function = tool.at(key: "function");
1884 std::string name = function.at(key: "name");
1885 auto parameters = function.at(key: "parameters");
1886 builder.resolve_refs(parameters);
1887
1888 tool_rules_recipient_in_role.push_back(
1889 x: builder.add_rule(name + "-call",
1890 "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
1891 builder.add_schema(name + "-args", parameters)
1892 )
1893 );
1894
1895 tool_rules_recipient_in_channel.push_back(
1896 x: builder.add_rule(name + "-call",
1897 "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
1898 builder.add_schema(name + "-args", parameters)
1899 )
1900 );
1901 });
1902
1903 auto recipient_in_channel = builder.add_rule("recipient_in_channel",
1904 channel + " \" to=functions.\" ( " +
1905 string_join(values: tool_rules_recipient_in_channel, separator: " | ") + " )"
1906 );
1907
1908 if (data.grammar_lazy) {
1909 auto recipient_in_role = builder.add_rule("recipient_in_role",
1910 "\"<|start|>assistant\"? \" to=functions.\" ( " +
1911 string_join(values: tool_rules_recipient_in_role, separator: " | ") + " )"
1912 );
1913
1914 builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
1915 } else {
1916 auto not_end = builder.add_rule("not-end",
1917 "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
1918 auto analysis = builder.add_rule("analysis",
1919 "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1920 auto commentary = builder.add_rule("commentary",
1921 "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1922
1923 auto recipient_in_role = builder.add_rule("recipient_in_role",
1924 "\" to=functions.\" ( " + string_join(values: tool_rules_recipient_in_role, separator: " | ") + " )"
1925 );
1926
1927 builder.add_rule("root",
1928 "( " + analysis + " \"<|start|>assistant\" )? " +
1929 "( " + commentary + " \"<|start|>assistant\" )? " +
1930 "( " + recipient_in_role + " | " + recipient_in_channel + " )"
1931 );
1932 }
1933
1934 // Trigger on tool calls that appear in the commentary channel
1935 data.grammar_triggers.push_back(x: {
1936 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1937 .value: "<\\|channel\\|>(commentary|analysis) to"
1938 });
1939
1940 // Trigger tool calls that appear in the role section, either at the
1941 // start or in the middle.
1942 data.grammar_triggers.push_back(x: {
1943 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1944 .value: "^ to"
1945 });
1946
1947 data.grammar_triggers.push_back(x: {
1948 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1949 .value: "<\\|start\\|>assistant to"
1950 });
1951 });
1952 }
1953
1954 return data;
1955}
1956static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
1957 static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
1958 static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
1959
1960 static const common_regex start_regex("<\\|start\\|>assistant");
1961 static const common_regex analysis_regex("<\\|channel\\|>analysis");
1962 static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
1963 static const common_regex preamble_regex("<\\|channel\\|>commentary");
1964 static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
1965 static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
1966
1967 auto consume_end = [&](bool include_end = false) {
1968 if (auto res = builder.try_find_literal(literal: "<|end|>")) {
1969 return res->prelude + (include_end ? builder.str(rng: res->groups[0]) : "");
1970 }
1971 return builder.consume_rest();
1972 };
1973
1974 auto handle_tool_call = [&](const std::string & name) {
1975 if (auto args = builder.try_consume_json_with_dumped_args(args_paths: {{}})) {
1976 if (builder.syntax().parse_tool_calls) {
1977 if (!builder.add_tool_call(name, id: "", arguments: args->value) || args->is_partial) {
1978 throw common_chat_msg_partial_exception("incomplete tool call");
1979 }
1980 } else if (args->is_partial) {
1981 throw common_chat_msg_partial_exception("incomplete tool call");
1982 }
1983 }
1984 };
1985
1986 auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
1987 auto match = regex.search(input, pos: 0, as_match: true);
1988 if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
1989 return match;
1990 }
1991 return std::nullopt;
1992 };
1993
1994 do {
1995 auto header_start_pos = builder.pos();
1996 auto content_start = builder.try_find_literal(literal: "<|message|>");
1997 if (!content_start) {
1998 throw common_chat_msg_partial_exception("incomplete header");
1999 }
2000
2001 auto header = content_start->prelude;
2002
2003 if (auto match = regex_match(tool_call1_regex, header)) {
2004 auto group = match->groups[1];
2005 auto name = header.substr(pos: group.begin, n: group.end - group.begin);
2006 handle_tool_call(name);
2007 continue;
2008 }
2009
2010 if (auto match = regex_match(tool_call2_regex, header)) {
2011 auto group = match->groups[2];
2012 auto name = header.substr(pos: group.begin, n: group.end - group.begin);
2013 handle_tool_call(name);
2014 continue;
2015 }
2016
2017 if (regex_match(analysis_regex, header)) {
2018 builder.move_to(pos: header_start_pos);
2019 if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
2020 builder.add_content(content: consume_end(true));
2021 } else {
2022 builder.try_parse_reasoning(start_think: "<|channel|>analysis<|message|>", end_think: "<|end|>");
2023 }
2024 continue;
2025 }
2026
2027 if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
2028 builder.add_content(content: consume_end());
2029 continue;
2030 }
2031
2032 // Possibly a malformed message, attempt to recover by rolling
2033 // back to pick up the next <|start|>
2034 LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
2035 builder.move_to(pos: header_start_pos);
2036 } while (builder.try_find_regex(regex: start_regex, from: std::string::npos, add_prelude_to_content: false));
2037
2038 auto remaining = builder.consume_rest();
2039 if (!remaining.empty()) {
2040 LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
2041 }
2042}
2043
2044static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2045 LOG_DBG("%s\n", __func__);
2046 common_chat_params data;
2047 const std::optional<json> tools_override = json();
2048 const std::optional<json> additional_context = json {
2049 {"datetime", format_time(now: inputs.now, format: "%b %d %Y %H:%M:%S GMT")},
2050 {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(indent: 2))},
2051 };
2052 data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, tools_override, additional_context);
2053 if (inputs.tools.is_array() && !inputs.tools.empty()) {
2054 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2055 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2056 auto schemas = json::array();
2057 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
2058 const auto & function = tool.at(key: "function");
2059 schemas.push_back(init: {
2060 {"type", "object"},
2061 {"properties", {
2062 {"name", {
2063 {"type", "string"},
2064 {"const", function.at(key: "name")},
2065 }},
2066 {"arguments", function.at(key: "parameters")},
2067 }},
2068 {"required", json::array(init: {"name", "arguments", "id"})},
2069 });
2070 });
2071 auto schema = json {
2072 {"type", "array"},
2073 {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
2074 {"minItems", 1},
2075 };
2076 if (!inputs.parallel_tool_calls) {
2077 schema["maxItems"] = 1;
2078 }
2079 builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
2080 });
2081 data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: " functools["});
2082 data.preserved_tokens = {
2083 " functools[",
2084 };
2085 data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
2086 } else {
2087 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2088 }
2089 return data;
2090}
2091static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
2092 if (!builder.syntax().parse_tool_calls) {
2093 builder.add_content(content: builder.consume_rest());
2094 return;
2095 }
2096 static const common_regex prefix(regex_escape(s: " functools["));
2097 parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
2098}
2099
2100static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
2101 // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
2102 // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
2103 // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
2104 common_chat_params data;
2105 data.prompt = apply(tmpl, inputs);
2106 data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
2107 if (inputs.tools.is_array() && !inputs.tools.empty()) {
2108 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2109 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2110 std::vector<std::string> first_tool_rules;
2111 std::vector<std::string> subsequent_tool_rules;
2112 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
2113 const auto & function = tool.at(key: "function");
2114 std::string name = function.at(key: "name");
2115 auto parameters = function.at(key: "parameters");
2116 builder.resolve_refs(parameters);
2117 std::string args_pattern = "[\\s\\S]*";
2118 auto args_rule = builder.add_schema(name + "-args", parameters);
2119 if (name == "python") {
2120 args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
2121 } else {
2122 args_pattern = "\\{" + args_pattern;
2123 }
2124 auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
2125 first_tool_rules.push_back(x: call_rule);
2126 if (inputs.parallel_tool_calls) {
2127 subsequent_tool_rules.push_back(x: builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
2128 }
2129 data.grammar_triggers.push_back(x: {
2130 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
2131 .value: "((?:[\\s\\S]+?>>>)?" + regex_escape(s: name) + "\n)" + args_pattern,
2132 });
2133 });
2134 data.preserved_tokens = {
2135 "<|end_header_id|>",
2136 };
2137 auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(values: first_tool_rules, separator: " | ")) + " space";
2138 if (inputs.parallel_tool_calls) {
2139 auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(values: subsequent_tool_rules, separator: " | ")) + " space";
2140 builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
2141 } else {
2142 builder.add_rule("root", first_rule);
2143 }
2144
2145 });
2146 }
2147 return data;
2148}
2149static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
2150 static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
2151 static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
2152 static const common_regex close_regex(R"(\s*)");
2153
2154 parse_json_tool_calls(
2155 builder,
2156 block_open: std::nullopt,
2157 function_regex_start_only,
2158 function_regex,
2159 close_regex,
2160 block_close: std::nullopt,
2161 /* allow_raw_python= */ true,
2162 /* get_function_name= */ [&](const auto & res) -> std::string {
2163 auto at_start = res.groups[0].begin == 0;
2164 auto name = builder.str(rng: res.groups[1]);
2165 if (!name.empty() && name.back() == '{') {
2166 // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
2167 builder.move_back(n: 1);
2168 }
2169 auto idx = name.find_last_not_of("\n{");
2170 name = name.substr(0, idx + 1);
2171 if (at_start && name == "all") {
2172 return "";
2173 }
2174 return name;
2175 });
2176}
2177
2178static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
2179 // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
2180 common_chat_params data;
2181
2182 if (!inputs.tools.is_null()) {
2183 std::string python_code_argument_name;
2184 auto has_raw_python = false;
2185
2186 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2187 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2188 std::vector<std::string> tool_rules;
2189 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
2190 const auto & function = tool.at(key: "function");
2191 const auto & parameters = function.at(key: "parameters");
2192 std::string name = function.at(key: "name");
2193 if (name == "python" || name == "ipython") {
2194 if (!parameters.contains(key: "type")) {
2195 throw std::runtime_error("Missing type in python tool");
2196 }
2197 has_raw_python = true;
2198 const auto & type = parameters.at(key: "type");
2199 if (type == "object") {
2200 auto properties = parameters.at(key: "properties");
2201 for (auto it = properties.begin(); it != properties.end(); ++it) {
2202 if (it.value().at(key: "type") == "string") {
2203 if (!python_code_argument_name.empty()) {
2204 throw std::runtime_error("Multiple string arguments found in python tool");
2205 }
2206 python_code_argument_name = it.key();
2207 }
2208 }
2209 if (python_code_argument_name.empty()) {
2210 throw std::runtime_error("No string argument found in python tool");
2211 }
2212 } else if (type != "string") {
2213 throw std::runtime_error("Invalid type in python tool: " + type.dump());
2214 }
2215 }
2216 tool_rules.push_back(x: builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
2217 });
2218 if (has_raw_python) {
2219 tool_rules.push_back(x: builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
2220 data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<|python_tag|>"});
2221 data.preserved_tokens.push_back(x: "<|python_tag|>");
2222 }
2223 auto tool_call = builder.add_rule("tool_call", string_join(values: tool_rules, separator: " | ")) + " space";
2224 builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
2225 data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<function="});
2226 });
2227 data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
2228 } else {
2229 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2230 }
2231
2232 data.prompt = apply(tmpl, inputs);
2233 // TODO: if (has_raw_python)
2234 return data;
2235}
2236static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
2237 if (!builder.syntax().parse_tool_calls) {
2238 builder.add_content(content: builder.consume_rest());
2239 return;
2240 }
2241 // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
2242 static const common_regex python_tag_regex(regex_escape(s: "<|python_tag|>"));
2243
2244 static const common_regex function_regex(R"(<function=(\w+)>)");
2245 static const common_regex close_regex(R"(</function>)");
2246
2247 parse_json_tool_calls(
2248 builder,
2249 /* block_open= */ std::nullopt,
2250 /* function_regex_start_only= */ std::nullopt,
2251 function_regex,
2252 close_regex,
2253 block_close: std::nullopt);
2254
2255 if (auto res = builder.try_find_regex(regex: python_tag_regex)) {
2256 auto arguments = wrap_code_as_arguments(builder, code: builder.consume_rest());
2257 builder.add_tool_call(name: "python", id: "", arguments);
2258 return;
2259 }
2260}
2261
2262static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
2263 common_chat_params data;
2264
2265 json extra_context = json {
2266 {"enable_thinking", inputs.enable_thinking},
2267 };
2268 extra_context.update(j: inputs.extra_context);
2269
2270 data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, additional_context: extra_context);
2271 data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
2272 if (string_ends_with(str: data.prompt, suffix: "<think>\n")) {
2273 if (!extra_context["enable_thinking"]) {
2274 data.prompt += "</think>";
2275 } else {
2276 data.thinking_forced_open = true;
2277 }
2278 }
2279
2280 if (!inputs.tools.is_null()) {
2281 // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
2282 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2283 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2284 std::vector<std::string> tool_rules;
2285 std::vector<std::string> tool_call_alts;
2286 std::vector<std::string> escaped_names;
2287 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
2288 const auto & function = tool.at(key: "function");
2289 std::string name = function.at(key: "name");
2290 auto parameters = function.at(key: "parameters");
2291 builder.resolve_refs(parameters);
2292 tool_rules.push_back(x: builder.add_schema(name + "-call", {
2293 {"type", "object"},
2294 {"properties", json {
2295 {"name", json {{"const", name}}},
2296 {"arguments", parameters},
2297 }},
2298 {"required", json::array(init: {"name", "arguments"})},
2299 }));
2300 tool_call_alts.push_back(x: builder.add_rule(
2301 name + "-function-tag",
2302 "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
2303 builder.add_schema(name + "-args", parameters) + " "
2304 "\"</function>\" space"));
2305
2306 data.grammar_triggers.push_back(x: {
2307 .type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
2308 .value: "<function=" + name + ">",
2309 });
2310 auto escaped_name = regex_escape(s: name);
2311 data.grammar_triggers.push_back(x: {
2312 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
2313 .value: "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
2314 });
2315 escaped_names.push_back(x: escaped_name);
2316 });
2317 auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(values: tool_rules, separator: " | ") + " ) space");
2318 std::vector<std::string> alt_tags {
2319 any_tool_call,
2320 "\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
2321 // The rest is just to accommodate common "good bad" outputs.
2322 "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
2323 "\"<response>\" space " + any_tool_call + " \"</response>\"",
2324 "\"<tools>\" space " + any_tool_call + " \"</tools>\"",
2325 "\"<json>\" space " + any_tool_call + " \"</json>\"",
2326 "\"<xml>\" space " + any_tool_call + " \"</xml>\"",
2327 "\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
2328 };
2329 auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(values: alt_tags, separator: " | ") + " ) space");
2330 tool_call_alts.push_back(x: wrappable_tool_call);
2331 tool_call_alts.push_back(
2332 x: "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
2333 auto tool_call = builder.add_rule("tool_call", string_join(values: tool_call_alts, separator: " | "));
2334 builder.add_rule("root",
2335 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
2336 (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
2337 // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
2338 data.grammar_triggers.push_back(x: {
2339 .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
2340 // If thinking_forced_open, then we capture the </think> tag in the grammar,
2341 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
2342 .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") + (
2343 "\\s*("
2344 "(?:<tool_call>"
2345 "|<function"
2346 "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
2347 "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(values: escaped_names, separator: "|") + ")\""
2348 ")"
2349 ")[\\s\\S]*"
2350 ),
2351 });
2352 data.preserved_tokens = {
2353 "<think>",
2354 "</think>",
2355 "<tool_call>",
2356 "</tool_call>",
2357 "<function",
2358 "<tools>",
2359 "</tools>",
2360 "<response>",
2361 "</response>",
2362 "<function_call>",
2363 "</function_call>",
2364 "<json>",
2365 "</json>",
2366 "<JSON>",
2367 "</JSON>",
2368 "```",
2369 "```json",
2370 "```xml",
2371 };
2372 });
2373 }
2374
2375 return data;
2376}
2377static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
2378 builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>");
2379 if (!builder.syntax().parse_tool_calls) {
2380 builder.add_content(content: builder.consume_rest());
2381 return;
2382 }
2383
2384 static const common_regex open_regex(
2385 "(?:"
2386 "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
2387 "(" // match 2 (open_tag)
2388 "<tool_call>"
2389 "|<function_call>"
2390 "|<tool>"
2391 "|<tools>"
2392 "|<response>"
2393 "|<json>"
2394 "|<xml>"
2395 "|<JSON>"
2396 ")?"
2397 "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
2398 ")"
2399 "|<function=([^>]+)>" // match 4 (function name)
2400 "|<function name=\"([^\"]+)\">" // match 5 (function name again)
2401 );
2402
2403 while (auto res = builder.try_find_regex(regex: open_regex)) {
2404 const auto & block_start = res->groups[1];
2405 std::string block_end = block_start.empty() ? "" : "```";
2406
2407 const auto & open_tag = res->groups[2];
2408 std::string close_tag;
2409
2410 if (!res->groups[3].empty()) {
2411 builder.move_to(pos: res->groups[3].begin);
2412 close_tag = open_tag.empty() ? "" : "</" + builder.str(rng: open_tag).substr(pos: 1);
2413
2414 if (auto tool_call = builder.try_consume_json_with_dumped_args(args_paths: {{"arguments"}})) {
2415 if (!builder.add_tool_call(tool_call: tool_call->value) || tool_call->is_partial) {
2416 throw common_chat_msg_partial_exception("incomplete tool call");
2417 }
2418 builder.consume_spaces();
2419 builder.consume_literal(literal: close_tag);
2420 builder.consume_spaces();
2421 if (!block_end.empty()) {
2422 builder.consume_literal(literal: block_end);
2423 builder.consume_spaces();
2424 }
2425 } else {
2426 throw common_chat_msg_partial_exception("failed to parse tool call");
2427 }
2428 } else {
2429 auto function_name = builder.str(rng: res->groups[4]);
2430 if (function_name.empty()) {
2431 function_name = builder.str(rng: res->groups[5]);
2432 }
2433 GGML_ASSERT(!function_name.empty());
2434
2435 close_tag = "</function>";
2436
2437 if (auto arguments = builder.try_consume_json_with_dumped_args(args_paths: {{}})) {
2438 if (!builder.add_tool_call(name: function_name, id: "", arguments: arguments->value) || arguments->is_partial) {
2439 throw common_chat_msg_partial_exception("incomplete tool call");
2440 }
2441 builder.consume_spaces();
2442 builder.consume_literal(literal: close_tag);
2443 builder.consume_spaces();
2444 if (!block_end.empty()) {
2445 builder.consume_literal(literal: block_end);
2446 builder.consume_spaces();
2447 }
2448 }
2449 }
2450 }
2451
2452 builder.add_content(content: builder.consume_rest());
2453}
2454
2455static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
2456 common_chat_params data;
2457
2458 // Pass thinking context for Granite template
2459 json additional_context = {
2460 {"thinking", inputs.enable_thinking},
2461 };
2462
2463 data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
2464 data.format = COMMON_CHAT_FORMAT_GRANITE;
2465
2466 if (string_ends_with(str: data.prompt, suffix: "<think>\n") || string_ends_with(str: data.prompt, suffix: "<think>")) {
2467 if (!inputs.enable_thinking) {
2468 data.prompt += "</think>";
2469 } else {
2470 data.thinking_forced_open = true;
2471 }
2472 }
2473
2474 if (!inputs.tools.is_null()) {
2475 // Granite uses <|tool_call|> followed by JSON list
2476 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2477 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2478 std::vector<std::string> tool_rules;
2479 foreach_function(tools: inputs.tools, fn: [&](const json & tool) {
2480 const auto & function = tool.at(key: "function");
2481 std::string name = function.at(key: "name");
2482 auto parameters = function.at(key: "parameters");
2483 builder.resolve_refs(parameters);
2484 tool_rules.push_back(x: builder.add_rule(name + "-call", builder.add_schema(name +
2485"-args", {
2486 {"type", "object"},
2487 {"properties", {
2488 {"name", {{"const", name}}},
2489 {"arguments", parameters},
2490 }},
2491 {"required", json::array(init: {"name", "arguments"})},
2492 })));
2493 });
2494
2495 auto tool_call = builder.add_rule("tool_call", string_join(values: tool_rules, separator: " | "));
2496 auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
2497
2498 if (data.thinking_forced_open) {
2499 builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
2500 } else {
2501 builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
2502 }
2503
2504 data.grammar_triggers.push_back(x: {
2505 .type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
2506 .value: "<|tool_call|>"
2507 });
2508
2509 data.preserved_tokens = {
2510 "<think>",
2511 "</think>",
2512 "<response>",
2513 "</response>",
2514 "<|tool_call|>",
2515 };
2516 });
2517 } else {
2518 // Handle thinking tags for non-tool responses
2519 if (data.thinking_forced_open && inputs.enable_thinking) {
2520 data.grammar_lazy = false;
2521 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2522 builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
2523 });
2524 data.preserved_tokens = {
2525 "<think>",
2526 "</think>",
2527 "<response>",
2528 "</response>",
2529 };
2530 }
2531 }
2532
2533 return data;
2534}
2535
2536static void common_chat_parse_granite(common_chat_msg_parser & builder) {
2537 // Parse thinking tags
2538 static const common_regex start_think_regex(regex_escape(s: "<think>"));
2539 static const common_regex end_think_regex(regex_escape(s: "</think>"));
2540 // Granite models output partial tokens such as "<" and "<think".
2541 // By leveraging try_consume_regex()/try_find_regex() throwing
2542 // common_chat_msg_partial_exception for these partial tokens,
2543 // processing is interrupted and the tokens are not passed to add_content().
2544 if (auto res = builder.try_consume_regex(regex: start_think_regex)) {
2545 // Restore position for try_parse_reasoning()
2546 builder.move_to(pos: res->groups[0].begin);
2547 builder.try_find_regex(regex: end_think_regex, from: std::string::npos, add_prelude_to_content: false);
2548 // Restore position for try_parse_reasoning()
2549 builder.move_to(pos: res->groups[0].begin);
2550 }
2551 builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>");
2552
2553 // Parse response tags
2554 static const common_regex start_response_regex(regex_escape(s: "<response>"));
2555 static const common_regex end_response_regex(regex_escape(s: "</response>"));
2556 // Granite models output partial tokens such as "<" and "<response".
2557 // Same hack as reasoning parsing.
2558 if (builder.try_consume_regex(regex: start_response_regex)) {
2559 builder.try_find_regex(regex: end_response_regex);
2560 }
2561
2562 if (!builder.syntax().parse_tool_calls) {
2563 builder.add_content(content: builder.consume_rest());
2564 return;
2565 }
2566
2567 // Look for tool calls
2568 static const common_regex tool_call_regex(regex_escape(s: "<|tool_call|>"));
2569 if (auto res = builder.try_find_regex(regex: tool_call_regex)) {
2570 builder.move_to(pos: res->groups[0].end);
2571
2572 // Expect JSON array of tool calls
2573 if (auto tool_call = builder.try_consume_json_with_dumped_args(args_paths: {{{"arguments"}}})) {
2574 if (!builder.add_tool_calls(arr: tool_call->value) || tool_call->is_partial) {
2575 throw common_chat_msg_partial_exception("incomplete tool call");
2576 }
2577 }
2578 } else {
2579 builder.add_content(content: builder.consume_rest());
2580 }
2581}
2582
2583static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
2584 // Parse thinking tags
2585 builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>");
2586 if (!builder.syntax().parse_tool_calls) {
2587 builder.add_content(content: builder.consume_rest());
2588 return;
2589 }
2590
2591 // Look for tool calls
2592 static const common_regex tool_call_regex(regex_escape(s: "<TOOLCALL>"));
2593 if (auto res = builder.try_find_regex(regex: tool_call_regex)) {
2594 builder.move_to(pos: res->groups[0].end);
2595
2596 // Expect JSON array of tool calls
2597 auto tool_calls_data = builder.consume_json();
2598 if (tool_calls_data.json.is_array()) {
2599 if (!builder.try_consume_literal(literal: "</TOOLCALL>")) {
2600 throw common_chat_msg_partial_exception("Incomplete tool call");
2601 }
2602 builder.add_tool_calls(arr: tool_calls_data.json);
2603 } else {
2604 throw common_chat_msg_partial_exception("Incomplete tool call");
2605 }
2606 }
2607 builder.add_content(content: builder.consume_rest());
2608}
2609
2610static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
2611 // Parse thinking tags
2612 builder.try_parse_reasoning(start_think: "<|inner_prefix|>", end_think: "<|inner_suffix|>");
2613 if (!builder.syntax().parse_tool_calls) {
2614 builder.add_content(content: builder.consume_rest());
2615 return;
2616 }
2617
2618 // Look for tool calls
2619 static const common_regex tool_call_regex(regex_escape(s: "<|tools_prefix|>"));
2620 if (auto res = builder.try_find_regex(regex: tool_call_regex)) {
2621 builder.move_to(pos: res->groups[0].end);
2622
2623 auto tool_calls_data = builder.consume_json();
2624 if (tool_calls_data.json.is_array()) {
2625 builder.consume_spaces();
2626 if (!builder.try_consume_literal(literal: "<|tools_suffix|>")) {
2627 throw common_chat_msg_partial_exception("Incomplete tool call");
2628 }
2629 for (const auto & value : tool_calls_data.json) {
2630 if (value.is_object()) {
2631 builder.add_tool_call_short_form(tool_call: value);
2632 }
2633 }
2634 } else {
2635 throw common_chat_msg_partial_exception("Incomplete tool call");
2636 }
2637 }
2638 builder.add_content(content: builder.consume_rest());
2639}
2640
2641
2642static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
2643 if (!builder.syntax().parse_tool_calls) {
2644 builder.add_content(content: builder.consume_rest());
2645 return;
2646 }
2647
2648 // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
2649 static const common_regex tool_call_start_regex(regex_escape(s: "<|tool_call_start|>"));
2650 static const common_regex tool_call_end_regex(regex_escape(s: "<|tool_call_end|>"));
2651
2652 // Loop through all tool calls
2653 while (auto res = builder.try_find_regex(regex: tool_call_start_regex, from: std::string::npos, /* add_prelude_to_content= */ true)) {
2654 builder.move_to(pos: res->groups[0].end);
2655
2656 // Parse JSON array format: [{"name": "...", "arguments": {...}}]
2657 auto tool_calls_data = builder.consume_json();
2658
2659 // Consume end marker
2660 builder.consume_spaces();
2661 if (!builder.try_consume_regex(regex: tool_call_end_regex)) {
2662 throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
2663 }
2664
2665 // Process each tool call in the array
2666 if (tool_calls_data.json.is_array()) {
2667 for (const auto & tool_call : tool_calls_data.json) {
2668 if (!tool_call.is_object()) {
2669 throw common_chat_msg_partial_exception("Tool call must be an object");
2670 }
2671
2672 if (!tool_call.contains(key: "name")) {
2673 throw common_chat_msg_partial_exception("Tool call missing 'name' field");
2674 }
2675
2676 std::string function_name = tool_call.at(key: "name");
2677 std::string arguments = "{}";
2678
2679 if (tool_call.contains(key: "arguments")) {
2680 if (tool_call.at(key: "arguments").is_object()) {
2681 arguments = tool_call.at(key: "arguments").dump();
2682 } else if (tool_call.at(key: "arguments").is_string()) {
2683 arguments = tool_call.at(key: "arguments");
2684 }
2685 }
2686
2687 if (!builder.add_tool_call(name: function_name, id: "", arguments)) {
2688 throw common_chat_msg_partial_exception("Incomplete tool call");
2689 }
2690 }
2691 } else {
2692 throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
2693 }
2694
2695 // Consume any trailing whitespace after this tool call
2696 builder.consume_spaces();
2697 }
2698
2699 // Consume any remaining content after all tool calls
2700 auto remaining = builder.consume_rest();
2701 if (!string_strip(str: remaining).empty()) {
2702 builder.add_content(content: remaining);
2703 }
2704}
2705
2706static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
2707 // Parse thinking tags first - this handles the main reasoning content
2708 builder.try_parse_reasoning(start_think: "<seed:think>", end_think: "</seed:think>");
2709
2710 if (!builder.syntax().parse_tool_calls) {
2711 builder.add_content(content: builder.consume_rest());
2712 return;
2713 }
2714
2715 // Parse tool calls - Seed-OSS uses <seed:tool_call> format
2716 static const common_regex tool_call_begin_regex("<seed:tool_call>");
2717 static const common_regex tool_call_end_regex("</seed:tool_call>");
2718 static const common_regex function_regex("<function=([^>]+)>");
2719 static const common_regex param_regex("<parameter=([^>]+)>");
2720
2721 while (auto tool_res = builder.try_find_regex(regex: tool_call_begin_regex)) {
2722 builder.consume_spaces(); // Consume whitespace after <seed:tool_call>
2723
2724 // Look for function call inside tool call, ignore any content before it
2725 if (auto func_res = builder.try_find_regex(regex: function_regex, from: std::string::npos, add_prelude_to_content: false)) {
2726 auto function_name = builder.str(rng: func_res->groups[1]);
2727
2728 // Parse Seed-OSS parameters <parameter=name>value</parameter>
2729 json args = json::object();
2730 // Parse all parameters
2731 while (auto param_res = builder.try_find_regex(regex: param_regex, from: std::string::npos, add_prelude_to_content: false)) {
2732 // again, ignore noise around parameters
2733 auto param_name = builder.str(rng: param_res->groups[1]);
2734 builder.move_to(pos: param_res->groups[0].end);
2735 builder.consume_spaces(); // Consume whitespace after parameter
2736 auto savedPos = builder.pos();
2737 if (auto param_parse = builder.try_find_literal(literal: "</parameter>")) {
2738 auto param = param_parse->prelude;
2739 builder.move_to(pos: savedPos);
2740 try {
2741 if (auto param_res = builder.try_consume_json()) {
2742 args[param_name] = param_res->json;
2743 } else {
2744 args[param_name] = param;
2745 }
2746 } catch (json::exception &) {
2747 args[param_name] = param;
2748 }
2749 } else {
2750 throw common_chat_msg_partial_exception("Incomplete tool parameter");
2751 }
2752 }
2753 // Look for closing function tag
2754 auto end_func = builder.try_find_literal(literal: "</function>");
2755 if (end_func) {
2756 builder.move_to(pos: end_func->groups[0].end);
2757 builder.consume_spaces(); // Consume whitespace after </function>
2758
2759 // Add the tool call with parsed arguments, but only if we REALLY got the literal
2760 auto eaten_fragment = builder.input().substr(pos: end_func->groups[0].begin, n: end_func->groups[0].end);
2761 auto funlen = std::string("</function>").length();
2762 if (eaten_fragment.length() >= funlen && eaten_fragment.substr(pos: 0, n: funlen) == std::string("</function>")) {
2763 if (!builder.add_tool_call(name: function_name, id: "", arguments: args.dump())) {
2764 throw common_chat_msg_partial_exception("Incomplete tool call");
2765 }
2766 } else {
2767 throw common_chat_msg_partial_exception("Incomplete tool call");
2768 }
2769 } else {
2770 throw common_chat_msg_partial_exception("Incomplete tool call");
2771 }
2772 // Look for closing tool call tag
2773 if (auto end_tool = builder.try_find_regex(regex: tool_call_end_regex, from: std::string::npos, add_prelude_to_content: false)) {
2774 builder.move_to(pos: end_tool->groups[0].end);
2775 builder.consume_spaces(); // Consume trailing whitespace after tool call
2776 } else {
2777 throw common_chat_msg_partial_exception("Incomplete tool call");
2778 }
2779 } else {
2780 // No function found - don't consume content here, let it be handled at the end
2781 break;
2782 }
2783 }
2784
2785 // Consume any remaining whitespace after all tool call processing
2786 builder.consume_spaces();
2787 auto remaining = builder.consume_rest();
2788 // If there's any non-whitespace content remaining, add it as content
2789 if (!string_strip(str: remaining).empty()) {
2790 builder.add_content(content: remaining);
2791 }
2792}
2793
2794static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
2795 common_chat_params data;
2796 data.prompt = apply(tmpl, inputs);
2797 data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2798 data.grammar_lazy = false;
2799 if (!inputs.json_schema.is_null()) {
2800 if (!inputs.grammar.empty()) {
2801 throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
2802 }
2803 data.grammar = json_schema_to_grammar(schema: inputs.json_schema);
2804 } else {
2805 data.grammar = inputs.grammar;
2806 }
2807 return data;
2808}
2809
2810static common_chat_params common_chat_params_init_seed_oss(
2811 const common_chat_template & tmpl,
2812 templates_params & params,
2813 const common_chat_templates_inputs & inputs)
2814{
2815 common_chat_params data;
2816 data.prompt = apply(tmpl, inputs: params);
2817 data.format = COMMON_CHAT_FORMAT_SEED_OSS;
2818 if (string_ends_with(str: data.prompt, suffix: "<seed:think>")) {
2819 if (!inputs.enable_thinking) {
2820 data.prompt += "</seed:think>";
2821 } else {
2822 data.thinking_forced_open = true;
2823 }
2824 }
2825
2826 if (params.tools.is_array() && !params.tools.empty()) {
2827 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
2828 data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) {
2829 std::vector<std::string> tool_rules;
2830 foreach_function(tools: params.tools, fn: [&](const json & tool) {
2831 const auto & function = tool.at(key: "function");
2832 std::string name = function.at(key: "name");
2833 auto parameters = function.at(key: "parameters");
2834 builder.resolve_refs(parameters);
2835
2836 // Create rule for Seed-OSS function call format
2837 std::string param_rules;
2838 if (parameters.contains(key: "properties")) {
2839 for (const auto & [key, value] : parameters.at(key: "properties").items()) {
2840 param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
2841 "\"</parameter>\"";
2842 }
2843 }
2844
2845 tool_rules.push_back(x: builder.add_rule(name + "-call",
2846 "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
2847 param_rules +
2848 " \"</function>\" space \"</seed:tool_call>\""));
2849 });
2850
2851 data.grammar_triggers.push_back(x: { .type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<seed:tool_call>" });
2852
2853 data.preserved_tokens = {
2854 "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
2855 "<function=", "</function>", "<parameter=", "</parameter>",
2856 };
2857
2858 builder.add_rule("root", string_join(values: tool_rules, separator: " | "));
2859 });
2860 }
2861 return data;
2862}
2863
2864static common_chat_params common_chat_templates_apply_jinja(
2865 const struct common_chat_templates * tmpls,
2866 const struct common_chat_templates_inputs & inputs)
2867{
2868 templates_params params;
2869 params.tools = common_chat_tools_to_json_oaicompat<json>(tools: inputs.tools);
2870 const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
2871 ? *tmpls->template_tool_use
2872 : *tmpls->template_default;
2873 const auto & src = tmpl.source();
2874 const auto & caps = tmpl.original_caps();
2875 params.messages = common_chat_msgs_to_json_oaicompat<json>(msgs: inputs.messages, /* concat_text= */ concat_typed_text: !tmpl.original_caps().requires_typed_content);
2876 params.add_generation_prompt = inputs.add_generation_prompt;
2877 params.tool_choice = inputs.tool_choice;
2878 params.enable_thinking = inputs.enable_thinking;
2879 params.grammar = inputs.grammar;
2880 params.now = inputs.now;
2881 params.add_bos = tmpls->add_bos;
2882 params.add_eos = tmpls->add_eos;
2883
2884 params.extra_context = json::object();
2885 for (auto el : inputs.chat_template_kwargs) {
2886 params.extra_context[el.first] = json::parse(i&: el.second);
2887 }
2888
2889 if (!inputs.json_schema.empty()) {
2890 params.json_schema = json::parse(i: inputs.json_schema);
2891 }
2892
2893 if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
2894 LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
2895 params.parallel_tool_calls = false;
2896 } else {
2897 params.parallel_tool_calls = inputs.parallel_tool_calls;
2898 }
2899
2900 if (params.tools.is_array()) {
2901 if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
2902 throw std::runtime_error("Cannot specify grammar with tools");
2903 }
2904 if (caps.supports_tool_calls && !caps.supports_tools) {
2905 LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
2906 }
2907 }
2908
2909 // DeepSeek V3.1: detect based on specific patterns in the template
2910 if (src.find(s: "message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
2911 params.json_schema.is_null()) {
2912 return common_chat_params_init_deepseek_v3_1(tmpl, inputs: params);
2913 }
2914
2915 // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
2916 if (src.find(s: "<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
2917 return common_chat_params_init_deepseek_r1(tmpl, inputs: params);
2918 }
2919
2920 // Command R7B: : use handler in all cases except json schema (thinking / tools).
2921 if (src.find(s: "<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
2922 return common_chat_params_init_command_r7b(tmpl, inputs: params);
2923 }
2924
2925 // Granite (IBM) - detects thinking / tools support
2926 if (src.find(s: "elif thinking") != std::string::npos && src.find(s: "<|tool_call|>") != std::string::npos) {
2927 return common_chat_params_init_granite(tmpl, inputs: params);
2928 }
2929
2930 // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
2931 if (src.find(s: "<tool_call>") != std::string::npos && params.json_schema.is_null()) {
2932 return common_chat_params_init_hermes_2_pro(tmpl, inputs: params);
2933 }
2934
2935 // GPT-OSS
2936 if (src.find(s: "<|channel|>") != std::string::npos) {
2937 return common_chat_params_init_gpt_oss(tmpl, inputs: params);
2938 }
2939
2940 // Seed-OSS
2941 if (src.find(s: "<seed:think>") != std::string::npos) {
2942 return common_chat_params_init_seed_oss(tmpl, params, inputs);
2943 }
2944
2945 // Nemotron v2
2946 if (src.find(s: "<SPECIAL_10>") != std::string::npos) {
2947 return common_chat_params_init_nemotron_v2(tmpl, inputs: params);
2948 }
2949
2950 // Apertus format detection
2951 if (src.find(s: "<|system_start|>") != std::string::npos && src.find(s: "<|tools_prefix|>") != std::string::npos) {
2952 return common_chat_params_init_apertus(tmpl, inputs: params);
2953 }
2954
2955 // LFM2 (w/ tools)
2956 if (src.find(s: "List of tools: <|tool_list_start|>[") != std::string::npos &&
2957 src.find(s: "]<|tool_list_end|>") != std::string::npos) {
2958 return common_chat_params_init_lfm2(tmpl, inputs: params);
2959 }
2960
2961 // Use generic handler when mixing tools + JSON schema.
2962 // TODO: support that mix in handlers below.
2963 if ((params.tools.is_array() && params.json_schema.is_object())) {
2964 return common_chat_params_init_generic(tmpl, inputs: params);
2965 }
2966
2967 // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
2968 if (src.find(s: ">>>all") != std::string::npos) {
2969 return common_chat_params_init_functionary_v3_2(tmpl, inputs: params);
2970 }
2971
2972 // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
2973 if (src.find(s: " functools[") != std::string::npos) {
2974 return common_chat_params_init_firefunction_v2(tmpl, inputs: params);
2975 }
2976
2977 // Functionary v3.1 (w/ tools)
2978 if (src.find(s: "<|start_header_id|>") != std::string::npos
2979 && src.find(s: "<function=") != std::string::npos) {
2980 return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs: params);
2981 }
2982
2983 // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
2984 if (src.find(s: "<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
2985 auto allow_python_tag_builtin_tools = src.find(s: "<|python_tag|>") != std::string::npos;
2986 return common_chat_params_init_llama_3_x(tmpl, inputs: params, allow_python_tag_builtin_tools);
2987 }
2988
2989 if (src.find(s: "[THINK]") != std::string::npos && src.find(s: "[/THINK]") != std::string::npos) {
2990 return common_chat_params_init_magistral(tmpl, inputs: params);
2991 }
2992
2993 // Plain handler (no tools)
2994 if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
2995 return common_chat_params_init_without_tools(tmpl, inputs: params);
2996 }
2997
2998 // Mistral Nemo (w/ tools)
2999 if (src.find(s: "[TOOL_CALLS]") != std::string::npos) {
3000 return common_chat_params_init_mistral_nemo(tmpl, inputs: params);
3001 }
3002
3003 // Generic fallback
3004 return common_chat_params_init_generic(tmpl, inputs: params);
3005}
3006
3007// Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
3008static common_chat_params common_chat_templates_apply_legacy(
3009 const struct common_chat_templates * tmpls,
3010 const struct common_chat_templates_inputs & inputs)
3011{
3012 int alloc_size = 0;
3013 std::vector<llama_chat_message> chat;
3014 std::vector<std::string> contents;
3015
3016 for (const auto & msg : inputs.messages) {
3017 auto content = msg.content;
3018 for (const auto & part : msg.content_parts) {
3019 if (part.type != "text") {
3020 LOG_WRN("Ignoring non-text content part: %s\n", part.type.c_str());
3021 continue;
3022 }
3023 if (!content.empty()) {
3024 content += "\n";;
3025 }
3026 content += part.text;
3027 }
3028 contents.emplace_back(args: std::move(content));
3029 }
3030 for (size_t i = 0; i < contents.size(); ++i) {
3031 const auto & msg = inputs.messages[i];
3032 const auto & content = contents[i];
3033 chat.push_back(x: {.role: msg.role.c_str(), .content: content.c_str()});
3034 alloc_size += (msg.role.size() + content.size()) * 1.25;
3035 }
3036
3037 std::vector<char> buf(alloc_size);
3038
3039 // run the first time to get the total output length
3040 const auto & src = tmpls->template_default->source();
3041 int32_t res = llama_chat_apply_template(tmpl: src.c_str(), chat: chat.data(), n_msg: chat.size(), add_ass: inputs.add_generation_prompt, buf: buf.data(), length: buf.size());
3042
3043 // error: chat template is not supported
3044 if (res < 0) {
3045 // if the custom "tmpl" is not supported, we throw an error
3046 // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template()
3047 throw std::runtime_error("this custom template is not supported, try using --jinja");
3048 }
3049
3050 // if it turns out that our buffer is too small, we resize it
3051 if ((size_t) res > buf.size()) {
3052 buf.resize(new_size: res);
3053 res = llama_chat_apply_template(tmpl: src.c_str(), chat: chat.data(), n_msg: chat.size(), add_ass: inputs.add_generation_prompt, buf: buf.data(), length: buf.size());
3054 }
3055
3056 common_chat_params params;
3057 params.prompt = std::string(buf.data(), res);
3058 if (!inputs.json_schema.empty()) {
3059 params.grammar = json_schema_to_grammar(schema: json::parse(i: inputs.json_schema));
3060 } else {
3061 params.grammar = inputs.grammar;
3062 }
3063 return params;
3064}
3065
3066common_chat_params common_chat_templates_apply(
3067 const struct common_chat_templates * tmpls,
3068 const struct common_chat_templates_inputs & inputs)
3069{
3070 GGML_ASSERT(tmpls != nullptr);
3071 return inputs.use_jinja
3072 ? common_chat_templates_apply_jinja(tmpls, inputs)
3073 : common_chat_templates_apply_legacy(tmpls, inputs);
3074}
3075
3076static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
3077 builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>");
3078 builder.add_content(content: builder.consume_rest());
3079}
3080
3081static void common_chat_parse(common_chat_msg_parser & builder) {
3082 LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
3083
3084 switch (builder.syntax().format) {
3085 case COMMON_CHAT_FORMAT_CONTENT_ONLY:
3086 common_chat_parse_content_only(builder);
3087 break;
3088 case COMMON_CHAT_FORMAT_GENERIC:
3089 common_chat_parse_generic(builder);
3090 break;
3091 case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
3092 common_chat_parse_mistral_nemo(builder);
3093 break;
3094 case COMMON_CHAT_FORMAT_MAGISTRAL:
3095 common_chat_parse_magistral(builder);
3096 break;
3097 case COMMON_CHAT_FORMAT_LLAMA_3_X:
3098 common_chat_parse_llama_3_1(builder);
3099 break;
3100 case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
3101 common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
3102 break;
3103 case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
3104 common_chat_parse_deepseek_r1(builder);
3105 break;
3106 case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
3107 common_chat_parse_deepseek_v3_1(builder);
3108 break;
3109 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
3110 common_chat_parse_functionary_v3_2(builder);
3111 break;
3112 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
3113 common_chat_parse_functionary_v3_1_llama_3_1(builder);
3114 break;
3115 case COMMON_CHAT_FORMAT_HERMES_2_PRO:
3116 common_chat_parse_hermes_2_pro(builder);
3117 break;
3118 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
3119 common_chat_parse_firefunction_v2(builder);
3120 break;
3121 case COMMON_CHAT_FORMAT_COMMAND_R7B:
3122 common_chat_parse_command_r7b(builder);
3123 break;
3124 case COMMON_CHAT_FORMAT_GRANITE:
3125 common_chat_parse_granite(builder);
3126 break;
3127 case COMMON_CHAT_FORMAT_GPT_OSS:
3128 common_chat_parse_gpt_oss(builder);
3129 break;
3130 case COMMON_CHAT_FORMAT_SEED_OSS:
3131 common_chat_parse_seed_oss(builder);
3132 break;
3133 case COMMON_CHAT_FORMAT_NEMOTRON_V2:
3134 common_chat_parse_nemotron_v2(builder);
3135 break;
3136 case COMMON_CHAT_FORMAT_APERTUS:
3137 common_chat_parse_apertus(builder);
3138 break;
3139 case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
3140 common_chat_parse_lfm2(builder);
3141 break;
3142 default:
3143 throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(format: builder.syntax().format));
3144 }
3145 builder.finish();
3146}
3147
3148common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
3149 common_chat_msg_parser builder(input, is_partial, syntax);
3150 try {
3151 common_chat_parse(builder);
3152 } catch (const common_chat_msg_partial_exception & ex) {
3153 LOG_DBG("Partial parse: %s\n", ex.what());
3154 if (!is_partial) {
3155 builder.clear_tools();
3156 builder.move_to(pos: 0);
3157 common_chat_parse_content_only(builder);
3158 }
3159 }
3160 auto msg = builder.result();
3161 if (!is_partial) {
3162 LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
3163 }
3164 return msg;
3165}
3166