| 1 | #include "chat.h" |
| 2 | #include "chat-parser.h" |
| 3 | #include "common.h" |
| 4 | #include "json-partial.h" |
| 5 | #include "json-schema-to-grammar.h" |
| 6 | #include "log.h" |
| 7 | #include "regex-partial.h" |
| 8 | |
| 9 | #include <minja/chat-template.hpp> |
| 10 | #include <minja/minja.hpp> |
| 11 | |
| 12 | #include <algorithm> |
| 13 | #include <cstdio> |
| 14 | #include <cctype> |
| 15 | #include <exception> |
| 16 | #include <functional> |
| 17 | #include <iostream> |
| 18 | #include <optional> |
| 19 | #include <stdexcept> |
| 20 | #include <string> |
| 21 | #include <vector> |
| 22 | |
| 23 | using json = nlohmann::ordered_json; |
| 24 | |
| 25 | static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) { |
| 26 | auto time = std::chrono::system_clock::to_time_t(t: now); |
| 27 | auto local_time = *std::localtime(timer: &time); |
| 28 | std::ostringstream ss; |
| 29 | ss << std::put_time(tmb: &local_time, fmt: format.c_str()); |
| 30 | auto res = ss.str(); |
| 31 | return res; |
| 32 | } |
| 33 | |
| 34 | static std::string string_diff(const std::string & last, const std::string & current) { |
| 35 | if (last.empty()) { |
| 36 | return current; |
| 37 | } |
| 38 | if (!string_starts_with(str: current, prefix: last)) { |
| 39 | if (string_starts_with(str: last, prefix: current)) { |
| 40 | // This happens if the last generation ended on a partial stop word (not erased), |
| 41 | // and the current ended on a stop word (erased). |
| 42 | return "" ; |
| 43 | } |
| 44 | throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'" ); |
| 45 | } |
| 46 | return current.substr(pos: last.size()); |
| 47 | } |
| 48 | |
| 49 | static bool has_content_or_tool_calls(const common_chat_msg & msg) { |
| 50 | return !msg.content.empty() || !msg.tool_calls.empty(); |
| 51 | } |
| 52 | |
| 53 | template <> |
| 54 | json common_chat_msg::to_json_oaicompat() const |
| 55 | { |
| 56 | json message { |
| 57 | {"role" , "assistant" }, |
| 58 | }; |
| 59 | if (!reasoning_content.empty()) { |
| 60 | message["reasoning_content" ] = reasoning_content; |
| 61 | } |
| 62 | if (content.empty() && !tool_calls.empty()) { |
| 63 | message["content" ] = json(); |
| 64 | } else { |
| 65 | message["content" ] = content; |
| 66 | } |
| 67 | if (!tool_calls.empty()) { |
| 68 | auto arr = json::array(); |
| 69 | for (const auto & tc : tool_calls) { |
| 70 | arr.push_back(init: { |
| 71 | {"type" , "function" }, |
| 72 | {"function" , { |
| 73 | {"name" , tc.name}, |
| 74 | {"arguments" , tc.arguments}, |
| 75 | }}, |
| 76 | {"id" , tc.id}, |
| 77 | // // Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo). |
| 78 | // // We only generate a random id for the ones that don't generate one by themselves |
| 79 | // // (they also won't get to see it as their template likely doesn't use it, so it's all for the client) |
| 80 | // {"id", tc.id.empty() ? gen_tool_call_id() : tc.id}, |
| 81 | }); |
| 82 | } |
| 83 | message["tool_calls" ] = arr; |
| 84 | } |
| 85 | return message; |
| 86 | } |
| 87 | |
| 88 | std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) { |
| 89 | std::vector<common_chat_msg_diff> diffs; |
| 90 | if (previous_msg.reasoning_content != new_msg.reasoning_content) { |
| 91 | auto & diff = diffs.emplace_back(); |
| 92 | diff.reasoning_content_delta = string_diff(last: previous_msg.reasoning_content, current: new_msg.reasoning_content); |
| 93 | } |
| 94 | if (previous_msg.content != new_msg.content) { |
| 95 | auto & diff = diffs.emplace_back(); |
| 96 | diff.content_delta = string_diff(last: previous_msg.content, current: new_msg.content); |
| 97 | } |
| 98 | |
| 99 | if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) { |
| 100 | throw std::runtime_error("Invalid diff: now finding less tool calls!" ); |
| 101 | } |
| 102 | |
| 103 | if (!previous_msg.tool_calls.empty()) { |
| 104 | auto idx = previous_msg.tool_calls.size() - 1; |
| 105 | const auto & pref = previous_msg.tool_calls[idx]; |
| 106 | const auto & newf = new_msg.tool_calls[idx]; |
| 107 | if (pref.name != newf.name) { |
| 108 | throw std::runtime_error("Invalid diff: tool call mismatch!" ); |
| 109 | } |
| 110 | auto args_diff = string_diff(last: pref.arguments, current: newf.arguments); |
| 111 | if (!args_diff.empty() || pref.id != newf.id) { |
| 112 | auto & diff = diffs.emplace_back(); |
| 113 | diff.tool_call_index = idx; |
| 114 | if (pref.id != newf.id) { |
| 115 | diff.tool_call_delta.id = newf.id; |
| 116 | diff.tool_call_delta.name = newf.name; |
| 117 | } |
| 118 | diff.tool_call_delta.arguments = args_diff; |
| 119 | } |
| 120 | } |
| 121 | for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) { |
| 122 | auto & diff = diffs.emplace_back(); |
| 123 | diff.tool_call_index = idx; |
| 124 | diff.tool_call_delta = new_msg.tool_calls[idx]; |
| 125 | } |
| 126 | return diffs; |
| 127 | } |
| 128 | |
| 129 | typedef minja::chat_template common_chat_template; |
| 130 | |
| 131 | struct common_chat_templates { |
| 132 | bool add_bos; |
| 133 | bool add_eos; |
| 134 | bool has_explicit_template; // Model had builtin template or template overridde was specified. |
| 135 | std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml) |
| 136 | std::unique_ptr<common_chat_template> template_tool_use; |
| 137 | }; |
| 138 | |
| 139 | struct templates_params { |
| 140 | json messages; |
| 141 | json tools; |
| 142 | common_chat_tool_choice tool_choice; |
| 143 | json json_schema; |
| 144 | bool parallel_tool_calls; |
| 145 | bool stream; |
| 146 | std::string grammar; |
| 147 | bool add_generation_prompt = true; |
| 148 | bool enable_thinking = true; |
| 149 | std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); |
| 150 | json ; |
| 151 | bool add_bos; |
| 152 | bool add_eos; |
| 153 | bool is_inference = true; |
| 154 | }; |
| 155 | |
| 156 | common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) { |
| 157 | if (tool_choice == "auto" ) { |
| 158 | return COMMON_CHAT_TOOL_CHOICE_AUTO; |
| 159 | } |
| 160 | if (tool_choice == "none" ) { |
| 161 | return COMMON_CHAT_TOOL_CHOICE_NONE; |
| 162 | } |
| 163 | if (tool_choice == "required" ) { |
| 164 | return COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 165 | } |
| 166 | throw std::runtime_error("Invalid tool_choice: " + tool_choice); |
| 167 | } |
| 168 | |
| 169 | bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) { |
| 170 | common_chat_templates_inputs dummy_inputs; |
| 171 | common_chat_msg msg; |
| 172 | msg.role = "user" ; |
| 173 | msg.content = "test" ; |
| 174 | dummy_inputs.messages = {msg}; |
| 175 | dummy_inputs.enable_thinking = false; |
| 176 | const auto rendered_no_thinking = common_chat_templates_apply(tmpls: chat_templates, inputs: dummy_inputs); |
| 177 | dummy_inputs.enable_thinking = true; |
| 178 | const auto rendered_with_thinking = common_chat_templates_apply(tmpls: chat_templates, inputs: dummy_inputs); |
| 179 | return rendered_no_thinking.prompt != rendered_with_thinking.prompt; |
| 180 | } |
| 181 | |
| 182 | template <> |
| 183 | std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) { |
| 184 | std::vector<common_chat_msg> msgs; |
| 185 | |
| 186 | try { |
| 187 | |
| 188 | if (!messages.is_array()) { |
| 189 | throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump()); |
| 190 | } |
| 191 | |
| 192 | for (const auto & message : messages) { |
| 193 | if (!message.is_object()) { |
| 194 | throw std::runtime_error("Expected 'message' to be an object, got " + message.dump()); |
| 195 | } |
| 196 | |
| 197 | common_chat_msg msg; |
| 198 | if (!message.contains(key: "role" )) { |
| 199 | throw std::runtime_error("Missing 'role' in message: " + message.dump()); |
| 200 | } |
| 201 | msg.role = message.at(key: "role" ); |
| 202 | |
| 203 | auto has_content = message.contains(key: "content" ); |
| 204 | auto has_tool_calls = message.contains(key: "tool_calls" ); |
| 205 | if (has_content) { |
| 206 | const auto & content = message.at(key: "content" ); |
| 207 | if (content.is_string()) { |
| 208 | msg.content = content; |
| 209 | } else if (content.is_array()) { |
| 210 | for (const auto & part : content) { |
| 211 | if (!part.contains(key: "type" )) { |
| 212 | throw std::runtime_error("Missing content part type: " + part.dump()); |
| 213 | } |
| 214 | const auto & type = part.at(key: "type" ); |
| 215 | if (type != "text" ) { |
| 216 | throw std::runtime_error("Unsupported content part type: " + type.dump()); |
| 217 | } |
| 218 | common_chat_msg_content_part msg_part; |
| 219 | msg_part.type = type; |
| 220 | msg_part.text = part.at(key: "text" ); |
| 221 | msg.content_parts.push_back(x: msg_part); |
| 222 | } |
| 223 | } else if (!content.is_null()) { |
| 224 | throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)" ); |
| 225 | } |
| 226 | } |
| 227 | if (has_tool_calls) { |
| 228 | for (const auto & tool_call : message.at(key: "tool_calls" )) { |
| 229 | common_chat_tool_call tc; |
| 230 | if (!tool_call.contains(key: "type" )) { |
| 231 | throw std::runtime_error("Missing tool call type: " + tool_call.dump()); |
| 232 | } |
| 233 | const auto & type = tool_call.at(key: "type" ); |
| 234 | if (type != "function" ) { |
| 235 | throw std::runtime_error("Unsupported tool call type: " + tool_call.dump()); |
| 236 | } |
| 237 | if (!tool_call.contains(key: "function" )) { |
| 238 | throw std::runtime_error("Missing tool call function: " + tool_call.dump()); |
| 239 | } |
| 240 | const auto & fc = tool_call.at(key: "function" ); |
| 241 | if (!fc.contains(key: "name" )) { |
| 242 | throw std::runtime_error("Missing tool call name: " + tool_call.dump()); |
| 243 | } |
| 244 | tc.name = fc.at(key: "name" ); |
| 245 | tc.arguments = fc.at(key: "arguments" ); |
| 246 | if (tool_call.contains(key: "id" )) { |
| 247 | tc.id = tool_call.at(key: "id" ); |
| 248 | } |
| 249 | msg.tool_calls.push_back(x: tc); |
| 250 | } |
| 251 | } |
| 252 | if (!has_content && !has_tool_calls) { |
| 253 | throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)" ); |
| 254 | } |
| 255 | if (message.contains(key: "reasoning_content" )) { |
| 256 | msg.reasoning_content = message.at(key: "reasoning_content" ); |
| 257 | } |
| 258 | if (message.contains(key: "name" )) { |
| 259 | msg.tool_name = message.at(key: "name" ); |
| 260 | } |
| 261 | if (message.contains(key: "tool_call_id" )) { |
| 262 | msg.tool_call_id = message.at(key: "tool_call_id" ); |
| 263 | } |
| 264 | |
| 265 | msgs.push_back(x: msg); |
| 266 | } |
| 267 | } catch (const std::exception & e) { |
| 268 | // @ngxson : disable otherwise it's bloating the API response |
| 269 | // printf("%s\n", std::string("; messages = ") + messages.dump(2)); |
| 270 | throw std::runtime_error("Failed to parse messages: " + std::string(e.what())); |
| 271 | } |
| 272 | |
| 273 | return msgs; |
| 274 | } |
| 275 | |
| 276 | template <> |
| 277 | json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text) { |
| 278 | json messages = json::array(); |
| 279 | for (const auto & msg : msgs) { |
| 280 | if (!msg.content.empty() && !msg.content_parts.empty()) { |
| 281 | throw std::runtime_error("Cannot specify both content and content_parts" ); |
| 282 | } |
| 283 | json jmsg { |
| 284 | {"role" , msg.role}, |
| 285 | }; |
| 286 | if (!msg.content.empty()) { |
| 287 | jmsg["content" ] = msg.content; |
| 288 | } else if (!msg.content_parts.empty()) { |
| 289 | if (concat_typed_text) { |
| 290 | std::string text; |
| 291 | for (const auto & part : msg.content_parts) { |
| 292 | if (part.type != "text" ) { |
| 293 | LOG_WRN("Ignoring content part type: %s\n" , part.type.c_str()); |
| 294 | continue; |
| 295 | } |
| 296 | if (!text.empty()) { |
| 297 | text += '\n'; |
| 298 | } |
| 299 | text += part.text; |
| 300 | } |
| 301 | jmsg["content" ] = text; |
| 302 | } else { |
| 303 | auto & parts = jmsg["content" ] = json::array(); |
| 304 | for (const auto & part : msg.content_parts) { |
| 305 | parts.push_back(init: { |
| 306 | {"type" , part.type}, |
| 307 | {"text" , part.text}, |
| 308 | }); |
| 309 | } |
| 310 | } |
| 311 | } else { |
| 312 | jmsg["content" ] = json(); // null |
| 313 | } |
| 314 | if (!msg.reasoning_content.empty()) { |
| 315 | jmsg["reasoning_content" ] = msg.reasoning_content; |
| 316 | } |
| 317 | if (!msg.tool_name.empty()) { |
| 318 | jmsg["name" ] = msg.tool_name; |
| 319 | } |
| 320 | if (!msg.tool_call_id.empty()) { |
| 321 | jmsg["tool_call_id" ] = msg.tool_call_id; |
| 322 | } |
| 323 | if (!msg.tool_calls.empty()) { |
| 324 | auto & tool_calls = jmsg["tool_calls" ] = json::array(); |
| 325 | for (const auto & tool_call : msg.tool_calls) { |
| 326 | json tc { |
| 327 | {"type" , "function" }, |
| 328 | {"function" , { |
| 329 | {"name" , tool_call.name}, |
| 330 | {"arguments" , tool_call.arguments}, |
| 331 | }}, |
| 332 | }; |
| 333 | if (!tool_call.id.empty()) { |
| 334 | tc["id" ] = tool_call.id; |
| 335 | } |
| 336 | tool_calls.push_back(val: tc); |
| 337 | } |
| 338 | } |
| 339 | messages.push_back(val: jmsg); |
| 340 | } |
| 341 | return messages; |
| 342 | } |
| 343 | |
| 344 | template <> |
| 345 | std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const std::string & messages) { |
| 346 | return common_chat_msgs_parse_oaicompat(messages: json::parse(i: messages)); |
| 347 | } |
| 348 | |
| 349 | template <> |
| 350 | std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & tools) { |
| 351 | std::vector<common_chat_tool> result; |
| 352 | |
| 353 | try { |
| 354 | if (!tools.is_null()) { |
| 355 | if (!tools.is_array()) { |
| 356 | throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump()); |
| 357 | } |
| 358 | for (const auto & tool : tools) { |
| 359 | if (!tool.contains(key: "type" )) { |
| 360 | throw std::runtime_error("Missing tool type: " + tool.dump()); |
| 361 | } |
| 362 | const auto & type = tool.at(key: "type" ); |
| 363 | if (!type.is_string() || type != "function" ) { |
| 364 | throw std::runtime_error("Unsupported tool type: " + tool.dump()); |
| 365 | } |
| 366 | if (!tool.contains(key: "function" )) { |
| 367 | throw std::runtime_error("Missing tool function: " + tool.dump()); |
| 368 | } |
| 369 | |
| 370 | const auto & function = tool.at(key: "function" ); |
| 371 | result.push_back(x: { |
| 372 | /* .name = */ function.at(key: "name" ), |
| 373 | /* .description = */ function.at(key: "description" ), |
| 374 | /* .parameters = */ function.at(key: "parameters" ).dump(), |
| 375 | }); |
| 376 | } |
| 377 | } |
| 378 | } catch (const std::exception & e) { |
| 379 | throw std::runtime_error("Failed to parse tools: " + std::string(e.what()) + "; tools = " + tools.dump(indent: 2)); |
| 380 | } |
| 381 | |
| 382 | return result; |
| 383 | } |
| 384 | |
| 385 | template <> |
| 386 | std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const std::string & tools) { |
| 387 | return common_chat_tools_parse_oaicompat(tools: json::parse(i: tools)); |
| 388 | } |
| 389 | |
| 390 | template <> |
| 391 | json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools) { |
| 392 | if (tools.empty()) { |
| 393 | return json(); |
| 394 | } |
| 395 | |
| 396 | auto result = json::array(); |
| 397 | for (const auto & tool : tools) { |
| 398 | result.push_back(init: { |
| 399 | {"type" , "function" }, |
| 400 | {"function" , { |
| 401 | {"name" , tool.name}, |
| 402 | {"description" , tool.description}, |
| 403 | {"parameters" , json::parse(i: tool.parameters)}, |
| 404 | }}, |
| 405 | }); |
| 406 | } |
| 407 | return result; |
| 408 | } |
| 409 | |
| 410 | template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) { |
| 411 | json delta = json::object(); |
| 412 | if (!diff.reasoning_content_delta.empty()) { |
| 413 | delta["reasoning_content" ] = diff.reasoning_content_delta; |
| 414 | } |
| 415 | if (!diff.content_delta.empty()) { |
| 416 | delta["content" ] = diff.content_delta; |
| 417 | } |
| 418 | if (diff.tool_call_index != std::string::npos) { |
| 419 | json tool_call; |
| 420 | tool_call["index" ] = diff.tool_call_index; |
| 421 | if (!diff.tool_call_delta.id.empty()) { |
| 422 | tool_call["id" ] = diff.tool_call_delta.id; |
| 423 | tool_call["type" ] = "function" ; |
| 424 | } |
| 425 | json function = json::object(); |
| 426 | if (!diff.tool_call_delta.name.empty()) { |
| 427 | function["name" ] = diff.tool_call_delta.name; |
| 428 | } |
| 429 | function["arguments" ] = diff.tool_call_delta.arguments; |
| 430 | tool_call["function" ] = function; |
| 431 | delta["tool_calls" ] = json::array(init: {tool_call}); |
| 432 | } |
| 433 | return delta; |
| 434 | } |
| 435 | |
| 436 | bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { |
| 437 | if (use_jinja) { |
| 438 | try { |
| 439 | common_chat_msg msg; |
| 440 | msg.role = "user" ; |
| 441 | msg.content = "test" ; |
| 442 | |
| 443 | auto tmpls = common_chat_templates_init(/* model= */ nullptr, chat_template_override: tmpl); |
| 444 | |
| 445 | common_chat_templates_inputs inputs; |
| 446 | inputs.messages = {msg}; |
| 447 | |
| 448 | common_chat_templates_apply(tmpls: tmpls.get(), inputs); |
| 449 | return true; |
| 450 | } catch (const std::exception & e) { |
| 451 | LOG_ERR("%s: failed to apply template: %s\n" , __func__, e.what()); |
| 452 | return false; |
| 453 | } |
| 454 | } |
| 455 | llama_chat_message chat[] = {{.role: "user" , .content: "test" }}; |
| 456 | const int res = llama_chat_apply_template(tmpl: tmpl.c_str(), chat, n_msg: 1, add_ass: true, buf: nullptr, length: 0); |
| 457 | return res >= 0; |
| 458 | } |
| 459 | |
| 460 | std::string common_chat_format_single( |
| 461 | const struct common_chat_templates * tmpls, |
| 462 | const std::vector<common_chat_msg> & past_msg, |
| 463 | const common_chat_msg & new_msg, |
| 464 | bool add_ass, |
| 465 | bool use_jinja) { |
| 466 | |
| 467 | common_chat_templates_inputs inputs; |
| 468 | inputs.use_jinja = use_jinja; |
| 469 | inputs.add_bos = tmpls->add_bos; |
| 470 | inputs.add_eos = tmpls->add_eos; |
| 471 | |
| 472 | std::string fmt_past_msg; |
| 473 | if (!past_msg.empty()) { |
| 474 | inputs.messages = past_msg; |
| 475 | inputs.add_generation_prompt = false; |
| 476 | fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt; |
| 477 | } |
| 478 | std::ostringstream ss; |
| 479 | // if the past_msg ends with a newline, we must preserve it in the formatted version |
| 480 | if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') { |
| 481 | ss << "\n" ; |
| 482 | }; |
| 483 | // format chat with new_msg |
| 484 | inputs.messages.push_back(x: new_msg); |
| 485 | inputs.add_generation_prompt = add_ass; |
| 486 | auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt; |
| 487 | // get the diff part |
| 488 | ss << fmt_new_msg.substr(pos: fmt_past_msg.size(), n: fmt_new_msg.size() - fmt_past_msg.size()); |
| 489 | return ss.str(); |
| 490 | } |
| 491 | |
| 492 | std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) { |
| 493 | common_chat_templates_inputs inputs; |
| 494 | inputs.use_jinja = use_jinja; |
| 495 | inputs.add_bos = tmpls->add_bos; |
| 496 | inputs.add_eos = tmpls->add_eos; |
| 497 | inputs.chat_template_kwargs = chat_template_kwargs; |
| 498 | auto add_simple_msg = [&](auto role, auto content) { |
| 499 | common_chat_msg msg; |
| 500 | msg.role = role; |
| 501 | msg.content = content; |
| 502 | inputs.messages.push_back(x: msg); |
| 503 | }; |
| 504 | add_simple_msg("system" , "You are a helpful assistant" ); |
| 505 | add_simple_msg("user" , "Hello" ); |
| 506 | add_simple_msg("assistant" , "Hi there" ); |
| 507 | add_simple_msg("user" , "How are you?" ); |
| 508 | return common_chat_templates_apply(tmpls, inputs).prompt; |
| 509 | } |
| 510 | |
| 511 | #define CHATML_TEMPLATE_SRC \ |
| 512 | "{%- for message in messages -%}\n" \ |
| 513 | " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \ |
| 514 | "{%- endfor -%}\n" \ |
| 515 | "{%- if add_generation_prompt -%}\n" \ |
| 516 | " {{- '<|im_start|>assistant\n' -}}\n" \ |
| 517 | "{%- endif -%}" |
| 518 | |
| 519 | void common_chat_templates_free(struct common_chat_templates * tmpls) { |
| 520 | delete tmpls; |
| 521 | } |
| 522 | |
| 523 | bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls) { |
| 524 | return tmpls->has_explicit_template; |
| 525 | } |
| 526 | |
| 527 | const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant) { |
| 528 | if (variant != nullptr) { |
| 529 | if (strcmp(s1: variant, s2: "tool_use" ) == 0) { |
| 530 | if (tmpls->template_tool_use) { |
| 531 | return tmpls->template_tool_use->source().c_str(); |
| 532 | } |
| 533 | return nullptr; |
| 534 | } else { |
| 535 | LOG_DBG("%s: unknown template variant: %s\n" , __func__, variant); |
| 536 | } |
| 537 | } |
| 538 | return tmpls->template_default->source().c_str(); |
| 539 | } |
| 540 | |
| 541 | common_chat_templates_ptr common_chat_templates_init( |
| 542 | const struct llama_model * model, |
| 543 | const std::string & chat_template_override, |
| 544 | const std::string & bos_token_override, |
| 545 | const std::string & eos_token_override) |
| 546 | { |
| 547 | std::string default_template_src; |
| 548 | std::string template_tool_use_src; |
| 549 | |
| 550 | bool has_explicit_template = !chat_template_override.empty(); |
| 551 | if (chat_template_override.empty()) { |
| 552 | GGML_ASSERT(model != nullptr); |
| 553 | const auto * str = llama_model_chat_template(model, /* name */ nullptr); |
| 554 | if (str) { |
| 555 | default_template_src = str; |
| 556 | has_explicit_template = true; |
| 557 | } |
| 558 | str = llama_model_chat_template(model, /* name */ "tool_use" ); |
| 559 | if (str) { |
| 560 | template_tool_use_src = str; |
| 561 | has_explicit_template = true; |
| 562 | } |
| 563 | } else { |
| 564 | default_template_src = chat_template_override; |
| 565 | } |
| 566 | if (default_template_src.empty() || default_template_src == "chatml" ) { |
| 567 | if (!template_tool_use_src.empty()) { |
| 568 | default_template_src = template_tool_use_src; |
| 569 | } else { |
| 570 | default_template_src = CHATML_TEMPLATE_SRC; |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error |
| 575 | // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633 |
| 576 | if (default_template_src.find(s: "<|channel|>" ) != std::string::npos |
| 577 | // search for the error message and patch it |
| 578 | && default_template_src.find(s: "in message.content or" ) != std::string::npos) { |
| 579 | string_replace_all(s&: default_template_src, |
| 580 | search: "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}" , |
| 581 | replace: "{%- if false %}" ); |
| 582 | } |
| 583 | |
| 584 | std::string token_bos = bos_token_override; |
| 585 | std::string token_eos = eos_token_override; |
| 586 | bool add_bos = false; |
| 587 | bool add_eos = false; |
| 588 | if (model) { |
| 589 | const auto * vocab = llama_model_get_vocab(model); |
| 590 | const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { |
| 591 | if (token == LLAMA_TOKEN_NULL) { |
| 592 | if (default_template_src.find(s: jinja_variable_name) != std::string::npos |
| 593 | || template_tool_use_src.find(s: jinja_variable_name) != std::string::npos) { |
| 594 | LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n" , name); |
| 595 | } |
| 596 | return std::string(); |
| 597 | } |
| 598 | return common_token_to_piece(vocab, token, special: true); |
| 599 | }; |
| 600 | token_bos = get_token(llama_vocab_bos(vocab), "BOS" , "bos_token" ); |
| 601 | token_eos = get_token(llama_vocab_eos(vocab), "EOS" , "eos_token" ); |
| 602 | add_bos = llama_vocab_get_add_bos(vocab); |
| 603 | add_eos = llama_vocab_get_add_eos(vocab); |
| 604 | } |
| 605 | common_chat_templates_ptr tmpls(new common_chat_templates()); |
| 606 | tmpls->has_explicit_template = has_explicit_template; |
| 607 | tmpls->add_bos = add_bos; |
| 608 | tmpls->add_eos = add_eos; |
| 609 | try { |
| 610 | tmpls->template_default = std::make_unique<minja::chat_template>(args&: default_template_src, args&: token_bos, args&: token_eos); |
| 611 | } catch (const std::exception & e) { |
| 612 | LOG_ERR("%s: failed to parse chat template (defaulting to chatml): %s \n" , __func__, e.what()); |
| 613 | tmpls->template_default = std::make_unique<minja::chat_template>(CHATML_TEMPLATE_SRC, args&: token_bos, args&: token_eos); |
| 614 | } |
| 615 | if (!template_tool_use_src.empty()) { |
| 616 | try { |
| 617 | tmpls->template_tool_use = std::make_unique<minja::chat_template>(args&: template_tool_use_src, args&: token_bos, args&: token_eos); |
| 618 | } catch (const std::exception & e) { |
| 619 | LOG_ERR("%s: failed to parse tool use chat template (ignoring it): %s\n" , __func__, e.what()); |
| 620 | } |
| 621 | } |
| 622 | return tmpls; |
| 623 | } |
| 624 | |
| 625 | const char * common_chat_format_name(common_chat_format format) { |
| 626 | switch (format) { |
| 627 | case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only" ; |
| 628 | case COMMON_CHAT_FORMAT_GENERIC: return "Generic" ; |
| 629 | case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo" ; |
| 630 | case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral" ; |
| 631 | case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x" ; |
| 632 | case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools" ; |
| 633 | case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1" ; |
| 634 | case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2" ; |
| 635 | case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2" ; |
| 636 | case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1" ; |
| 637 | case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1" ; |
| 638 | case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro" ; |
| 639 | case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B" ; |
| 640 | case COMMON_CHAT_FORMAT_GRANITE: return "Granite" ; |
| 641 | case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS" ; |
| 642 | case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS" ; |
| 643 | case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2" ; |
| 644 | case COMMON_CHAT_FORMAT_APERTUS: return "Apertus" ; |
| 645 | case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools" ; |
| 646 | default: |
| 647 | throw std::runtime_error("Unknown chat format" ); |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | const char * common_reasoning_format_name(common_reasoning_format format) { |
| 652 | switch (format) { |
| 653 | case COMMON_REASONING_FORMAT_NONE: return "none" ; |
| 654 | case COMMON_REASONING_FORMAT_AUTO: return "auto" ; |
| 655 | case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek" ; |
| 656 | case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy" ; |
| 657 | default: |
| 658 | throw std::runtime_error("Unknown reasoning format" ); |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | common_reasoning_format common_reasoning_format_from_name(const std::string & format) { |
| 663 | if (format == "none" ) { |
| 664 | return COMMON_REASONING_FORMAT_NONE; |
| 665 | } else if (format == "auto" ) { |
| 666 | return COMMON_REASONING_FORMAT_AUTO; |
| 667 | } else if (format == "deepseek" ) { |
| 668 | return COMMON_REASONING_FORMAT_DEEPSEEK; |
| 669 | } else if (format == "deepseek-legacy" ) { |
| 670 | return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; |
| 671 | } |
| 672 | throw std::runtime_error("Unknown reasoning format: " + format); |
| 673 | } |
| 674 | |
| 675 | static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) { |
| 676 | std::string arguments; |
| 677 | if (builder.is_partial()) { |
| 678 | arguments = (json {{"code" , code + builder.healing_marker()}}).dump(); |
| 679 | auto idx = arguments.find(str: builder.healing_marker()); |
| 680 | if (idx != std::string::npos) { |
| 681 | arguments.resize(n: idx); |
| 682 | } |
| 683 | } else { |
| 684 | arguments = (json {{"code" , code}}).dump(); |
| 685 | } |
| 686 | return arguments; |
| 687 | } |
| 688 | |
| 689 | /** |
| 690 | * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. |
| 691 | * Aggregates the prefix, suffix and in-between text into the content. |
| 692 | */ |
| 693 | static void parse_json_tool_calls( |
| 694 | common_chat_msg_parser & builder, |
| 695 | const std::optional<common_regex> & block_open, |
| 696 | const std::optional<common_regex> & function_regex_start_only, |
| 697 | const std::optional<common_regex> & function_regex, |
| 698 | const common_regex & close_regex, |
| 699 | const std::optional<common_regex> & block_close, |
| 700 | bool allow_raw_python = false, |
| 701 | const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) { |
| 702 | |
| 703 | auto parse_tool_calls = [&]() { |
| 704 | size_t from = std::string::npos; |
| 705 | auto first = true; |
| 706 | while (true) { |
| 707 | auto start_pos = builder.pos(); |
| 708 | auto res = function_regex_start_only && first |
| 709 | ? builder.try_consume_regex(regex: *function_regex_start_only) |
| 710 | : function_regex |
| 711 | ? builder.try_find_regex(regex: *function_regex, from) |
| 712 | : std::nullopt; |
| 713 | |
| 714 | if (res) { |
| 715 | std::string name; |
| 716 | if (get_function_name) { |
| 717 | name = get_function_name(*res); |
| 718 | } else { |
| 719 | GGML_ASSERT(res->groups.size() == 2); |
| 720 | name = builder.str(rng: res->groups[1]); |
| 721 | } |
| 722 | first = false; |
| 723 | if (name.empty()) { |
| 724 | // get_function_name signalled us that we should skip this match and treat it as content. |
| 725 | from = res->groups[0].begin + 1; |
| 726 | continue; |
| 727 | } |
| 728 | from = std::string::npos; |
| 729 | |
| 730 | auto maybe_raw_python = name == "python" && allow_raw_python; |
| 731 | if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) { |
| 732 | if (auto arguments = builder.try_consume_json_with_dumped_args(args_paths: {{}})) { |
| 733 | if (!builder.add_tool_call(name, id: "" , arguments: arguments->value) || arguments->is_partial) { |
| 734 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 735 | } |
| 736 | builder.consume_regex(regex: close_regex); |
| 737 | } |
| 738 | continue; |
| 739 | } |
| 740 | if (maybe_raw_python) { |
| 741 | auto arguments = wrap_code_as_arguments(builder, code: builder.consume_rest()); |
| 742 | if (!builder.add_tool_call(name, id: "" , arguments)) { |
| 743 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 744 | } |
| 745 | return; |
| 746 | } |
| 747 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 748 | } else { |
| 749 | builder.move_to(pos: start_pos); |
| 750 | } |
| 751 | break; |
| 752 | } |
| 753 | if (block_close) { |
| 754 | builder.consume_regex(regex: *block_close); |
| 755 | } |
| 756 | builder.consume_spaces(); |
| 757 | builder.add_content(content: builder.consume_rest()); |
| 758 | }; |
| 759 | if (block_open) { |
| 760 | if (auto res = builder.try_find_regex(regex: *block_open)) { |
| 761 | parse_tool_calls(); |
| 762 | } else { |
| 763 | builder.add_content(content: builder.consume_rest()); |
| 764 | } |
| 765 | } else { |
| 766 | parse_tool_calls(); |
| 767 | } |
| 768 | } |
| 769 | |
| 770 | static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder, const common_regex & prefix, size_t rstrip_prefix = 0) { |
| 771 | static const std::vector<std::vector<std::string>> args_paths = {{"arguments" }}; |
| 772 | if (auto res = builder.try_find_regex(regex: prefix)) { |
| 773 | builder.move_back(n: rstrip_prefix); |
| 774 | auto tool_calls = builder.consume_json_with_dumped_args(args_paths); |
| 775 | if (!builder.add_tool_calls(arr: tool_calls.value) || tool_calls.is_partial) { |
| 776 | throw common_chat_msg_partial_exception("incomplete tool call array" ); |
| 777 | } |
| 778 | } else { |
| 779 | builder.add_content(content: builder.consume_rest()); |
| 780 | } |
| 781 | } |
| 782 | |
| 783 | static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) { |
| 784 | for (const auto & tool : tools) { |
| 785 | if (!tool.contains(key: "type" ) || tool.at(key: "type" ) != "function" || !tool.contains(key: "function" )) { |
| 786 | LOG_INF("Skipping tool without function: %s" , tool.dump(2).c_str()); |
| 787 | continue; |
| 788 | } |
| 789 | fn(tool); |
| 790 | } |
| 791 | } |
| 792 | |
| 793 | static std::string apply( |
| 794 | const common_chat_template & tmpl, |
| 795 | const struct templates_params & inputs, |
| 796 | const std::optional<json> & messages_override = std::nullopt, |
| 797 | const std::optional<json> & tools_override = std::nullopt, |
| 798 | const std::optional<json> & additional_context = std::nullopt) |
| 799 | { |
| 800 | minja::chat_template_inputs tmpl_inputs; |
| 801 | tmpl_inputs.messages = messages_override ? *messages_override : inputs.messages; |
| 802 | if (tools_override) { |
| 803 | tmpl_inputs.tools = *tools_override; |
| 804 | } else { |
| 805 | tmpl_inputs.tools = inputs.tools.empty() ? json() : inputs.tools; |
| 806 | } |
| 807 | tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt; |
| 808 | tmpl_inputs.extra_context = inputs.extra_context; |
| 809 | tmpl_inputs.extra_context["enable_thinking" ] = inputs.enable_thinking; |
| 810 | if (additional_context) { |
| 811 | tmpl_inputs.extra_context.merge_patch(apply_patch: *additional_context); |
| 812 | } |
| 813 | // TODO: add flag to control date/time, if only for testing purposes. |
| 814 | // tmpl_inputs.now = std::chrono::system_clock::now(); |
| 815 | |
| 816 | minja::chat_template_options tmpl_opts; |
| 817 | // To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens |
| 818 | // instead of using `chat_template_options.use_bos_token = false`, since these tokens |
| 819 | // may be needed inside the template / between messages too. |
| 820 | auto result = tmpl.apply(inputs: tmpl_inputs, opts: tmpl_opts); |
| 821 | if (inputs.add_bos && string_starts_with(str: result, prefix: tmpl.bos_token())) { |
| 822 | result = result.substr(pos: tmpl.bos_token().size()); |
| 823 | } |
| 824 | if (inputs.add_eos && string_ends_with(str: result, suffix: tmpl.eos_token())) { |
| 825 | result = result.substr(pos: 0, n: result.size() - tmpl.eos_token().size()); |
| 826 | } |
| 827 | return result; |
| 828 | } |
| 829 | |
| 830 | static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 831 | common_chat_params data; |
| 832 | |
| 833 | auto tool_call_schemas = json::array(); |
| 834 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 835 | const auto & function = tool.at(key: "function" ); |
| 836 | auto tool_schema = json { |
| 837 | {"type" , "object" }, |
| 838 | {"properties" , { |
| 839 | {"name" , { |
| 840 | {"type" , "string" }, |
| 841 | {"const" , function.at(key: "name" )}, |
| 842 | }}, |
| 843 | {"arguments" , function.at(key: "parameters" )}, |
| 844 | }}, |
| 845 | {"required" , json::array(init: {"name" , "arguments" })}, |
| 846 | }; |
| 847 | if (function.contains(key: "description" )) { |
| 848 | tool_schema["description" ] = function.at(key: "description" ); |
| 849 | } |
| 850 | if (inputs.parallel_tool_calls) { |
| 851 | tool_schema.at(key: "properties" )["id" ] = { |
| 852 | {"type" , "string" }, |
| 853 | {"minLength" , 4}, |
| 854 | }; |
| 855 | tool_schema.at(key: "required" ).push_back(val: "id" ); |
| 856 | } |
| 857 | tool_call_schemas.emplace_back(args&: tool_schema); |
| 858 | }); |
| 859 | const auto tool_call = |
| 860 | inputs.parallel_tool_calls |
| 861 | ? json { |
| 862 | {"type" , "object" }, |
| 863 | {"properties" , { |
| 864 | {"tool_calls" , { |
| 865 | {"type" , "array" }, |
| 866 | {"items" , tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { |
| 867 | {"anyOf" , tool_call_schemas}, |
| 868 | }}, |
| 869 | {"minItems" , 1}, |
| 870 | }}, |
| 871 | }}, |
| 872 | {"required" , json::array(init: {"tool_calls" })}, |
| 873 | } |
| 874 | : json { |
| 875 | {"type" , "object" }, |
| 876 | {"properties" , { |
| 877 | {"tool_call" , tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { |
| 878 | {"anyOf" , tool_call_schemas}, |
| 879 | }}, |
| 880 | }}, |
| 881 | {"required" , json::array(init: {"tool_call" })}, |
| 882 | }; |
| 883 | const auto schema = |
| 884 | inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED |
| 885 | ? json { |
| 886 | {"anyOf" , json::array(init: { |
| 887 | tool_call, |
| 888 | { |
| 889 | {"type" , "object" }, |
| 890 | {"properties" , { |
| 891 | {"response" , inputs.json_schema.is_null() |
| 892 | ? json {{"type" , "string" }} |
| 893 | : inputs.json_schema |
| 894 | }, |
| 895 | }}, |
| 896 | {"required" , json::array(init: {"response" })}, |
| 897 | }, |
| 898 | })} |
| 899 | } |
| 900 | : tool_call; |
| 901 | |
| 902 | data.grammar_lazy = false; |
| 903 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 904 | builder.add_schema("root" , schema); |
| 905 | }); |
| 906 | |
| 907 | auto tweaked_messages = common_chat_template::add_system( |
| 908 | messages: inputs.messages, |
| 909 | system_prompt: "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request" ); |
| 910 | |
| 911 | data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); |
| 912 | data.format = COMMON_CHAT_FORMAT_GENERIC; |
| 913 | return data; |
| 914 | } |
| 915 | static void common_chat_parse_generic(common_chat_msg_parser & builder) { |
| 916 | if (!builder.syntax().parse_tool_calls) { |
| 917 | builder.add_content(content: builder.consume_rest()); |
| 918 | return; |
| 919 | } |
| 920 | static const std::vector<std::vector<std::string>> content_paths = { |
| 921 | {"response" }, |
| 922 | }; |
| 923 | static const std::vector<std::vector<std::string>> args_paths = { |
| 924 | {"tool_call" , "arguments" }, |
| 925 | {"tool_calls" , "arguments" }, |
| 926 | }; |
| 927 | auto data = builder.consume_json_with_dumped_args(args_paths, content_paths); |
| 928 | if (data.value.contains(key: "tool_calls" )) { |
| 929 | if (!builder.add_tool_calls(arr: data.value.at(key: "tool_calls" )) || data.is_partial) { |
| 930 | throw common_chat_msg_partial_exception("incomplete tool calls" ); |
| 931 | } |
| 932 | } else if (data.value.contains(key: "tool_call" )) { |
| 933 | if (!builder.add_tool_call(tool_call: data.value.at(key: "tool_call" )) || data.is_partial) { |
| 934 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 935 | } |
| 936 | } else if (data.value.contains(key: "response" )) { |
| 937 | const auto & response = data.value.at(key: "response" ); |
| 938 | builder.add_content(content: response.is_string() ? response.template get<std::string>() : response.dump(indent: 2)); |
| 939 | if (data.is_partial) { |
| 940 | throw common_chat_msg_partial_exception("incomplete response" ); |
| 941 | } |
| 942 | } else { |
| 943 | throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON" ); |
| 944 | } |
| 945 | } |
| 946 | |
| 947 | static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 948 | common_chat_params data; |
| 949 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 950 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 951 | auto schemas = json::array(); |
| 952 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 953 | const auto & function = tool.at(key: "function" ); |
| 954 | schemas.push_back(init: { |
| 955 | {"type" , "object" }, |
| 956 | {"properties" , { |
| 957 | // Important note: the model is probably trained to take a JSON stringified arguments value. |
| 958 | // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. |
| 959 | {"name" , { |
| 960 | {"type" , "string" }, |
| 961 | {"const" , function.at(key: "name" )}, |
| 962 | }}, |
| 963 | {"arguments" , function.at(key: "parameters" )}, |
| 964 | {"id" , { |
| 965 | {"type" , "string" }, |
| 966 | // Nemo's template expects a 9-character alphanumeric ID. |
| 967 | {"pattern" , "^[a-zA-Z0-9]{9}$" }, |
| 968 | }}, |
| 969 | }}, |
| 970 | {"required" , json::array(init: {"name" , "arguments" , "id" })}, |
| 971 | }); |
| 972 | }); |
| 973 | auto schema = json { |
| 974 | {"type" , "array" }, |
| 975 | {"items" , schemas.size() == 1 ? schemas[0] : json {{"anyOf" , schemas}}}, |
| 976 | {"minItems" , 1}, |
| 977 | }; |
| 978 | if (!inputs.parallel_tool_calls) { |
| 979 | schema["maxItems" ] = 1; |
| 980 | } |
| 981 | builder.add_rule("root" , "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls" , schema)); |
| 982 | }); |
| 983 | data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "[TOOL_CALLS]" }); |
| 984 | data.preserved_tokens = { |
| 985 | "[TOOL_CALLS]" , |
| 986 | }; |
| 987 | data.prompt = apply(tmpl, inputs); |
| 988 | data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; |
| 989 | return data; |
| 990 | } |
| 991 | |
| 992 | |
| 993 | // Case-insensitive find |
| 994 | static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) { |
| 995 | auto it = std::search( |
| 996 | first1: haystack.begin() + pos, last1: haystack.end(), |
| 997 | first2: needle.begin(), last2: needle.end(), |
| 998 | predicate: [](char a, char b) { return std::tolower(c: a) == std::tolower(c: b); } |
| 999 | ); |
| 1000 | return (it == haystack.end()) ? std::string::npos : std::distance(first: haystack.begin(), last: it); |
| 1001 | } |
| 1002 | |
| 1003 | static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1004 | common_chat_params data; |
| 1005 | const auto is_json_schema_provided = !inputs.json_schema.is_null(); |
| 1006 | const auto is_grammar_provided = !inputs.grammar.empty(); |
| 1007 | const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty(); |
| 1008 | |
| 1009 | // the logic requires potentially modifying the messages |
| 1010 | auto tweaked_messages = inputs.messages; |
| 1011 | |
| 1012 | auto replace_json_schema_marker = [](json & messages) -> bool { |
| 1013 | static std::string marker1 = "force json schema.\n" ; |
| 1014 | static std::string marker2 = "force json schema." ; |
| 1015 | |
| 1016 | if (messages.empty() || messages.at(idx: 0).at(key: "role" ) != "system" ) { |
| 1017 | return false; |
| 1018 | } |
| 1019 | |
| 1020 | std::string content = messages.at(idx: 0).at(key: "content" ); |
| 1021 | |
| 1022 | for (const auto & marker : {marker1, marker2}) { |
| 1023 | const auto pos = ifind_string(haystack: content, needle: marker); |
| 1024 | if (pos != std::string::npos) { |
| 1025 | content.replace(pos: pos, n1: marker.length(), s: "" ); |
| 1026 | // inject modified content back into the messages |
| 1027 | messages.at(idx: 0).at(key: "content" ) = content; |
| 1028 | return true; |
| 1029 | } |
| 1030 | } |
| 1031 | |
| 1032 | return false; |
| 1033 | }; |
| 1034 | |
| 1035 | // Lfm2 model does not natively work with json, but can generally understand the tools structure |
| 1036 | // |
| 1037 | // Example of the pytorch dialog structure: |
| 1038 | // <|startoftext|><|im_start|>system |
| 1039 | // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|> |
| 1040 | // <|im_start|>user |
| 1041 | // What is the current status of candidate ID 12345?<|im_end|> |
| 1042 | // <|im_start|>assistant |
| 1043 | // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|> |
| 1044 | // <|im_start|>tool |
| 1045 | // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|> |
| 1046 | // <|im_start|>assistant |
| 1047 | // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|> |
| 1048 | // |
| 1049 | // For the llama server compatibility with json tools semantic, |
| 1050 | // the client can add "Follow json schema." line into the system message prompt to force the json output. |
| 1051 | // |
| 1052 | if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) { |
| 1053 | // server/utils.hpp prohibits that branch for the custom grammar anyways |
| 1054 | throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar" ); |
| 1055 | } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) { |
| 1056 | LOG_INF("%s: Using tools to build a grammar\n" , __func__); |
| 1057 | |
| 1058 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1059 | auto schemas = json::array(); |
| 1060 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1061 | const auto & function = tool.at(key: "function" ); |
| 1062 | schemas.push_back(init: { |
| 1063 | {"type" , "object" }, |
| 1064 | {"properties" , { |
| 1065 | {"name" , { |
| 1066 | {"type" , "string" }, |
| 1067 | {"const" , function.at(key: "name" )}, |
| 1068 | }}, |
| 1069 | {"arguments" , function.at(key: "parameters" )}, |
| 1070 | }}, |
| 1071 | {"required" , json::array(init: {"name" , "arguments" , "id" })}, |
| 1072 | }); |
| 1073 | }); |
| 1074 | auto schema = json { |
| 1075 | {"type" , "array" }, |
| 1076 | {"items" , schemas.size() == 1 ? schemas[0] : json {{"anyOf" , schemas}}}, |
| 1077 | {"minItems" , 1}, |
| 1078 | }; |
| 1079 | if (!inputs.parallel_tool_calls) { |
| 1080 | schema["maxItems" ] = 1; |
| 1081 | } |
| 1082 | |
| 1083 | builder.add_rule("root" , "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls" , schema) + "\"<|tool_call_end|>\"" ); |
| 1084 | }); |
| 1085 | // model has no concept of tool selection mode choice, |
| 1086 | // if the system prompt rendered correctly it will produce a tool call |
| 1087 | // the grammar goes inside the tool call body |
| 1088 | data.grammar_lazy = true; |
| 1089 | data.grammar_triggers = {{.type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, .value: "\\s*<\\|tool_call_start\\|>\\s*\\[" }}; |
| 1090 | data.preserved_tokens = {"<|tool_call_start|>" , "<|tool_call_end|>" }; |
| 1091 | data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS; |
| 1092 | } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) { |
| 1093 | LOG_INF("%s: Using tools without json schema or grammar\n" , __func__); |
| 1094 | // output those tokens |
| 1095 | data.preserved_tokens = {"<|tool_call_start|>" , "<|tool_call_end|>" }; |
| 1096 | } else if (is_json_schema_provided) { |
| 1097 | LOG_INF("%s: Using provided json schema to build a grammar\n" , __func__); |
| 1098 | data.grammar = json_schema_to_grammar(schema: inputs.json_schema); |
| 1099 | } else if (is_grammar_provided) { |
| 1100 | LOG_INF("%s: Using provided grammar\n" , __func__); |
| 1101 | data.grammar = inputs.grammar; |
| 1102 | } else { |
| 1103 | LOG_INF("%s: Using content relying on the template\n" , __func__); |
| 1104 | } |
| 1105 | |
| 1106 | data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); |
| 1107 | LOG_DBG("%s: Prompt: %s\n" , __func__, data.prompt.c_str()); |
| 1108 | |
| 1109 | return data; |
| 1110 | } |
| 1111 | |
| 1112 | static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1113 | common_chat_params data; |
| 1114 | data.prompt = apply(tmpl, inputs); |
| 1115 | data.format = COMMON_CHAT_FORMAT_MAGISTRAL; |
| 1116 | data.preserved_tokens = { |
| 1117 | "[THINK]" , |
| 1118 | "[/THINK]" , |
| 1119 | }; |
| 1120 | |
| 1121 | if (inputs.tools.is_array() && !inputs.tools.empty()) { |
| 1122 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 1123 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1124 | auto schemas = json::array(); |
| 1125 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1126 | const auto & function = tool.at(key: "function" ); |
| 1127 | schemas.push_back(init: { |
| 1128 | {"type" , "object" }, |
| 1129 | {"properties" , { |
| 1130 | {"name" , { |
| 1131 | {"type" , "string" }, |
| 1132 | {"const" , function.at(key: "name" )}, |
| 1133 | }}, |
| 1134 | {"arguments" , function.at(key: "parameters" )}, |
| 1135 | {"id" , { |
| 1136 | {"type" , "string" }, |
| 1137 | {"pattern" , "^[a-zA-Z0-9]{9}$" }, |
| 1138 | }}, |
| 1139 | }}, |
| 1140 | {"required" , json::array(init: {"name" , "arguments" , "id" })}, |
| 1141 | }); |
| 1142 | }); |
| 1143 | auto schema = json { |
| 1144 | {"type" , "array" }, |
| 1145 | {"items" , schemas.size() == 1 ? schemas[0] : json {{"anyOf" , schemas}}}, |
| 1146 | {"minItems" , 1}, |
| 1147 | }; |
| 1148 | if (!inputs.parallel_tool_calls) { |
| 1149 | schema["maxItems" ] = 1; |
| 1150 | } |
| 1151 | builder.add_rule("root" , "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls" , schema)); |
| 1152 | }); |
| 1153 | data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "[TOOL_CALLS]" }); |
| 1154 | data.preserved_tokens.push_back(x: "[TOOL_CALLS]" ); |
| 1155 | } else { |
| 1156 | data.grammar_lazy = false; |
| 1157 | if (!inputs.json_schema.is_null()) { |
| 1158 | if (!inputs.grammar.empty()) { |
| 1159 | throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both" ); |
| 1160 | } |
| 1161 | data.grammar = json_schema_to_grammar(schema: inputs.json_schema); |
| 1162 | } else { |
| 1163 | data.grammar = inputs.grammar; |
| 1164 | } |
| 1165 | } |
| 1166 | |
| 1167 | return data; |
| 1168 | } |
| 1169 | |
| 1170 | static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) { |
| 1171 | if (!builder.syntax().parse_tool_calls) { |
| 1172 | builder.add_content(content: builder.consume_rest()); |
| 1173 | return; |
| 1174 | } |
| 1175 | |
| 1176 | static const common_regex prefix(regex_escape(s: "[TOOL_CALLS]" )); |
| 1177 | parse_prefixed_json_tool_call_array(builder, prefix); |
| 1178 | } |
| 1179 | |
| 1180 | static void common_chat_parse_magistral(common_chat_msg_parser & builder) { |
| 1181 | builder.try_parse_reasoning(start_think: "[THINK]" , end_think: "[/THINK]" ); |
| 1182 | |
| 1183 | if (!builder.syntax().parse_tool_calls) { |
| 1184 | builder.add_content(content: builder.consume_rest()); |
| 1185 | return; |
| 1186 | } |
| 1187 | |
| 1188 | static const common_regex prefix(regex_escape(s: "[TOOL_CALLS]" )); |
| 1189 | parse_prefixed_json_tool_call_array(builder, prefix); |
| 1190 | } |
| 1191 | |
| 1192 | static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1193 | common_chat_params data; |
| 1194 | |
| 1195 | auto adjusted_messages = json::array(); |
| 1196 | for (const auto & msg : inputs.messages) { |
| 1197 | auto has_reasoning_content = msg.contains(key: "reasoning_content" ) && msg.at(key: "reasoning_content" ).is_string(); |
| 1198 | auto has_tool_calls = msg.contains(key: "tool_calls" ) && msg.at(key: "tool_calls" ).is_array(); |
| 1199 | if (has_reasoning_content && has_tool_calls) { |
| 1200 | auto adjusted_message = msg; |
| 1201 | adjusted_message["tool_plan" ] = msg.at(key: "reasoning_content" ); |
| 1202 | adjusted_message.erase(key: "reasoning_content" ); |
| 1203 | adjusted_messages.push_back(val: adjusted_message); |
| 1204 | } else { |
| 1205 | adjusted_messages.push_back(val: msg); |
| 1206 | } |
| 1207 | } |
| 1208 | data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); |
| 1209 | data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; |
| 1210 | if (string_ends_with(str: data.prompt, suffix: "<|START_THINKING|>" )) { |
| 1211 | if (!inputs.enable_thinking) { |
| 1212 | data.prompt += "<|END_THINKING|>" ; |
| 1213 | } else { |
| 1214 | data.thinking_forced_open = true; |
| 1215 | } |
| 1216 | } else if (!inputs.enable_thinking && string_ends_with(str: data.prompt, suffix: "<|CHATBOT_TOKEN|>" )) { |
| 1217 | data.prompt += "<|START_THINKING|><|END_THINKING|>" ; |
| 1218 | } |
| 1219 | |
| 1220 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 1221 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1222 | auto schemas = json::array(); |
| 1223 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1224 | const auto & function = tool.at(key: "function" ); |
| 1225 | schemas.push_back(init: { |
| 1226 | {"type" , "object" }, |
| 1227 | {"properties" , { |
| 1228 | {"tool_call_id" , { |
| 1229 | {"type" , "string" }, |
| 1230 | // Command-R's template expects an integer string. |
| 1231 | {"pattern" , "^[0-9]{1,10}$" }, |
| 1232 | }}, |
| 1233 | {"tool_name" , { |
| 1234 | {"type" , "string" }, |
| 1235 | {"const" , function.at(key: "name" )}, |
| 1236 | }}, |
| 1237 | {"parameters" , function.at(key: "parameters" )}, |
| 1238 | }}, |
| 1239 | {"required" , json::array(init: {"tool_call_id" , "tool_name" , "parameters" })}, |
| 1240 | }); |
| 1241 | }); |
| 1242 | auto schema = json { |
| 1243 | {"type" , "array" }, |
| 1244 | {"items" , schemas.size() == 1 ? schemas[0] : json {{"anyOf" , schemas}}}, |
| 1245 | {"minItems" , 1}, |
| 1246 | }; |
| 1247 | if (!inputs.parallel_tool_calls) { |
| 1248 | schema["maxItems" ] = 1; |
| 1249 | } |
| 1250 | builder.add_rule("root" , |
| 1251 | std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "" ) + |
| 1252 | "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls" , schema) + " \"<|END_ACTION|>\"" ); |
| 1253 | }); |
| 1254 | data.grammar_triggers.push_back(x: { |
| 1255 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1256 | // If thinking_forced_open, then we capture the </think> tag in the grammar, |
| 1257 | // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) |
| 1258 | .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?" ) + |
| 1259 | "(<\\|START_ACTION\\|>)[\\s\\S]*" |
| 1260 | }); |
| 1261 | data.preserved_tokens = { |
| 1262 | "<|START_ACTION|>" , |
| 1263 | "<|END_ACTION|>" , |
| 1264 | "<|START_RESPONSE|>" , |
| 1265 | "<|END_RESPONSE|>" , |
| 1266 | "<|START_THINKING|>" , |
| 1267 | "<|END_THINKING|>" , |
| 1268 | }; |
| 1269 | return data; |
| 1270 | } |
| 1271 | |
| 1272 | static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) { |
| 1273 | builder.try_parse_reasoning(start_think: "<|START_THINKING|>" , end_think: "<|END_THINKING|>" ); |
| 1274 | |
| 1275 | static const common_regex start_action_regex("<\\|START_ACTION\\|>" ); |
| 1276 | static const common_regex end_action_regex("<\\|END_ACTION\\|>" ); |
| 1277 | static const common_regex start_response_regex("<\\|START_RESPONSE\\|>" ); |
| 1278 | static const common_regex end_response_regex("<\\|END_RESPONSE\\|>" ); |
| 1279 | |
| 1280 | if (auto res = builder.try_find_regex(regex: start_action_regex)) { |
| 1281 | // If we didn't extract thoughts, prelude includes them. |
| 1282 | auto tool_calls = builder.consume_json_with_dumped_args(args_paths: {{"parameters" }}); |
| 1283 | for (const auto & tool_call : tool_calls.value) { |
| 1284 | std::string name = tool_call.contains(key: "tool_name" ) ? tool_call.at(key: "tool_name" ) : "" ; |
| 1285 | std::string id = tool_call.contains(key: "tool_call_id" ) ? tool_call.at(key: "tool_call_id" ) : "" ; |
| 1286 | std::string arguments = tool_call.contains(key: "parameters" ) ? tool_call.at(key: "parameters" ) : "" ; |
| 1287 | if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) { |
| 1288 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 1289 | } |
| 1290 | } |
| 1291 | if (tool_calls.is_partial) { |
| 1292 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 1293 | } |
| 1294 | builder.consume_regex(regex: end_action_regex); |
| 1295 | } else if (auto res = builder.try_find_regex(regex: start_response_regex)) { |
| 1296 | if (!builder.try_find_regex(regex: end_response_regex)) { |
| 1297 | builder.add_content(content: builder.consume_rest()); |
| 1298 | throw common_chat_msg_partial_exception(end_response_regex.str()); |
| 1299 | } |
| 1300 | } else { |
| 1301 | builder.add_content(content: builder.consume_rest()); |
| 1302 | } |
| 1303 | } |
| 1304 | |
| 1305 | static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) { |
| 1306 | if (!parameters.is_object() || !parameters.contains(key: "type" ) || parameters.at(key: "type" ) != "object" || !parameters.contains(key: "properties" ) || !parameters.contains(key: "required" )) { |
| 1307 | throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties" ); |
| 1308 | } |
| 1309 | const auto & parameters_properties = parameters.at(key: "properties" ); |
| 1310 | const auto & parameters_required = parameters.at(key: "required" ); |
| 1311 | for (const auto & prop : expected_properties) { |
| 1312 | if (!parameters_properties.contains(key: prop)) { |
| 1313 | throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT |
| 1314 | } |
| 1315 | if (std::find(first: parameters_required.begin(), last: parameters_required.end(), val: json(prop)) == parameters_required.end()) { |
| 1316 | throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT |
| 1317 | } |
| 1318 | } |
| 1319 | if (parameters_properties.size() != expected_properties.size()) { |
| 1320 | throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(values: expected_properties, separator: ", " )); |
| 1321 | } |
| 1322 | } |
| 1323 | |
| 1324 | static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) { |
| 1325 | auto builtin_tools = json::array(); |
| 1326 | common_chat_params data; |
| 1327 | if (!inputs.tools.is_null()) { |
| 1328 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 1329 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1330 | std::vector<std::string> tool_rules; |
| 1331 | |
| 1332 | auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { |
| 1333 | if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search" ) { |
| 1334 | // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py |
| 1335 | // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py |
| 1336 | expect_tool_parameters(name, parameters, expected_properties: {"query" }); |
| 1337 | } else if (name == "python" || name == "code_interpreter" ) { |
| 1338 | // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py |
| 1339 | expect_tool_parameters(name, parameters, expected_properties: {"code" }); |
| 1340 | } else { |
| 1341 | return false; |
| 1342 | } |
| 1343 | |
| 1344 | std::vector<std::string> kvs; |
| 1345 | for (const auto & [key, value] : parameters.at(key: "properties" ).items()) { |
| 1346 | kvs.push_back(x: "\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT |
| 1347 | } |
| 1348 | |
| 1349 | tool_rules.push_back( |
| 1350 | x: builder.add_rule( |
| 1351 | name + "-call" , |
| 1352 | "\"<|python_tag|>" + name + ".call(\" " + string_join(values: kvs, separator: " \", \" " ) + " \")\"" )); |
| 1353 | builtin_tools.push_back(val: name); |
| 1354 | |
| 1355 | return true; |
| 1356 | }; |
| 1357 | |
| 1358 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1359 | const auto & function = tool.at(key: "function" ); |
| 1360 | std::string name = function.at(key: "name" ); |
| 1361 | auto parameters = function.at(key: "parameters" ); |
| 1362 | builder.resolve_refs(parameters); |
| 1363 | |
| 1364 | // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime |
| 1365 | if (allow_python_tag_builtin_tools) { |
| 1366 | handle_builtin_tool(name, parameters); |
| 1367 | } |
| 1368 | tool_rules.push_back( |
| 1369 | x: builder.add_rule( |
| 1370 | name + "-call" , |
| 1371 | "\"{\" space " |
| 1372 | "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? " |
| 1373 | " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space " |
| 1374 | " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args" , parameters) + " " |
| 1375 | "\"}\" space" )); |
| 1376 | }); |
| 1377 | // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name. |
| 1378 | data.grammar_triggers.push_back(x: { |
| 1379 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1380 | .value: "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*" , // + name + "\"[\\s\\S]*", |
| 1381 | }); |
| 1382 | if (!builtin_tools.empty()) { |
| 1383 | data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<|python_tag|>" }); |
| 1384 | data.preserved_tokens.push_back(x: "<|python_tag|>" ); |
| 1385 | } |
| 1386 | // Allow a few empty lines on top of the usual constrained json schema space rule. |
| 1387 | builder.add_rule("root" , string_join(values: tool_rules, separator: " | " )); |
| 1388 | data.additional_stops.push_back(x: "<|eom_id|>" ); |
| 1389 | }); |
| 1390 | data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() |
| 1391 | ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS |
| 1392 | : COMMON_CHAT_FORMAT_LLAMA_3_X; |
| 1393 | } else { |
| 1394 | data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; |
| 1395 | } |
| 1396 | data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, additional_context: json { |
| 1397 | {"date_string" , format_time(now: inputs.now, format: "%d %b %Y" )}, |
| 1398 | {"tools_in_user_message" , false}, |
| 1399 | {"builtin_tools" , builtin_tools.empty() ? json() : builtin_tools}, |
| 1400 | }); |
| 1401 | return data; |
| 1402 | } |
| 1403 | |
| 1404 | static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1405 | common_chat_params data; |
| 1406 | |
| 1407 | // Generate the prompt using the apply() function with the template |
| 1408 | data.prompt = apply(tmpl, inputs); |
| 1409 | data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2; |
| 1410 | |
| 1411 | // Handle thinking tags appropriately based on inputs.enable_thinking |
| 1412 | if (string_ends_with(str: data.prompt, suffix: "<think>\n" )) { |
| 1413 | if (!inputs.enable_thinking) { |
| 1414 | data.prompt += "</think>" ; |
| 1415 | } else { |
| 1416 | data.thinking_forced_open = true; |
| 1417 | } |
| 1418 | } |
| 1419 | |
| 1420 | // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID |
| 1421 | if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { |
| 1422 | data.grammar_lazy = true; |
| 1423 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1424 | auto schemas = json::array(); |
| 1425 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1426 | const auto & function = tool.at(key: "function" ); |
| 1427 | schemas.push_back(init: { |
| 1428 | { "type" , "object" }, |
| 1429 | { "properties" , |
| 1430 | { |
| 1431 | { "name" , |
| 1432 | { |
| 1433 | { "type" , "string" }, |
| 1434 | { "const" , function.at(key: "name" ) }, |
| 1435 | } }, |
| 1436 | { "arguments" , function.at(key: "parameters" ) }, |
| 1437 | } }, |
| 1438 | { "required" , json::array(init: { "name" , "arguments" }) }, |
| 1439 | }); |
| 1440 | }); |
| 1441 | auto schema = json{ |
| 1442 | { "type" , "array" }, |
| 1443 | { "items" , schemas.size() == 1 ? schemas[0] : json{ { "anyOf" , schemas } } }, |
| 1444 | { "minItems" , 1 }, |
| 1445 | }; |
| 1446 | if (!inputs.parallel_tool_calls) { |
| 1447 | schema["maxItems" ] = 1; |
| 1448 | } |
| 1449 | builder.add_rule("root" , |
| 1450 | std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "" ) + |
| 1451 | "\"<TOOLCALL>\" " + builder.add_schema("tool_calls" , schema) + |
| 1452 | " \"</TOOLCALL>\"" ); |
| 1453 | }); |
| 1454 | data.grammar_triggers.push_back(x: { .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1455 | // If thinking_forced_open, then we capture the </think> tag in the grammar, |
| 1456 | // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) |
| 1457 | .value: std::string(data.thinking_forced_open ? |
| 1458 | "[\\s\\S]*?(</think>\\s*)" : |
| 1459 | "(?:<think>[\\s\\S]*?</think>\\s*)?" ) + |
| 1460 | "(<TOOLCALL>)[\\s\\S]*" }); |
| 1461 | } |
| 1462 | return data; |
| 1463 | } |
| 1464 | |
| 1465 | static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1466 | common_chat_params data; |
| 1467 | |
| 1468 | // Generate the prompt using the apply() function with the template |
| 1469 | data.prompt = apply(tmpl, inputs); |
| 1470 | data.format = COMMON_CHAT_FORMAT_APERTUS; |
| 1471 | |
| 1472 | // Handle thinking tags appropriately based on inputs.enable_thinking |
| 1473 | if (string_ends_with(str: data.prompt, suffix: "<|inner_prefix|>" )) { |
| 1474 | if (!inputs.enable_thinking) { |
| 1475 | data.prompt += "<|inner_suffix|>" ; |
| 1476 | } else { |
| 1477 | data.thinking_forced_open = true; |
| 1478 | } |
| 1479 | } |
| 1480 | |
| 1481 | // When tools are present, build grammar for the <|tools_prefix|> format |
| 1482 | if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) { |
| 1483 | data.grammar_lazy = true; |
| 1484 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1485 | auto schemas = json::array(); |
| 1486 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1487 | const auto & function = tool.at(key: "function" ); |
| 1488 | schemas.push_back(init: { |
| 1489 | { "type" , "object" }, |
| 1490 | { "properties" , |
| 1491 | { |
| 1492 | { function.at(key: "name" ), function.at(key: "parameters" ) } |
| 1493 | } }, |
| 1494 | { "required" , json::array(init: { function.at(key: "name" ) }) }, |
| 1495 | }); |
| 1496 | }); |
| 1497 | auto schema = json{ |
| 1498 | { "type" , "array" }, |
| 1499 | { "items" , schemas.size() == 1 ? schemas[0] : json{ { "anyOf" , schemas } } }, |
| 1500 | { "minItems" , 1 }, |
| 1501 | }; |
| 1502 | if (!inputs.parallel_tool_calls) { |
| 1503 | schema["maxItems" ] = 1; |
| 1504 | } |
| 1505 | builder.add_rule("root" , |
| 1506 | std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "" ) + |
| 1507 | "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls" , schema) + "\"<|tools_suffix|>\"" ); |
| 1508 | }); |
| 1509 | data.grammar_triggers.push_back(x: { .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1510 | // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar, |
| 1511 | // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) |
| 1512 | .value: std::string(data.thinking_forced_open ? |
| 1513 | "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" : |
| 1514 | "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?" ) + |
| 1515 | "(<\\|tools_prefix\\|>)[\\s\\S]*" }); |
| 1516 | data.preserved_tokens = { |
| 1517 | "<|system_start|>" , |
| 1518 | "<|system_end|>" , |
| 1519 | "<|developer_start|>" , |
| 1520 | "<|developer_end|>" , |
| 1521 | "<|user_start|>" , |
| 1522 | "<|user_end|>" , |
| 1523 | "<|assistant_start|>" , |
| 1524 | "<|assistant_end|>" , |
| 1525 | "<|inner_prefix|>" , |
| 1526 | "<|inner_suffix|>" , |
| 1527 | "<|tools_prefix|>" , |
| 1528 | "<|tools_suffix|>" , |
| 1529 | }; |
| 1530 | } |
| 1531 | return data; |
| 1532 | } |
| 1533 | static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) { |
| 1534 | builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" ); |
| 1535 | |
| 1536 | if (!builder.syntax().parse_tool_calls) { |
| 1537 | builder.add_content(content: builder.consume_rest()); |
| 1538 | return; |
| 1539 | } |
| 1540 | |
| 1541 | static const common_regex function_regex( |
| 1542 | "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: " ); |
| 1543 | static const common_regex close_regex("\\}\\s*" ); |
| 1544 | |
| 1545 | static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(" ); |
| 1546 | static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*" ); |
| 1547 | |
| 1548 | if (with_builtin_tools) { |
| 1549 | static const common_regex builtin_call_regex("<\\|python_tag\\|>" ); |
| 1550 | if (auto res = builder.try_find_regex(regex: builtin_call_regex)) { |
| 1551 | auto fun_res = builder.consume_regex(regex: function_name_regex); |
| 1552 | auto function_name = builder.str(rng: fun_res.groups[1]); |
| 1553 | |
| 1554 | common_healing_marker healing_marker; |
| 1555 | json args = json::object(); |
| 1556 | while (true) { |
| 1557 | if (auto arg_res = builder.try_consume_regex(regex: arg_name_regex)) { |
| 1558 | auto arg_name = builder.str(rng: arg_res->groups[1]); |
| 1559 | auto partial = builder.consume_json(); |
| 1560 | args[arg_name] = partial.json; |
| 1561 | healing_marker.marker = partial.healing_marker.marker; |
| 1562 | healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker; |
| 1563 | builder.consume_spaces(); |
| 1564 | if (!builder.try_consume_literal(literal: "," )) { |
| 1565 | break; |
| 1566 | } |
| 1567 | } else { |
| 1568 | break; |
| 1569 | } |
| 1570 | } |
| 1571 | builder.consume_literal(literal: ")" ); |
| 1572 | builder.consume_spaces(); |
| 1573 | |
| 1574 | auto arguments = args.dump(); |
| 1575 | if (!builder.add_tool_call(name: function_name, id: "" , arguments)) { |
| 1576 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 1577 | } |
| 1578 | return; |
| 1579 | } |
| 1580 | } |
| 1581 | parse_json_tool_calls( |
| 1582 | builder, |
| 1583 | /* block_open= */ std::nullopt, |
| 1584 | /* function_regex_start_only= */ function_regex, |
| 1585 | /* function_regex= */ std::nullopt, |
| 1586 | close_regex, |
| 1587 | block_close: std::nullopt); |
| 1588 | |
| 1589 | } |
| 1590 | |
| 1591 | static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1592 | common_chat_params data; |
| 1593 | auto prompt = apply(tmpl, inputs); |
| 1594 | |
| 1595 | // Hacks to fix the official (broken) prompt. |
| 1596 | // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead, |
| 1597 | // until the official template is fixed. |
| 1598 | if (tmpl.source().find(s: "{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}" ) != std::string::npos) { |
| 1599 | // Don't leave the chat dangling after tool results |
| 1600 | if (string_ends_with(str: prompt, suffix: "<|tool▁outputs▁end|>" )) { |
| 1601 | prompt += "<|end▁of▁sentence|>" ; |
| 1602 | if (inputs.add_generation_prompt) { |
| 1603 | prompt += "<|Assistant|>" ; |
| 1604 | } |
| 1605 | } |
| 1606 | // Fix up tool call delta example added by Minja |
| 1607 | prompt = std::regex_replace( |
| 1608 | s: prompt, |
| 1609 | e: std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)" ), |
| 1610 | fmt: "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2" ); |
| 1611 | } |
| 1612 | data.prompt = prompt; |
| 1613 | data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; |
| 1614 | if (string_ends_with(str: data.prompt, suffix: "<think>\n" )) { |
| 1615 | if (!inputs.enable_thinking) { |
| 1616 | data.prompt += "</think>" ; |
| 1617 | } else { |
| 1618 | data.thinking_forced_open = true; |
| 1619 | } |
| 1620 | } |
| 1621 | |
| 1622 | if (inputs.tools.is_array() && !inputs.tools.empty()) { |
| 1623 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); |
| 1624 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1625 | std::vector<std::string> tool_rules; |
| 1626 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1627 | const auto & function = tool.at(key: "function" ); |
| 1628 | std::string name = function.at(key: "name" ); |
| 1629 | auto parameters = function.at(key: "parameters" ); |
| 1630 | builder.resolve_refs(parameters); |
| 1631 | tool_rules.push_back(x: builder.add_rule(name + "-call" , |
| 1632 | "( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n" |
| 1633 | "```json\\n\" " + builder.add_schema(name + "-args" , parameters) + " " |
| 1634 | "\"```<|tool▁call▁end|>\"" )); |
| 1635 | }); |
| 1636 | // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, |
| 1637 | // so we accept common variants (then it's all constrained) |
| 1638 | builder.add_rule("root" , |
| 1639 | std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "" ) + |
| 1640 | "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " |
| 1641 | "(" + string_join(values: tool_rules, separator: " | " ) + ")" + (inputs.parallel_tool_calls ? "*" : "" ) + " " |
| 1642 | "\"<|tool▁calls▁end|>\"" |
| 1643 | " space" ); |
| 1644 | data.grammar_triggers.push_back(x: { |
| 1645 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1646 | // If thinking_forced_open, then we capture the </think> tag in the grammar, |
| 1647 | // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) |
| 1648 | .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?" ) + |
| 1649 | "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" |
| 1650 | }); |
| 1651 | data.preserved_tokens = { |
| 1652 | "<think>" , |
| 1653 | "</think>" , |
| 1654 | "<|tool▁calls▁begin|>" , |
| 1655 | "<|tool▁call▁begin|>" , |
| 1656 | "<|tool▁sep|>" , |
| 1657 | "<|tool▁call▁end|>" , |
| 1658 | "<|tool▁calls▁end|" , |
| 1659 | }; |
| 1660 | }); |
| 1661 | } |
| 1662 | return data; |
| 1663 | } |
| 1664 | |
| 1665 | static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1666 | common_chat_params data; |
| 1667 | |
| 1668 | // Pass thinking context for DeepSeek V3.1 template |
| 1669 | json additional_context = { |
| 1670 | {"thinking" , inputs.enable_thinking}, |
| 1671 | }; |
| 1672 | |
| 1673 | auto prompt = apply(tmpl, inputs, |
| 1674 | /* messages_override= */ inputs.messages, |
| 1675 | /* tools_override= */ std::nullopt, |
| 1676 | additional_context); |
| 1677 | data.prompt = prompt; |
| 1678 | data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; |
| 1679 | if (string_ends_with(str: data.prompt, suffix: "<think>" )) { |
| 1680 | if (!inputs.enable_thinking) { |
| 1681 | data.prompt += "</think>" ; |
| 1682 | } else { |
| 1683 | data.thinking_forced_open = true; |
| 1684 | } |
| 1685 | } |
| 1686 | if (inputs.tools.is_array() && !inputs.tools.empty()) { |
| 1687 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); |
| 1688 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1689 | std::vector<std::string> tool_rules; |
| 1690 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1691 | const auto & function = tool.at(key: "function" ); |
| 1692 | std::string name = function.at(key: "name" ); |
| 1693 | auto parameters = function.at(key: "parameters" ); |
| 1694 | builder.resolve_refs(parameters); |
| 1695 | tool_rules.push_back(x: builder.add_rule(name + "-call" , |
| 1696 | "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>" |
| 1697 | "\" " + builder.add_schema(name + "-args" , parameters) + " " |
| 1698 | "\"<|tool▁call▁end|>\"" )); |
| 1699 | }); |
| 1700 | // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, |
| 1701 | // so we accept common variants (then it's all constrained) |
| 1702 | builder.add_rule("root" , |
| 1703 | std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "" ) + |
| 1704 | "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) " |
| 1705 | "(" + string_join(values: tool_rules, separator: " | " ) + ")" + (inputs.parallel_tool_calls ? "*" : "" ) + " " |
| 1706 | "\"<|tool▁calls▁end|>\"" |
| 1707 | " space" ); |
| 1708 | data.grammar_triggers.push_back(x: { |
| 1709 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1710 | // If thinking_forced_open, then we capture the </think> tag in the grammar, |
| 1711 | // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) |
| 1712 | .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?" ) + |
| 1713 | "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*" |
| 1714 | }); |
| 1715 | data.preserved_tokens = { |
| 1716 | "<think>" , |
| 1717 | "</think>" , |
| 1718 | "<|tool▁calls▁begin|>" , |
| 1719 | "<|tool▁call▁begin|>" , |
| 1720 | "<|tool▁sep|>" , |
| 1721 | "<|tool▁call▁end|>" , |
| 1722 | "<|tool▁calls▁end|>" , |
| 1723 | }; |
| 1724 | }); |
| 1725 | } |
| 1726 | return data; |
| 1727 | } |
| 1728 | |
| 1729 | static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { |
| 1730 | builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" ); |
| 1731 | if (!builder.syntax().parse_tool_calls) { |
| 1732 | builder.add_content(content: builder.consume_rest()); |
| 1733 | return; |
| 1734 | } |
| 1735 | |
| 1736 | static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)" ); |
| 1737 | static const common_regex tool_calls_end("<|tool▁calls▁end|>" ); |
| 1738 | static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n" ); |
| 1739 | static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>" ); |
| 1740 | |
| 1741 | parse_json_tool_calls( |
| 1742 | builder, |
| 1743 | /* block_open= */ tool_calls_begin, |
| 1744 | /* function_regex_start_only= */ std::nullopt, |
| 1745 | function_regex, |
| 1746 | close_regex, |
| 1747 | block_close: tool_calls_end); |
| 1748 | } |
| 1749 | |
| 1750 | static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) { |
| 1751 | static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)" ); |
| 1752 | |
| 1753 | static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>" ); |
| 1754 | static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)" ); |
| 1755 | static const common_regex tool_calls_end("<|tool▁calls▁end|>" ); |
| 1756 | |
| 1757 | if (!builder.syntax().parse_tool_calls) { |
| 1758 | LOG_DBG("%s: not parse_tool_calls\n" , __func__); |
| 1759 | builder.add_content(content: builder.consume_rest()); |
| 1760 | return; |
| 1761 | } |
| 1762 | |
| 1763 | LOG_DBG("%s: parse_tool_calls\n" , __func__); |
| 1764 | |
| 1765 | parse_json_tool_calls( |
| 1766 | builder, |
| 1767 | /* block_open= */ tool_calls_begin, |
| 1768 | /* function_regex_start_only= */ std::nullopt, |
| 1769 | function_regex, |
| 1770 | close_regex, |
| 1771 | block_close: tool_calls_end); |
| 1772 | } |
| 1773 | |
| 1774 | static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { |
| 1775 | // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content |
| 1776 | // First try to parse using the standard reasoning parsing method |
| 1777 | LOG_DBG("%s: thinking_forced_open: %s\n" , __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); |
| 1778 | |
| 1779 | auto start_pos = builder.pos(); |
| 1780 | auto found_end_think = builder.try_find_literal(literal: "</think>" ); |
| 1781 | builder.move_to(pos: start_pos); |
| 1782 | |
| 1783 | if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { |
| 1784 | LOG_DBG("%s: no end_think, not partial, adding content\n" , __func__); |
| 1785 | common_chat_parse_deepseek_v3_1_content(builder); |
| 1786 | } else if (builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" )) { |
| 1787 | // If reasoning was parsed successfully, the remaining content is regular content |
| 1788 | LOG_DBG("%s: parsed reasoning, adding content\n" , __func__); |
| 1789 | // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|> |
| 1790 | common_chat_parse_deepseek_v3_1_content(builder); |
| 1791 | } else { |
| 1792 | if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { |
| 1793 | LOG_DBG("%s: reasoning_format none, adding content\n" , __func__); |
| 1794 | common_chat_parse_deepseek_v3_1_content(builder); |
| 1795 | return; |
| 1796 | } |
| 1797 | // If no reasoning tags found, check if we should treat everything as reasoning |
| 1798 | if (builder.syntax().thinking_forced_open) { |
| 1799 | // If thinking is forced open but no tags found, treat everything as reasoning |
| 1800 | LOG_DBG("%s: thinking_forced_open, adding reasoning content\n" , __func__); |
| 1801 | builder.add_reasoning_content(reasoning_content: builder.consume_rest()); |
| 1802 | } else { |
| 1803 | LOG_DBG("%s: no thinking_forced_open, adding content\n" , __func__); |
| 1804 | // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|> |
| 1805 | common_chat_parse_deepseek_v3_1_content(builder); |
| 1806 | } |
| 1807 | } |
| 1808 | } |
| 1809 | |
| 1810 | static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1811 | common_chat_params data; |
| 1812 | |
| 1813 | // Copy reasoning to the "thinking" field as expected by the gpt-oss template |
| 1814 | auto adjusted_messages = json::array(); |
| 1815 | for (const auto & msg : inputs.messages) { |
| 1816 | auto has_reasoning_content = msg.contains(key: "reasoning_content" ) && msg.at(key: "reasoning_content" ).is_string(); |
| 1817 | auto has_tool_calls = msg.contains(key: "tool_calls" ) && msg.at(key: "tool_calls" ).is_array(); |
| 1818 | |
| 1819 | if (has_reasoning_content && has_tool_calls) { |
| 1820 | auto adjusted_message = msg; |
| 1821 | adjusted_message["thinking" ] = msg.at(key: "reasoning_content" ); |
| 1822 | adjusted_messages.push_back(val: adjusted_message); |
| 1823 | } else { |
| 1824 | adjusted_messages.push_back(val: msg); |
| 1825 | } |
| 1826 | } |
| 1827 | |
| 1828 | auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); |
| 1829 | |
| 1830 | // Check if we need to replace the return token with end token during |
| 1831 | // inference and without generation prompt. For more details see: |
| 1832 | // https://github.com/ggml-org/llama.cpp/issues/15417 |
| 1833 | if (inputs.is_inference && !inputs.add_generation_prompt) { |
| 1834 | static constexpr std::string_view return_token = "<|return|>" ; |
| 1835 | static constexpr std::string_view end_token = "<|end|>" ; |
| 1836 | if (size_t pos = prompt.rfind(svt: return_token); pos != std::string::npos) { |
| 1837 | prompt.replace(pos: pos, n: return_token.length(), svt: end_token); |
| 1838 | } |
| 1839 | } |
| 1840 | |
| 1841 | data.prompt = prompt; |
| 1842 | data.format = COMMON_CHAT_FORMAT_GPT_OSS; |
| 1843 | |
| 1844 | // These special tokens are required to parse properly, so we include them |
| 1845 | // even if parse_tool_calls is false. |
| 1846 | data.preserved_tokens = { |
| 1847 | "<|channel|>" , |
| 1848 | "<|constrain|>" , |
| 1849 | "<|message|>" , |
| 1850 | "<|start|>" , |
| 1851 | "<|end|>" , |
| 1852 | }; |
| 1853 | |
| 1854 | if (!inputs.json_schema.is_null()) { |
| 1855 | data.grammar_lazy = false; |
| 1856 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1857 | auto schema = inputs.json_schema; |
| 1858 | builder.resolve_refs(schema); |
| 1859 | |
| 1860 | auto not_end = builder.add_rule("not-end" , |
| 1861 | "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]" ); |
| 1862 | auto analysis = builder.add_rule("analysis" , |
| 1863 | "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"" ); |
| 1864 | auto constraint = builder.add_rule("constraint" , "\"<|constrain|>\"? [a-zA-Z0-9_-]+" ); |
| 1865 | auto final = builder.add_rule("final" , |
| 1866 | "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " + |
| 1867 | builder.add_schema("response" , schema) |
| 1868 | ); |
| 1869 | |
| 1870 | builder.add_rule("root" , "( " + analysis + " \"<|start|>assistant\" )? " + final); |
| 1871 | }); |
| 1872 | } |
| 1873 | |
| 1874 | if (inputs.tools.is_array() && !inputs.tools.empty()) { |
| 1875 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 1876 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 1877 | // tool calls can appear in commentary or analysis channels |
| 1878 | auto channel = builder.add_rule("channel" , "\"<|channel|>\" ( \"commentary\" | \"analysis\" )" ); |
| 1879 | |
| 1880 | std::vector<std::string> tool_rules_recipient_in_role; |
| 1881 | std::vector<std::string> tool_rules_recipient_in_channel; |
| 1882 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 1883 | const auto & function = tool.at(key: "function" ); |
| 1884 | std::string name = function.at(key: "name" ); |
| 1885 | auto parameters = function.at(key: "parameters" ); |
| 1886 | builder.resolve_refs(parameters); |
| 1887 | |
| 1888 | tool_rules_recipient_in_role.push_back( |
| 1889 | x: builder.add_rule(name + "-call" , |
| 1890 | "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " + |
| 1891 | builder.add_schema(name + "-args" , parameters) |
| 1892 | ) |
| 1893 | ); |
| 1894 | |
| 1895 | tool_rules_recipient_in_channel.push_back( |
| 1896 | x: builder.add_rule(name + "-call" , |
| 1897 | "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " + |
| 1898 | builder.add_schema(name + "-args" , parameters) |
| 1899 | ) |
| 1900 | ); |
| 1901 | }); |
| 1902 | |
| 1903 | auto recipient_in_channel = builder.add_rule("recipient_in_channel" , |
| 1904 | channel + " \" to=functions.\" ( " + |
| 1905 | string_join(values: tool_rules_recipient_in_channel, separator: " | " ) + " )" |
| 1906 | ); |
| 1907 | |
| 1908 | if (data.grammar_lazy) { |
| 1909 | auto recipient_in_role = builder.add_rule("recipient_in_role" , |
| 1910 | "\"<|start|>assistant\"? \" to=functions.\" ( " + |
| 1911 | string_join(values: tool_rules_recipient_in_role, separator: " | " ) + " )" |
| 1912 | ); |
| 1913 | |
| 1914 | builder.add_rule("root" , recipient_in_role + " | " + recipient_in_channel); |
| 1915 | } else { |
| 1916 | auto not_end = builder.add_rule("not-end" , |
| 1917 | "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]" ); |
| 1918 | auto analysis = builder.add_rule("analysis" , |
| 1919 | "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"" ); |
| 1920 | auto = builder.add_rule("commentary" , |
| 1921 | "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"" ); |
| 1922 | |
| 1923 | auto recipient_in_role = builder.add_rule("recipient_in_role" , |
| 1924 | "\" to=functions.\" ( " + string_join(values: tool_rules_recipient_in_role, separator: " | " ) + " )" |
| 1925 | ); |
| 1926 | |
| 1927 | builder.add_rule("root" , |
| 1928 | "( " + analysis + " \"<|start|>assistant\" )? " + |
| 1929 | "( " + commentary + " \"<|start|>assistant\" )? " + |
| 1930 | "( " + recipient_in_role + " | " + recipient_in_channel + " )" |
| 1931 | ); |
| 1932 | } |
| 1933 | |
| 1934 | // Trigger on tool calls that appear in the commentary channel |
| 1935 | data.grammar_triggers.push_back(x: { |
| 1936 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, |
| 1937 | .value: "<\\|channel\\|>(commentary|analysis) to" |
| 1938 | }); |
| 1939 | |
| 1940 | // Trigger tool calls that appear in the role section, either at the |
| 1941 | // start or in the middle. |
| 1942 | data.grammar_triggers.push_back(x: { |
| 1943 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 1944 | .value: "^ to" |
| 1945 | }); |
| 1946 | |
| 1947 | data.grammar_triggers.push_back(x: { |
| 1948 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, |
| 1949 | .value: "<\\|start\\|>assistant to" |
| 1950 | }); |
| 1951 | }); |
| 1952 | } |
| 1953 | |
| 1954 | return data; |
| 1955 | } |
| 1956 | static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) { |
| 1957 | static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))" ; |
| 1958 | static const std::string recipient("(?: to=functions\\.([^<\\s]+))" ); |
| 1959 | |
| 1960 | static const common_regex start_regex("<\\|start\\|>assistant" ); |
| 1961 | static const common_regex analysis_regex("<\\|channel\\|>analysis" ); |
| 1962 | static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?" ); |
| 1963 | static const common_regex preamble_regex("<\\|channel\\|>commentary" ); |
| 1964 | static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?" ); |
| 1965 | static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?" ); |
| 1966 | |
| 1967 | auto consume_end = [&](bool include_end = false) { |
| 1968 | if (auto res = builder.try_find_literal(literal: "<|end|>" )) { |
| 1969 | return res->prelude + (include_end ? builder.str(rng: res->groups[0]) : "" ); |
| 1970 | } |
| 1971 | return builder.consume_rest(); |
| 1972 | }; |
| 1973 | |
| 1974 | auto handle_tool_call = [&](const std::string & name) { |
| 1975 | if (auto args = builder.try_consume_json_with_dumped_args(args_paths: {{}})) { |
| 1976 | if (builder.syntax().parse_tool_calls) { |
| 1977 | if (!builder.add_tool_call(name, id: "" , arguments: args->value) || args->is_partial) { |
| 1978 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 1979 | } |
| 1980 | } else if (args->is_partial) { |
| 1981 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 1982 | } |
| 1983 | } |
| 1984 | }; |
| 1985 | |
| 1986 | auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> { |
| 1987 | auto match = regex.search(input, pos: 0, as_match: true); |
| 1988 | if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) { |
| 1989 | return match; |
| 1990 | } |
| 1991 | return std::nullopt; |
| 1992 | }; |
| 1993 | |
| 1994 | do { |
| 1995 | auto = builder.pos(); |
| 1996 | auto content_start = builder.try_find_literal(literal: "<|message|>" ); |
| 1997 | if (!content_start) { |
| 1998 | throw common_chat_msg_partial_exception("incomplete header" ); |
| 1999 | } |
| 2000 | |
| 2001 | auto = content_start->prelude; |
| 2002 | |
| 2003 | if (auto match = regex_match(tool_call1_regex, header)) { |
| 2004 | auto group = match->groups[1]; |
| 2005 | auto name = header.substr(pos: group.begin, n: group.end - group.begin); |
| 2006 | handle_tool_call(name); |
| 2007 | continue; |
| 2008 | } |
| 2009 | |
| 2010 | if (auto match = regex_match(tool_call2_regex, header)) { |
| 2011 | auto group = match->groups[2]; |
| 2012 | auto name = header.substr(pos: group.begin, n: group.end - group.begin); |
| 2013 | handle_tool_call(name); |
| 2014 | continue; |
| 2015 | } |
| 2016 | |
| 2017 | if (regex_match(analysis_regex, header)) { |
| 2018 | builder.move_to(pos: header_start_pos); |
| 2019 | if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { |
| 2020 | builder.add_content(content: consume_end(true)); |
| 2021 | } else { |
| 2022 | builder.try_parse_reasoning(start_think: "<|channel|>analysis<|message|>" , end_think: "<|end|>" ); |
| 2023 | } |
| 2024 | continue; |
| 2025 | } |
| 2026 | |
| 2027 | if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) { |
| 2028 | builder.add_content(content: consume_end()); |
| 2029 | continue; |
| 2030 | } |
| 2031 | |
| 2032 | // Possibly a malformed message, attempt to recover by rolling |
| 2033 | // back to pick up the next <|start|> |
| 2034 | LOG_DBG("%s: unknown header from message: %s\n" , __func__, header.c_str()); |
| 2035 | builder.move_to(pos: header_start_pos); |
| 2036 | } while (builder.try_find_regex(regex: start_regex, from: std::string::npos, add_prelude_to_content: false)); |
| 2037 | |
| 2038 | auto remaining = builder.consume_rest(); |
| 2039 | if (!remaining.empty()) { |
| 2040 | LOG_DBG("%s: content after last message: %s\n" , __func__, remaining.c_str()); |
| 2041 | } |
| 2042 | } |
| 2043 | |
| 2044 | static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 2045 | LOG_DBG("%s\n" , __func__); |
| 2046 | common_chat_params data; |
| 2047 | const std::optional<json> tools_override = json(); |
| 2048 | const std::optional<json> additional_context = json { |
| 2049 | {"datetime" , format_time(now: inputs.now, format: "%b %d %Y %H:%M:%S GMT" )}, |
| 2050 | {"functions" , json(inputs.tools.empty() ? "" : inputs.tools.dump(indent: 2))}, |
| 2051 | }; |
| 2052 | data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, tools_override, additional_context); |
| 2053 | if (inputs.tools.is_array() && !inputs.tools.empty()) { |
| 2054 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 2055 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2056 | auto schemas = json::array(); |
| 2057 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 2058 | const auto & function = tool.at(key: "function" ); |
| 2059 | schemas.push_back(init: { |
| 2060 | {"type" , "object" }, |
| 2061 | {"properties" , { |
| 2062 | {"name" , { |
| 2063 | {"type" , "string" }, |
| 2064 | {"const" , function.at(key: "name" )}, |
| 2065 | }}, |
| 2066 | {"arguments" , function.at(key: "parameters" )}, |
| 2067 | }}, |
| 2068 | {"required" , json::array(init: {"name" , "arguments" , "id" })}, |
| 2069 | }); |
| 2070 | }); |
| 2071 | auto schema = json { |
| 2072 | {"type" , "array" }, |
| 2073 | {"items" , schemas.size() == 1 ? schemas[0] : json {{"anyOf" , schemas}}}, |
| 2074 | {"minItems" , 1}, |
| 2075 | }; |
| 2076 | if (!inputs.parallel_tool_calls) { |
| 2077 | schema["maxItems" ] = 1; |
| 2078 | } |
| 2079 | builder.add_rule("root" , "\" functools\"? " + builder.add_schema("tool_calls" , schema)); |
| 2080 | }); |
| 2081 | data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: " functools[" }); |
| 2082 | data.preserved_tokens = { |
| 2083 | " functools[" , |
| 2084 | }; |
| 2085 | data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; |
| 2086 | } else { |
| 2087 | data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; |
| 2088 | } |
| 2089 | return data; |
| 2090 | } |
| 2091 | static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) { |
| 2092 | if (!builder.syntax().parse_tool_calls) { |
| 2093 | builder.add_content(content: builder.consume_rest()); |
| 2094 | return; |
| 2095 | } |
| 2096 | static const common_regex prefix(regex_escape(s: " functools[" )); |
| 2097 | parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1); |
| 2098 | } |
| 2099 | |
| 2100 | static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 2101 | // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... |
| 2102 | // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar |
| 2103 | // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code. |
| 2104 | common_chat_params data; |
| 2105 | data.prompt = apply(tmpl, inputs); |
| 2106 | data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; |
| 2107 | if (inputs.tools.is_array() && !inputs.tools.empty()) { |
| 2108 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 2109 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2110 | std::vector<std::string> first_tool_rules; |
| 2111 | std::vector<std::string> subsequent_tool_rules; |
| 2112 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 2113 | const auto & function = tool.at(key: "function" ); |
| 2114 | std::string name = function.at(key: "name" ); |
| 2115 | auto parameters = function.at(key: "parameters" ); |
| 2116 | builder.resolve_refs(parameters); |
| 2117 | std::string args_pattern = "[\\s\\S]*" ; |
| 2118 | auto args_rule = builder.add_schema(name + "-args" , parameters); |
| 2119 | if (name == "python" ) { |
| 2120 | args_rule = builder.add_rule(name + "-maybe-raw-args" , args_rule + " | [^{] .*" ); |
| 2121 | } else { |
| 2122 | args_pattern = "\\{" + args_pattern; |
| 2123 | } |
| 2124 | auto call_rule = builder.add_rule(name + "-call" , "\"" + name + "\\n\" " + args_rule); |
| 2125 | first_tool_rules.push_back(x: call_rule); |
| 2126 | if (inputs.parallel_tool_calls) { |
| 2127 | subsequent_tool_rules.push_back(x: builder.add_rule(name + "-call2" , "\">>>\" " + call_rule)); |
| 2128 | } |
| 2129 | data.grammar_triggers.push_back(x: { |
| 2130 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 2131 | .value: "((?:[\\s\\S]+?>>>)?" + regex_escape(s: name) + "\n)" + args_pattern, |
| 2132 | }); |
| 2133 | }); |
| 2134 | data.preserved_tokens = { |
| 2135 | "<|end_header_id|>" , |
| 2136 | }; |
| 2137 | auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call" , string_join(values: first_tool_rules, separator: " | " )) + " space" ; |
| 2138 | if (inputs.parallel_tool_calls) { |
| 2139 | auto subsequent_rule = builder.add_rule("subsequent_tool_call" , string_join(values: subsequent_tool_rules, separator: " | " )) + " space" ; |
| 2140 | builder.add_rule("root" , first_rule + " (" + subsequent_rule + ")*" ); |
| 2141 | } else { |
| 2142 | builder.add_rule("root" , first_rule); |
| 2143 | } |
| 2144 | |
| 2145 | }); |
| 2146 | } |
| 2147 | return data; |
| 2148 | } |
| 2149 | static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) { |
| 2150 | static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))" ); |
| 2151 | static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))" ); |
| 2152 | static const common_regex close_regex(R"(\s*)" ); |
| 2153 | |
| 2154 | parse_json_tool_calls( |
| 2155 | builder, |
| 2156 | block_open: std::nullopt, |
| 2157 | function_regex_start_only, |
| 2158 | function_regex, |
| 2159 | close_regex, |
| 2160 | block_close: std::nullopt, |
| 2161 | /* allow_raw_python= */ true, |
| 2162 | /* get_function_name= */ [&](const auto & res) -> std::string { |
| 2163 | auto at_start = res.groups[0].begin == 0; |
| 2164 | auto name = builder.str(rng: res.groups[1]); |
| 2165 | if (!name.empty() && name.back() == '{') { |
| 2166 | // Unconsume the opening brace '{' to ensure the JSON parsing goes well. |
| 2167 | builder.move_back(n: 1); |
| 2168 | } |
| 2169 | auto idx = name.find_last_not_of("\n{" ); |
| 2170 | name = name.substr(0, idx + 1); |
| 2171 | if (at_start && name == "all" ) { |
| 2172 | return "" ; |
| 2173 | } |
| 2174 | return name; |
| 2175 | }); |
| 2176 | } |
| 2177 | |
| 2178 | static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 2179 | // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt |
| 2180 | common_chat_params data; |
| 2181 | |
| 2182 | if (!inputs.tools.is_null()) { |
| 2183 | std::string python_code_argument_name; |
| 2184 | auto has_raw_python = false; |
| 2185 | |
| 2186 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 2187 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2188 | std::vector<std::string> tool_rules; |
| 2189 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 2190 | const auto & function = tool.at(key: "function" ); |
| 2191 | const auto & parameters = function.at(key: "parameters" ); |
| 2192 | std::string name = function.at(key: "name" ); |
| 2193 | if (name == "python" || name == "ipython" ) { |
| 2194 | if (!parameters.contains(key: "type" )) { |
| 2195 | throw std::runtime_error("Missing type in python tool" ); |
| 2196 | } |
| 2197 | has_raw_python = true; |
| 2198 | const auto & type = parameters.at(key: "type" ); |
| 2199 | if (type == "object" ) { |
| 2200 | auto properties = parameters.at(key: "properties" ); |
| 2201 | for (auto it = properties.begin(); it != properties.end(); ++it) { |
| 2202 | if (it.value().at(key: "type" ) == "string" ) { |
| 2203 | if (!python_code_argument_name.empty()) { |
| 2204 | throw std::runtime_error("Multiple string arguments found in python tool" ); |
| 2205 | } |
| 2206 | python_code_argument_name = it.key(); |
| 2207 | } |
| 2208 | } |
| 2209 | if (python_code_argument_name.empty()) { |
| 2210 | throw std::runtime_error("No string argument found in python tool" ); |
| 2211 | } |
| 2212 | } else if (type != "string" ) { |
| 2213 | throw std::runtime_error("Invalid type in python tool: " + type.dump()); |
| 2214 | } |
| 2215 | } |
| 2216 | tool_rules.push_back(x: builder.add_rule(name + "-call" , "\"<function=" + name + ">\" " + builder.add_schema(name + "-args" , parameters) + " \"</function>\" space" )); |
| 2217 | }); |
| 2218 | if (has_raw_python) { |
| 2219 | tool_rules.push_back(x: builder.add_rule("python-call" , "\"<|python_tag|>\" .*" )); |
| 2220 | data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<|python_tag|>" }); |
| 2221 | data.preserved_tokens.push_back(x: "<|python_tag|>" ); |
| 2222 | } |
| 2223 | auto tool_call = builder.add_rule("tool_call" , string_join(values: tool_rules, separator: " | " )) + " space" ; |
| 2224 | builder.add_rule("root" , inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); |
| 2225 | data.grammar_triggers.push_back(x: {.type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<function=" }); |
| 2226 | }); |
| 2227 | data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1; |
| 2228 | } else { |
| 2229 | data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; |
| 2230 | } |
| 2231 | |
| 2232 | data.prompt = apply(tmpl, inputs); |
| 2233 | // TODO: if (has_raw_python) |
| 2234 | return data; |
| 2235 | } |
| 2236 | static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) { |
| 2237 | if (!builder.syntax().parse_tool_calls) { |
| 2238 | builder.add_content(content: builder.consume_rest()); |
| 2239 | return; |
| 2240 | } |
| 2241 | // This version of Functionary still supports the llama 3.1 tool call format for the python tool. |
| 2242 | static const common_regex python_tag_regex(regex_escape(s: "<|python_tag|>" )); |
| 2243 | |
| 2244 | static const common_regex function_regex(R"(<function=(\w+)>)" ); |
| 2245 | static const common_regex close_regex(R"(</function>)" ); |
| 2246 | |
| 2247 | parse_json_tool_calls( |
| 2248 | builder, |
| 2249 | /* block_open= */ std::nullopt, |
| 2250 | /* function_regex_start_only= */ std::nullopt, |
| 2251 | function_regex, |
| 2252 | close_regex, |
| 2253 | block_close: std::nullopt); |
| 2254 | |
| 2255 | if (auto res = builder.try_find_regex(regex: python_tag_regex)) { |
| 2256 | auto arguments = wrap_code_as_arguments(builder, code: builder.consume_rest()); |
| 2257 | builder.add_tool_call(name: "python" , id: "" , arguments); |
| 2258 | return; |
| 2259 | } |
| 2260 | } |
| 2261 | |
| 2262 | static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 2263 | common_chat_params data; |
| 2264 | |
| 2265 | json = json { |
| 2266 | {"enable_thinking" , inputs.enable_thinking}, |
| 2267 | }; |
| 2268 | extra_context.update(j: inputs.extra_context); |
| 2269 | |
| 2270 | data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, additional_context: extra_context); |
| 2271 | data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO; |
| 2272 | if (string_ends_with(str: data.prompt, suffix: "<think>\n" )) { |
| 2273 | if (!extra_context["enable_thinking" ]) { |
| 2274 | data.prompt += "</think>" ; |
| 2275 | } else { |
| 2276 | data.thinking_forced_open = true; |
| 2277 | } |
| 2278 | } |
| 2279 | |
| 2280 | if (!inputs.tools.is_null()) { |
| 2281 | // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)* |
| 2282 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 2283 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2284 | std::vector<std::string> tool_rules; |
| 2285 | std::vector<std::string> tool_call_alts; |
| 2286 | std::vector<std::string> escaped_names; |
| 2287 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 2288 | const auto & function = tool.at(key: "function" ); |
| 2289 | std::string name = function.at(key: "name" ); |
| 2290 | auto parameters = function.at(key: "parameters" ); |
| 2291 | builder.resolve_refs(parameters); |
| 2292 | tool_rules.push_back(x: builder.add_schema(name + "-call" , { |
| 2293 | {"type" , "object" }, |
| 2294 | {"properties" , json { |
| 2295 | {"name" , json {{"const" , name}}}, |
| 2296 | {"arguments" , parameters}, |
| 2297 | }}, |
| 2298 | {"required" , json::array(init: {"name" , "arguments" })}, |
| 2299 | })); |
| 2300 | tool_call_alts.push_back(x: builder.add_rule( |
| 2301 | name + "-function-tag" , |
| 2302 | "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " + |
| 2303 | builder.add_schema(name + "-args" , parameters) + " " |
| 2304 | "\"</function>\" space" )); |
| 2305 | |
| 2306 | data.grammar_triggers.push_back(x: { |
| 2307 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, |
| 2308 | .value: "<function=" + name + ">" , |
| 2309 | }); |
| 2310 | auto escaped_name = regex_escape(s: name); |
| 2311 | data.grammar_triggers.push_back(x: { |
| 2312 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, |
| 2313 | .value: "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"" , |
| 2314 | }); |
| 2315 | escaped_names.push_back(x: escaped_name); |
| 2316 | }); |
| 2317 | auto any_tool_call = builder.add_rule("any_tool_call" , "( " + string_join(values: tool_rules, separator: " | " ) + " ) space" ); |
| 2318 | std::vector<std::string> alt_tags { |
| 2319 | any_tool_call, |
| 2320 | "\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"" , |
| 2321 | // The rest is just to accommodate common "good bad" outputs. |
| 2322 | "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"" , |
| 2323 | "\"<response>\" space " + any_tool_call + " \"</response>\"" , |
| 2324 | "\"<tools>\" space " + any_tool_call + " \"</tools>\"" , |
| 2325 | "\"<json>\" space " + any_tool_call + " \"</json>\"" , |
| 2326 | "\"<xml>\" space " + any_tool_call + " \"</xml>\"" , |
| 2327 | "\"<JSON>\" space " + any_tool_call + " \"</JSON>\"" , |
| 2328 | }; |
| 2329 | auto wrappable_tool_call = builder.add_rule("wrappable_tool_call" , "( " + string_join(values: alt_tags, separator: " | " ) + " ) space" ); |
| 2330 | tool_call_alts.push_back(x: wrappable_tool_call); |
| 2331 | tool_call_alts.push_back( |
| 2332 | x: "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space " ); |
| 2333 | auto tool_call = builder.add_rule("tool_call" , string_join(values: tool_call_alts, separator: " | " )); |
| 2334 | builder.add_rule("root" , |
| 2335 | std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "" ) + |
| 2336 | (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call)); |
| 2337 | // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives) |
| 2338 | data.grammar_triggers.push_back(x: { |
| 2339 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, |
| 2340 | // If thinking_forced_open, then we capture the </think> tag in the grammar, |
| 2341 | // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) |
| 2342 | .value: std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?" ) + ( |
| 2343 | "\\s*(" |
| 2344 | "(?:<tool_call>" |
| 2345 | "|<function" |
| 2346 | "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?" |
| 2347 | "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(values: escaped_names, separator: "|" ) + ")\"" |
| 2348 | ")" |
| 2349 | ")[\\s\\S]*" |
| 2350 | ), |
| 2351 | }); |
| 2352 | data.preserved_tokens = { |
| 2353 | "<think>" , |
| 2354 | "</think>" , |
| 2355 | "<tool_call>" , |
| 2356 | "</tool_call>" , |
| 2357 | "<function" , |
| 2358 | "<tools>" , |
| 2359 | "</tools>" , |
| 2360 | "<response>" , |
| 2361 | "</response>" , |
| 2362 | "<function_call>" , |
| 2363 | "</function_call>" , |
| 2364 | "<json>" , |
| 2365 | "</json>" , |
| 2366 | "<JSON>" , |
| 2367 | "</JSON>" , |
| 2368 | "```" , |
| 2369 | "```json" , |
| 2370 | "```xml" , |
| 2371 | }; |
| 2372 | }); |
| 2373 | } |
| 2374 | |
| 2375 | return data; |
| 2376 | } |
| 2377 | static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) { |
| 2378 | builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" ); |
| 2379 | if (!builder.syntax().parse_tool_calls) { |
| 2380 | builder.add_content(content: builder.consume_rest()); |
| 2381 | return; |
| 2382 | } |
| 2383 | |
| 2384 | static const common_regex open_regex( |
| 2385 | "(?:" |
| 2386 | "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start) |
| 2387 | "(" // match 2 (open_tag) |
| 2388 | "<tool_call>" |
| 2389 | "|<function_call>" |
| 2390 | "|<tool>" |
| 2391 | "|<tools>" |
| 2392 | "|<response>" |
| 2393 | "|<json>" |
| 2394 | "|<xml>" |
| 2395 | "|<JSON>" |
| 2396 | ")?" |
| 2397 | "(\\s*\\{\\s*\"name\")" // match 3 (named tool call) |
| 2398 | ")" |
| 2399 | "|<function=([^>]+)>" // match 4 (function name) |
| 2400 | "|<function name=\"([^\"]+)\">" // match 5 (function name again) |
| 2401 | ); |
| 2402 | |
| 2403 | while (auto res = builder.try_find_regex(regex: open_regex)) { |
| 2404 | const auto & block_start = res->groups[1]; |
| 2405 | std::string block_end = block_start.empty() ? "" : "```" ; |
| 2406 | |
| 2407 | const auto & open_tag = res->groups[2]; |
| 2408 | std::string close_tag; |
| 2409 | |
| 2410 | if (!res->groups[3].empty()) { |
| 2411 | builder.move_to(pos: res->groups[3].begin); |
| 2412 | close_tag = open_tag.empty() ? "" : "</" + builder.str(rng: open_tag).substr(pos: 1); |
| 2413 | |
| 2414 | if (auto tool_call = builder.try_consume_json_with_dumped_args(args_paths: {{"arguments" }})) { |
| 2415 | if (!builder.add_tool_call(tool_call: tool_call->value) || tool_call->is_partial) { |
| 2416 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 2417 | } |
| 2418 | builder.consume_spaces(); |
| 2419 | builder.consume_literal(literal: close_tag); |
| 2420 | builder.consume_spaces(); |
| 2421 | if (!block_end.empty()) { |
| 2422 | builder.consume_literal(literal: block_end); |
| 2423 | builder.consume_spaces(); |
| 2424 | } |
| 2425 | } else { |
| 2426 | throw common_chat_msg_partial_exception("failed to parse tool call" ); |
| 2427 | } |
| 2428 | } else { |
| 2429 | auto function_name = builder.str(rng: res->groups[4]); |
| 2430 | if (function_name.empty()) { |
| 2431 | function_name = builder.str(rng: res->groups[5]); |
| 2432 | } |
| 2433 | GGML_ASSERT(!function_name.empty()); |
| 2434 | |
| 2435 | close_tag = "</function>" ; |
| 2436 | |
| 2437 | if (auto arguments = builder.try_consume_json_with_dumped_args(args_paths: {{}})) { |
| 2438 | if (!builder.add_tool_call(name: function_name, id: "" , arguments: arguments->value) || arguments->is_partial) { |
| 2439 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 2440 | } |
| 2441 | builder.consume_spaces(); |
| 2442 | builder.consume_literal(literal: close_tag); |
| 2443 | builder.consume_spaces(); |
| 2444 | if (!block_end.empty()) { |
| 2445 | builder.consume_literal(literal: block_end); |
| 2446 | builder.consume_spaces(); |
| 2447 | } |
| 2448 | } |
| 2449 | } |
| 2450 | } |
| 2451 | |
| 2452 | builder.add_content(content: builder.consume_rest()); |
| 2453 | } |
| 2454 | |
| 2455 | static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 2456 | common_chat_params data; |
| 2457 | |
| 2458 | // Pass thinking context for Granite template |
| 2459 | json additional_context = { |
| 2460 | {"thinking" , inputs.enable_thinking}, |
| 2461 | }; |
| 2462 | |
| 2463 | data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context); |
| 2464 | data.format = COMMON_CHAT_FORMAT_GRANITE; |
| 2465 | |
| 2466 | if (string_ends_with(str: data.prompt, suffix: "<think>\n" ) || string_ends_with(str: data.prompt, suffix: "<think>" )) { |
| 2467 | if (!inputs.enable_thinking) { |
| 2468 | data.prompt += "</think>" ; |
| 2469 | } else { |
| 2470 | data.thinking_forced_open = true; |
| 2471 | } |
| 2472 | } |
| 2473 | |
| 2474 | if (!inputs.tools.is_null()) { |
| 2475 | // Granite uses <|tool_call|> followed by JSON list |
| 2476 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 2477 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2478 | std::vector<std::string> tool_rules; |
| 2479 | foreach_function(tools: inputs.tools, fn: [&](const json & tool) { |
| 2480 | const auto & function = tool.at(key: "function" ); |
| 2481 | std::string name = function.at(key: "name" ); |
| 2482 | auto parameters = function.at(key: "parameters" ); |
| 2483 | builder.resolve_refs(parameters); |
| 2484 | tool_rules.push_back(x: builder.add_rule(name + "-call" , builder.add_schema(name + |
| 2485 | "-args" , { |
| 2486 | {"type" , "object" }, |
| 2487 | {"properties" , { |
| 2488 | {"name" , {{"const" , name}}}, |
| 2489 | {"arguments" , parameters}, |
| 2490 | }}, |
| 2491 | {"required" , json::array(init: {"name" , "arguments" })}, |
| 2492 | }))); |
| 2493 | }); |
| 2494 | |
| 2495 | auto tool_call = builder.add_rule("tool_call" , string_join(values: tool_rules, separator: " | " )); |
| 2496 | auto tool_list = builder.add_rule("tool_list" , "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"" ); |
| 2497 | |
| 2498 | if (data.thinking_forced_open) { |
| 2499 | builder.add_rule("root" , "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list); |
| 2500 | } else { |
| 2501 | builder.add_rule("root" , "\"<|tool_call|>\" space " + tool_list); |
| 2502 | } |
| 2503 | |
| 2504 | data.grammar_triggers.push_back(x: { |
| 2505 | .type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, |
| 2506 | .value: "<|tool_call|>" |
| 2507 | }); |
| 2508 | |
| 2509 | data.preserved_tokens = { |
| 2510 | "<think>" , |
| 2511 | "</think>" , |
| 2512 | "<response>" , |
| 2513 | "</response>" , |
| 2514 | "<|tool_call|>" , |
| 2515 | }; |
| 2516 | }); |
| 2517 | } else { |
| 2518 | // Handle thinking tags for non-tool responses |
| 2519 | if (data.thinking_forced_open && inputs.enable_thinking) { |
| 2520 | data.grammar_lazy = false; |
| 2521 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2522 | builder.add_rule("root" , "\"</think>\" space \"<response>\" space .* \"</response>\" space" ); |
| 2523 | }); |
| 2524 | data.preserved_tokens = { |
| 2525 | "<think>" , |
| 2526 | "</think>" , |
| 2527 | "<response>" , |
| 2528 | "</response>" , |
| 2529 | }; |
| 2530 | } |
| 2531 | } |
| 2532 | |
| 2533 | return data; |
| 2534 | } |
| 2535 | |
| 2536 | static void common_chat_parse_granite(common_chat_msg_parser & builder) { |
| 2537 | // Parse thinking tags |
| 2538 | static const common_regex start_think_regex(regex_escape(s: "<think>" )); |
| 2539 | static const common_regex end_think_regex(regex_escape(s: "</think>" )); |
| 2540 | // Granite models output partial tokens such as "<" and "<think". |
| 2541 | // By leveraging try_consume_regex()/try_find_regex() throwing |
| 2542 | // common_chat_msg_partial_exception for these partial tokens, |
| 2543 | // processing is interrupted and the tokens are not passed to add_content(). |
| 2544 | if (auto res = builder.try_consume_regex(regex: start_think_regex)) { |
| 2545 | // Restore position for try_parse_reasoning() |
| 2546 | builder.move_to(pos: res->groups[0].begin); |
| 2547 | builder.try_find_regex(regex: end_think_regex, from: std::string::npos, add_prelude_to_content: false); |
| 2548 | // Restore position for try_parse_reasoning() |
| 2549 | builder.move_to(pos: res->groups[0].begin); |
| 2550 | } |
| 2551 | builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" ); |
| 2552 | |
| 2553 | // Parse response tags |
| 2554 | static const common_regex start_response_regex(regex_escape(s: "<response>" )); |
| 2555 | static const common_regex end_response_regex(regex_escape(s: "</response>" )); |
| 2556 | // Granite models output partial tokens such as "<" and "<response". |
| 2557 | // Same hack as reasoning parsing. |
| 2558 | if (builder.try_consume_regex(regex: start_response_regex)) { |
| 2559 | builder.try_find_regex(regex: end_response_regex); |
| 2560 | } |
| 2561 | |
| 2562 | if (!builder.syntax().parse_tool_calls) { |
| 2563 | builder.add_content(content: builder.consume_rest()); |
| 2564 | return; |
| 2565 | } |
| 2566 | |
| 2567 | // Look for tool calls |
| 2568 | static const common_regex tool_call_regex(regex_escape(s: "<|tool_call|>" )); |
| 2569 | if (auto res = builder.try_find_regex(regex: tool_call_regex)) { |
| 2570 | builder.move_to(pos: res->groups[0].end); |
| 2571 | |
| 2572 | // Expect JSON array of tool calls |
| 2573 | if (auto tool_call = builder.try_consume_json_with_dumped_args(args_paths: {{{"arguments" }}})) { |
| 2574 | if (!builder.add_tool_calls(arr: tool_call->value) || tool_call->is_partial) { |
| 2575 | throw common_chat_msg_partial_exception("incomplete tool call" ); |
| 2576 | } |
| 2577 | } |
| 2578 | } else { |
| 2579 | builder.add_content(content: builder.consume_rest()); |
| 2580 | } |
| 2581 | } |
| 2582 | |
| 2583 | static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) { |
| 2584 | // Parse thinking tags |
| 2585 | builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" ); |
| 2586 | if (!builder.syntax().parse_tool_calls) { |
| 2587 | builder.add_content(content: builder.consume_rest()); |
| 2588 | return; |
| 2589 | } |
| 2590 | |
| 2591 | // Look for tool calls |
| 2592 | static const common_regex tool_call_regex(regex_escape(s: "<TOOLCALL>" )); |
| 2593 | if (auto res = builder.try_find_regex(regex: tool_call_regex)) { |
| 2594 | builder.move_to(pos: res->groups[0].end); |
| 2595 | |
| 2596 | // Expect JSON array of tool calls |
| 2597 | auto tool_calls_data = builder.consume_json(); |
| 2598 | if (tool_calls_data.json.is_array()) { |
| 2599 | if (!builder.try_consume_literal(literal: "</TOOLCALL>" )) { |
| 2600 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2601 | } |
| 2602 | builder.add_tool_calls(arr: tool_calls_data.json); |
| 2603 | } else { |
| 2604 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2605 | } |
| 2606 | } |
| 2607 | builder.add_content(content: builder.consume_rest()); |
| 2608 | } |
| 2609 | |
| 2610 | static void common_chat_parse_apertus(common_chat_msg_parser & builder) { |
| 2611 | // Parse thinking tags |
| 2612 | builder.try_parse_reasoning(start_think: "<|inner_prefix|>" , end_think: "<|inner_suffix|>" ); |
| 2613 | if (!builder.syntax().parse_tool_calls) { |
| 2614 | builder.add_content(content: builder.consume_rest()); |
| 2615 | return; |
| 2616 | } |
| 2617 | |
| 2618 | // Look for tool calls |
| 2619 | static const common_regex tool_call_regex(regex_escape(s: "<|tools_prefix|>" )); |
| 2620 | if (auto res = builder.try_find_regex(regex: tool_call_regex)) { |
| 2621 | builder.move_to(pos: res->groups[0].end); |
| 2622 | |
| 2623 | auto tool_calls_data = builder.consume_json(); |
| 2624 | if (tool_calls_data.json.is_array()) { |
| 2625 | builder.consume_spaces(); |
| 2626 | if (!builder.try_consume_literal(literal: "<|tools_suffix|>" )) { |
| 2627 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2628 | } |
| 2629 | for (const auto & value : tool_calls_data.json) { |
| 2630 | if (value.is_object()) { |
| 2631 | builder.add_tool_call_short_form(tool_call: value); |
| 2632 | } |
| 2633 | } |
| 2634 | } else { |
| 2635 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2636 | } |
| 2637 | } |
| 2638 | builder.add_content(content: builder.consume_rest()); |
| 2639 | } |
| 2640 | |
| 2641 | |
| 2642 | static void common_chat_parse_lfm2(common_chat_msg_parser & builder) { |
| 2643 | if (!builder.syntax().parse_tool_calls) { |
| 2644 | builder.add_content(content: builder.consume_rest()); |
| 2645 | return; |
| 2646 | } |
| 2647 | |
| 2648 | // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|> |
| 2649 | static const common_regex tool_call_start_regex(regex_escape(s: "<|tool_call_start|>" )); |
| 2650 | static const common_regex tool_call_end_regex(regex_escape(s: "<|tool_call_end|>" )); |
| 2651 | |
| 2652 | // Loop through all tool calls |
| 2653 | while (auto res = builder.try_find_regex(regex: tool_call_start_regex, from: std::string::npos, /* add_prelude_to_content= */ true)) { |
| 2654 | builder.move_to(pos: res->groups[0].end); |
| 2655 | |
| 2656 | // Parse JSON array format: [{"name": "...", "arguments": {...}}] |
| 2657 | auto tool_calls_data = builder.consume_json(); |
| 2658 | |
| 2659 | // Consume end marker |
| 2660 | builder.consume_spaces(); |
| 2661 | if (!builder.try_consume_regex(regex: tool_call_end_regex)) { |
| 2662 | throw common_chat_msg_partial_exception("Expected <|tool_call_end|>" ); |
| 2663 | } |
| 2664 | |
| 2665 | // Process each tool call in the array |
| 2666 | if (tool_calls_data.json.is_array()) { |
| 2667 | for (const auto & tool_call : tool_calls_data.json) { |
| 2668 | if (!tool_call.is_object()) { |
| 2669 | throw common_chat_msg_partial_exception("Tool call must be an object" ); |
| 2670 | } |
| 2671 | |
| 2672 | if (!tool_call.contains(key: "name" )) { |
| 2673 | throw common_chat_msg_partial_exception("Tool call missing 'name' field" ); |
| 2674 | } |
| 2675 | |
| 2676 | std::string function_name = tool_call.at(key: "name" ); |
| 2677 | std::string arguments = "{}" ; |
| 2678 | |
| 2679 | if (tool_call.contains(key: "arguments" )) { |
| 2680 | if (tool_call.at(key: "arguments" ).is_object()) { |
| 2681 | arguments = tool_call.at(key: "arguments" ).dump(); |
| 2682 | } else if (tool_call.at(key: "arguments" ).is_string()) { |
| 2683 | arguments = tool_call.at(key: "arguments" ); |
| 2684 | } |
| 2685 | } |
| 2686 | |
| 2687 | if (!builder.add_tool_call(name: function_name, id: "" , arguments)) { |
| 2688 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2689 | } |
| 2690 | } |
| 2691 | } else { |
| 2692 | throw common_chat_msg_partial_exception("Expected JSON array for tool calls" ); |
| 2693 | } |
| 2694 | |
| 2695 | // Consume any trailing whitespace after this tool call |
| 2696 | builder.consume_spaces(); |
| 2697 | } |
| 2698 | |
| 2699 | // Consume any remaining content after all tool calls |
| 2700 | auto remaining = builder.consume_rest(); |
| 2701 | if (!string_strip(str: remaining).empty()) { |
| 2702 | builder.add_content(content: remaining); |
| 2703 | } |
| 2704 | } |
| 2705 | |
| 2706 | static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) { |
| 2707 | // Parse thinking tags first - this handles the main reasoning content |
| 2708 | builder.try_parse_reasoning(start_think: "<seed:think>" , end_think: "</seed:think>" ); |
| 2709 | |
| 2710 | if (!builder.syntax().parse_tool_calls) { |
| 2711 | builder.add_content(content: builder.consume_rest()); |
| 2712 | return; |
| 2713 | } |
| 2714 | |
| 2715 | // Parse tool calls - Seed-OSS uses <seed:tool_call> format |
| 2716 | static const common_regex tool_call_begin_regex("<seed:tool_call>" ); |
| 2717 | static const common_regex tool_call_end_regex("</seed:tool_call>" ); |
| 2718 | static const common_regex function_regex("<function=([^>]+)>" ); |
| 2719 | static const common_regex param_regex("<parameter=([^>]+)>" ); |
| 2720 | |
| 2721 | while (auto tool_res = builder.try_find_regex(regex: tool_call_begin_regex)) { |
| 2722 | builder.consume_spaces(); // Consume whitespace after <seed:tool_call> |
| 2723 | |
| 2724 | // Look for function call inside tool call, ignore any content before it |
| 2725 | if (auto func_res = builder.try_find_regex(regex: function_regex, from: std::string::npos, add_prelude_to_content: false)) { |
| 2726 | auto function_name = builder.str(rng: func_res->groups[1]); |
| 2727 | |
| 2728 | // Parse Seed-OSS parameters <parameter=name>value</parameter> |
| 2729 | json args = json::object(); |
| 2730 | // Parse all parameters |
| 2731 | while (auto param_res = builder.try_find_regex(regex: param_regex, from: std::string::npos, add_prelude_to_content: false)) { |
| 2732 | // again, ignore noise around parameters |
| 2733 | auto param_name = builder.str(rng: param_res->groups[1]); |
| 2734 | builder.move_to(pos: param_res->groups[0].end); |
| 2735 | builder.consume_spaces(); // Consume whitespace after parameter |
| 2736 | auto savedPos = builder.pos(); |
| 2737 | if (auto param_parse = builder.try_find_literal(literal: "</parameter>" )) { |
| 2738 | auto param = param_parse->prelude; |
| 2739 | builder.move_to(pos: savedPos); |
| 2740 | try { |
| 2741 | if (auto param_res = builder.try_consume_json()) { |
| 2742 | args[param_name] = param_res->json; |
| 2743 | } else { |
| 2744 | args[param_name] = param; |
| 2745 | } |
| 2746 | } catch (json::exception &) { |
| 2747 | args[param_name] = param; |
| 2748 | } |
| 2749 | } else { |
| 2750 | throw common_chat_msg_partial_exception("Incomplete tool parameter" ); |
| 2751 | } |
| 2752 | } |
| 2753 | // Look for closing function tag |
| 2754 | auto end_func = builder.try_find_literal(literal: "</function>" ); |
| 2755 | if (end_func) { |
| 2756 | builder.move_to(pos: end_func->groups[0].end); |
| 2757 | builder.consume_spaces(); // Consume whitespace after </function> |
| 2758 | |
| 2759 | // Add the tool call with parsed arguments, but only if we REALLY got the literal |
| 2760 | auto eaten_fragment = builder.input().substr(pos: end_func->groups[0].begin, n: end_func->groups[0].end); |
| 2761 | auto funlen = std::string("</function>" ).length(); |
| 2762 | if (eaten_fragment.length() >= funlen && eaten_fragment.substr(pos: 0, n: funlen) == std::string("</function>" )) { |
| 2763 | if (!builder.add_tool_call(name: function_name, id: "" , arguments: args.dump())) { |
| 2764 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2765 | } |
| 2766 | } else { |
| 2767 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2768 | } |
| 2769 | } else { |
| 2770 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2771 | } |
| 2772 | // Look for closing tool call tag |
| 2773 | if (auto end_tool = builder.try_find_regex(regex: tool_call_end_regex, from: std::string::npos, add_prelude_to_content: false)) { |
| 2774 | builder.move_to(pos: end_tool->groups[0].end); |
| 2775 | builder.consume_spaces(); // Consume trailing whitespace after tool call |
| 2776 | } else { |
| 2777 | throw common_chat_msg_partial_exception("Incomplete tool call" ); |
| 2778 | } |
| 2779 | } else { |
| 2780 | // No function found - don't consume content here, let it be handled at the end |
| 2781 | break; |
| 2782 | } |
| 2783 | } |
| 2784 | |
| 2785 | // Consume any remaining whitespace after all tool call processing |
| 2786 | builder.consume_spaces(); |
| 2787 | auto remaining = builder.consume_rest(); |
| 2788 | // If there's any non-whitespace content remaining, add it as content |
| 2789 | if (!string_strip(str: remaining).empty()) { |
| 2790 | builder.add_content(content: remaining); |
| 2791 | } |
| 2792 | } |
| 2793 | |
| 2794 | static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 2795 | common_chat_params data; |
| 2796 | data.prompt = apply(tmpl, inputs); |
| 2797 | data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; |
| 2798 | data.grammar_lazy = false; |
| 2799 | if (!inputs.json_schema.is_null()) { |
| 2800 | if (!inputs.grammar.empty()) { |
| 2801 | throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both" ); |
| 2802 | } |
| 2803 | data.grammar = json_schema_to_grammar(schema: inputs.json_schema); |
| 2804 | } else { |
| 2805 | data.grammar = inputs.grammar; |
| 2806 | } |
| 2807 | return data; |
| 2808 | } |
| 2809 | |
| 2810 | static common_chat_params common_chat_params_init_seed_oss( |
| 2811 | const common_chat_template & tmpl, |
| 2812 | templates_params & params, |
| 2813 | const common_chat_templates_inputs & inputs) |
| 2814 | { |
| 2815 | common_chat_params data; |
| 2816 | data.prompt = apply(tmpl, inputs: params); |
| 2817 | data.format = COMMON_CHAT_FORMAT_SEED_OSS; |
| 2818 | if (string_ends_with(str: data.prompt, suffix: "<seed:think>" )) { |
| 2819 | if (!inputs.enable_thinking) { |
| 2820 | data.prompt += "</seed:think>" ; |
| 2821 | } else { |
| 2822 | data.thinking_forced_open = true; |
| 2823 | } |
| 2824 | } |
| 2825 | |
| 2826 | if (params.tools.is_array() && !params.tools.empty()) { |
| 2827 | data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; |
| 2828 | data.grammar = build_grammar(cb: [&](const common_grammar_builder & builder) { |
| 2829 | std::vector<std::string> tool_rules; |
| 2830 | foreach_function(tools: params.tools, fn: [&](const json & tool) { |
| 2831 | const auto & function = tool.at(key: "function" ); |
| 2832 | std::string name = function.at(key: "name" ); |
| 2833 | auto parameters = function.at(key: "parameters" ); |
| 2834 | builder.resolve_refs(parameters); |
| 2835 | |
| 2836 | // Create rule for Seed-OSS function call format |
| 2837 | std::string param_rules; |
| 2838 | if (parameters.contains(key: "properties" )) { |
| 2839 | for (const auto & [key, value] : parameters.at(key: "properties" ).items()) { |
| 2840 | param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) + |
| 2841 | "\"</parameter>\"" ; |
| 2842 | } |
| 2843 | } |
| 2844 | |
| 2845 | tool_rules.push_back(x: builder.add_rule(name + "-call" , |
| 2846 | "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " + |
| 2847 | param_rules + |
| 2848 | " \"</function>\" space \"</seed:tool_call>\"" )); |
| 2849 | }); |
| 2850 | |
| 2851 | data.grammar_triggers.push_back(x: { .type: COMMON_GRAMMAR_TRIGGER_TYPE_WORD, .value: "<seed:tool_call>" }); |
| 2852 | |
| 2853 | data.preserved_tokens = { |
| 2854 | "<seed:think>" , "</seed:think>" , "<seed:tool_call>" , "</seed:tool_call>" , |
| 2855 | "<function=" , "</function>" , "<parameter=" , "</parameter>" , |
| 2856 | }; |
| 2857 | |
| 2858 | builder.add_rule("root" , string_join(values: tool_rules, separator: " | " )); |
| 2859 | }); |
| 2860 | } |
| 2861 | return data; |
| 2862 | } |
| 2863 | |
| 2864 | static common_chat_params common_chat_templates_apply_jinja( |
| 2865 | const struct common_chat_templates * tmpls, |
| 2866 | const struct common_chat_templates_inputs & inputs) |
| 2867 | { |
| 2868 | templates_params params; |
| 2869 | params.tools = common_chat_tools_to_json_oaicompat<json>(tools: inputs.tools); |
| 2870 | const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use |
| 2871 | ? *tmpls->template_tool_use |
| 2872 | : *tmpls->template_default; |
| 2873 | const auto & src = tmpl.source(); |
| 2874 | const auto & caps = tmpl.original_caps(); |
| 2875 | params.messages = common_chat_msgs_to_json_oaicompat<json>(msgs: inputs.messages, /* concat_text= */ concat_typed_text: !tmpl.original_caps().requires_typed_content); |
| 2876 | params.add_generation_prompt = inputs.add_generation_prompt; |
| 2877 | params.tool_choice = inputs.tool_choice; |
| 2878 | params.enable_thinking = inputs.enable_thinking; |
| 2879 | params.grammar = inputs.grammar; |
| 2880 | params.now = inputs.now; |
| 2881 | params.add_bos = tmpls->add_bos; |
| 2882 | params.add_eos = tmpls->add_eos; |
| 2883 | |
| 2884 | params.extra_context = json::object(); |
| 2885 | for (auto el : inputs.chat_template_kwargs) { |
| 2886 | params.extra_context[el.first] = json::parse(i&: el.second); |
| 2887 | } |
| 2888 | |
| 2889 | if (!inputs.json_schema.empty()) { |
| 2890 | params.json_schema = json::parse(i: inputs.json_schema); |
| 2891 | } |
| 2892 | |
| 2893 | if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { |
| 2894 | LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n" ); |
| 2895 | params.parallel_tool_calls = false; |
| 2896 | } else { |
| 2897 | params.parallel_tool_calls = inputs.parallel_tool_calls; |
| 2898 | } |
| 2899 | |
| 2900 | if (params.tools.is_array()) { |
| 2901 | if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) { |
| 2902 | throw std::runtime_error("Cannot specify grammar with tools" ); |
| 2903 | } |
| 2904 | if (caps.supports_tool_calls && !caps.supports_tools) { |
| 2905 | LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n" ); |
| 2906 | } |
| 2907 | } |
| 2908 | |
| 2909 | // DeepSeek V3.1: detect based on specific patterns in the template |
| 2910 | if (src.find(s: "message['prefix'] is defined and message['prefix'] and thinking" ) != std::string::npos && |
| 2911 | params.json_schema.is_null()) { |
| 2912 | return common_chat_params_init_deepseek_v3_1(tmpl, inputs: params); |
| 2913 | } |
| 2914 | |
| 2915 | // DeepSeek R1: use handler in all cases except json schema (thinking / tools). |
| 2916 | if (src.find(s: "<|tool▁calls▁begin|>" ) != std::string::npos && params.json_schema.is_null()) { |
| 2917 | return common_chat_params_init_deepseek_r1(tmpl, inputs: params); |
| 2918 | } |
| 2919 | |
| 2920 | // Command R7B: : use handler in all cases except json schema (thinking / tools). |
| 2921 | if (src.find(s: "<|END_THINKING|><|START_ACTION|>" ) != std::string::npos && params.json_schema.is_null()) { |
| 2922 | return common_chat_params_init_command_r7b(tmpl, inputs: params); |
| 2923 | } |
| 2924 | |
| 2925 | // Granite (IBM) - detects thinking / tools support |
| 2926 | if (src.find(s: "elif thinking" ) != std::string::npos && src.find(s: "<|tool_call|>" ) != std::string::npos) { |
| 2927 | return common_chat_params_init_granite(tmpl, inputs: params); |
| 2928 | } |
| 2929 | |
| 2930 | // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools) |
| 2931 | if (src.find(s: "<tool_call>" ) != std::string::npos && params.json_schema.is_null()) { |
| 2932 | return common_chat_params_init_hermes_2_pro(tmpl, inputs: params); |
| 2933 | } |
| 2934 | |
| 2935 | // GPT-OSS |
| 2936 | if (src.find(s: "<|channel|>" ) != std::string::npos) { |
| 2937 | return common_chat_params_init_gpt_oss(tmpl, inputs: params); |
| 2938 | } |
| 2939 | |
| 2940 | // Seed-OSS |
| 2941 | if (src.find(s: "<seed:think>" ) != std::string::npos) { |
| 2942 | return common_chat_params_init_seed_oss(tmpl, params, inputs); |
| 2943 | } |
| 2944 | |
| 2945 | // Nemotron v2 |
| 2946 | if (src.find(s: "<SPECIAL_10>" ) != std::string::npos) { |
| 2947 | return common_chat_params_init_nemotron_v2(tmpl, inputs: params); |
| 2948 | } |
| 2949 | |
| 2950 | // Apertus format detection |
| 2951 | if (src.find(s: "<|system_start|>" ) != std::string::npos && src.find(s: "<|tools_prefix|>" ) != std::string::npos) { |
| 2952 | return common_chat_params_init_apertus(tmpl, inputs: params); |
| 2953 | } |
| 2954 | |
| 2955 | // LFM2 (w/ tools) |
| 2956 | if (src.find(s: "List of tools: <|tool_list_start|>[" ) != std::string::npos && |
| 2957 | src.find(s: "]<|tool_list_end|>" ) != std::string::npos) { |
| 2958 | return common_chat_params_init_lfm2(tmpl, inputs: params); |
| 2959 | } |
| 2960 | |
| 2961 | // Use generic handler when mixing tools + JSON schema. |
| 2962 | // TODO: support that mix in handlers below. |
| 2963 | if ((params.tools.is_array() && params.json_schema.is_object())) { |
| 2964 | return common_chat_params_init_generic(tmpl, inputs: params); |
| 2965 | } |
| 2966 | |
| 2967 | // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases. |
| 2968 | if (src.find(s: ">>>all" ) != std::string::npos) { |
| 2969 | return common_chat_params_init_functionary_v3_2(tmpl, inputs: params); |
| 2970 | } |
| 2971 | |
| 2972 | // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases. |
| 2973 | if (src.find(s: " functools[" ) != std::string::npos) { |
| 2974 | return common_chat_params_init_firefunction_v2(tmpl, inputs: params); |
| 2975 | } |
| 2976 | |
| 2977 | // Functionary v3.1 (w/ tools) |
| 2978 | if (src.find(s: "<|start_header_id|>" ) != std::string::npos |
| 2979 | && src.find(s: "<function=" ) != std::string::npos) { |
| 2980 | return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs: params); |
| 2981 | } |
| 2982 | |
| 2983 | // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools) |
| 2984 | if (src.find(s: "<|start_header_id|>ipython<|end_header_id|>" ) != std::string::npos) { |
| 2985 | auto allow_python_tag_builtin_tools = src.find(s: "<|python_tag|>" ) != std::string::npos; |
| 2986 | return common_chat_params_init_llama_3_x(tmpl, inputs: params, allow_python_tag_builtin_tools); |
| 2987 | } |
| 2988 | |
| 2989 | if (src.find(s: "[THINK]" ) != std::string::npos && src.find(s: "[/THINK]" ) != std::string::npos) { |
| 2990 | return common_chat_params_init_magistral(tmpl, inputs: params); |
| 2991 | } |
| 2992 | |
| 2993 | // Plain handler (no tools) |
| 2994 | if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) { |
| 2995 | return common_chat_params_init_without_tools(tmpl, inputs: params); |
| 2996 | } |
| 2997 | |
| 2998 | // Mistral Nemo (w/ tools) |
| 2999 | if (src.find(s: "[TOOL_CALLS]" ) != std::string::npos) { |
| 3000 | return common_chat_params_init_mistral_nemo(tmpl, inputs: params); |
| 3001 | } |
| 3002 | |
| 3003 | // Generic fallback |
| 3004 | return common_chat_params_init_generic(tmpl, inputs: params); |
| 3005 | } |
| 3006 | |
| 3007 | // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template. |
| 3008 | static common_chat_params common_chat_templates_apply_legacy( |
| 3009 | const struct common_chat_templates * tmpls, |
| 3010 | const struct common_chat_templates_inputs & inputs) |
| 3011 | { |
| 3012 | int alloc_size = 0; |
| 3013 | std::vector<llama_chat_message> chat; |
| 3014 | std::vector<std::string> contents; |
| 3015 | |
| 3016 | for (const auto & msg : inputs.messages) { |
| 3017 | auto content = msg.content; |
| 3018 | for (const auto & part : msg.content_parts) { |
| 3019 | if (part.type != "text" ) { |
| 3020 | LOG_WRN("Ignoring non-text content part: %s\n" , part.type.c_str()); |
| 3021 | continue; |
| 3022 | } |
| 3023 | if (!content.empty()) { |
| 3024 | content += "\n" ;; |
| 3025 | } |
| 3026 | content += part.text; |
| 3027 | } |
| 3028 | contents.emplace_back(args: std::move(content)); |
| 3029 | } |
| 3030 | for (size_t i = 0; i < contents.size(); ++i) { |
| 3031 | const auto & msg = inputs.messages[i]; |
| 3032 | const auto & content = contents[i]; |
| 3033 | chat.push_back(x: {.role: msg.role.c_str(), .content: content.c_str()}); |
| 3034 | alloc_size += (msg.role.size() + content.size()) * 1.25; |
| 3035 | } |
| 3036 | |
| 3037 | std::vector<char> buf(alloc_size); |
| 3038 | |
| 3039 | // run the first time to get the total output length |
| 3040 | const auto & src = tmpls->template_default->source(); |
| 3041 | int32_t res = llama_chat_apply_template(tmpl: src.c_str(), chat: chat.data(), n_msg: chat.size(), add_ass: inputs.add_generation_prompt, buf: buf.data(), length: buf.size()); |
| 3042 | |
| 3043 | // error: chat template is not supported |
| 3044 | if (res < 0) { |
| 3045 | // if the custom "tmpl" is not supported, we throw an error |
| 3046 | // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() |
| 3047 | throw std::runtime_error("this custom template is not supported, try using --jinja" ); |
| 3048 | } |
| 3049 | |
| 3050 | // if it turns out that our buffer is too small, we resize it |
| 3051 | if ((size_t) res > buf.size()) { |
| 3052 | buf.resize(new_size: res); |
| 3053 | res = llama_chat_apply_template(tmpl: src.c_str(), chat: chat.data(), n_msg: chat.size(), add_ass: inputs.add_generation_prompt, buf: buf.data(), length: buf.size()); |
| 3054 | } |
| 3055 | |
| 3056 | common_chat_params params; |
| 3057 | params.prompt = std::string(buf.data(), res); |
| 3058 | if (!inputs.json_schema.empty()) { |
| 3059 | params.grammar = json_schema_to_grammar(schema: json::parse(i: inputs.json_schema)); |
| 3060 | } else { |
| 3061 | params.grammar = inputs.grammar; |
| 3062 | } |
| 3063 | return params; |
| 3064 | } |
| 3065 | |
| 3066 | common_chat_params common_chat_templates_apply( |
| 3067 | const struct common_chat_templates * tmpls, |
| 3068 | const struct common_chat_templates_inputs & inputs) |
| 3069 | { |
| 3070 | GGML_ASSERT(tmpls != nullptr); |
| 3071 | return inputs.use_jinja |
| 3072 | ? common_chat_templates_apply_jinja(tmpls, inputs) |
| 3073 | : common_chat_templates_apply_legacy(tmpls, inputs); |
| 3074 | } |
| 3075 | |
| 3076 | static void common_chat_parse_content_only(common_chat_msg_parser & builder) { |
| 3077 | builder.try_parse_reasoning(start_think: "<think>" , end_think: "</think>" ); |
| 3078 | builder.add_content(content: builder.consume_rest()); |
| 3079 | } |
| 3080 | |
| 3081 | static void common_chat_parse(common_chat_msg_parser & builder) { |
| 3082 | LOG_DBG("Parsing input with format %s: %s\n" , common_chat_format_name(builder.syntax().format), builder.input().c_str()); |
| 3083 | |
| 3084 | switch (builder.syntax().format) { |
| 3085 | case COMMON_CHAT_FORMAT_CONTENT_ONLY: |
| 3086 | common_chat_parse_content_only(builder); |
| 3087 | break; |
| 3088 | case COMMON_CHAT_FORMAT_GENERIC: |
| 3089 | common_chat_parse_generic(builder); |
| 3090 | break; |
| 3091 | case COMMON_CHAT_FORMAT_MISTRAL_NEMO: |
| 3092 | common_chat_parse_mistral_nemo(builder); |
| 3093 | break; |
| 3094 | case COMMON_CHAT_FORMAT_MAGISTRAL: |
| 3095 | common_chat_parse_magistral(builder); |
| 3096 | break; |
| 3097 | case COMMON_CHAT_FORMAT_LLAMA_3_X: |
| 3098 | common_chat_parse_llama_3_1(builder); |
| 3099 | break; |
| 3100 | case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: |
| 3101 | common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true); |
| 3102 | break; |
| 3103 | case COMMON_CHAT_FORMAT_DEEPSEEK_R1: |
| 3104 | common_chat_parse_deepseek_r1(builder); |
| 3105 | break; |
| 3106 | case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: |
| 3107 | common_chat_parse_deepseek_v3_1(builder); |
| 3108 | break; |
| 3109 | case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: |
| 3110 | common_chat_parse_functionary_v3_2(builder); |
| 3111 | break; |
| 3112 | case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: |
| 3113 | common_chat_parse_functionary_v3_1_llama_3_1(builder); |
| 3114 | break; |
| 3115 | case COMMON_CHAT_FORMAT_HERMES_2_PRO: |
| 3116 | common_chat_parse_hermes_2_pro(builder); |
| 3117 | break; |
| 3118 | case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: |
| 3119 | common_chat_parse_firefunction_v2(builder); |
| 3120 | break; |
| 3121 | case COMMON_CHAT_FORMAT_COMMAND_R7B: |
| 3122 | common_chat_parse_command_r7b(builder); |
| 3123 | break; |
| 3124 | case COMMON_CHAT_FORMAT_GRANITE: |
| 3125 | common_chat_parse_granite(builder); |
| 3126 | break; |
| 3127 | case COMMON_CHAT_FORMAT_GPT_OSS: |
| 3128 | common_chat_parse_gpt_oss(builder); |
| 3129 | break; |
| 3130 | case COMMON_CHAT_FORMAT_SEED_OSS: |
| 3131 | common_chat_parse_seed_oss(builder); |
| 3132 | break; |
| 3133 | case COMMON_CHAT_FORMAT_NEMOTRON_V2: |
| 3134 | common_chat_parse_nemotron_v2(builder); |
| 3135 | break; |
| 3136 | case COMMON_CHAT_FORMAT_APERTUS: |
| 3137 | common_chat_parse_apertus(builder); |
| 3138 | break; |
| 3139 | case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: |
| 3140 | common_chat_parse_lfm2(builder); |
| 3141 | break; |
| 3142 | default: |
| 3143 | throw std::runtime_error(std::string("Unsupported format: " ) + common_chat_format_name(format: builder.syntax().format)); |
| 3144 | } |
| 3145 | builder.finish(); |
| 3146 | } |
| 3147 | |
| 3148 | common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) { |
| 3149 | common_chat_msg_parser builder(input, is_partial, syntax); |
| 3150 | try { |
| 3151 | common_chat_parse(builder); |
| 3152 | } catch (const common_chat_msg_partial_exception & ex) { |
| 3153 | LOG_DBG("Partial parse: %s\n" , ex.what()); |
| 3154 | if (!is_partial) { |
| 3155 | builder.clear_tools(); |
| 3156 | builder.move_to(pos: 0); |
| 3157 | common_chat_parse_content_only(builder); |
| 3158 | } |
| 3159 | } |
| 3160 | auto msg = builder.result(); |
| 3161 | if (!is_partial) { |
| 3162 | LOG_DBG("Parsed message: %s\n" , common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str()); |
| 3163 | } |
| 3164 | return msg; |
| 3165 | } |
| 3166 | |