1// Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
2//
3// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
4// e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
5//
6// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
7//
8#include <exception>
9#include <iostream>
10#include <string>
11
12#include "chat-parser.h"
13#include "common.h"
14#include "log.h"
15#include "regex-partial.h"
16
17template <class T>
18static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
19 if (expected != actual) {
20 std::cerr << label << std::endl;
21 std::cerr << "Expected: " << expected << std::endl;
22 std::cerr << "Actual: " << actual << std::endl;
23 std::cerr << std::flush;
24 throw std::runtime_error("Test failed");
25 }
26}
27
28template <class T>
29static void assert_equals(const T & expected, const T & actual) {
30 assert_equals("", expected, actual);
31}
32static void assert_equals(const char * expected, const std::string & actual) {
33 return assert_equals<std::string>(expected, actual);
34}
35
36static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
37 try {
38 fn();
39 } catch (const std::exception & e) {
40 if (expected_exception_pattern.empty()) {
41 return;
42 }
43 std::regex expected_exception_regex(expected_exception_pattern);
44 std::string actual_message = e.what();
45 if (std::regex_search(s: actual_message, e: expected_exception_regex)) {
46 return;
47 }
48 throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
49 throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
50 }
51 throw std::runtime_error("Exception was expected but not thrown");
52}
53
54static void test_reasoning() {
55 //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
56 {
57 common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
58 /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
59 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
60 /* .reasoning_in_content = */ false,
61 /* .thinking_forced_open = */ false,
62 });
63 assert_equals(expected: false, actual: builder.try_parse_reasoning(start_think: "<tnk>", end_think: "</tnk>"));
64 assert_equals(expected: "<tnk>Cogito</tnk>Ergo sum", actual: builder.consume_rest());
65 }
66 {
67 common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
68 /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
69 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
70 /* .reasoning_in_content = */ false,
71 /* .thinking_forced_open = */ false,
72 });
73 assert_equals(expected: true, actual: builder.try_parse_reasoning(start_think: "<tnk>", end_think: "</tnk>"));
74 assert_equals(expected: std::string("Cogito"), actual: builder.result().reasoning_content);
75 assert_equals(expected: "Ergo sum", actual: builder.consume_rest());
76 }
77 {
78 common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
79 /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
80 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
81 /* .reasoning_in_content = */ false,
82 /* .thinking_forced_open = */ false,
83 });
84 assert_equals(expected: false, actual: builder.try_parse_reasoning(start_think: "<tnk>", end_think: "</tnk>"));
85 assert_equals(expected: "Cogito</tnk>Ergo sum", actual: builder.consume_rest());
86 }
87 {
88 common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
89 /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
90 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
91 /* .reasoning_in_content = */ false,
92 /* .thinking_forced_open = */ true,
93 });
94 assert_equals(expected: true, actual: builder.try_parse_reasoning(start_think: "<tnk>", end_think: "</tnk>"));
95 assert_equals(expected: std::string("Cogito"), actual: builder.result().reasoning_content);
96 assert_equals(expected: "Ergo sum", actual: builder.consume_rest());
97 }
98 {
99 common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
100 /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
101 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
102 /* .reasoning_in_content = */ true,
103 /* .thinking_forced_open = */ true,
104 });
105 assert_equals(expected: true, actual: builder.try_parse_reasoning(start_think: "<tnk>", end_think: "</tnk>"));
106 assert_equals(expected: "<think>Cogito</think>", actual: builder.result().content);
107 assert_equals(expected: "Ergo sum", actual: builder.consume_rest());
108 }
109 {
110 const std::string variant("content_only_inline_think");
111 common_chat_syntax syntax = {
112 /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
113 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
114 /* .reasoning_in_content = */ false,
115 /* .thinking_forced_open = */ false,
116 /* .parse_tool_calls = */ false,
117 };
118 const std::string input = "<think>Pense</think>Bonjour";
119 auto msg = common_chat_parse(input, is_partial: false, syntax);
120 assert_equals(label: variant, expected: std::string("Pense"), actual: msg.reasoning_content);
121 assert_equals(label: variant, expected: std::string("Bonjour"), actual: msg.content);
122 }
123 {
124 const std::string variant("llama_3_inline_think");
125 common_chat_syntax syntax = {
126 /* .format = */ COMMON_CHAT_FORMAT_LLAMA_3_X,
127 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
128 /* .reasoning_in_content = */ false,
129 /* .thinking_forced_open = */ false,
130 /* .parse_tool_calls = */ false,
131 };
132 const std::string input = "<think>Plan</think>Réponse";
133 auto msg = common_chat_parse(input, is_partial: false, syntax);
134 assert_equals(label: variant, expected: std::string("Plan"), actual: msg.reasoning_content);
135 assert_equals(label: variant, expected: std::string("Réponse"), actual: msg.content);
136 }
137 // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
138 {
139 common_chat_syntax syntax = {
140 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
141 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
142 /* .reasoning_in_content = */ false,
143 /* .thinking_forced_open = */ true,
144 /* .parse_tool_calls = */ true,
145 };
146 const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
147 common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, syntax);
148 assert_equals(label: variant, expected: true, actual: builder.try_parse_reasoning(start_think: "<think>", end_think: "</think>"));
149 assert_equals(label: variant, expected: std::string("REASONING"), actual: builder.result().reasoning_content);
150 assert_equals(label: variant, expected: std::string("ok"), actual: builder.consume_rest());
151 }
152 // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
153 {
154 common_chat_syntax syntax = {
155 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
156 /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
157 /* .reasoning_in_content = */ false,
158 /* .thinking_forced_open = */ true,
159 /* .parse_tool_calls = */ true,
160 };
161 const std::string variant("deepseek_v3_1_reasoning_format_none");
162 const std::string input = "REASONING</think>ok";
163 auto msg = common_chat_parse(input, is_partial: false, syntax);
164 assert_equals(label: variant, expected: std::string("REASONING</think>ok"), actual: msg.content);
165 assert_equals(label: variant, expected: std::string(""), actual: msg.reasoning_content);
166 }
167}
168
169static void test_regex() {
170 auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
171 common_chat_msg_parser builder(input, /* is_partial= */ false, {});
172 assert_throws(fn: [&]() { builder.consume_regex(regex: common_regex(regex)); }, expected_exception_pattern);
173 };
174
175 test_throws("Hello, world!", "abc", "^abc$");
176 test_throws("Hello, world!", "e", "^e$");
177
178 {
179 common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
180 builder.consume_regex(regex: common_regex("Hello"));
181 assert_equals(expected: ", world!", actual: builder.consume_rest());
182 }
183
184 {
185 // When in non partial mode, we can say whether the regex was consumed or not.
186 common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
187 assert_equals(expected: false, actual: builder.try_consume_regex(regex: common_regex("Hello, world!")).has_value());
188 }
189 {
190 common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
191 auto res = builder.try_consume_regex(regex: common_regex("H(el)l(?:o, world!)?"));
192 assert_equals(expected: true, actual: res.has_value());
193 // Verify captures
194 assert_equals<size_t>(expected: 2, actual: res->groups.size());
195 assert_equals(expected: "Hell", actual: builder.str(rng: res->groups[0]));
196 assert_equals(expected: "el", actual: builder.str(rng: res->groups[1]));
197 // Verify position is after the match
198 assert_equals<size_t>(expected: 4, actual: builder.pos());
199 assert_equals(expected: "o,", actual: builder.consume_rest());
200 }
201 {
202 // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
203 common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
204 assert_throws(fn: [&]() {
205 builder.try_consume_regex(regex: common_regex("Hello, world!"));
206 }, expected_exception_pattern: "^Hello, world!$");
207 }
208
209 // Now regardless of the mode, we can tell these aren't a match.
210 for (const auto is_partial : {false, true}) {
211 common_chat_msg_parser builder("Hello,", is_partial, {});
212 assert_equals(expected: false, actual: builder.try_consume_regex(regex: common_regex("a(b|c)(d|e)f")).has_value());
213 }
214 for (const auto is_partial : {false, true}) {
215 common_chat_msg_parser builder("Hello,", is_partial, {});
216 assert_equals(expected: false, actual: builder.try_consume_literal(literal: "Oh"));
217 }
218}
219
220const std::vector<std::string> barely_healable_jsons = {
221 "{",
222 "{\"",
223 "{\"\\",
224 "{\"n",
225 "{\"name\"",
226 "{\"name\":",
227 "{\"name\":\"",
228 "{\"name\":\"\\",
229 "{\"name\":\"python",
230 "{\"name\":\"python\\",
231 "{\",",
232 "{\":",
233 "{\"[",
234 "{\"]",
235 "{\"{",
236 "{\"}",
237 "{\"1",
238 "{\"name\":\",",
239 "{\"name\":\":",
240 "{\"name\":\"[",
241 "{\"name\":\"]",
242 "{\"name\":\"{",
243 "{\"name\":\"}",
244 "{\"name\":\"1",
245};
246
247static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
248 common_chat_msg_parser builder(input, is_partial, {});
249 auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
250 assert_equals(expected: true, actual: js.has_value());
251 assert_equals(expected: is_partial, actual: js->is_partial);
252 assert_equals(expected, actual: args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
253}
254
255static void test_deepseek_v3_1_tool_calls() {
256 //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
257 // variant: happy path for when it works as the model card says it should
258 const std::string variant("simple");
259 common_chat_syntax syntax = {
260 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
261 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
262 /* .reasoning_in_content = */ false,
263 /* .thinking_forced_open = */ false,
264 /* .parse_tool_calls = */ true,
265 };
266 const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
267 auto msg = common_chat_parse(input, is_partial: false, syntax);
268 assert_equals<std::size_t>(label: variant, expected: 1, actual: msg.tool_calls.size());
269 assert_equals(label: variant, expected: std::string("get_time"), actual: msg.tool_calls[0].name);
270 // JSON arguments are dumped without spaces
271 assert_equals(label: variant, expected: std::string("{\"city\":\"Tokyo\"}"), actual: msg.tool_calls[0].arguments);
272 assert_equals(label: variant, expected: std::string(""), actual: msg.content);
273 assert_equals(label: variant, expected: std::string(""), actual: msg.reasoning_content);
274
275 // variant: simple + thinking open
276 {
277 common_chat_syntax syntax = {
278 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
279 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
280 /* .reasoning_in_content = */ false,
281 /* .thinking_forced_open = */ true,
282 /* .parse_tool_calls = */ true,
283 };
284 const std::string variant("simple_thinking");
285 const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
286 auto m = common_chat_parse(input: in, is_partial: false, syntax);
287 assert_equals<std::size_t>(label: variant, expected: 1, actual: m.tool_calls.size());
288 assert_equals(label: variant, expected: std::string("get_time"), actual: m.tool_calls[0].name);
289 assert_equals(label: variant, expected: std::string("{\"city\":\"Tokyo\"}"), actual: m.tool_calls[0].arguments);
290 assert_equals(label: variant, expected: std::string(""), actual: m.content);
291 assert_equals(label: variant, expected: std::string("REASONING"), actual: m.reasoning_content);
292 }
293 // variant: simple + multiple tool calls
294 {
295 common_chat_syntax syntax = {
296 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
297 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
298 /* .reasoning_in_content = */ false,
299 /* .thinking_forced_open = */ false,
300 /* .parse_tool_calls = */ true,
301 };
302 const std::string variant("simple_multiple_tool_calls");
303 const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
304 auto m = common_chat_parse(input: in, is_partial: false, syntax);
305 assert_equals<std::size_t>(label: variant, expected: 2, actual: m.tool_calls.size());
306 assert_equals(label: variant, expected: std::string("get_time"), actual: m.tool_calls[0].name);
307 assert_equals(label: variant, expected: std::string("{\"city\":\"Paris\"}"), actual: m.tool_calls[0].arguments);
308 assert_equals(label: variant, expected: std::string("get_weather"), actual: m.tool_calls[1].name);
309 assert_equals(label: variant, expected: std::string("{\"city\":\"Paris\"}"), actual: m.tool_calls[1].arguments);
310 assert_equals(label: variant, expected: std::string("CONTENT"), actual: m.content);
311 assert_equals(label: variant, expected: std::string(""), actual: m.reasoning_content);
312 }
313
314
315 // variant: thinking forced open + tool call in reasoning content
316 {
317 common_chat_syntax syntax = {
318 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
319 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
320 /* .reasoning_in_content = */ false,
321 /* .thinking_forced_open = */ true,
322 /* .parse_tool_calls = */ true,
323 };
324 const std::string variant("thinking_forced_open_tool_call_in_reasoning");
325 const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
326 auto m = common_chat_parse(input: in, is_partial: false, syntax);
327 assert_equals<std::size_t>(label: variant, expected: 1, actual: m.tool_calls.size());
328 assert_equals(label: variant, expected: std::string("get_time"), actual: m.tool_calls[0].name);
329 assert_equals(label: variant, expected: std::string("{\"city\":\"Tokyo\"}"), actual: m.tool_calls[0].arguments);
330 assert_equals(label: variant, expected: std::string(""), actual: m.content);
331 assert_equals(label: variant, expected: std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), actual: m.reasoning_content);
332 }
333
334 // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
335 // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
336 // to make tool calls in reasoning content according to the model card, but it does sometimes, so
337 // add the reasoning content as regular content and parse the tool calls.
338 {
339 common_chat_syntax syntax = {
340 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
341 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
342 /* .reasoning_in_content = */ false,
343 /* .thinking_forced_open = */ true,
344 /* .parse_tool_calls = */ true,
345 };
346 const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
347 const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
348 auto m = common_chat_parse(input: in, is_partial: false, syntax);
349 assert_equals(label: variant, expected: std::string("REASONING"), actual: m.content);
350 assert_equals(label: variant, expected: std::string(""), actual: m.reasoning_content);
351 assert_equals<std::size_t>(label: variant, expected: 1, actual: m.tool_calls.size());
352 assert_equals(label: variant, expected: std::string("get_time"), actual: m.tool_calls[0].name);
353 assert_equals(label: variant, expected: std::string("{\"city\":\"Tokyo\"}"), actual: m.tool_calls[0].arguments);
354 }
355
356 // variant: thinking forced open + tool call in reasoning content + no closing think + partial
357 {
358 common_chat_syntax syntax = {
359 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
360 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
361 /* .reasoning_in_content = */ false,
362 /* .thinking_forced_open = */ true,
363 /* .parse_tool_calls = */ true,
364 };
365 const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
366 const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
367 auto m = common_chat_parse(input: in, /* is_partial= */ true, syntax);
368 assert_equals(label: variant, expected: std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), actual: m.reasoning_content);
369 assert_equals(label: variant, expected: std::string(""), actual: m.content);
370 assert_equals<std::size_t>(label: variant, expected: 0, actual: m.tool_calls.size());
371 }
372
373 // variant: thinking not forced open + reasoning + regular content + no tool calls
374 {
375 common_chat_syntax syntax = {
376 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
377 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
378 /* .reasoning_in_content = */ false,
379 /* .thinking_forced_open = */ true,
380 /* .parse_tool_calls = */ true,
381 };
382 const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
383 const std::string in = "REASONING</think>CONTENT";
384 auto m = common_chat_parse(input: in, is_partial: false, syntax);
385 assert_equals<std::size_t>(label: variant, expected: 0, actual: m.tool_calls.size());
386 assert_equals(label: variant, expected: std::string("CONTENT"), actual: m.content);
387 assert_equals(label: variant, expected: std::string("REASONING"), actual: m.reasoning_content);
388 }
389 // variant: thinking not forced open + missing reasoning + no tool calls
390 {
391 common_chat_syntax syntax = {
392 /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
393 /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
394 /* .reasoning_in_content = */ false,
395 /* .thinking_forced_open = */ false,
396 /* .parse_tool_calls = */ true,
397 };
398 const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
399 const std::string in = "CONTENT";
400 auto m = common_chat_parse(input: in, is_partial: false, syntax);
401 assert_equals<std::size_t>(label: variant, expected: 0, actual: m.tool_calls.size());
402 assert_equals(label: variant, expected: std::string("CONTENT"), actual: m.content);
403 assert_equals(label: variant, expected: std::string(""), actual: m.reasoning_content);
404 }
405}
406
407static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
408 common_chat_msg_parser builder(input, parse_as_partial, {});
409 auto js = builder.try_consume_json_with_dumped_args(args_paths: {{"args"}}, content_paths: {});
410 assert_equals(expected: true, actual: js.has_value());
411 assert_equals(expected: is_partial, actual: js->is_partial);
412 assert_equals(expected, actual: js->value.dump());
413}
414
415static void test_json_with_dumped_args_no_args() {
416 // Normal JSON, nothing to heal, nothing to dump
417 test(input: "{\"name\": \"python\"}", is_partial: false, args_paths: {}, content_paths: {}, expected: "{\"name\":\"python\"}");
418 // Full json is args
419 test(input: "{\"name\": \"python\"}", is_partial: false, args_paths: {{}}, content_paths: {}, expected: "{\"name\":\"python\"}");
420
421 // If the arguments are further down, don't heal partial content.
422 for (const auto & src : barely_healable_jsons) {
423 test(input: src, is_partial: true, args_paths: {{"arguments"}}, content_paths: {}, expected: "{}");
424 }
425 // But heal content that isn't partial.
426 test(input: "{\"name\": \"python\"", is_partial: true, args_paths: {{"arguments"}}, content_paths: {}, expected: "{\"name\":\"python\"}");
427}
428
429static void test_json_with_dumped_args() {
430
431 // Partial content.
432 test(input: "{\"content\": \"t", is_partial: true, args_paths: {}, content_paths: {{"content"}}, expected: "{\"content\":\"t\"}");
433 test(input: "{\"content\": \"", is_partial: true, args_paths: {}, content_paths: {{"content"}}, expected: "{\"content\":\"\"}");
434 test(input: "{\"content\": ", is_partial: true, args_paths: {}, content_paths: {{"content"}}, expected: "{}");
435
436 // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
437 test(input: "{\"name\": \"python", is_partial: true, args_paths: {{}}, content_paths: {}, expected: "{\"name\":\"python");
438 for (const auto & src : barely_healable_jsons) {
439 test(input: src, is_partial: true, args_paths: {{}}, content_paths: {}, expected: src);
440 }
441
442 // Full JSON w/ args
443 for (auto parse_as_partial : {true, false}) {
444 test_with_args(
445 input: R"({"name": "python", "args": {"arg1": 1}})",
446 expected: R"({"name":"python","args":"{\"arg1\":1}"})",
447 parse_as_partial,
448 /* is_partial= */ false
449 );
450 }
451
452 // Partial JSON w/ partial args
453 test_with_args(
454 input: R"({"foo": "bar", "args": {")",
455 expected: R"({"foo":"bar","args":"{\""})"
456 );
457 // Partial args broken in object key
458 test_with_args(
459 input: R"({"foo": "bar", "args": {"ar)",
460 expected: R"({"foo":"bar","args":"{\"ar"})"
461 );
462 // Partial args broken after object key
463 test_with_args(
464 input: R"({"foo": "bar", "args": {"arg1")",
465 expected: R"({"foo":"bar","args":"{\"arg1\""})"
466 );
467 // Partial args broken before object value
468 test_with_args(
469 input: R"({"foo": "bar", "args": {"arg1":)",
470 expected: R"({"foo":"bar","args":"{\"arg1\":"})"
471 );
472 // Partial args broken before object value (space)
473 test_with_args(
474 input: R"({"foo": "bar", "args": {"arg1": )",
475 expected: R"({"foo":"bar","args":"{\"arg1\":"})"
476 );
477 // Partial args broken in object value that may not be complete (int)
478 test_with_args(
479 input: R"({"foo": "bar", "args": {"arg1": 1)",
480 expected: R"({"foo":"bar","args":"{\"arg1\":"})"
481 );
482 // Partial args broken in object value that is complete (int)
483 test_with_args(
484 input: R"({"foo": "bar", "args": {"arg1": 1 )",
485 expected: R"({"foo":"bar","args":"{\"arg1\":1"})"
486 );
487 // Partial args broken in object value that is incomplete (string)
488 test_with_args(
489 input: R"({"foo": "bar", "args": {"arg1": ")",
490 expected: R"({"foo":"bar","args":"{\"arg1\":\""})"
491 );
492 // Partial args broken in object value that is complete (string)
493 test_with_args(
494 input: R"({"foo": "bar", "args": {"arg1": "1")",
495 expected: R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
496 );
497 // Partial args broken on array opening
498 test_with_args(
499 input: R"({"foo": "bar", "args": [)",
500 expected: R"({"foo":"bar","args":"["})"
501 );
502 // Partial args broken on array value that is incomplete (int)
503 test_with_args(
504 input: R"({"foo": "bar", "args": [1)",
505 expected: R"({"foo":"bar","args":"["})"
506 );
507 // Partial args broken on array value that is complete (int)
508 test_with_args(
509 input: R"({"foo": "bar", "args": [1 )",
510 expected: R"({"foo":"bar","args":"[1"})"
511 );
512 // Partial args broken on array value that is complete (string)
513 test_with_args(
514 input: R"({"foo": "bar", "args": ["1")",
515 expected: R"({"foo":"bar","args":"[\"1\""})"
516 );
517 // Partial args broken after array value
518 test_with_args(
519 input: R"({"foo": "bar", "args": [1,)",
520 expected: R"({"foo":"bar","args":"[1,"})"
521 );
522 // Partial args broken on nested array
523 test_with_args(
524 input: R"({"foo": "bar", "args": {"arg1": [)",
525 expected: R"({"foo":"bar","args":"{\"arg1\":["})"
526 );
527
528 // Unicode tests
529 test_with_args(
530 input: R"({"foo": "bar", "args": {"arg1": "\u)",
531 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
532 );
533 test_with_args(
534 input: R"({"foo": "bar", "args": {"arg1": "\u0)",
535 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
536 );
537 test_with_args(
538 input: R"({"foo": "bar", "args": {"arg1": "\u00)",
539 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
540 );
541 test_with_args(
542 input: R"({"foo": "bar", "args": {"arg1": "\u000)",
543 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
544 );
545 test_with_args(
546 input: R"({"foo": "bar", "args": {"arg1": "\u0000)",
547 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
548 );
549 test_with_args(
550 input: R"({"foo": "bar", "args": {"arg1": "\ud8)",
551 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
552 );
553 test_with_args(
554 input: R"({"foo": "bar", "args": {"arg1": "\ud80)",
555 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
556 );
557 test_with_args(
558 input: R"({"foo": "bar", "args": {"arg1": "\ud800)",
559 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
560 );
561 test_with_args(
562 input: R"({"foo": "bar", "args": {"arg1": "\ud800\)",
563 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
564 );
565 test_with_args(
566 input: R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
567 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
568 );
569 test_with_args(
570 input: R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
571 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
572 );
573 test_with_args(
574 input: R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
575 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
576 );
577 test_with_args(
578 input: R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
579 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
580 );
581 test_with_args(
582 input: R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
583 expected: R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
584 );
585}
586
587static void test_positions() {
588 {
589 common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
590 assert_equals<size_t>(expected: 0, actual: builder.pos());
591 assert_throws(fn: [&]() { builder.move_to(pos: 100); });
592 assert_equals<size_t>(expected: 0, actual: builder.pos());
593 assert_throws(fn: [&]() { builder.move_back(n: 1); });
594 assert_equals<size_t>(expected: 0, actual: builder.pos());
595
596 builder.move_to(pos: 8);
597 assert_equals<size_t>(expected: 8, actual: builder.pos());
598 builder.move_back(n: 1);
599 assert_equals<size_t>(expected: 7, actual: builder.pos());
600 assert_equals(expected: "world!", actual: builder.consume_rest());
601
602 builder.move_to(pos: 0);
603 assert_equals<size_t>(expected: 0, actual: builder.pos());
604
605 assert_throws(fn: [&]() { builder.finish(); });
606 assert_equals<size_t>(expected: 0, actual: builder.pos());
607
608 builder.move_to(pos: builder.input().size());
609 builder.finish();
610 }
611 {
612 common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
613
614 builder.move_to(pos: builder.input().size());
615 assert_equals<size_t>(expected: builder.input().size(), actual: builder.pos());
616 builder.finish();
617 }
618}
619
620int main() {
621 test_positions();
622 test_json_with_dumped_args_no_args();
623 test_json_with_dumped_args();
624 test_reasoning();
625 test_regex();
626 test_deepseek_v3_1_tool_calls();
627 std::cout << "All tests passed!\n";
628 return 0;
629}
630