1#ifdef NDEBUG
2#undef NDEBUG
3#endif
4
5#include "json-schema-to-grammar.h"
6
7#include "../src/unicode.h"
8#include "../src/llama-grammar.h"
9
10#include <nlohmann/json.hpp>
11
12#include <cassert>
13#include <string>
14#include <vector>
15
16using json = nlohmann::ordered_json;
17
18static llama_grammar * build_grammar(const std::string & grammar_str) {
19 return llama_grammar_init_impl(vocab: nullptr, grammar_str: grammar_str.c_str(), grammar_root: "root", lazy: false, trigger_patterns: nullptr, num_trigger_patterns: 0, trigger_tokens: nullptr, num_trigger_tokens: 0);
20}
21
22static bool test_build_grammar_fails(const std::string & grammar_str) {
23 fprintf(stderr, format: "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
24 bool grammar_fails = false;
25 llama_grammar * grammar = build_grammar(grammar_str);
26 if (grammar != nullptr) {
27 fprintf(stderr, format: " ❌ Expected build failure, but succeeded\n");
28 } else {
29 grammar_fails = true;
30 fprintf(stdout, format: " ✅︎\n");
31 }
32 return grammar_fails;
33}
34
35static bool match_string(const std::string & input, llama_grammar * grammar) {
36 const auto cpts = unicode_cpts_from_utf8(utf8: input);
37
38 auto & stacks_cur = llama_grammar_get_stacks(grammar);
39
40 for (const auto & cpt : cpts) {
41 llama_grammar_accept(grammar, chr: cpt);
42
43 if (stacks_cur.empty()) {
44 // no stacks means that the grammar failed to match at this point
45 return false;
46 }
47 }
48
49 for (const auto & stack : stacks_cur) {
50 if (stack.empty()) {
51 // An empty stack means that the grammar has been completed
52 return true;
53 }
54 }
55
56 return false;
57}
58
59static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
60 fprintf(stderr, format: "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
61 fflush(stderr);
62
63 auto * grammar = build_grammar(grammar_str);
64
65 // Save the original grammar stacks so that we can reset after every new string we want to test
66 const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
67
68 llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
69
70 fprintf(stderr, format: " 🔵 Valid strings:\n");
71
72 // Passing strings
73 for (const auto & test_string : passing_strings) {
74 fprintf(stderr, format: " \"%s\" ", test_string.c_str());
75 fflush(stderr);
76
77 bool matched = match_string(input: test_string, grammar);
78
79 if (!matched) {
80 fprintf(stderr, format: "❌ (failed to match)\n");
81
82 // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
83 // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
84 FILE* grammar_file = fopen(filename: "test-grammar-integration.grammar.gbnf", modes: "w");
85 if (grammar_file) {
86 fprintf(stream: grammar_file, format: "%s", grammar_str.c_str());
87 fclose(stream: grammar_file);
88 }
89
90 // DEBUG: Write the test string to test-grammar-integration.string.txt
91 FILE* string_file = fopen(filename: "test-grammar-integration.string.txt", modes: "w");
92 if (string_file) {
93 fprintf(stream: string_file, format: "%s", test_string.c_str());
94 fclose(stream: string_file);
95 }
96
97 fprintf(stderr, format: "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
98 } else {
99 fprintf(stdout, format: "✅︎\n");
100 }
101
102 assert(matched);
103
104 // Reset the grammar stacks
105 stacks_cur = stacks_org;
106 }
107
108 fprintf(stderr, format: " 🟠 Invalid strings:\n");
109
110 // Failing strings
111 for (const auto & test_string : failing_strings) {
112 fprintf(stderr, format: " \"%s\" ", test_string.c_str());
113 fflush(stderr);
114
115 bool matched = match_string(input: test_string, grammar);
116
117 if (matched) {
118 fprintf(stderr, format: "❌ (incorrectly matched)\n");
119 } else {
120 fprintf(stdout, format: "✅︎\n");
121 }
122 assert(!matched);
123
124 // Reset the grammar stacks
125 stacks_cur = stacks_org;
126 }
127
128 // Clean up allocated memory
129 llama_grammar_free_impl(grammar);
130}
131static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
132 test(test_desc: test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
133}
134static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
135 test(test_desc: test_desc + ". Schema: " + schema_str, grammar_str: json_schema_to_grammar(schema: json::parse(i: schema_str), force_gbnf: true), passing_strings, failing_strings);
136}
137
138static void test_simple_grammar() {
139 test_schema(
140 test_desc: "min 0",
141 schema_str: R"""({
142 "type": "integer",
143 "minimum": 0
144 })""",
145 // Passing strings
146 passing_strings: {
147 "0",
148 "10",
149 "12",
150 "10000",
151 },
152 // Failing strings
153 failing_strings: {
154 "-1",
155 "-10",
156 "-10000",
157 "-100000000000000000000000000000000",
158 "100000000000000000000000000000000",
159 "00",
160 "01",
161 "-0",
162 }
163 );
164 test_schema(
165 test_desc: "min 2",
166 // Schema
167 schema_str: R"""({
168 "type": "integer",
169 "minimum": 2
170 })""",
171 // Passing strings
172 passing_strings: {
173 "2",
174 "3",
175 "4",
176 "10",
177 "20",
178 "1234567890000000",
179 },
180 // Failing strings
181 failing_strings: {
182 "0",
183 "1",
184 "-1",
185 "-100",
186 "0",
187 "1",
188 "01",
189 "02",
190 "12345678900000000",
191 }
192 );
193 test_schema(
194 test_desc: "min 456",
195 schema_str: R"""({
196 "type": "integer",
197 "minimum": 456
198 })""",
199 // Passing strings
200 passing_strings: {
201 "456",
202 "4560",
203 "457",
204 "460",
205 "500",
206 },
207 // Failing strings
208 failing_strings: {
209 "455",
210 "356",
211 "50",
212 "050",
213 "-1",
214 "-456",
215 }
216 );
217 test_schema(
218 test_desc: "min -123",
219 schema_str: R"""({
220 "type": "integer",
221 "minimum": -123
222 })""",
223 // Passing strings
224 passing_strings: {
225 "-123",
226 "-122",
227 "-11",
228 "-1",
229 "0",
230 "1",
231 "123",
232 "1234",
233 "2345",
234 },
235 // Failing strings
236 failing_strings: {
237 "-1234",
238 "-124",
239 }
240 );
241
242 test_schema(
243 test_desc: "max 9999",
244 // Schema
245 schema_str: R"""({
246 "type": "integer",
247 "maximum": 9999
248 })""",
249 // Passing strings
250 passing_strings: {
251 "-99999",
252 "0",
253 "9999",
254 },
255 // Failing strings
256 failing_strings: {
257 "10000",
258 "99991",
259 }
260 );
261 test_schema(
262 test_desc: "max -9999",
263 // Schema
264 schema_str: R"""({
265 "type": "integer",
266 "maximum": -9999
267 })""",
268 // Passing strings
269 passing_strings: {
270 "-10000",
271 "-9999",
272 },
273 // Failing strings
274 failing_strings: {
275 "-9998",
276 "0",
277 "9999",
278 }
279 );
280 test_schema(
281 test_desc: "min 5 max 30",
282 // Schema
283 schema_str: R"""({
284 "type": "integer",
285 "minimum": 5,
286 "maximum": 30
287 })""",
288 // Passing strings
289 passing_strings: {
290 "5",
291 "10",
292 "30",
293 },
294 // Failing strings
295 failing_strings: {
296 "05",
297 "4",
298 "-1",
299 "31",
300 "123",
301 "0123",
302 }
303 );
304 test_schema(
305 test_desc: "min 1 max 900719925474091",
306 // Schema
307 schema_str: R"""({
308 "type": "integer",
309 "exclusiveMinimum": 0,
310 "maximum": 900719925474091
311 })""",
312 // Passing strings
313 passing_strings: {
314 "1",
315 "2",
316 "10",
317 "900719925474090",
318 "900719925474091",
319 },
320 // Failing strings
321 failing_strings: {
322 "0",
323 "01",
324 "900719925474092",
325 "9007199254740910",
326 }
327 );
328 test_schema(
329 test_desc: "min -1 max 1",
330 schema_str: R"""({
331 "type": "integer",
332 "minimum": -1,
333 "maximum": 1
334 })""",
335 // Passing strings
336 passing_strings: {
337 "-1",
338 "0",
339 "1",
340 },
341 // Failing strings
342 failing_strings: {
343 "-11",
344 "-10",
345 "-2",
346 "2",
347 "10",
348 "11",
349 }
350 );
351 test_schema(
352 test_desc: "min -123 max 42",
353 schema_str: R"""({
354 "type": "integer",
355 "minimum": -123,
356 "maximum": 42
357 })""",
358 // Passing strings
359 passing_strings: {
360 "-123",
361 "-122",
362 "-13",
363 "-11",
364 "-2",
365 "-1",
366 "0",
367 "1",
368 "5",
369 "10",
370 "39",
371 "40",
372 "42",
373 },
374 // Failing strings
375 failing_strings: {
376 "-0123",
377 "-124",
378 "-1123",
379 "-200",
380 "43",
381 "123",
382 "0123",
383 }
384 );
385 test_schema(
386 test_desc: "exclusive min / max",
387 // Schema
388 schema_str: R"""({
389 "type": "integer",
390 "exclusiveMinimum": 0,
391 "exclusiveMaximum": 10000
392 })""",
393 // Passing strings
394 passing_strings: {
395 "1",
396 "9999",
397 },
398 // Failing strings
399 failing_strings: {
400 "0",
401 "01",
402 "10000",
403 "99999",
404 }
405 );
406
407 // Test case for a simple grammar
408 test_grammar(
409 test_desc: "simple grammar",
410 grammar_str: R"""(
411 root ::= expr
412 expr ::= term ("+" term)*
413 term ::= number
414 number ::= [0-9]+)""",
415 // Passing strings
416 passing_strings: {
417 "42",
418 "1+2+3+4+5",
419 "123+456",
420 },
421 // Failing strings
422 failing_strings: {
423 "+",
424 "/ 3",
425 "1+2+3+4+5+",
426 "12a45",
427 }
428 );
429}
430
431static void test_complex_grammar() {
432 // Test case for a more complex grammar, with both failure strings and success strings
433 test_grammar(
434 test_desc: "medium complexity grammar",
435 // Grammar
436 grammar_str: R"""(
437 root ::= expression
438 expression ::= term ws (("+"|"-") ws term)*
439 term ::= factor ws (("*"|"/") ws factor)*
440 factor ::= number | variable | "(" expression ")" | function-call
441 number ::= [0-9]+
442 variable ::= [a-zA-Z_][a-zA-Z0-9_]*
443 function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
444 ws ::= [ \t\n\r]?)""",
445 // Passing strings
446 passing_strings: {
447 "42",
448 "1*2*3*4*5",
449 "x",
450 "x+10",
451 "x1+y2",
452 "(a+b)*(c-d)",
453 "func()",
454 "func(x,y+2)",
455 "a*(b+c)-d/e",
456 "f(g(x),h(y,z))",
457 "x + 10",
458 "x1 + y2",
459 "(a + b) * (c - d)",
460 "func()",
461 "func(x, y + 2)",
462 "a * (b + c) - d / e",
463 "f(g(x), h(y, z))",
464 "123+456",
465 "123*456*789-123/456+789*123",
466 "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
467 },
468 // Failing strings
469 failing_strings: {
470 "+",
471 "/ 3x",
472 "x + + y",
473 "a * / b",
474 "func(,)",
475 "func(x y)",
476 "(a + b",
477 "x + y)",
478 "a + b * (c - d",
479 "42 +",
480 "x +",
481 "x + 10 +",
482 "(a + b) * (c - d",
483 "func(",
484 "func(x, y + 2",
485 "a * (b + c) - d /",
486 "f(g(x), h(y, z)",
487 "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
488 }
489 );
490}
491
492static void test_special_chars() {
493 // A collection of tests to exercise special characters such as "."
494 test_grammar(
495 test_desc: "special characters",
496 // Grammar
497 grammar_str: R"""(
498 root ::= ... "abc" ...
499 )""",
500 // Passing strings
501 passing_strings: {
502 "abcabcabc",
503 "aaaabcccc",
504 // NOTE: Also ensures that multi-byte characters still count as a single character
505 "🔵🟠✅abc❌🟠🔵"
506 },
507 // Failing strings
508 failing_strings: {
509 "aaabcccc",
510 "aaaaabcccc",
511 "aaaabccc",
512 "aaaabccccc",
513 "🔵🟠✅❌abc❌✅🟠🔵",
514 "🔵🟠abc🟠🔵"
515 }
516 );
517}
518
519static void test_quantifiers() {
520 // A collection of tests to exercise * + and ? quantifiers
521
522 test_grammar(
523 test_desc: "* quantifier",
524 // Grammar
525 grammar_str: R"""(root ::= "a"*)""",
526 // Passing strings
527 passing_strings: {
528 "",
529 "a",
530 "aaaaa",
531 "aaaaaaaaaaaaaaaaaa",
532 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
533 },
534 // Failing strings
535 failing_strings: {
536 "b",
537 "ab",
538 "aab",
539 "ba",
540 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
541 }
542 );
543 test_grammar(
544 test_desc: "+ quantifier",
545 // Grammar
546 grammar_str: R"""(root ::= "a"+)""",
547 // Passing strings
548 passing_strings: {
549 "a",
550 "aaaaa",
551 "aaaaaaaaaaaaaaaaaa",
552 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
553 },
554 // Failing strings
555 failing_strings: {
556 "",
557 "b",
558 "ab",
559 "aab",
560 "ba",
561 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
562 }
563 );
564 test_grammar(
565 test_desc: "? quantifier",
566 // Grammar
567 grammar_str: R"""(root ::= "a"?)""",
568 // Passing strings
569 passing_strings: {
570 "",
571 "a"
572 },
573 // Failing strings
574 failing_strings: {
575 "b",
576 "ab",
577 "aa",
578 "ba",
579 }
580 );
581 test_grammar(
582 test_desc: "mixed quantifiers",
583 // Grammar
584 grammar_str: R"""(
585 root ::= cons+ vowel* cons? (vowel cons)*
586 vowel ::= [aeiouy]
587 cons ::= [bcdfghjklmnpqrstvwxyz]
588 )""",
589 // Passing strings
590 passing_strings: {
591 "yes",
592 "no",
593 "noyes",
594 "crwth",
595 "four",
596 "bryyyy",
597 },
598 // Failing strings
599 failing_strings: {
600 "yess",
601 "yesno",
602 "forty",
603 "catyyy",
604 }
605 );
606 test_grammar(
607 test_desc: "simple exact repetition",
608 // Grammar
609 grammar_str: R"""(
610 root ::= [ab]{4}
611 )""",
612 // Passing strings
613 passing_strings: {
614 "aaaa",
615 "bbbb",
616 "abab",
617 },
618 // Failing strings
619 failing_strings: {
620 "a",
621 "b",
622 "aaaaa",
623 }
624 );
625 test_grammar(
626 test_desc: "simple min repetition",
627 // Grammar
628 grammar_str: R"""(
629 root ::= [ab]{4,}
630 )""",
631 // Passing strings
632 passing_strings: {
633 "aaaa",
634 "aaaaab",
635 "bbbb",
636 "ababab",
637 },
638 // Failing strings
639 failing_strings: {
640 "",
641 "aba",
642 }
643 );
644 test_grammar(
645 test_desc: "simple max repetition",
646 // Grammar
647 grammar_str: R"""(
648 root ::= [ab]{0,4}
649 )""",
650 // Passing strings
651 passing_strings: {
652 "",
653 "a",
654 "aa",
655 "aaa",
656 "aaab",
657 },
658 // Failing strings
659 failing_strings: {
660 "aaaaa",
661 }
662 );
663 test_grammar(
664 test_desc: "min / max repetition",
665 // Grammar
666 grammar_str: R"""(
667 root ::= ("0x" [A-F0-9]{2} " "?){3,5}
668 )""",
669 // Passing strings
670 passing_strings: {
671 "0xFF 0x12 0xAB",
672 "0xFF 0x12 0xAB 0x00 0x00",
673 },
674 // Failing strings
675 failing_strings: {
676 "",
677 "0xFF",
678 "0xFF 0x12",
679 "0xFF 0x12 0xAB 0x00 0x00 0x00",
680 }
681 );
682}
683
684static void test_failure_missing_root() {
685 fprintf(stderr, format: "⚫ Testing missing root node:\n");
686 // Test case for a grammar that is missing a root rule
687 const std::string grammar_str = R"""(
688 rot ::= expr
689 expr ::= term ("+" term)*
690 term ::= number
691 number ::= [0-9]+)""";
692
693 llama_grammar_parser parsed_grammar;
694 parsed_grammar.parse(src: grammar_str.c_str());
695
696 // Ensure we parsed correctly
697 assert(!parsed_grammar.rules.empty());
698
699 // Ensure we do NOT have a root node
700 assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
701 fprintf(stderr, format: " ✅︎ Passed\n");
702}
703
704static void test_failure_missing_reference() {
705 fprintf(stderr, format: "⚫ Testing missing reference node:\n");
706
707 // Test case for a grammar that is missing a referenced rule
708 const std::string grammar_str =
709 R"""(root ::= expr
710 expr ::= term ("+" term)*
711 term ::= numero
712 number ::= [0-9]+)""";
713
714 fprintf(stderr, format: " Expected error: ");
715
716 llama_grammar_parser parsed_grammar;
717 parsed_grammar.parse(src: grammar_str.c_str());
718
719 // Ensure we did NOT parsed correctly
720 assert(parsed_grammar.rules.empty());
721
722 fprintf(stderr, format: " End of expected error.\n");
723 fprintf(stderr, format: " ✅︎ Passed\n");
724}
725
726static void test_failure_left_recursion() {
727 fprintf(stderr, format: "⚫ Testing left recursion detection:\n");
728
729 // Test simple left recursion detection
730 const std::string simple_str = R"""(root ::= "a" | root "a")""";
731 assert(test_build_grammar_fails(simple_str));
732
733 // Test more complicated left recursion detection
734 const std::string medium_str = R"""(
735 root ::= asdf
736 asdf ::= "a" | asdf "a"
737 )""";
738 assert(test_build_grammar_fails(medium_str));
739
740 // Test even more complicated left recursion detection
741 const std::string hard_str = R"""(
742 root ::= asdf
743 asdf ::= "a" | foo "b"
744 foo ::= "c" | asdf "d" | "e")""";
745 assert(test_build_grammar_fails(hard_str));
746
747 // Test yet even more complicated left recursion detection
748 const std::string hardest_str = R"""(
749 root ::= asdf
750 asdf ::= "a" | foo "b"
751 foo ::= "c" | empty asdf "d" | "e"
752 empty ::= "blah" | )""";
753 assert(test_build_grammar_fails(hardest_str));
754
755 fprintf(stderr, format: " ✅︎ Passed\n");
756}
757
758static void test_json_schema() {
759 // Note that this is similar to the regular grammar tests,
760 // but we convert each json schema to a grammar before parsing.
761 // Otherwise, this test structure is the same.
762
763 test_schema(
764 test_desc: "empty schema (object)",
765 // Schema
766 schema_str: R"""(
767 {}
768 )""",
769 // Passing strings
770 passing_strings: {
771 R"""({})""",
772 R"""({"foo": "bar"})""",
773 },
774 // Failing strings
775 failing_strings: {
776 "",
777 "[]",
778 "null",
779 R"""("")""",
780 "true",
781 }
782 );
783
784 test_schema(
785 test_desc: "exotic formats (list)",
786 // Schema
787 schema_str: R"""({
788 "items": [
789 { "format": "date" },
790 { "format": "uuid" },
791 { "format": "time" },
792 { "format": "date-time" }
793 ]
794 })""",
795 // Passing strings
796 passing_strings: {
797 // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
798 // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
799 R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
800 //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
801 //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
802 },
803 // Failing strings
804 failing_strings: {
805 R"""(["foo", "bar"])""",
806 R"""(["12345678-1234-1234-1234-1234567890ab"])""",
807 }
808 );
809
810 test_schema(
811 test_desc: "string",
812 // Schema
813 schema_str: R"""({
814 "type": "string"
815 })""",
816 // Passing strings
817 passing_strings: {
818 R"""("foo")""",
819 R"""("bar")""",
820 R"""("")""",
821 },
822 // Failing strings
823 failing_strings: {
824 R"""({})""",
825 R"""("foo": "bar")""",
826 }
827 );
828
829 test_schema(
830 test_desc: "string w/ min length 1",
831 // Schema
832 schema_str: R"""({
833 "type": "string",
834 "minLength": 1
835 })""",
836 // Passing strings
837 passing_strings: {
838 R"""("foo")""",
839 R"""("bar")""",
840 },
841 // Failing strings
842 failing_strings: {
843 R"""("")""",
844 R"""({})""",
845 R"""("foo": "bar")""",
846 }
847 );
848
849 test_schema(
850 test_desc: "string w/ min length 3",
851 // Schema
852 schema_str: R"""({
853 "type": "string",
854 "minLength": 3
855 })""",
856 // Passing strings
857 passing_strings: {
858 R"""("foo")""",
859 R"""("bar")""",
860 R"""("foobar")""",
861 },
862 // Failing strings
863 failing_strings: {
864 R"""("")""",
865 R"""("f")""",
866 R"""("fo")""",
867 }
868 );
869
870 test_schema(
871 test_desc: "string w/ max length",
872 // Schema
873 schema_str: R"""({
874 "type": "string",
875 "maxLength": 3
876 })""",
877 // Passing strings
878 passing_strings: {
879 R"""("foo")""",
880 R"""("bar")""",
881 R"""("")""",
882 R"""("f")""",
883 R"""("fo")""",
884 },
885 // Failing strings
886 failing_strings: {
887 R"""("foobar")""",
888 }
889 );
890
891 test_schema(
892 test_desc: "string w/ min & max length",
893 // Schema
894 schema_str: R"""({
895 "type": "string",
896 "minLength": 1,
897 "maxLength": 4
898 })""",
899 // Passing strings
900 passing_strings: {
901 R"""("foo")""",
902 R"""("bar")""",
903 R"""("f")""",
904 R"""("barf")""",
905 },
906 // Failing strings
907 failing_strings: {
908 R"""("")""",
909 R"""("barfo")""",
910 R"""("foobar")""",
911 }
912 );
913
914 test_schema(
915 test_desc: "boolean",
916 // Schema
917 schema_str: R"""({
918 "type": "boolean"
919 })""",
920 // Passing strings
921 passing_strings: {
922 "true",
923 "false",
924 },
925 // Failing strings
926 failing_strings: {
927 R"""("")""",
928 R"""("true")""",
929 R"""(True)""",
930 R"""(FALSE)""",
931 }
932 );
933
934 test_schema(
935 test_desc: "integer",
936 // Schema
937 schema_str: R"""({
938 "type": "integer"
939 })""",
940 // Passing strings
941 passing_strings: {
942 R"""(0)""",
943 R"""(12345)""",
944 R"""(1234567890123456)""",
945 },
946 // Failing strings
947 failing_strings: {
948 R"""()""",
949 R"""(01)""",
950 R"""(007)""",
951 R"""(12345678901234567 )""",
952 }
953 );
954
955 test_schema(
956 test_desc: "string const",
957 // Schema
958 schema_str: R"""({
959 "const": "foo"
960 })""",
961 // Passing strings
962 passing_strings: {
963 R"""("foo")""",
964 },
965 // Failing strings
966 failing_strings: {
967 R"""(foo)""",
968 R"""("bar")""",
969 }
970 );
971
972 test_schema(
973 test_desc: "non-string const",
974 // Schema
975 schema_str: R"""({
976 "const": true
977 })""",
978 // Passing strings
979 passing_strings: {
980 R"""(true)""",
981 },
982 // Failing strings
983 failing_strings: {
984 R"""()""",
985 R"""(foo)""",
986 R"""("true")""",
987 }
988 );
989
990 test_schema(
991 test_desc: "non-string const",
992 // Schema
993 schema_str: R"""({
994 "enum": ["red", "amber", "green", null, 42, ["foo"]]
995 })""",
996 // Passing strings
997 passing_strings: {
998 R"""("red")""",
999 R"""(null)""",
1000 R"""(42)""",
1001 R"""(["foo"])""",
1002 },
1003 // Failing strings
1004 failing_strings: {
1005 R"""()""",
1006 R"""(420)""",
1007 R"""(true)""",
1008 R"""(foo)""",
1009 }
1010 );
1011
1012 test_schema(
1013 test_desc: "simple pattern",
1014 // Schema
1015 schema_str: R"""({
1016 "pattern": "^[a-zA-Z0-9_-]*$"
1017 })""",
1018 // Passing strings
1019 passing_strings: {
1020 R"""("")""",
1021 R"""("He_llo-12")""",
1022 },
1023 // Failing strings
1024 failing_strings: {
1025 R"""("!")""",
1026 R"""("Hello World")""",
1027 }
1028 );
1029
1030 test_schema(
1031 test_desc: "pattern with escapes",
1032 // Schema
1033 schema_str: R"""({
1034 "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
1035 })""",
1036 // Passing strings
1037 passing_strings: {
1038 R"""("a^$.[]()|{}*+?b")""",
1039 },
1040 // Failing strings
1041 failing_strings: {
1042 R"""("ab")""",
1043 }
1044 );
1045
1046 test_schema(
1047 test_desc: "",
1048 // Schema
1049 schema_str: R"""(
1050 {
1051 "type": ["array", "null"],
1052 "items": { "type": "string" }
1053 }
1054 )""",
1055 // Passing strings
1056 passing_strings: {
1057 "null",
1058 "[]",
1059 "[\"123\"]",
1060 "[\"foo\", \"bar\"]",
1061 },
1062 // Failing strings
1063 failing_strings: {
1064 "",
1065 "[123]",
1066 "\"foo\"",
1067 "[\"foo\", 42]",
1068 }
1069 );
1070
1071 test_schema(
1072 test_desc: "min+max items",
1073 // Schema
1074 schema_str: R"""({
1075 "items": {
1076 "type": ["number", "integer"]
1077 },
1078 "minItems": 3,
1079 "maxItems": 5
1080 })""",
1081 // Passing strings
1082 passing_strings: {
1083 R"""([1, 2, 3])""",
1084 R"""([1, 2, 3, 4])""",
1085 R"""([1, 2, 3, 4, 5])""",
1086 },
1087 // Failing strings
1088 failing_strings: {
1089 R"""([1, 2])""",
1090 R"""([1, 2, 3, 4, 5, 6])""",
1091 R"""(1)""",
1092 }
1093 );
1094
1095 // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
1096 test_schema(
1097 test_desc: "object properties",
1098 // Schema
1099 schema_str: R"""({
1100 "type": "object",
1101 "properties": {
1102 "number": { "type": "number" },
1103 "street_name": { "type": "string" },
1104 "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1105 }
1106 })""",
1107 // Passing strings
1108 passing_strings: {
1109 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1110 // "By default, leaving out properties is valid"
1111 R"""({ "street_name": "Pennsylvania" })""",
1112 R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1113 // "By extension, even an empty object is valid"
1114 R"""({})""",
1115 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1116 },
1117 // Failing strings
1118 failing_strings: {
1119 // Change datatype from number to string
1120 R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1121 // Reorder properties
1122 R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
1123 // Reorder properties
1124 R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1125 // "Additional properties default to false for generation, even though the spec says true.
1126 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1127
1128 }
1129 );
1130
1131 test_schema(
1132 test_desc: "additional properties can't override other properties",
1133 schema_str: R"""({
1134 "properties": {
1135 "a": {"type": "integer"},
1136 "b": {"type": "integer"}
1137 },
1138 "additionalProperties": true
1139 })""",
1140 // Passing strings
1141 passing_strings: {
1142 R"""({"a": 42})""",
1143 R"""({"c": ""})""",
1144 R"""({"a": 42, "c": ""})""",
1145 R"""({"a_": ""})""",
1146 },
1147 // Failing strings
1148 failing_strings: {
1149 R"""()""",
1150 R"""({"a": ""})""",
1151 R"""({"a": "", "b": ""})""",
1152 }
1153 );
1154
1155 // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
1156 test_schema(
1157 test_desc: "object properties, additionalProperties: true",
1158 // Schema
1159 schema_str: R"""({
1160 "type": "object",
1161 "properties": {
1162 "number": { "type": "number" },
1163 "street_name": { "type": "string" },
1164 "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1165 },
1166 "additionalProperties": true
1167 })""",
1168 // Passing strings
1169 passing_strings: {
1170 // "By extension, even an empty object is valid"
1171 R"""({})""",
1172 R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
1173 // "By default, leaving out properties is valid"
1174 R"""({ "street_name": "Pennsylvania" })""",
1175 R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1176 // "By default, providing additional properties is valid"
1177 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1178 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1179 },
1180 // Failing strings
1181 failing_strings: {
1182 // Change datatype from number to string
1183 R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1184 // Reorder properties
1185 R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
1186 }
1187 );
1188
1189 // Additional properties: false
1190 test_schema(
1191 test_desc: "required + optional props each in original order",
1192 // Schema
1193 schema_str: R"""({
1194 "type": "object",
1195 "properties": {
1196 "number": { "type": "number" },
1197 "street_name": { "type": "string" },
1198 "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1199 },
1200 "additionalProperties": false
1201 })""",
1202 // Passing strings
1203 passing_strings: {
1204 R"""({ "street_name": "Pennsylvania" })""",
1205 R"""({ "number": 1600, "street_type":"Avenue"})""",
1206 R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1207 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1208 // Spaces are permitted around enum values
1209 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1210 },
1211 // Failing strings
1212 failing_strings: {
1213 // Reorder properties
1214 R"""({ "street_type": "Avenue", "number": 1600 })""",
1215 // Add "direction"
1216 R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
1217 }
1218 );
1219
1220 test_schema(
1221 test_desc: "required + optional props each in original order",
1222 // Schema
1223 schema_str: R"""({
1224 "properties": {
1225 "b": {"type": "string"},
1226 "a": {"type": "string"},
1227 "d": {"type": "string"},
1228 "c": {"type": "string"}
1229 },
1230 "required": ["a", "b"],
1231 "additionalProperties": false
1232 })""",
1233 // Passing strings
1234 passing_strings: {
1235 R"""({"b": "foo", "a": "bar"})""",
1236 R"""({"b":"foo","a":"bar","d":"qux"})""",
1237 R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
1238 },
1239 // Failing strings
1240 failing_strings: {
1241 R"""({"a": "foo", "b": "bar"})""",
1242 R"""({"b": "bar"})""",
1243 R"""({"a": "foo", "c": "baz"})""",
1244 R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
1245 }
1246 );
1247
1248 // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
1249 test_schema(
1250 test_desc: "required props",
1251 // Schema
1252 schema_str: R"""({
1253 "$schema": "https://json-schema.org/draft/2020-12/schema",
1254 "$id": "https://example.com/product.schema.json",
1255 "title": "Product",
1256 "description": "A product from Acme's catalog",
1257 "type": "object",
1258 "properties": {
1259 "productId": {
1260 "description": "The unique identifier for a product",
1261 "type": "integer"
1262 },
1263 "productName": {
1264 "description": "Name of the product",
1265 "type": "string"
1266 },
1267 "price": {
1268 "description": "The price of the product",
1269 "type": "number",
1270 "exclusiveMinimum": 0
1271 },
1272 "tags": {
1273 "description": "Tags for the product",
1274 "type": "array",
1275 "items": {
1276 "type": "string"
1277 },
1278 "minItems": 1,
1279 "uniqueItems": true
1280 },
1281 "dimensions": {
1282 "type": "object",
1283 "properties": {
1284 "length": {
1285 "type": "number"
1286 },
1287 "width": {
1288 "type": "number"
1289 },
1290 "height": {
1291 "type": "number"
1292 }
1293 },
1294 "required": [ "length", "width", "height" ]
1295 }
1296 },
1297 "required": [ "productId", "productName", "price" ]
1298 })""",
1299 // Passing strings
1300 passing_strings: {
1301 R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
1302 R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
1303 R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
1304 },
1305 // Failing strings
1306 failing_strings: {
1307 R"""({})""", // Missing all required properties
1308 R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
1309 // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
1310 // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
1311 // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
1312 R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
1313 R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
1314 R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
1315 R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
1316 // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
1317 // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
1318 }
1319 );
1320}
1321
1322int main() {
1323 fprintf(stdout, format: "Running grammar integration tests...\n");
1324 test_simple_grammar();
1325 test_complex_grammar();
1326 test_special_chars();
1327 test_quantifiers();
1328 test_failure_missing_root();
1329 test_failure_missing_reference();
1330 test_failure_left_recursion();
1331 test_json_schema();
1332 fprintf(stdout, format: "All tests passed.\n");
1333 return 0;
1334}
1335