| 1 | #include "common.h" |
| 2 | #include "json-partial.h" |
| 3 | #include <exception> |
| 4 | #include <iostream> |
| 5 | #include <stdexcept> |
| 6 | |
| 7 | template <class T> static void assert_equals(const T & expected, const T & actual) { |
| 8 | if (expected != actual) { |
| 9 | std::cerr << "Expected: " << expected << std::endl; |
| 10 | std::cerr << "Actual: " << actual << std::endl; |
| 11 | std::cerr << std::flush; |
| 12 | throw std::runtime_error("Test failed" ); |
| 13 | } |
| 14 | } |
| 15 | |
| 16 | static void test_json_healing() { |
| 17 | auto parse = [](const std::string & str) { |
| 18 | std::cerr << "# Parsing: " << str << '\n'; |
| 19 | std::string::const_iterator it = str.begin(); |
| 20 | const auto end = str.end(); |
| 21 | common_json out; |
| 22 | std::string healing_marker = "$llama.cpp.json$" ; |
| 23 | if (common_json_parse(it, end, healing_marker, out)) { |
| 24 | auto dump = out.json.dump(); |
| 25 | std::cerr << "Parsed: " << dump << '\n'; |
| 26 | std::cerr << "Magic: " << out.healing_marker.json_dump_marker << '\n'; |
| 27 | std::string result; |
| 28 | if (!out.healing_marker.json_dump_marker.empty()) { |
| 29 | auto i = dump.find(str: out.healing_marker.json_dump_marker); |
| 30 | if (i == std::string::npos) { |
| 31 | throw std::runtime_error("Failed to find magic in dump " + dump + " (magic: " + out.healing_marker.json_dump_marker + ")" ); |
| 32 | } |
| 33 | result = dump.substr(pos: 0, n: i); |
| 34 | } else { |
| 35 | result = dump; |
| 36 | } |
| 37 | std::cerr << "Result: " << result << '\n'; |
| 38 | if (string_starts_with(str, prefix: result)) { |
| 39 | std::cerr << "Failure!\n" ; |
| 40 | } |
| 41 | // return dump; |
| 42 | } else { |
| 43 | throw std::runtime_error("Failed to parse: " + str); |
| 44 | } |
| 45 | |
| 46 | }; |
| 47 | auto parse_all = [&](const std::string & str) { |
| 48 | for (size_t i = 1; i < str.size(); i++) { |
| 49 | parse(str.substr(pos: 0, n: i)); |
| 50 | } |
| 51 | }; |
| 52 | parse_all("{\"a\": \"b\"}" ); |
| 53 | parse_all("{\"hey\": 1, \"ho\\\"ha\": [1]}" ); |
| 54 | |
| 55 | parse_all("[{\"a\": \"b\"}]" ); |
| 56 | |
| 57 | auto test = [&](const std::vector<std::string> & inputs, const std::string & expected, const std::string & expected_marker) { |
| 58 | for (const auto & input : inputs) { |
| 59 | common_json out; |
| 60 | assert_equals(expected: true, actual: common_json_parse(input, healing_marker: "$foo" , out)); |
| 61 | assert_equals<std::string>(expected, actual: out.json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true)); |
| 62 | assert_equals<std::string>(expected: expected_marker, actual: out.healing_marker.json_dump_marker); |
| 63 | } |
| 64 | }; |
| 65 | // No healing needed: |
| 66 | test( |
| 67 | { |
| 68 | R"([{"a":"b"}, "y"])" , |
| 69 | }, |
| 70 | R"([{"a":"b"},"y"])" , |
| 71 | "" |
| 72 | ); |
| 73 | // Partial literals can't be healed: |
| 74 | test( |
| 75 | { |
| 76 | R"([1)" , |
| 77 | R"([tru)" , |
| 78 | R"([n)" , |
| 79 | R"([nul)" , |
| 80 | R"([23.2)" , |
| 81 | }, |
| 82 | R"(["$foo"])" , |
| 83 | R"("$foo)" |
| 84 | ); |
| 85 | test( |
| 86 | { |
| 87 | R"({"a": 1)" , |
| 88 | R"({"a": tru)" , |
| 89 | R"({"a": n)" , |
| 90 | R"({"a": nul)" , |
| 91 | R"({"a": 23.2)" , |
| 92 | }, |
| 93 | R"({"a":"$foo"})" , |
| 94 | R"("$foo)" |
| 95 | ); |
| 96 | test( |
| 97 | { |
| 98 | R"({)" , |
| 99 | }, |
| 100 | R"({"$foo":1})" , |
| 101 | R"("$foo)" |
| 102 | ); |
| 103 | test( |
| 104 | { |
| 105 | R"([)" , |
| 106 | }, |
| 107 | R"(["$foo"])" , |
| 108 | R"("$foo)" |
| 109 | ); |
| 110 | // Healing right after a full literal |
| 111 | test( |
| 112 | { |
| 113 | R"(1 )" , |
| 114 | }, |
| 115 | R"(1)" , |
| 116 | "" |
| 117 | ); |
| 118 | test( |
| 119 | { |
| 120 | R"(true)" , |
| 121 | R"(true )" , |
| 122 | }, |
| 123 | R"(true)" , |
| 124 | "" |
| 125 | ); |
| 126 | test( |
| 127 | { |
| 128 | R"(null)" , |
| 129 | R"(null )" , |
| 130 | }, |
| 131 | R"(null)" , |
| 132 | "" |
| 133 | ); |
| 134 | test( |
| 135 | { |
| 136 | R"([1 )" , |
| 137 | }, |
| 138 | R"([1,"$foo"])" , |
| 139 | R"(,"$foo)" |
| 140 | ); |
| 141 | test( |
| 142 | { |
| 143 | R"([{})" , |
| 144 | R"([{} )" , |
| 145 | }, |
| 146 | R"([{},"$foo"])" , |
| 147 | R"(,"$foo)" |
| 148 | ); |
| 149 | test( |
| 150 | { |
| 151 | R"([true)" , |
| 152 | }, |
| 153 | // TODO: detect the true/false/null literal was complete |
| 154 | R"(["$foo"])" , |
| 155 | R"("$foo)" |
| 156 | ); |
| 157 | test( |
| 158 | { |
| 159 | R"([true )" , |
| 160 | }, |
| 161 | R"([true,"$foo"])" , |
| 162 | R"(,"$foo)" |
| 163 | ); |
| 164 | test( |
| 165 | { |
| 166 | R"([true,)" , |
| 167 | }, |
| 168 | R"([true,"$foo"])" , |
| 169 | R"("$foo)" |
| 170 | ); |
| 171 | // Test nesting |
| 172 | test( |
| 173 | { |
| 174 | R"([{"a": [{"b": [{)" , |
| 175 | }, |
| 176 | R"([{"a":[{"b":[{"$foo":1}]}]}])" , |
| 177 | R"("$foo)" |
| 178 | ); |
| 179 | test( |
| 180 | { |
| 181 | R"([{"a": [{"b": [)" , |
| 182 | }, |
| 183 | R"([{"a":[{"b":["$foo"]}]}])" , |
| 184 | R"("$foo)" |
| 185 | ); |
| 186 | |
| 187 | test( |
| 188 | { |
| 189 | R"([{"a": "b"})" , |
| 190 | R"([{"a": "b"} )" , |
| 191 | }, |
| 192 | R"([{"a":"b"},"$foo"])" , |
| 193 | R"(,"$foo)" |
| 194 | ); |
| 195 | test( |
| 196 | { |
| 197 | R"([{"a": "b"},)" , |
| 198 | R"([{"a": "b"}, )" , |
| 199 | }, |
| 200 | R"([{"a":"b"},"$foo"])" , |
| 201 | R"("$foo)" |
| 202 | ); |
| 203 | test( |
| 204 | { |
| 205 | R"({ "code)" , |
| 206 | }, |
| 207 | R"({"code$foo":1})" , |
| 208 | R"($foo)" |
| 209 | ); |
| 210 | test( |
| 211 | { |
| 212 | R"({ "code\)" , |
| 213 | }, |
| 214 | R"({"code\\$foo":1})" , |
| 215 | R"(\$foo)" |
| 216 | ); |
| 217 | test( |
| 218 | { |
| 219 | R"({ "code")" , |
| 220 | }, |
| 221 | R"({"code":"$foo"})" , |
| 222 | R"(:"$foo)" |
| 223 | ); |
| 224 | test( |
| 225 | { |
| 226 | R"({ "key")" , |
| 227 | }, |
| 228 | R"({"key":"$foo"})" , |
| 229 | R"(:"$foo)" |
| 230 | ); |
| 231 | // Test unicode escape sequences |
| 232 | test( |
| 233 | { |
| 234 | R"({"a":"\u)" , |
| 235 | }, |
| 236 | R"({"a":"\u0000$foo"})" , |
| 237 | R"(0000$foo)" |
| 238 | ); |
| 239 | test( |
| 240 | { |
| 241 | R"({"a":"\u00)" , |
| 242 | }, |
| 243 | R"({"a":"\u0000$foo"})" , |
| 244 | R"(00$foo)" |
| 245 | ); |
| 246 | test( |
| 247 | { |
| 248 | R"({"a":"\ud300)" , |
| 249 | }, |
| 250 | R"({"a":"\ud300$foo"})" , |
| 251 | R"($foo)" |
| 252 | ); |
| 253 | test( |
| 254 | { |
| 255 | R"({"a":"\ud800)" , |
| 256 | }, |
| 257 | R"({"a":"\ud800\udc00$foo"})" , |
| 258 | R"(\udc00$foo)" |
| 259 | ); |
| 260 | test( |
| 261 | { |
| 262 | R"({"a":"\ud800\)" , |
| 263 | }, |
| 264 | R"({"a":"\ud800\udc00$foo"})" , |
| 265 | R"(udc00$foo)" |
| 266 | ); |
| 267 | test( |
| 268 | { |
| 269 | R"({"a":"\ud800\u)" , |
| 270 | }, |
| 271 | R"({"a":"\ud800\udc00$foo"})" , |
| 272 | R"(dc00$foo)" |
| 273 | ); |
| 274 | test( |
| 275 | { |
| 276 | R"({"a":"\ud800\udc00)" , |
| 277 | }, |
| 278 | R"({"a":"\ud800\udc00$foo"})" , |
| 279 | R"($foo)" |
| 280 | ); |
| 281 | } |
| 282 | |
| 283 | int main() { |
| 284 | test_json_healing(); |
| 285 | std::cerr << "All tests passed.\n" ; |
| 286 | return 0; |
| 287 | } |
| 288 | |