llama-grammar.cpp source code [llama.cpp/src/llama-grammar.cpp]

1	#include "llama-grammar.h"
2
3	#include "llama-impl.h"
4	#include "llama-vocab.h"
5	#include "llama-sampling.h"
6
7	#include <cmath>
8	#include <algorithm>
9	#include <stdexcept>
10
11	//
12	// helpers
13	//
14
15	// NOTE: assumes valid utf8 (but checks for overrun)
16	static std::pair<uint32_t, const char > decode_utf8(const* char * src) {
17	static const int lookup[] = { `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `3`, `4` };
18	uint8_t first_byte = static_cast<uint8_t>(*src);
19	uint8_t highbits = first_byte >> `4`;
20	int len = lookup[highbits];
21	uint8_t mask = (`1` << (`8` - len)) - `1`;
22	uint32_t value = first_byte & mask;
23	const char * end = src + len; // may overrun!
24	const char * pos = src + `1`;
25	for ( ; pos < end && *pos; pos++) {
26	value = (value << `6`) + (static_cast<uint8_t>(*pos) & `0x3F`);
27	}
28	return std::make_pair(x&: value, y&: pos);
29	}
30
31	static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32	const std::string & src,
33	llama_partial_utf8 partial_start) {
34	static const int lookup[] = { `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `2`, `2`, `3`, `4` };
35	const char * pos = src.c_str();
36	std::vector<uint32_t> code_points;
37
38	// common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39	code_points.reserve(n: src.size() + `1`);
40	uint32_t value = partial_start.value;
41	int n_remain = partial_start.n_remain;
42
43	// continue previous decode, if applicable
44	while (*pos != `0` && n_remain > `0`) {
45	uint8_t next_byte = static_cast<uint8_t>(*pos);
46	if ((next_byte >> `6`) != `2`) {
47	// invalid sequence, abort
48	code_points.push_back(x: `0`);
49	return std::make_pair(x: std::move(code_points), y: llama_partial_utf8{ .value: `0`, .n_remain: -`1` });
50	}
51	value = (value << `6`) + (next_byte & `0x3F`);
52	++pos;
53	--n_remain;
54	}
55
56	if (partial_start.n_remain > `0` && n_remain == `0`) {
57	code_points.push_back(x: value);
58	}
59
60	// decode any subsequent utf-8 sequences, which may end in an incomplete one
61	while (*pos != `0`) {
62	uint8_t first_byte = static_cast<uint8_t>(*pos);
63	uint8_t highbits = first_byte >> `4`;
64	n_remain = lookup[highbits] - `1`;
65
66	if (n_remain < `0`) {
67	// invalid sequence, abort
68	code_points.clear();
69	code_points.push_back(x: `0`);
70	return std::make_pair(x: std::move(code_points), y: llama_partial_utf8{ .value: `0`, .n_remain: n_remain });
71	}
72
73	uint8_t mask = (`1` << (`7` - n_remain)) - `1`;
74	value = first_byte & mask;
75
76	++pos;
77	while (*pos != `0` && n_remain > `0`) {
78	value = (value << `6`) + (static_cast<uint8_t>(*pos) & `0x3F`);
79	++pos;
80	--n_remain;
81	}
82	if (n_remain == `0`) {
83	code_points.push_back(x: value);
84	}
85	}
86	code_points.push_back(x: `0`);
87
88	return std::make_pair(x: std::move(code_points), y: llama_partial_utf8{ .value: value, .n_remain: n_remain });
89	}
90
91	static bool is_digit_char(char c) {
92	return `'0'` <= c && c <= `'9'`;
93	}
94
95	static bool is_word_char(char c) {
96	return (`'a'` <= c && c <= `'z'`) \|\| (`'A'` <= c && c <= `'Z'`) \|\| c == `'-'` \|\| is_digit_char(c);
97	}
98
99	static std::pair<uint32_t, const char > parse_hex(const* char * src, int size) {
100	const char * pos = src;
101	const char * end = src + size;
102	uint32_t value = `0`;
103	for ( ; pos < end && *pos; pos++) {
104	value <<= `4`;
105	char c = *pos;
106	if (`'a'` <= c && c <= `'f'`) {
107	value += c - `'a'` + `10`;
108	} else if (`'A'` <= c && c <= `'F'`) {
109	value += c - `'A'` + `10`;
110	} else if (`'0'` <= c && c <= `'9'`) {
111	value += c - `'0'`;
112	} else {
113	break;
114	}
115	}
116	if (pos != end) {
117	throw std::runtime_error ("expecting " + std::to_string(val: size) + " hex chars at " + src);
118	}
119	return std::make_pair(x&: value, y&: pos);
120	}
121
122	static const char * parse_space(const char * src, bool newline_ok) {
123	const char * pos = src;
124	while (pos == `' '` \|\| pos == `'\t'` \|\| *pos == `'#'` \|\|
125	(newline_ok && (pos == `'\r'` \|\| pos == `'\n'`))) {
126	if (*pos == `'#'`) {
127	while (pos && pos != `'\r'` && *pos != `'\n'`) {
128	pos++;
129	}
130	} else {
131	pos++;
132	}
133	}
134	return pos;
135	}
136
137	static const char * parse_name(const char * src) {
138	const char * pos = src;
139	while (is_word_char(c: *pos)) {
140	pos++;
141	}
142	if (pos == src) {
143	throw std::runtime_error (std::string ("expecting name at ") + src);
144	}
145	return pos;
146	}
147
148	static const char * parse_int(const char * src) {
149	const char * pos = src;
150	while (is_digit_char(c: *pos)) {
151	pos++;
152	}
153	if (pos == src) {
154	throw std::runtime_error (std::string ("expecting integer at ") + src);
155	}
156	return pos;
157	}
158
159	static std::pair<uint32_t, const char > parse_char(const* char * src) {
160	if (*src == `'\\'`) {
161	switch (src[`1`]) {
162	case `'x'`: return parse_hex(src: src + `2`, size: `2`);
163	case `'u'`: return parse_hex(src: src + `2`, size: `4`);
164	case `'U'`: return parse_hex(src: src + `2`, size: `8`);
165	case `'t'`: return std::make_pair(x: `'\t'`, y: src + `2`);
166	case `'r'`: return std::make_pair(x: `'\r'`, y: src + `2`);
167	case `'n'`: return std::make_pair(x: `'\n'`, y: src + `2`);
168	case `'\\'`:
169	case `'"'`:
170	case `'['`:
171	case `']'`:
172	return std::make_pair(x: src[`1`], y: src + `2`);
173	default:
174	throw std::runtime_error (std::string ("unknown escape at ") + src);
175	}
176	} else if (*src) {
177	return decode_utf8(src);
178	}
179	throw std::runtime_error ("unexpected end of input");
180	}
181
182	static void print_grammar_char(FILE * file, uint32_t c) {
183	if (`0x20` <= c && c <= `0x7f`) {
184	fprintf(stream: file, format: "%c", static_cast<char>(c));
185	} else {
186	// cop out of encoding UTF-8
187	fprintf(stream: file, format: "<U+%04X>", c);
188	}
189	}
190
191	static bool is_char_element(llama_grammar_element elem) {
192	switch (elem.type) {
193	case LLAMA_GRETYPE_CHAR: return true;
194	case LLAMA_GRETYPE_CHAR_NOT: return true;
195	case LLAMA_GRETYPE_CHAR_ALT: return true;
196	case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197	case LLAMA_GRETYPE_CHAR_ANY: return true;
198	default: return false;
199	}
200	}
201
202	static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203	for (auto elem : rule) {
204	switch (elem.type) {
205	case LLAMA_GRETYPE_END: fprintf(stream: file, format: "END"); break;
206	case LLAMA_GRETYPE_ALT: fprintf(stream: file, format: "ALT"); break;
207	case LLAMA_GRETYPE_RULE_REF: fprintf(stream: file, format: "RULE_REF"); break;
208	case LLAMA_GRETYPE_CHAR: fprintf(stream: file, format: "CHAR"); break;
209	case LLAMA_GRETYPE_CHAR_NOT: fprintf(stream: file, format: "CHAR_NOT"); break;
210	case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(stream: file, format: "CHAR_RNG_UPPER"); break;
211	case LLAMA_GRETYPE_CHAR_ALT: fprintf(stream: file, format: "CHAR_ALT"); break;
212	case LLAMA_GRETYPE_CHAR_ANY: fprintf(stream: file, format: "CHAR_ANY"); break;
213	}
214	switch (elem.type) {
215	case LLAMA_GRETYPE_END:
216	case LLAMA_GRETYPE_ALT:
217	case LLAMA_GRETYPE_RULE_REF:
218	fprintf(stream: file, format: "(%u) ", elem.value);
219	break;
220	case LLAMA_GRETYPE_CHAR:
221	case LLAMA_GRETYPE_CHAR_NOT:
222	case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223	case LLAMA_GRETYPE_CHAR_ALT:
224	case LLAMA_GRETYPE_CHAR_ANY:
225	fprintf(stream: file, format: "(\"");
226	print_grammar_char(file, c: elem.value);
227	fprintf(stream: file, format: "\") ");
228	break;
229	}
230	}
231	fprintf(stream: file, format: "\n");
232	}
233
234	static void print_rule(
235	FILE * file,
236	uint32_t rule_id,
237	const llama_grammar_rule & rule,
238	const std::map<uint32_t, std::string> & symbol_id_names) {
239	if (rule.empty() \|\| rule.back().type != LLAMA_GRETYPE_END) {
240	throw std::runtime_error (
241	"malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(val: rule_id));
242	}
243	fprintf(stream: file, format: "%s ::= ", symbol_id_names.at(k: rule_id).c_str());
244	for (size_t i = `0`, end = rule.size() - `1`; i < end; i++) {
245	llama_grammar_element elem = rule [i];
246	switch (elem.type) {
247	case LLAMA_GRETYPE_END:
248	throw std::runtime_error (
249	"unexpected end of rule: " + std::to_string(val: rule_id) + "," +
250	std::to_string(val: i));
251	case LLAMA_GRETYPE_ALT:
252	fprintf(stream: file, format: "\| ");
253	break;
254	case LLAMA_GRETYPE_RULE_REF:
255	fprintf(stream: file, format: "%s ", symbol_id_names.at(k: elem.value).c_str());
256	break;
257	case LLAMA_GRETYPE_CHAR:
258	fprintf(stream: file, format: "[");
259	print_grammar_char(file, c: elem.value);
260	break;
261	case LLAMA_GRETYPE_CHAR_NOT:
262	fprintf(stream: file, format: "[^");
263	print_grammar_char(file, c: elem.value);
264	break;
265	case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266	if (i == `0` \|\| !is_char_element(elem: rule [i - `1`])) {
267	throw std::runtime_error (
268	"LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269	std::to_string(val: rule_id) + "," + std::to_string(val: i));
270	}
271	fprintf(stream: file, format: "-");
272	print_grammar_char(file, c: elem.value);
273	break;
274	case LLAMA_GRETYPE_CHAR_ALT:
275	if (i == `0` \|\| !is_char_element(elem: rule [i - `1`])) {
276	throw std::runtime_error (
277	"LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278	std::to_string(val: rule_id) + "," + std::to_string(val: i));
279	}
280	print_grammar_char(file, c: elem.value);
281	break;
282	case LLAMA_GRETYPE_CHAR_ANY:
283	fprintf(stream: file, format: ".");
284	break;
285	}
286	if (is_char_element(elem)) {
287	switch (rule [i + `1`].type) {
288	case LLAMA_GRETYPE_CHAR_ALT:
289	case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290	case LLAMA_GRETYPE_CHAR_ANY:
291	break;
292	default:
293	fprintf(stream: file, format: "] ");
294	}
295	}
296	}
297	fprintf(stream: file, format: "\n");
298	}
299
300	//
301	// implementation
302	//
303
304	uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305	uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306	auto result = symbol_ids.emplace(args: std::string (src, len), args&: next_id);
307	return result.first ->second;
308	}
309
310	uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311	uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312	symbol_ids [base_name + `'_'` + std::to_string(val: next_id)] = next_id;
313	return next_id;
314	}
315
316	void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317	if (rules.size() <= rule_id) {
318	rules.resize(new_size: rule_id + `1`);
319	}
320	rules [rule_id] = rule;
321	}
322
323	const char * llama_grammar_parser::parse_alternates(
324	const char * src,
325	const std::string & rule_name,
326	uint32_t rule_id,
327	bool is_nested) {
328	llama_grammar_rule rule;
329	const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330	while (*pos == `'\|'`) {
331	rule.push_back(x: {.type: LLAMA_GRETYPE_ALT, .value: `0`});
332	pos = parse_space(src: pos + `1`, newline_ok: true);
333	pos = parse_sequence(src: pos, rule_name, rule, is_nested);
334	}
335	rule.push_back(x: {.type: LLAMA_GRETYPE_END, .value: `0`});
336	add_rule(rule_id, rule);
337	return pos;
338	}
339
340	const char * llama_grammar_parser::parse_sequence(
341	const char * src,
342	const std::string & rule_name,
343	llama_grammar_rule & rule,
344	bool is_nested) {
345	size_t last_sym_start = rule.size();
346	const char * pos = src;
347
348	auto handle_repetitions = [&](int min_times, int max_times) {
349
350	if (last_sym_start == rule.size()) {
351	throw std::runtime_error (std::string ("expecting preceding item to */+/?/{ at ") + pos);
352	}
353
354	// apply transformation to previous symbol (last_sym_start to end) according to
355	// the following rewrite rules:
356	// S{m,n} --> S S S (m times) S'(n-m)
357	// S'(x) ::= S S'(x-1) \|
358	// (... n-m definitions of these S' rules ...)
359	// S'(1) ::= S \|
360	// S{m,} --> S S S (m times) S'
361	// S' ::= S S' \|
362	// S --> S{0,}*
363	// --> S' ::= S S' \|
364	// S+ --> S{1,}
365	// --> S S'
366	// S' ::= S S' \|
367	// S? --> S{0,1}
368	// --> S'
369	// S' ::= S \|
370
371	llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372	if (min_times == `0`) {
373	rule.resize(new_size: last_sym_start);
374	} else {
375	// Repeat the previous elements (min_times - 1) times
376	for (int i = `1`; i < min_times; i++) {
377	rule.insert(position: rule.end(), first: prev_rule.begin(), last: prev_rule.end());
378	}
379	}
380
381	uint32_t last_rec_rule_id = `0`;
382	auto n_opt = max_times < `0` ? `1` : max_times - min_times;
383
384	llama_grammar_rule rec_rule(prev_rule);
385	for (int i = `0`; i < n_opt; i++) {
386	rec_rule.resize(new_size: prev_rule.size());
387	uint32_t rec_rule_id = generate_symbol_id( base_name: rule_name);
388	if (i > `0` \|\| max_times < `0`) {
389	rec_rule.push_back(x: {.type: LLAMA_GRETYPE_RULE_REF, .value: max_times < `0` ? rec_rule_id : last_rec_rule_id});
390	}
391	rec_rule.push_back(x: {.type: LLAMA_GRETYPE_ALT, .value: `0`});
392	rec_rule.push_back(x: {.type: LLAMA_GRETYPE_END, .value: `0`});
393	add_rule( rule_id: rec_rule_id, rule: rec_rule);
394	last_rec_rule_id = rec_rule_id;
395	}
396	if (n_opt > `0`) {
397	rule.push_back(x: {.type: LLAMA_GRETYPE_RULE_REF, .value: last_rec_rule_id});
398	}
399	};
400
401	while (*pos) {
402	if (pos == `'"'`) { // literal string*
403	pos++;
404	last_sym_start = rule.size();
405	while (*pos != `'"'`) {
406	if (!*pos) {
407	throw std::runtime_error ("unexpected end of input");
408	}
409	auto char_pair = parse_char(src: pos);
410	pos = char_pair.second;
411	rule.push_back(x: {.type: LLAMA_GRETYPE_CHAR, .value: char_pair.first});
412	}
413	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
414	} else if (pos == `'['`) { // char range(s)*
415	pos++;
416	enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417	if (*pos == `'^'`) {
418	pos++;
419	start_type = LLAMA_GRETYPE_CHAR_NOT;
420	}
421	last_sym_start = rule.size();
422	while (*pos != `']'`) {
423	if (!*pos) {
424	throw std::runtime_error ("unexpected end of input");
425	}
426	auto char_pair = parse_char(src: pos);
427	pos = char_pair.second;
428	enum llama_gretype type = last_sym_start < rule.size()
429	? LLAMA_GRETYPE_CHAR_ALT
430	: start_type;
431
432	rule.push_back(x: {.type: type, .value: char_pair.first});
433	if (pos[`0`] == `'-'` && pos[`1`] != `']'`) {
434	if (!pos[`1`]) {
435	throw std::runtime_error ("unexpected end of input");
436	}
437	auto endchar_pair = parse_char(src: pos + `1`);
438	pos = endchar_pair.second;
439	rule.push_back(x: {.type: LLAMA_GRETYPE_CHAR_RNG_UPPER, .value: endchar_pair.first});
440	}
441	}
442	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
443	} else if (is_word_char(c: pos)) { // rule reference*
444	const char * name_end = parse_name(src: pos);
445	uint32_t ref_rule_id = get_symbol_id(src: pos, len: name_end - pos);
446	pos = parse_space(src: name_end, newline_ok: is_nested);
447	last_sym_start = rule.size();
448	rule.push_back(x: {.type: LLAMA_GRETYPE_RULE_REF, .value: ref_rule_id});
449	} else if (pos == `'('`) { // grouping*
450	// parse nested alternates into synthesized rule
451	pos = parse_space(src: pos + `1`, newline_ok: true);
452	uint32_t sub_rule_id = generate_symbol_id(base_name: rule_name);
453	pos = parse_alternates(src: pos, rule_name, rule_id: sub_rule_id, is_nested: true);
454	last_sym_start = rule.size();
455	// output reference to synthesized rule
456	rule.push_back(x: {.type: LLAMA_GRETYPE_RULE_REF, .value: sub_rule_id});
457	if (*pos != `')'`) {
458	throw std::runtime_error (std::string ("expecting ')' at ") + pos);
459	}
460	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
461	} else if (pos == `'.'`) { // any char*
462	last_sym_start = rule.size();
463	rule.push_back(x: {.type: LLAMA_GRETYPE_CHAR_ANY, .value: `0`});
464	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
465	} else if (pos == `''`) {
466	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
467	handle_repetitions (`0`, -`1`);
468	} else if (*pos == `'+'`) {
469	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
470	handle_repetitions (`1`, -`1`);
471	} else if (*pos == `'?'`) {
472	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
473	handle_repetitions (`0`, `1`);
474	} else if (*pos == `'{'`) {
475	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
476
477	if (!is_digit_char(c: *pos)) {
478	throw std::runtime_error (std::string ("expecting an int at ") + pos);
479	}
480	const char * int_end = parse_int(src: pos);
481	int min_times = std::stoul(str: std::string (pos, int_end - pos));
482	pos = parse_space(src: int_end, newline_ok: is_nested);
483
484	int max_times = -`1`;
485
486	if (*pos == `'}'`) {
487	max_times = min_times;
488	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
489	} else if (*pos == `','`) {
490	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
491
492	if (is_digit_char(c: *pos)) {
493	const char * int_end = parse_int(src: pos);
494	max_times = std::stoul(str: std::string (pos, int_end - pos));
495	pos = parse_space(src: int_end, newline_ok: is_nested);
496	}
497
498	if (*pos != `'}'`) {
499	throw std::runtime_error (std::string ("expecting '}' at ") + pos);
500	}
501	pos = parse_space(src: pos + `1`, newline_ok: is_nested);
502	} else {
503	throw std::runtime_error (std::string ("expecting ',' at ") + pos);
504	}
505	handle_repetitions (min_times, max_times);
506	} else {
507	break;
508	}
509	}
510	return pos;
511	}
512
513	const char * llama_grammar_parser::parse_rule(const char * src) {
514	const char * name_end = parse_name(src);
515	const char * pos = parse_space(src: name_end, newline_ok: false);
516	size_t name_len = name_end - src;
517	uint32_t rule_id = get_symbol_id(src, len: name_len);
518	const std::string name(src, name_len);
519
520	if (!(pos[`0`] == `':'` && pos[`1`] == `':'` && pos[`2`] == `'='`)) {
521	throw std::runtime_error (std::string ("expecting ::= at ") + pos);
522	}
523	pos = parse_space(src: pos + `3`, newline_ok: true);
524
525	pos = parse_alternates(src: pos, rule_name: name, rule_id, is_nested: false);
526
527	if (*pos == `'\r'`) {
528	pos += pos[`1`] == `'\n'` ? `2` : `1`;
529	} else if (*pos == `'\n'`) {
530	pos++;
531	} else if (*pos) {
532	throw std::runtime_error (std::string ("expecting newline or end at ") + pos);
533	}
534	return parse_space(src: pos, newline_ok: true);
535	}
536
537	bool llama_grammar_parser::parse(const char * src) {
538	try {
539	const char * pos = parse_space(src, newline_ok: true);
540	while (*pos) {
541	pos = parse_rule(src: pos);
542	}
543	// Validate the state to ensure that all rules are defined
544	for (const auto & rule : rules) {
545	if (rule.empty()) {
546	throw std::runtime_error ("Undefined rule");
547	}
548	for (const auto & elem : rule) {
549	if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550	// Ensure that the rule at that location exists
551	if (elem.value >= rules.size() \|\| rules [elem.value].empty()) {
552	// Get the name of the rule that is missing
553	for (const auto & kv : symbol_ids) {
554	if (kv.second == elem.value) {
555	throw std::runtime_error ("Undefined rule identifier '" + kv.first + "'");
556	}
557	}
558	}
559	}
560	}
561	}
562	} catch (const std::exception & err) {
563	fprintf(stderr, format: "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src);
564	rules.clear();
565	return false;
566	}
567
568	return true;
569	}
570
571	void llama_grammar_parser::print(FILE * file) {
572	try {
573	std::map<uint32_t, std::string> symbol_id_names;
574	for (const auto & kv : symbol_ids) {
575	symbol_id_names [kv.second] = kv.first;
576	}
577	for (size_t i = `0`, end = rules.size(); i < end; i++) {
578	// fprintf(file, "%zu: ", i);
579	// print_rule_binary(file, rules[i]);
580	print_rule(file, rule_id: uint32_t(i), rule: rules [i], symbol_id_names);
581	// fprintf(file, "\n");
582	}
583	} catch (const std::exception & err) {
584	fprintf(stderr, format: "\n%s: error printing grammar: %s\n", __func__, err.what());
585	}
586	}
587
588	llama_grammar_stack llama_grammar_parser::c_rules() const {
589	llama_grammar_stack ret;
590	ret.reserve(n: rules.size());
591	for (const auto & rule : rules) {
592	ret.push_back(x: rule.data());
593	}
594	return ret;
595	}
596
597	// returns true iff pos points to the end of one of the definitions of a rule
598	static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599	switch (pos->type) {
600	case LLAMA_GRETYPE_END: return true; // NOLINT
601	case LLAMA_GRETYPE_ALT: return true; // NOLINT
602	default: return false;
603	}
604	}
605
606	// returns true iff chr satisfies the char range at pos (regular or inverse range)
607	// asserts that pos is pointing to a char range element
608	static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609	const llama_grammar_element * pos,
610	const uint32_t chr) {
611	bool found = false;
612	bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR \|\| pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
614	GGML_ASSERT(is_positive_char \|\| pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
616	do {
617	if (pos[`1`].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618	// inclusive range, e.g. [a-z]
619	found = found \|\| (pos->value <= chr && chr <= pos[`1`].value);
620	pos += `2`;
621	} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622	// Any character matches "."
623	found = true;
624	pos += `1`;
625	} else {
626	// exact char match, e.g. [a] or "a"
627	found = found \|\| pos->value == chr;
628	pos += `1`;
629	}
630	} while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
632	return std::make_pair(x: found == is_positive_char, y&: pos);
633	}
634
635	// returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636	// range at pos (regular or inverse range)
637	// asserts that pos is pointing to a char range element
638	static bool llama_grammar_match_partial_char(
639	const llama_grammar_element * pos,
640	const llama_partial_utf8 partial_utf8) {
641	bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR \|\| pos->type == LLAMA_GRETYPE_CHAR_ANY;
642	GGML_ASSERT(is_positive_char \|\| pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
644	uint32_t partial_value = partial_utf8.value;
645	int n_remain = partial_utf8.n_remain;
646
647	// invalid sequence or 7-bit char split across 2 bytes (overlong)
648	if (n_remain < `0` \|\| (n_remain == `1` && partial_value < `2`)) {
649	return false;
650	}
651
652	// range of possible code points this partial UTF-8 sequence could complete to
653	uint32_t low = partial_value << (n_remain * `6`);
654	uint32_t high = low \| ((`1` << (n_remain * `6`)) - `1`);
655
656	if (low == `0`) {
657	if (n_remain == `2`) {
658	low = `1` << `11`;
659	} else if (n_remain == `3`) {
660	low = `1` << `16`;
661	}
662	}
663
664	do {
665	if (pos[`1`].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666	// inclusive range, e.g. [a-z]
667	if (pos->value <= high && low <= pos[`1`].value) {
668	return is_positive_char;
669	}
670	pos += `2`;
671	} else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672	// Any character matches "."
673	return true;
674	} else {
675	// exact char match, e.g. [a] or "a"
676	if (low <= pos->value && pos->value <= high) {
677	return is_positive_char;
678	}
679	pos += `1`;
680	}
681	} while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
683	return !is_positive_char;
684	}
685
686	// transforms a grammar pushdown stack into N possible stacks, all ending
687	// at a character range (terminal element)
688	static void llama_grammar_advance_stack(
689	const llama_grammar_rules & rules,
690	const llama_grammar_stack & stack,
691	llama_grammar_stacks & new_stacks) {
692	if (stack.empty()) {
693	if (std::find(first: new_stacks.begin(), last: new_stacks.end(), val: stack) == new_stacks.end()) {
694	new_stacks.emplace_back(args: stack);
695	}
696	return;
697	}
698
699	const llama_grammar_element * pos = stack.back();
700
701	switch (pos->type) {
702	case LLAMA_GRETYPE_RULE_REF: {
703	const size_t rule_id = static_cast<size_t>(pos->value);
704	const llama_grammar_element * subpos = rules [rule_id].data();
705	do {
706	// init new stack without the top (pos)
707	llama_grammar_stack new_stack(stack.begin(), stack.end() - `1`);
708	if (!llama_grammar_is_end_of_sequence(pos: pos + `1`)) {
709	// if this rule ref is followed by another element, add that to stack
710	new_stack.push_back(x: pos + `1`);
711	}
712	if (!llama_grammar_is_end_of_sequence(pos: subpos)) {
713	// if alternate is nonempty, add to stack
714	new_stack.push_back(x: subpos);
715	}
716	llama_grammar_advance_stack(rules, stack: new_stack, new_stacks);
717	while (!llama_grammar_is_end_of_sequence(pos: subpos)) {
718	// scan to end of alternate def
719	subpos++;
720	}
721	if (subpos->type == LLAMA_GRETYPE_ALT) {
722	// there's another alternate def of this rule to process
723	subpos++;
724	} else {
725	break;
726	}
727	} while (true);
728	break;
729	}
730	case LLAMA_GRETYPE_CHAR:
731	case LLAMA_GRETYPE_CHAR_NOT:
732	case LLAMA_GRETYPE_CHAR_ANY:
733	if (std::find(first: new_stacks.begin(), last: new_stacks.end(), val: stack) == new_stacks.end()) {
734	// only add the stack if it's not a duplicate of one we already have
735	new_stacks.emplace_back(args: stack);
736	}
737	break;
738	default:
739	// end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740	// (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741	// those
742	GGML_ABORT("fatal error");
743	}
744	}
745
746	static llama_grammar_candidates llama_grammar_reject_candidates(
747	const llama_grammar_rules & rules,
748	const llama_grammar_stacks & stacks,
749	const llama_grammar_candidates & candidates) {
750	GGML_ASSERT(!stacks.empty()); // REVIEW
751
752	if (candidates.empty()) {
753	return {};
754	}
755
756	auto rejects = llama_grammar_reject_candidates_for_stack(rules, stack: stacks.front(), candidates);
757
758	for (size_t i = `1`, size = stacks.size(); i < size; ++i) {
759	rejects = llama_grammar_reject_candidates_for_stack(rules, stack: stacks [i], candidates: rejects);
760	}
761
762	return rejects;
763	}
764
765	static bool llama_grammar_detect_left_recursion(
766	const llama_grammar_rules & rules,
767	size_t rule_index,
768	std::vector<bool> * rules_visited,
769	std::vector<bool> * rules_in_progress,
770	std::vector<bool> * rules_may_be_empty) {
771	if ((*rules_in_progress)[rule_index]) {
772	return true;
773	}
774
775	(rules_in_progress)[rule_index] = true*;
776
777	const llama_grammar_rule & rule = rules [rule_index];
778
779	// First check if the rule might produce the empty string. This could be done combined with the second
780	// step but it's more readable as two steps.
781	bool at_rule_start = true;
782	for (size_t i = `0`; i < rule.size(); i++) {
783	if (llama_grammar_is_end_of_sequence(pos: &rule [i])) {
784	if (at_rule_start) {
785	(rules_may_be_empty)[rule_index] = true*;
786	break;
787	}
788	at_rule_start = true;
789	} else {
790	at_rule_start = false;
791	}
792	}
793
794	// Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795	// be empty)
796	bool recurse_into_nonterminal = true;
797	for (size_t i = `0`; i < rule.size(); i++) {
798	if (rule [i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799	if (llama_grammar_detect_left_recursion(rules, rule_index: (size_t)rule [i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800	return true;
801	}
802	if (!((*rules_may_be_empty)[(size_t)rule [i].value])) {
803	recurse_into_nonterminal = false;
804	}
805	} else if (llama_grammar_is_end_of_sequence(pos: &rule [i])) {
806	recurse_into_nonterminal = true;
807	} else {
808	recurse_into_nonterminal = false;
809	}
810	}
811
812	(rules_in_progress)[rule_index] = false*;
813	(rules_visited)[rule_index] = true*;
814
815	return false;
816	}
817
818	const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819	return grammar->rules;
820	}
821
822	llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823	return grammar->stacks;
824	}
825
826	void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827	llama_grammar_stacks stacks_new;
828	stacks_new.reserve(n: grammar->stacks.size());
829
830	for (const auto & stack : grammar->stacks) {
831	if (stack.empty()) {
832	continue;
833	}
834
835	auto match = llama_grammar_match_char(pos: stack.back(), chr);
836	if (match.first) {
837	const llama_grammar_element * pos = match.second;
838
839	// update top of stack to next element, if any
840	llama_grammar_stack new_stack(stack.begin(), stack.end() - `1`);
841	if (!llama_grammar_is_end_of_sequence(pos)) {
842	new_stack.push_back(x: pos);
843	}
844	llama_grammar_advance_stack(rules: grammar->rules, stack: new_stack, new_stacks&: stacks_new);
845	}
846	}
847
848	grammar->stacks = std::move(stacks_new);
849	}
850
851	llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852	const llama_grammar_rules & rules,
853	const llama_grammar_stack & stack,
854	const llama_grammar_candidates & candidates) {
855
856	llama_grammar_candidates rejects;
857	rejects.reserve(n: candidates.size());
858
859	if (stack.empty()) {
860	for (const auto & tok : candidates) {
861	if (*tok.code_points != `0` \|\| tok.partial_utf8.n_remain != `0`) {
862	rejects.push_back(x: tok);
863	}
864	}
865	return rejects;
866	}
867
868	const llama_grammar_element * stack_pos = stack.back();
869
870	llama_grammar_candidates next_candidates;
871	next_candidates.reserve(n: candidates.size());
872
873	for (const auto & tok : candidates) {
874	if (*tok.code_points == `0`) {
875	// reached end of full codepoints in token, reject iff it ended in a partial sequence
876	// that cannot satisfy this position in grammar
877	if (tok.partial_utf8.n_remain != `0` &&
878	!llama_grammar_match_partial_char(pos: stack_pos, partial_utf8: tok.partial_utf8)) {
879	rejects.push_back(x: tok);
880	}
881	} else if (llama_grammar_match_char(pos: stack_pos, chr: *tok.code_points).first) {
882	next_candidates.push_back(x: { .index: tok.index, .code_points: tok.code_points + `1`, .partial_utf8: tok.partial_utf8 });
883	} else {
884	rejects.push_back(x: tok);
885	}
886	}
887
888	const auto * stack_pos_after = llama_grammar_match_char(pos: stack_pos, chr: `0`).second;
889
890	// update top of stack to next element, if any
891	llama_grammar_stack stack_after(stack.begin(), stack.end() - `1`);
892	if (!llama_grammar_is_end_of_sequence(pos: stack_pos_after)) {
893	stack_after.push_back(x: stack_pos_after);
894	}
895	llama_grammar_stacks next_stacks;
896	llama_grammar_advance_stack(rules, stack: stack_after, new_stacks&: next_stacks);
897
898	auto next_rejects = llama_grammar_reject_candidates(rules, stacks: next_stacks, candidates: next_candidates);
899	for (const auto & tok : next_rejects) {
900	rejects.push_back(x: { .index: tok.index, .code_points: tok.code_points - `1`, .partial_utf8: tok.partial_utf8 });
901	}
902
903	return rejects;
904	}
905
906	////////////////////
907
908	struct llama_grammar * llama_grammar_init_impl(
909	const struct llama_vocab * vocab,
910	const llama_grammar_element ** rules,
911	size_t n_rules,
912	size_t start_rule_index) {
913	const llama_grammar_element * pos;
914
915	// copy rule definitions into vectors
916	llama_grammar_rules vec_rules(n_rules);
917	for (size_t i = `0`; i < n_rules; i++) {
918	for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919	vec_rules [i].push_back(x: *pos);
920	}
921	vec_rules [i].push_back(x: {.type: LLAMA_GRETYPE_END, .value: `0`});
922	}
923
924	// Check for left recursion
925	std::vector<bool> rules_visited(n_rules);
926	std::vector<bool> rules_in_progress(n_rules);
927	std::vector<bool> rules_may_be_empty(n_rules);
928	for (size_t i = `0`; i < n_rules; i++) {
929	if (rules_visited [i]) {
930	continue;
931	}
932	if (llama_grammar_detect_left_recursion(rules: vec_rules, rule_index: i, rules_visited: &rules_visited, rules_in_progress: &rules_in_progress, rules_may_be_empty: &rules_may_be_empty)) {
933	LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934	return nullptr;
935	}
936	}
937
938	// loop over alternates of start rule to build initial stacks
939	llama_grammar_stacks stacks;
940	pos = vec_rules [start_rule_index].data();
941	do {
942	llama_grammar_stack stack;
943	if (!llama_grammar_is_end_of_sequence(pos)) {
944	// if alternate is nonempty, add to stack
945	stack.push_back(x: pos);
946	}
947	llama_grammar_advance_stack(rules: vec_rules, stack, new_stacks&: stacks);
948	while (!llama_grammar_is_end_of_sequence(pos)) {
949	// scan to end of alternate def
950	pos++;
951	}
952	if (pos->type == LLAMA_GRETYPE_ALT) {
953	// there's another alternate def of this rule to process
954	pos++;
955	} else {
956	break;
957	}
958	} while (true);
959
960	// Important: vec_rules has to be moved here, not copied, because stacks contains
961	// pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962	// then the pointers would be invalidated when the local vec_rules goes out of scope.
963	return new llama_grammar {
964	.vocab: vocab,
965	.rules: std::move(vec_rules),
966	.stacks: std::move(stacks),
967	/ .partial_utf8 = / {},
968	/ .lazy =/ false,
969	/ .awaiting_trigger = / false,
970	/ .trigger_buffer = / "",
971	/ .trigger_tokens = / {},
972	/ .trigger_patterns = / {},
973	};
974	}
975
976	struct llama_grammar * llama_grammar_init_impl(
977	const struct llama_vocab * vocab,
978	const char * grammar_str,
979	const char * grammar_root,
980	bool lazy,
981	const char ** trigger_patterns,
982	size_t num_trigger_patterns,
983	const llama_token * trigger_tokens,
984	size_t num_trigger_tokens) {
985	llama_grammar_parser parser;
986
987	// if there is a grammar, parse it
988	// rules will be empty (default) if there are parse errors
989	if (!parser.parse(src: grammar_str) \|\| parser.rules.empty()) {
990	fprintf(stderr, format: "%s: failed to parse grammar\n", __func__);
991	return nullptr;
992	}
993
994	// Ensure that there is a "root" node.
995	if (parser.symbol_ids.find(x: "root") == parser.symbol_ids.end()) {
996	fprintf(stderr, format: "%s: grammar does not contain a 'root' symbol\n", __func__);
997	return nullptr;
998	}
999
1000	std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
1001
1002	const size_t n_rules = grammar_rules.size();
1003	const size_t start_rule_index = parser.symbol_ids.at(k: grammar_root);
1004
1005	const llama_grammar_element * pos;
1006
1007	// copy rule definitions into vectors
1008	llama_grammar_rules vec_rules(n_rules);
1009	for (size_t i = `0`; i < n_rules; i++) {
1010	for (pos = grammar_rules [i]; pos->type != LLAMA_GRETYPE_END; pos++) {
1011	vec_rules [i].push_back(x: *pos);
1012	}
1013	vec_rules [i].push_back(x: {.type: LLAMA_GRETYPE_END, .value: `0`});
1014	}
1015
1016	// Check for left recursion
1017	std::vector<bool> rules_visited(n_rules);
1018	std::vector<bool> rules_in_progress(n_rules);
1019	std::vector<bool> rules_may_be_empty(n_rules);
1020	for (size_t i = `0`; i < n_rules; i++) {
1021	if (rules_visited [i]) {
1022	continue;
1023	}
1024	if (llama_grammar_detect_left_recursion(rules: vec_rules, rule_index: i, rules_visited: &rules_visited, rules_in_progress: &rules_in_progress, rules_may_be_empty: &rules_may_be_empty)) {
1025	LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1026	return nullptr;
1027	}
1028	}
1029
1030	// loop over alternates of start rule to build initial stacks
1031	llama_grammar_stacks stacks;
1032	pos = vec_rules [start_rule_index].data();
1033	do {
1034	llama_grammar_stack stack;
1035	if (!llama_grammar_is_end_of_sequence(pos)) {
1036	// if alternate is nonempty, add to stack
1037	stack.push_back(x: pos);
1038	}
1039	llama_grammar_advance_stack(rules: vec_rules, stack, new_stacks&: stacks);
1040	while (!llama_grammar_is_end_of_sequence(pos)) {
1041	// scan to end of alternate def
1042	pos++;
1043	}
1044	if (pos->type == LLAMA_GRETYPE_ALT) {
1045	// there's another alternate def of this rule to process
1046	pos++;
1047	} else {
1048	break;
1049	}
1050	} while (true);
1051
1052	std::vector<llama_token> vec_trigger_tokens;
1053	std::vector<llama_grammar_trigger_pattern> vec_trigger_patterns;
1054	for (size_t i = `0`; i < num_trigger_tokens; i++) {
1055	GGML_ASSERT(trigger_tokens != nullptr);
1056	vec_trigger_tokens.push_back(x: trigger_tokens[i]);
1057	}
1058	for (size_t i = `0`; i < num_trigger_patterns; i++) {
1059	GGML_ASSERT(trigger_patterns != nullptr);
1060	auto & trigger = vec_trigger_patterns.emplace_back();
1061	trigger.pattern = trigger_patterns[i];
1062	trigger.regex = std::regex (trigger.pattern);
1063	}
1064
1065	// Important: vec_rules has to be moved here, not copied, because stacks contains
1066	// pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1067	// then the pointers would be invalidated when the local vec_rules goes out of scope.
1068	return new llama_grammar {
1069	.vocab: vocab,
1070	.rules: std::move(vec_rules),
1071	.stacks: std::move(stacks),
1072	/ .partial_utf8 = / {},
1073	/ .lazy = / lazy,
1074	/ .awaiting_trigger = / lazy,
1075	/ .trigger_buffer = / "",
1076	.trigger_tokens: std::move(vec_trigger_tokens),
1077	.trigger_patterns: std::move(vec_trigger_patterns),
1078	};
1079	}
1080
1081	void llama_grammar_free_impl(struct llama_grammar * grammar) {
1082	if (grammar == nullptr) {
1083	return;
1084	}
1085
1086	delete grammar;
1087	}
1088
1089	struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1090	auto * result = new llama_grammar {
1091	.vocab: grammar.vocab,
1092	.rules: grammar.rules,
1093	.stacks: grammar.stacks,
1094	.partial_utf8: grammar.partial_utf8,
1095	.lazy: grammar.lazy,
1096	.awaiting_trigger: grammar.awaiting_trigger,
1097	.trigger_buffer: grammar.trigger_buffer,
1098	.trigger_tokens: grammar.trigger_tokens,
1099	.trigger_patterns: grammar.trigger_patterns,
1100	};
1101
1102	// redirect elements in stacks to point to new rules
1103	for (size_t is = `0`; is < result->stacks.size(); is++) {
1104	for (size_t ie = `0`; ie < result->stacks [is].size(); ie++) {
1105	for (size_t ir0 = `0`; ir0 < grammar.rules.size(); ir0++) {
1106	for (size_t ir1 = `0`; ir1 < grammar.rules [ir0].size(); ir1++) {
1107	if (grammar.stacks [is][ie] == &grammar.rules [ir0][ir1]) {
1108	result->stacks [is][ie] = &result->rules [ir0][ir1];
1109	}
1110	}
1111	}
1112	}
1113	}
1114
1115	return result;
1116	}
1117
1118	void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1119	GGML_ASSERT(grammar.vocab != nullptr);
1120
1121	if (grammar.awaiting_trigger) {
1122	return;
1123	}
1124
1125	bool allow_eog = false;
1126	for (const auto & stack : grammar.stacks) {
1127	if (stack.empty()) {
1128	allow_eog = true;
1129	break;
1130	}
1131	}
1132
1133	std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1134	candidates_decoded.reserve(n: cur_p->size);
1135
1136	llama_grammar_candidates candidates_grammar;
1137	candidates_grammar.reserve(n: cur_p->size);
1138
1139	for (size_t i = `0`; i < cur_p->size; ++i) {
1140	const llama_token id = cur_p->data[i].id;
1141	const std::string & piece = grammar.vocab->token_to_piece(token: id);
1142
1143	if (grammar.vocab->is_eog(id)) {
1144	if (!allow_eog) {
1145	cur_p->data[i].logit = -INFINITY;
1146	}
1147	} else if (piece.empty() \|\| piece [`0`] == `0`) {
1148	cur_p->data[i].logit = -INFINITY;
1149	} else {
1150	candidates_decoded.push_back(x: decode_utf8(src: piece, partial_start: grammar.partial_utf8));
1151	candidates_grammar.push_back(x: { .index: i, .code_points: candidates_decoded.back().first.data(), .partial_utf8: candidates_decoded.back().second });
1152	}
1153	}
1154
1155	const auto rejects = llama_grammar_reject_candidates(rules: grammar.rules, stacks: grammar.stacks, candidates: candidates_grammar);
1156	for (const auto & reject : rejects) {
1157	cur_p->data[reject.index].logit = -INFINITY;
1158	}
1159	}
1160
1161	void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1162	GGML_ASSERT(grammar.vocab != nullptr);
1163
1164	const auto & piece = grammar.vocab->token_to_piece(token);
1165
1166	if (grammar.awaiting_trigger) {
1167	if (std::find(first: grammar.trigger_tokens.begin(), last: grammar.trigger_tokens.end(), val: token) != grammar.trigger_tokens.end()) {
1168	grammar.awaiting_trigger = false;
1169	grammar.trigger_buffer.clear();
1170	llama_grammar_accept_str(grammar, piece);
1171	LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
1172	return;
1173	} else {
1174	grammar.trigger_buffer += piece;
1175
1176	std::smatch match;
1177	for (const auto & trigger_pattern : grammar.trigger_patterns) {
1178	if (std::regex_match(s: grammar.trigger_buffer, m&: match, re: trigger_pattern.regex)) {
1179	grammar.awaiting_trigger = false;
1180	// get from the first matched capturing group to the end of the string
1181	size_t start = std::string::npos;
1182	for (auto i = `1u`; i < match.size(); i++) {
1183	if (match.length(sub: i) > `0`) {
1184	start = match.position(sub: i);
1185	break;
1186	}
1187	}
1188	if (start == std::string::npos) {
1189	start = match.position(sub: `0`);
1190	}
1191	auto constrained_str = grammar.trigger_buffer.substr(pos: start);
1192	// std::string constrained_str(match[1].first, grammar.trigger_buffer.end());
1193	grammar.trigger_buffer.clear();
1194	llama_grammar_accept_str(grammar, piece: constrained_str);
1195	LLAMA_LOG_DEBUG("Grammar triggered on regex: '%s'\n", constrained_str.c_str());
1196	return;
1197	}
1198	}
1199	LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
1200	return;
1201	}
1202	}
1203
1204	if (grammar.vocab->is_eog(id: token)) {
1205	for (const auto & stack : grammar.stacks) {
1206	if (stack.empty()) {
1207	return;
1208	}
1209	}
1210	GGML_ABORT("fatal error");
1211	}
1212
1213	llama_grammar_accept_str(grammar, piece);
1214	}
1215
1216	void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
1217	// Note terminating 0 in decoded string
1218	const auto decoded = decode_utf8(src: piece, partial_start: grammar.partial_utf8);
1219	const auto & code_points = decoded.first;
1220
1221	for (auto it = code_points.begin(), end = code_points.end() - `1`; it != end; ++it) {
1222	llama_grammar_accept(grammar: &grammar, chr: *it);
1223	}
1224
1225	grammar.partial_utf8 = decoded.second;
1226	if (grammar.stacks.empty()) {
1227	throw std::runtime_error ("Unexpected empty grammar stack after accepting piece: " + piece);
1228	}
1229	}
1230

Browse the source code of llama.cpp/src/llama-grammar.cpp