1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
3*/
4
5#include <stddef.h>
6
7#ifdef EXPAT_WIN32
8#include "winconfig.h"
9#else
10#ifdef HAVE_EXPAT_CONFIG_H
11#include "expat_config.h"
12#endif
13#endif /* ndef EXPAT_WIN32 */
14
15#include "Poco/XML/expat_external.h"
16#include "internal.h"
17#include "xmlrole.h"
18#include "ascii.h"
19
20/* Doesn't check:
21
22 that ,| are not mixed in a model group
23 content of literals
24
25*/
26
27static const char KW_ANY[] = {
28 ASCII_A, ASCII_N, ASCII_Y, '\0' };
29static const char KW_ATTLIST[] = {
30 ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
31static const char KW_CDATA[] = {
32 ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
33static const char KW_DOCTYPE[] = {
34 ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
35static const char KW_ELEMENT[] = {
36 ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
37static const char KW_EMPTY[] = {
38 ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
39static const char KW_ENTITIES[] = {
40 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
41 '\0' };
42static const char KW_ENTITY[] = {
43 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
44static const char KW_FIXED[] = {
45 ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
46static const char KW_ID[] = {
47 ASCII_I, ASCII_D, '\0' };
48static const char KW_IDREF[] = {
49 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
50static const char KW_IDREFS[] = {
51 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
52#ifdef XML_DTD
53static const char KW_IGNORE[] = {
54 ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
55#endif
56static const char KW_IMPLIED[] = {
57 ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
58#ifdef XML_DTD
59static const char KW_INCLUDE[] = {
60 ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
61#endif
62static const char KW_NDATA[] = {
63 ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
64static const char KW_NMTOKEN[] = {
65 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
66static const char KW_NMTOKENS[] = {
67 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
68 '\0' };
69static const char KW_NOTATION[] =
70 { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
71 '\0' };
72static const char KW_PCDATA[] = {
73 ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
74static const char KW_PUBLIC[] = {
75 ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
76static const char KW_REQUIRED[] = {
77 ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
78 '\0' };
79static const char KW_SYSTEM[] = {
80 ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
81
82#ifndef MIN_BYTES_PER_CHAR
83#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
84#endif
85
86#ifdef XML_DTD
87#define setTopLevel(state) \
88 ((state)->handler = ((state)->documentEntity \
89 ? internalSubset \
90 : externalSubset1))
91#else /* not XML_DTD */
92#define setTopLevel(state) ((state)->handler = internalSubset)
93#endif /* not XML_DTD */
94
95typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
96 int tok,
97 const char *ptr,
98 const char *end,
99 const ENCODING *enc);
100
101static PROLOG_HANDLER
102 prolog0, prolog1, prolog2,
103 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
104 internalSubset,
105 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
106 entity7, entity8, entity9, entity10,
107 notation0, notation1, notation2, notation3, notation4,
108 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
109 attlist7, attlist8, attlist9,
110 element0, element1, element2, element3, element4, element5, element6,
111 element7,
112#ifdef XML_DTD
113 externalSubset0, externalSubset1,
114 condSect0, condSect1, condSect2,
115#endif /* XML_DTD */
116 declClose,
117 error;
118
119static int FASTCALL common(PROLOG_STATE *state, int tok);
120
121static int PTRCALL
122prolog0(PROLOG_STATE *state,
123 int tok,
124 const char *ptr,
125 const char *end,
126 const ENCODING *enc)
127{
128 switch (tok) {
129 case XML_TOK_PROLOG_S:
130 state->handler = prolog1;
131 return XML_ROLE_NONE;
132 case XML_TOK_XML_DECL:
133 state->handler = prolog1;
134 return XML_ROLE_XML_DECL;
135 case XML_TOK_PI:
136 state->handler = prolog1;
137 return XML_ROLE_PI;
138 case XML_TOK_COMMENT:
139 state->handler = prolog1;
140 return XML_ROLE_COMMENT;
141 case XML_TOK_BOM:
142 return XML_ROLE_NONE;
143 case XML_TOK_DECL_OPEN:
144 if (!XmlNameMatchesAscii(enc,
145 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
146 end,
147 KW_DOCTYPE))
148 break;
149 state->handler = doctype0;
150 return XML_ROLE_DOCTYPE_NONE;
151 case XML_TOK_INSTANCE_START:
152 state->handler = error;
153 return XML_ROLE_INSTANCE_START;
154 }
155 return common(state, tok);
156}
157
158static int PTRCALL
159prolog1(PROLOG_STATE *state,
160 int tok,
161 const char *ptr,
162 const char *end,
163 const ENCODING *enc)
164{
165 switch (tok) {
166 case XML_TOK_PROLOG_S:
167 return XML_ROLE_NONE;
168 case XML_TOK_PI:
169 return XML_ROLE_PI;
170 case XML_TOK_COMMENT:
171 return XML_ROLE_COMMENT;
172 case XML_TOK_BOM:
173 /* This case can never arise. To reach this role function, the
174 * parse must have passed through prolog0 and therefore have had
175 * some form of input, even if only a space. At that point, a
176 * byte order mark is no longer a valid character (though
177 * technically it should be interpreted as a non-breaking space),
178 * so will be rejected by the tokenizing stages.
179 */
180 return XML_ROLE_NONE; /* LCOV_EXCL_LINE */
181 case XML_TOK_DECL_OPEN:
182 if (!XmlNameMatchesAscii(enc,
183 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
184 end,
185 KW_DOCTYPE))
186 break;
187 state->handler = doctype0;
188 return XML_ROLE_DOCTYPE_NONE;
189 case XML_TOK_INSTANCE_START:
190 state->handler = error;
191 return XML_ROLE_INSTANCE_START;
192 }
193 return common(state, tok);
194}
195
196static int PTRCALL
197prolog2(PROLOG_STATE *state,
198 int tok,
199 const char *UNUSED_P(ptr),
200 const char *UNUSED_P(end),
201 const ENCODING *UNUSED_P(enc))
202{
203 switch (tok) {
204 case XML_TOK_PROLOG_S:
205 return XML_ROLE_NONE;
206 case XML_TOK_PI:
207 return XML_ROLE_PI;
208 case XML_TOK_COMMENT:
209 return XML_ROLE_COMMENT;
210 case XML_TOK_INSTANCE_START:
211 state->handler = error;
212 return XML_ROLE_INSTANCE_START;
213 }
214 return common(state, tok);
215}
216
217static int PTRCALL
218doctype0(PROLOG_STATE *state,
219 int tok,
220 const char *UNUSED_P(ptr),
221 const char *UNUSED_P(end),
222 const ENCODING *UNUSED_P(enc))
223{
224 switch (tok) {
225 case XML_TOK_PROLOG_S:
226 return XML_ROLE_DOCTYPE_NONE;
227 case XML_TOK_NAME:
228 case XML_TOK_PREFIXED_NAME:
229 state->handler = doctype1;
230 return XML_ROLE_DOCTYPE_NAME;
231 }
232 return common(state, tok);
233}
234
235static int PTRCALL
236doctype1(PROLOG_STATE *state,
237 int tok,
238 const char *ptr,
239 const char *end,
240 const ENCODING *enc)
241{
242 switch (tok) {
243 case XML_TOK_PROLOG_S:
244 return XML_ROLE_DOCTYPE_NONE;
245 case XML_TOK_OPEN_BRACKET:
246 state->handler = internalSubset;
247 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
248 case XML_TOK_DECL_CLOSE:
249 state->handler = prolog2;
250 return XML_ROLE_DOCTYPE_CLOSE;
251 case XML_TOK_NAME:
252 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
253 state->handler = doctype3;
254 return XML_ROLE_DOCTYPE_NONE;
255 }
256 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
257 state->handler = doctype2;
258 return XML_ROLE_DOCTYPE_NONE;
259 }
260 break;
261 }
262 return common(state, tok);
263}
264
265static int PTRCALL
266doctype2(PROLOG_STATE *state,
267 int tok,
268 const char *UNUSED_P(ptr),
269 const char *UNUSED_P(end),
270 const ENCODING *UNUSED_P(enc))
271{
272 switch (tok) {
273 case XML_TOK_PROLOG_S:
274 return XML_ROLE_DOCTYPE_NONE;
275 case XML_TOK_LITERAL:
276 state->handler = doctype3;
277 return XML_ROLE_DOCTYPE_PUBLIC_ID;
278 }
279 return common(state, tok);
280}
281
282static int PTRCALL
283doctype3(PROLOG_STATE *state,
284 int tok,
285 const char *UNUSED_P(ptr),
286 const char *UNUSED_P(end),
287 const ENCODING *UNUSED_P(enc))
288{
289 switch (tok) {
290 case XML_TOK_PROLOG_S:
291 return XML_ROLE_DOCTYPE_NONE;
292 case XML_TOK_LITERAL:
293 state->handler = doctype4;
294 return XML_ROLE_DOCTYPE_SYSTEM_ID;
295 }
296 return common(state, tok);
297}
298
299static int PTRCALL
300doctype4(PROLOG_STATE *state,
301 int tok,
302 const char *UNUSED_P(ptr),
303 const char *UNUSED_P(end),
304 const ENCODING *UNUSED_P(enc))
305{
306 switch (tok) {
307 case XML_TOK_PROLOG_S:
308 return XML_ROLE_DOCTYPE_NONE;
309 case XML_TOK_OPEN_BRACKET:
310 state->handler = internalSubset;
311 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
312 case XML_TOK_DECL_CLOSE:
313 state->handler = prolog2;
314 return XML_ROLE_DOCTYPE_CLOSE;
315 }
316 return common(state, tok);
317}
318
319static int PTRCALL
320doctype5(PROLOG_STATE *state,
321 int tok,
322 const char *UNUSED_P(ptr),
323 const char *UNUSED_P(end),
324 const ENCODING *UNUSED_P(enc))
325{
326 switch (tok) {
327 case XML_TOK_PROLOG_S:
328 return XML_ROLE_DOCTYPE_NONE;
329 case XML_TOK_DECL_CLOSE:
330 state->handler = prolog2;
331 return XML_ROLE_DOCTYPE_CLOSE;
332 }
333 return common(state, tok);
334}
335
336static int PTRCALL
337internalSubset(PROLOG_STATE *state,
338 int tok,
339 const char *ptr,
340 const char *end,
341 const ENCODING *enc)
342{
343 switch (tok) {
344 case XML_TOK_PROLOG_S:
345 return XML_ROLE_NONE;
346 case XML_TOK_DECL_OPEN:
347 if (XmlNameMatchesAscii(enc,
348 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
349 end,
350 KW_ENTITY)) {
351 state->handler = entity0;
352 return XML_ROLE_ENTITY_NONE;
353 }
354 if (XmlNameMatchesAscii(enc,
355 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
356 end,
357 KW_ATTLIST)) {
358 state->handler = attlist0;
359 return XML_ROLE_ATTLIST_NONE;
360 }
361 if (XmlNameMatchesAscii(enc,
362 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
363 end,
364 KW_ELEMENT)) {
365 state->handler = element0;
366 return XML_ROLE_ELEMENT_NONE;
367 }
368 if (XmlNameMatchesAscii(enc,
369 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
370 end,
371 KW_NOTATION)) {
372 state->handler = notation0;
373 return XML_ROLE_NOTATION_NONE;
374 }
375 break;
376 case XML_TOK_PI:
377 return XML_ROLE_PI;
378 case XML_TOK_COMMENT:
379 return XML_ROLE_COMMENT;
380 case XML_TOK_PARAM_ENTITY_REF:
381 return XML_ROLE_PARAM_ENTITY_REF;
382 case XML_TOK_CLOSE_BRACKET:
383 state->handler = doctype5;
384 return XML_ROLE_DOCTYPE_NONE;
385 case XML_TOK_NONE:
386 return XML_ROLE_NONE;
387 }
388 return common(state, tok);
389}
390
391#ifdef XML_DTD
392
393static int PTRCALL
394externalSubset0(PROLOG_STATE *state,
395 int tok,
396 const char *ptr,
397 const char *end,
398 const ENCODING *enc)
399{
400 state->handler = externalSubset1;
401 if (tok == XML_TOK_XML_DECL)
402 return XML_ROLE_TEXT_DECL;
403 return externalSubset1(state, tok, ptr, end, enc);
404}
405
406static int PTRCALL
407externalSubset1(PROLOG_STATE *state,
408 int tok,
409 const char *ptr,
410 const char *end,
411 const ENCODING *enc)
412{
413 switch (tok) {
414 case XML_TOK_COND_SECT_OPEN:
415 state->handler = condSect0;
416 return XML_ROLE_NONE;
417 case XML_TOK_COND_SECT_CLOSE:
418 if (state->includeLevel == 0)
419 break;
420 state->includeLevel -= 1;
421 return XML_ROLE_NONE;
422 case XML_TOK_PROLOG_S:
423 return XML_ROLE_NONE;
424 case XML_TOK_CLOSE_BRACKET:
425 break;
426 case XML_TOK_NONE:
427 if (state->includeLevel)
428 break;
429 return XML_ROLE_NONE;
430 default:
431 return internalSubset(state, tok, ptr, end, enc);
432 }
433 return common(state, tok);
434}
435
436#endif /* XML_DTD */
437
438static int PTRCALL
439entity0(PROLOG_STATE *state,
440 int tok,
441 const char *UNUSED_P(ptr),
442 const char *UNUSED_P(end),
443 const ENCODING *UNUSED_P(enc))
444{
445 switch (tok) {
446 case XML_TOK_PROLOG_S:
447 return XML_ROLE_ENTITY_NONE;
448 case XML_TOK_PERCENT:
449 state->handler = entity1;
450 return XML_ROLE_ENTITY_NONE;
451 case XML_TOK_NAME:
452 state->handler = entity2;
453 return XML_ROLE_GENERAL_ENTITY_NAME;
454 }
455 return common(state, tok);
456}
457
458static int PTRCALL
459entity1(PROLOG_STATE *state,
460 int tok,
461 const char *UNUSED_P(ptr),
462 const char *UNUSED_P(end),
463 const ENCODING *UNUSED_P(enc))
464{
465 switch (tok) {
466 case XML_TOK_PROLOG_S:
467 return XML_ROLE_ENTITY_NONE;
468 case XML_TOK_NAME:
469 state->handler = entity7;
470 return XML_ROLE_PARAM_ENTITY_NAME;
471 }
472 return common(state, tok);
473}
474
475static int PTRCALL
476entity2(PROLOG_STATE *state,
477 int tok,
478 const char *ptr,
479 const char *end,
480 const ENCODING *enc)
481{
482 switch (tok) {
483 case XML_TOK_PROLOG_S:
484 return XML_ROLE_ENTITY_NONE;
485 case XML_TOK_NAME:
486 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
487 state->handler = entity4;
488 return XML_ROLE_ENTITY_NONE;
489 }
490 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
491 state->handler = entity3;
492 return XML_ROLE_ENTITY_NONE;
493 }
494 break;
495 case XML_TOK_LITERAL:
496 state->handler = declClose;
497 state->role_none = XML_ROLE_ENTITY_NONE;
498 return XML_ROLE_ENTITY_VALUE;
499 }
500 return common(state, tok);
501}
502
503static int PTRCALL
504entity3(PROLOG_STATE *state,
505 int tok,
506 const char *UNUSED_P(ptr),
507 const char *UNUSED_P(end),
508 const ENCODING *UNUSED_P(enc))
509{
510 switch (tok) {
511 case XML_TOK_PROLOG_S:
512 return XML_ROLE_ENTITY_NONE;
513 case XML_TOK_LITERAL:
514 state->handler = entity4;
515 return XML_ROLE_ENTITY_PUBLIC_ID;
516 }
517 return common(state, tok);
518}
519
520static int PTRCALL
521entity4(PROLOG_STATE *state,
522 int tok,
523 const char *UNUSED_P(ptr),
524 const char *UNUSED_P(end),
525 const ENCODING *UNUSED_P(enc))
526{
527 switch (tok) {
528 case XML_TOK_PROLOG_S:
529 return XML_ROLE_ENTITY_NONE;
530 case XML_TOK_LITERAL:
531 state->handler = entity5;
532 return XML_ROLE_ENTITY_SYSTEM_ID;
533 }
534 return common(state, tok);
535}
536
537static int PTRCALL
538entity5(PROLOG_STATE *state,
539 int tok,
540 const char *ptr,
541 const char *end,
542 const ENCODING *enc)
543{
544 switch (tok) {
545 case XML_TOK_PROLOG_S:
546 return XML_ROLE_ENTITY_NONE;
547 case XML_TOK_DECL_CLOSE:
548 setTopLevel(state);
549 return XML_ROLE_ENTITY_COMPLETE;
550 case XML_TOK_NAME:
551 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
552 state->handler = entity6;
553 return XML_ROLE_ENTITY_NONE;
554 }
555 break;
556 }
557 return common(state, tok);
558}
559
560static int PTRCALL
561entity6(PROLOG_STATE *state,
562 int tok,
563 const char *UNUSED_P(ptr),
564 const char *UNUSED_P(end),
565 const ENCODING *UNUSED_P(enc))
566{
567 switch (tok) {
568 case XML_TOK_PROLOG_S:
569 return XML_ROLE_ENTITY_NONE;
570 case XML_TOK_NAME:
571 state->handler = declClose;
572 state->role_none = XML_ROLE_ENTITY_NONE;
573 return XML_ROLE_ENTITY_NOTATION_NAME;
574 }
575 return common(state, tok);
576}
577
578static int PTRCALL
579entity7(PROLOG_STATE *state,
580 int tok,
581 const char *ptr,
582 const char *end,
583 const ENCODING *enc)
584{
585 switch (tok) {
586 case XML_TOK_PROLOG_S:
587 return XML_ROLE_ENTITY_NONE;
588 case XML_TOK_NAME:
589 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
590 state->handler = entity9;
591 return XML_ROLE_ENTITY_NONE;
592 }
593 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
594 state->handler = entity8;
595 return XML_ROLE_ENTITY_NONE;
596 }
597 break;
598 case XML_TOK_LITERAL:
599 state->handler = declClose;
600 state->role_none = XML_ROLE_ENTITY_NONE;
601 return XML_ROLE_ENTITY_VALUE;
602 }
603 return common(state, tok);
604}
605
606static int PTRCALL
607entity8(PROLOG_STATE *state,
608 int tok,
609 const char *UNUSED_P(ptr),
610 const char *UNUSED_P(end),
611 const ENCODING *UNUSED_P(enc))
612{
613 switch (tok) {
614 case XML_TOK_PROLOG_S:
615 return XML_ROLE_ENTITY_NONE;
616 case XML_TOK_LITERAL:
617 state->handler = entity9;
618 return XML_ROLE_ENTITY_PUBLIC_ID;
619 }
620 return common(state, tok);
621}
622
623static int PTRCALL
624entity9(PROLOG_STATE *state,
625 int tok,
626 const char *UNUSED_P(ptr),
627 const char *UNUSED_P(end),
628 const ENCODING *UNUSED_P(enc))
629{
630 switch (tok) {
631 case XML_TOK_PROLOG_S:
632 return XML_ROLE_ENTITY_NONE;
633 case XML_TOK_LITERAL:
634 state->handler = entity10;
635 return XML_ROLE_ENTITY_SYSTEM_ID;
636 }
637 return common(state, tok);
638}
639
640static int PTRCALL
641entity10(PROLOG_STATE *state,
642 int tok,
643 const char *UNUSED_P(ptr),
644 const char *UNUSED_P(end),
645 const ENCODING *UNUSED_P(enc))
646{
647 switch (tok) {
648 case XML_TOK_PROLOG_S:
649 return XML_ROLE_ENTITY_NONE;
650 case XML_TOK_DECL_CLOSE:
651 setTopLevel(state);
652 return XML_ROLE_ENTITY_COMPLETE;
653 }
654 return common(state, tok);
655}
656
657static int PTRCALL
658notation0(PROLOG_STATE *state,
659 int tok,
660 const char *UNUSED_P(ptr),
661 const char *UNUSED_P(end),
662 const ENCODING *UNUSED_P(enc))
663{
664 switch (tok) {
665 case XML_TOK_PROLOG_S:
666 return XML_ROLE_NOTATION_NONE;
667 case XML_TOK_NAME:
668 state->handler = notation1;
669 return XML_ROLE_NOTATION_NAME;
670 }
671 return common(state, tok);
672}
673
674static int PTRCALL
675notation1(PROLOG_STATE *state,
676 int tok,
677 const char *ptr,
678 const char *end,
679 const ENCODING *enc)
680{
681 switch (tok) {
682 case XML_TOK_PROLOG_S:
683 return XML_ROLE_NOTATION_NONE;
684 case XML_TOK_NAME:
685 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
686 state->handler = notation3;
687 return XML_ROLE_NOTATION_NONE;
688 }
689 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
690 state->handler = notation2;
691 return XML_ROLE_NOTATION_NONE;
692 }
693 break;
694 }
695 return common(state, tok);
696}
697
698static int PTRCALL
699notation2(PROLOG_STATE *state,
700 int tok,
701 const char *UNUSED_P(ptr),
702 const char *UNUSED_P(end),
703 const ENCODING *UNUSED_P(enc))
704{
705 switch (tok) {
706 case XML_TOK_PROLOG_S:
707 return XML_ROLE_NOTATION_NONE;
708 case XML_TOK_LITERAL:
709 state->handler = notation4;
710 return XML_ROLE_NOTATION_PUBLIC_ID;
711 }
712 return common(state, tok);
713}
714
715static int PTRCALL
716notation3(PROLOG_STATE *state,
717 int tok,
718 const char *UNUSED_P(ptr),
719 const char *UNUSED_P(end),
720 const ENCODING *UNUSED_P(enc))
721{
722 switch (tok) {
723 case XML_TOK_PROLOG_S:
724 return XML_ROLE_NOTATION_NONE;
725 case XML_TOK_LITERAL:
726 state->handler = declClose;
727 state->role_none = XML_ROLE_NOTATION_NONE;
728 return XML_ROLE_NOTATION_SYSTEM_ID;
729 }
730 return common(state, tok);
731}
732
733static int PTRCALL
734notation4(PROLOG_STATE *state,
735 int tok,
736 const char *UNUSED_P(ptr),
737 const char *UNUSED_P(end),
738 const ENCODING *UNUSED_P(enc))
739{
740 switch (tok) {
741 case XML_TOK_PROLOG_S:
742 return XML_ROLE_NOTATION_NONE;
743 case XML_TOK_LITERAL:
744 state->handler = declClose;
745 state->role_none = XML_ROLE_NOTATION_NONE;
746 return XML_ROLE_NOTATION_SYSTEM_ID;
747 case XML_TOK_DECL_CLOSE:
748 setTopLevel(state);
749 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
750 }
751 return common(state, tok);
752}
753
754static int PTRCALL
755attlist0(PROLOG_STATE *state,
756 int tok,
757 const char *UNUSED_P(ptr),
758 const char *UNUSED_P(end),
759 const ENCODING *UNUSED_P(enc))
760{
761 switch (tok) {
762 case XML_TOK_PROLOG_S:
763 return XML_ROLE_ATTLIST_NONE;
764 case XML_TOK_NAME:
765 case XML_TOK_PREFIXED_NAME:
766 state->handler = attlist1;
767 return XML_ROLE_ATTLIST_ELEMENT_NAME;
768 }
769 return common(state, tok);
770}
771
772static int PTRCALL
773attlist1(PROLOG_STATE *state,
774 int tok,
775 const char *UNUSED_P(ptr),
776 const char *UNUSED_P(end),
777 const ENCODING *UNUSED_P(enc))
778{
779 switch (tok) {
780 case XML_TOK_PROLOG_S:
781 return XML_ROLE_ATTLIST_NONE;
782 case XML_TOK_DECL_CLOSE:
783 setTopLevel(state);
784 return XML_ROLE_ATTLIST_NONE;
785 case XML_TOK_NAME:
786 case XML_TOK_PREFIXED_NAME:
787 state->handler = attlist2;
788 return XML_ROLE_ATTRIBUTE_NAME;
789 }
790 return common(state, tok);
791}
792
793static int PTRCALL
794attlist2(PROLOG_STATE *state,
795 int tok,
796 const char *ptr,
797 const char *end,
798 const ENCODING *enc)
799{
800 switch (tok) {
801 case XML_TOK_PROLOG_S:
802 return XML_ROLE_ATTLIST_NONE;
803 case XML_TOK_NAME:
804 {
805 static const char * const types[] = {
806 KW_CDATA,
807 KW_ID,
808 KW_IDREF,
809 KW_IDREFS,
810 KW_ENTITY,
811 KW_ENTITIES,
812 KW_NMTOKEN,
813 KW_NMTOKENS,
814 };
815 int i;
816 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
817 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
818 state->handler = attlist8;
819 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
820 }
821 }
822 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
823 state->handler = attlist5;
824 return XML_ROLE_ATTLIST_NONE;
825 }
826 break;
827 case XML_TOK_OPEN_PAREN:
828 state->handler = attlist3;
829 return XML_ROLE_ATTLIST_NONE;
830 }
831 return common(state, tok);
832}
833
834static int PTRCALL
835attlist3(PROLOG_STATE *state,
836 int tok,
837 const char *UNUSED_P(ptr),
838 const char *UNUSED_P(end),
839 const ENCODING *UNUSED_P(enc))
840{
841 switch (tok) {
842 case XML_TOK_PROLOG_S:
843 return XML_ROLE_ATTLIST_NONE;
844 case XML_TOK_NMTOKEN:
845 case XML_TOK_NAME:
846 case XML_TOK_PREFIXED_NAME:
847 state->handler = attlist4;
848 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
849 }
850 return common(state, tok);
851}
852
853static int PTRCALL
854attlist4(PROLOG_STATE *state,
855 int tok,
856 const char *UNUSED_P(ptr),
857 const char *UNUSED_P(end),
858 const ENCODING *UNUSED_P(enc))
859{
860 switch (tok) {
861 case XML_TOK_PROLOG_S:
862 return XML_ROLE_ATTLIST_NONE;
863 case XML_TOK_CLOSE_PAREN:
864 state->handler = attlist8;
865 return XML_ROLE_ATTLIST_NONE;
866 case XML_TOK_OR:
867 state->handler = attlist3;
868 return XML_ROLE_ATTLIST_NONE;
869 }
870 return common(state, tok);
871}
872
873static int PTRCALL
874attlist5(PROLOG_STATE *state,
875 int tok,
876 const char *UNUSED_P(ptr),
877 const char *UNUSED_P(end),
878 const ENCODING *UNUSED_P(enc))
879{
880 switch (tok) {
881 case XML_TOK_PROLOG_S:
882 return XML_ROLE_ATTLIST_NONE;
883 case XML_TOK_OPEN_PAREN:
884 state->handler = attlist6;
885 return XML_ROLE_ATTLIST_NONE;
886 }
887 return common(state, tok);
888}
889
890static int PTRCALL
891attlist6(PROLOG_STATE *state,
892 int tok,
893 const char *UNUSED_P(ptr),
894 const char *UNUSED_P(end),
895 const ENCODING *UNUSED_P(enc))
896{
897 switch (tok) {
898 case XML_TOK_PROLOG_S:
899 return XML_ROLE_ATTLIST_NONE;
900 case XML_TOK_NAME:
901 state->handler = attlist7;
902 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
903 }
904 return common(state, tok);
905}
906
907static int PTRCALL
908attlist7(PROLOG_STATE *state,
909 int tok,
910 const char *UNUSED_P(ptr),
911 const char *UNUSED_P(end),
912 const ENCODING *UNUSED_P(enc))
913{
914 switch (tok) {
915 case XML_TOK_PROLOG_S:
916 return XML_ROLE_ATTLIST_NONE;
917 case XML_TOK_CLOSE_PAREN:
918 state->handler = attlist8;
919 return XML_ROLE_ATTLIST_NONE;
920 case XML_TOK_OR:
921 state->handler = attlist6;
922 return XML_ROLE_ATTLIST_NONE;
923 }
924 return common(state, tok);
925}
926
927/* default value */
928static int PTRCALL
929attlist8(PROLOG_STATE *state,
930 int tok,
931 const char *ptr,
932 const char *end,
933 const ENCODING *enc)
934{
935 switch (tok) {
936 case XML_TOK_PROLOG_S:
937 return XML_ROLE_ATTLIST_NONE;
938 case XML_TOK_POUND_NAME:
939 if (XmlNameMatchesAscii(enc,
940 ptr + MIN_BYTES_PER_CHAR(enc),
941 end,
942 KW_IMPLIED)) {
943 state->handler = attlist1;
944 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
945 }
946 if (XmlNameMatchesAscii(enc,
947 ptr + MIN_BYTES_PER_CHAR(enc),
948 end,
949 KW_REQUIRED)) {
950 state->handler = attlist1;
951 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
952 }
953 if (XmlNameMatchesAscii(enc,
954 ptr + MIN_BYTES_PER_CHAR(enc),
955 end,
956 KW_FIXED)) {
957 state->handler = attlist9;
958 return XML_ROLE_ATTLIST_NONE;
959 }
960 break;
961 case XML_TOK_LITERAL:
962 state->handler = attlist1;
963 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
964 }
965 return common(state, tok);
966}
967
968static int PTRCALL
969attlist9(PROLOG_STATE *state,
970 int tok,
971 const char *UNUSED_P(ptr),
972 const char *UNUSED_P(end),
973 const ENCODING *UNUSED_P(enc))
974{
975 switch (tok) {
976 case XML_TOK_PROLOG_S:
977 return XML_ROLE_ATTLIST_NONE;
978 case XML_TOK_LITERAL:
979 state->handler = attlist1;
980 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
981 }
982 return common(state, tok);
983}
984
985static int PTRCALL
986element0(PROLOG_STATE *state,
987 int tok,
988 const char *UNUSED_P(ptr),
989 const char *UNUSED_P(end),
990 const ENCODING *UNUSED_P(enc))
991{
992 switch (tok) {
993 case XML_TOK_PROLOG_S:
994 return XML_ROLE_ELEMENT_NONE;
995 case XML_TOK_NAME:
996 case XML_TOK_PREFIXED_NAME:
997 state->handler = element1;
998 return XML_ROLE_ELEMENT_NAME;
999 }
1000 return common(state, tok);
1001}
1002
1003static int PTRCALL
1004element1(PROLOG_STATE *state,
1005 int tok,
1006 const char *ptr,
1007 const char *end,
1008 const ENCODING *enc)
1009{
1010 switch (tok) {
1011 case XML_TOK_PROLOG_S:
1012 return XML_ROLE_ELEMENT_NONE;
1013 case XML_TOK_NAME:
1014 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1015 state->handler = declClose;
1016 state->role_none = XML_ROLE_ELEMENT_NONE;
1017 return XML_ROLE_CONTENT_EMPTY;
1018 }
1019 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1020 state->handler = declClose;
1021 state->role_none = XML_ROLE_ELEMENT_NONE;
1022 return XML_ROLE_CONTENT_ANY;
1023 }
1024 break;
1025 case XML_TOK_OPEN_PAREN:
1026 state->handler = element2;
1027 state->level = 1;
1028 return XML_ROLE_GROUP_OPEN;
1029 }
1030 return common(state, tok);
1031}
1032
1033static int PTRCALL
1034element2(PROLOG_STATE *state,
1035 int tok,
1036 const char *ptr,
1037 const char *end,
1038 const ENCODING *enc)
1039{
1040 switch (tok) {
1041 case XML_TOK_PROLOG_S:
1042 return XML_ROLE_ELEMENT_NONE;
1043 case XML_TOK_POUND_NAME:
1044 if (XmlNameMatchesAscii(enc,
1045 ptr + MIN_BYTES_PER_CHAR(enc),
1046 end,
1047 KW_PCDATA)) {
1048 state->handler = element3;
1049 return XML_ROLE_CONTENT_PCDATA;
1050 }
1051 break;
1052 case XML_TOK_OPEN_PAREN:
1053 state->level = 2;
1054 state->handler = element6;
1055 return XML_ROLE_GROUP_OPEN;
1056 case XML_TOK_NAME:
1057 case XML_TOK_PREFIXED_NAME:
1058 state->handler = element7;
1059 return XML_ROLE_CONTENT_ELEMENT;
1060 case XML_TOK_NAME_QUESTION:
1061 state->handler = element7;
1062 return XML_ROLE_CONTENT_ELEMENT_OPT;
1063 case XML_TOK_NAME_ASTERISK:
1064 state->handler = element7;
1065 return XML_ROLE_CONTENT_ELEMENT_REP;
1066 case XML_TOK_NAME_PLUS:
1067 state->handler = element7;
1068 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1069 }
1070 return common(state, tok);
1071}
1072
1073static int PTRCALL
1074element3(PROLOG_STATE *state,
1075 int tok,
1076 const char *UNUSED_P(ptr),
1077 const char *UNUSED_P(end),
1078 const ENCODING *UNUSED_P(enc))
1079{
1080 switch (tok) {
1081 case XML_TOK_PROLOG_S:
1082 return XML_ROLE_ELEMENT_NONE;
1083 case XML_TOK_CLOSE_PAREN:
1084 state->handler = declClose;
1085 state->role_none = XML_ROLE_ELEMENT_NONE;
1086 return XML_ROLE_GROUP_CLOSE;
1087 case XML_TOK_CLOSE_PAREN_ASTERISK:
1088 state->handler = declClose;
1089 state->role_none = XML_ROLE_ELEMENT_NONE;
1090 return XML_ROLE_GROUP_CLOSE_REP;
1091 case XML_TOK_OR:
1092 state->handler = element4;
1093 return XML_ROLE_ELEMENT_NONE;
1094 }
1095 return common(state, tok);
1096}
1097
1098static int PTRCALL
1099element4(PROLOG_STATE *state,
1100 int tok,
1101 const char *UNUSED_P(ptr),
1102 const char *UNUSED_P(end),
1103 const ENCODING *UNUSED_P(enc))
1104{
1105 switch (tok) {
1106 case XML_TOK_PROLOG_S:
1107 return XML_ROLE_ELEMENT_NONE;
1108 case XML_TOK_NAME:
1109 case XML_TOK_PREFIXED_NAME:
1110 state->handler = element5;
1111 return XML_ROLE_CONTENT_ELEMENT;
1112 }
1113 return common(state, tok);
1114}
1115
1116static int PTRCALL
1117element5(PROLOG_STATE *state,
1118 int tok,
1119 const char *UNUSED_P(ptr),
1120 const char *UNUSED_P(end),
1121 const ENCODING *UNUSED_P(enc))
1122{
1123 switch (tok) {
1124 case XML_TOK_PROLOG_S:
1125 return XML_ROLE_ELEMENT_NONE;
1126 case XML_TOK_CLOSE_PAREN_ASTERISK:
1127 state->handler = declClose;
1128 state->role_none = XML_ROLE_ELEMENT_NONE;
1129 return XML_ROLE_GROUP_CLOSE_REP;
1130 case XML_TOK_OR:
1131 state->handler = element4;
1132 return XML_ROLE_ELEMENT_NONE;
1133 }
1134 return common(state, tok);
1135}
1136
1137static int PTRCALL
1138element6(PROLOG_STATE *state,
1139 int tok,
1140 const char *UNUSED_P(ptr),
1141 const char *UNUSED_P(end),
1142 const ENCODING *UNUSED_P(enc))
1143{
1144 switch (tok) {
1145 case XML_TOK_PROLOG_S:
1146 return XML_ROLE_ELEMENT_NONE;
1147 case XML_TOK_OPEN_PAREN:
1148 state->level += 1;
1149 return XML_ROLE_GROUP_OPEN;
1150 case XML_TOK_NAME:
1151 case XML_TOK_PREFIXED_NAME:
1152 state->handler = element7;
1153 return XML_ROLE_CONTENT_ELEMENT;
1154 case XML_TOK_NAME_QUESTION:
1155 state->handler = element7;
1156 return XML_ROLE_CONTENT_ELEMENT_OPT;
1157 case XML_TOK_NAME_ASTERISK:
1158 state->handler = element7;
1159 return XML_ROLE_CONTENT_ELEMENT_REP;
1160 case XML_TOK_NAME_PLUS:
1161 state->handler = element7;
1162 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1163 }
1164 return common(state, tok);
1165}
1166
1167static int PTRCALL
1168element7(PROLOG_STATE *state,
1169 int tok,
1170 const char *UNUSED_P(ptr),
1171 const char *UNUSED_P(end),
1172 const ENCODING *UNUSED_P(enc))
1173{
1174 switch (tok) {
1175 case XML_TOK_PROLOG_S:
1176 return XML_ROLE_ELEMENT_NONE;
1177 case XML_TOK_CLOSE_PAREN:
1178 state->level -= 1;
1179 if (state->level == 0) {
1180 state->handler = declClose;
1181 state->role_none = XML_ROLE_ELEMENT_NONE;
1182 }
1183 return XML_ROLE_GROUP_CLOSE;
1184 case XML_TOK_CLOSE_PAREN_ASTERISK:
1185 state->level -= 1;
1186 if (state->level == 0) {
1187 state->handler = declClose;
1188 state->role_none = XML_ROLE_ELEMENT_NONE;
1189 }
1190 return XML_ROLE_GROUP_CLOSE_REP;
1191 case XML_TOK_CLOSE_PAREN_QUESTION:
1192 state->level -= 1;
1193 if (state->level == 0) {
1194 state->handler = declClose;
1195 state->role_none = XML_ROLE_ELEMENT_NONE;
1196 }
1197 return XML_ROLE_GROUP_CLOSE_OPT;
1198 case XML_TOK_CLOSE_PAREN_PLUS:
1199 state->level -= 1;
1200 if (state->level == 0) {
1201 state->handler = declClose;
1202 state->role_none = XML_ROLE_ELEMENT_NONE;
1203 }
1204 return XML_ROLE_GROUP_CLOSE_PLUS;
1205 case XML_TOK_COMMA:
1206 state->handler = element6;
1207 return XML_ROLE_GROUP_SEQUENCE;
1208 case XML_TOK_OR:
1209 state->handler = element6;
1210 return XML_ROLE_GROUP_CHOICE;
1211 }
1212 return common(state, tok);
1213}
1214
1215#ifdef XML_DTD
1216
1217static int PTRCALL
1218condSect0(PROLOG_STATE *state,
1219 int tok,
1220 const char *ptr,
1221 const char *end,
1222 const ENCODING *enc)
1223{
1224 switch (tok) {
1225 case XML_TOK_PROLOG_S:
1226 return XML_ROLE_NONE;
1227 case XML_TOK_NAME:
1228 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1229 state->handler = condSect1;
1230 return XML_ROLE_NONE;
1231 }
1232 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1233 state->handler = condSect2;
1234 return XML_ROLE_NONE;
1235 }
1236 break;
1237 }
1238 return common(state, tok);
1239}
1240
1241static int PTRCALL
1242condSect1(PROLOG_STATE *state,
1243 int tok,
1244 const char *UNUSED_P(ptr),
1245 const char *UNUSED_P(end),
1246 const ENCODING *UNUSED_P(enc))
1247{
1248 switch (tok) {
1249 case XML_TOK_PROLOG_S:
1250 return XML_ROLE_NONE;
1251 case XML_TOK_OPEN_BRACKET:
1252 state->handler = externalSubset1;
1253 state->includeLevel += 1;
1254 return XML_ROLE_NONE;
1255 }
1256 return common(state, tok);
1257}
1258
1259static int PTRCALL
1260condSect2(PROLOG_STATE *state,
1261 int tok,
1262 const char *UNUSED_P(ptr),
1263 const char *UNUSED_P(end),
1264 const ENCODING *UNUSED_P(enc))
1265{
1266 switch (tok) {
1267 case XML_TOK_PROLOG_S:
1268 return XML_ROLE_NONE;
1269 case XML_TOK_OPEN_BRACKET:
1270 state->handler = externalSubset1;
1271 return XML_ROLE_IGNORE_SECT;
1272 }
1273 return common(state, tok);
1274}
1275
1276#endif /* XML_DTD */
1277
1278static int PTRCALL
1279declClose(PROLOG_STATE *state,
1280 int tok,
1281 const char *UNUSED_P(ptr),
1282 const char *UNUSED_P(end),
1283 const ENCODING *UNUSED_P(enc))
1284{
1285 switch (tok) {
1286 case XML_TOK_PROLOG_S:
1287 return state->role_none;
1288 case XML_TOK_DECL_CLOSE:
1289 setTopLevel(state);
1290 return state->role_none;
1291 }
1292 return common(state, tok);
1293}
1294
1295/* This function will only be invoked if the internal logic of the
1296 * parser has broken down. It is used in two cases:
1297 *
1298 * 1: When the XML prolog has been finished. At this point the
1299 * processor (the parser level above these role handlers) should
1300 * switch from prologProcessor to contentProcessor and reinitialise
1301 * the handler function.
1302 *
1303 * 2: When an error has been detected (via common() below). At this
1304 * point again the processor should be switched to errorProcessor,
1305 * which will never call a handler.
1306 *
1307 * The result of this is that error() can only be called if the
1308 * processor switch failed to happen, which is an internal error and
1309 * therefore we shouldn't be able to provoke it simply by using the
1310 * library. It is a necessary backstop, however, so we merely exclude
1311 * it from the coverage statistics.
1312 *
1313 * LCOV_EXCL_START
1314 */
1315static int PTRCALL
1316error(PROLOG_STATE *UNUSED_P(state),
1317 int UNUSED_P(tok),
1318 const char *UNUSED_P(ptr),
1319 const char *UNUSED_P(end),
1320 const ENCODING *UNUSED_P(enc))
1321{
1322 return XML_ROLE_NONE;
1323}
1324/* LCOV_EXCL_STOP */
1325
1326static int FASTCALL
1327common(PROLOG_STATE *state, int tok)
1328{
1329#ifdef XML_DTD
1330 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1331 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1332#endif
1333 state->handler = error;
1334 return XML_ROLE_ERROR;
1335}
1336
1337void
1338XmlPrologStateInit(PROLOG_STATE *state)
1339{
1340 state->handler = prolog0;
1341#ifdef XML_DTD
1342 state->documentEntity = 1;
1343 state->includeLevel = 0;
1344 state->inEntityValue = 0;
1345#endif /* XML_DTD */
1346}
1347
1348#ifdef XML_DTD
1349
1350void
1351XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1352{
1353 state->handler = externalSubset0;
1354 state->documentEntity = 0;
1355 state->includeLevel = 0;
1356}
1357
1358#endif /* XML_DTD */
1359