1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
38#define IN_LIBXML
39#include "libxml.h"
40
41#if defined(_WIN32) && !defined (__CYGWIN__)
42#define XML_DIR_SEP '\\'
43#else
44#define XML_DIR_SEP '/'
45#endif
46
47#include <stdlib.h>
48#include <limits.h>
49#include <string.h>
50#include <stdarg.h>
51#include <stddef.h>
52#include <libxml/xmlmemory.h>
53#include <libxml/threads.h>
54#include <libxml/globals.h>
55#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
64#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
67#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
71#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
86
87#include "buf.h"
88#include "enc.h"
89
90static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128{
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
148 ++ctxt->depth;
149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
151 --ctxt->depth;
152 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
153 ent->content[0] = 0;
154 }
155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
204 size = ent->checked / 2;
205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
223 * strange we got no data for checking
224 */
225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
229 }
230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232}
233
234/**
235 * xmlParserMaxDepth:
236 *
237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
241 */
242unsigned int xmlParserMaxDepth = 256;
243
244
245
246#define SAX2 1
247#define XML_PARSER_BIG_BUFFER_SIZE 300
248#define XML_PARSER_BUFFER_SIZE 100
249#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
251/**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260#define XML_PARSER_CHUNK_SIZE 100
261
262/*
263 * List of XML prefixed PI allowed by W3C specs
264 */
265
266static const char *xmlW3CPIs[] = {
267 "xml-stylesheet",
268 "xml-model",
269 NULL
270};
271
272
273/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
276
277static xmlParserErrors
278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
280 void *user_data, int depth, const xmlChar *URL,
281 const xmlChar *ID, xmlNodePtr *list);
282
283static int
284xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
286#ifdef LIBXML_LEGACY_ENABLED
287static void
288xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
290#endif /* LIBXML_LEGACY_ENABLED */
291
292static xmlParserErrors
293xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
295
296static int
297xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
299/************************************************************************
300 * *
301 * Some factorized error routines *
302 * *
303 ************************************************************************/
304
305/**
306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313static void
314xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316{
317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322
323 if (prefix == NULL)
324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
328 else
329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
339}
340
341/**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349static void
350xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351{
352 const char *errmsg;
353
354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
359 errmsg = "CharRef: invalid hexadecimal value";
360 break;
361 case XML_ERR_INVALID_DEC_CHARREF:
362 errmsg = "CharRef: invalid decimal value";
363 break;
364 case XML_ERR_INVALID_CHARREF:
365 errmsg = "CharRef: invalid value";
366 break;
367 case XML_ERR_INTERNAL_ERROR:
368 errmsg = "internal error";
369 break;
370 case XML_ERR_PEREF_AT_EOF:
371 errmsg = "PEReference at end of document";
372 break;
373 case XML_ERR_PEREF_IN_PROLOG:
374 errmsg = "PEReference in prolog";
375 break;
376 case XML_ERR_PEREF_IN_EPILOG:
377 errmsg = "PEReference in epilog";
378 break;
379 case XML_ERR_PEREF_NO_NAME:
380 errmsg = "PEReference: no name";
381 break;
382 case XML_ERR_PEREF_SEMICOL_MISSING:
383 errmsg = "PEReference: expecting ';'";
384 break;
385 case XML_ERR_ENTITY_LOOP:
386 errmsg = "Detected an entity reference loop";
387 break;
388 case XML_ERR_ENTITY_NOT_STARTED:
389 errmsg = "EntityValue: \" or ' expected";
390 break;
391 case XML_ERR_ENTITY_PE_INTERNAL:
392 errmsg = "PEReferences forbidden in internal subset";
393 break;
394 case XML_ERR_ENTITY_NOT_FINISHED:
395 errmsg = "EntityValue: \" or ' expected";
396 break;
397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
398 errmsg = "AttValue: \" or ' expected";
399 break;
400 case XML_ERR_LT_IN_ATTRIBUTE:
401 errmsg = "Unescaped '<' not allowed in attributes values";
402 break;
403 case XML_ERR_LITERAL_NOT_STARTED:
404 errmsg = "SystemLiteral \" or ' expected";
405 break;
406 case XML_ERR_LITERAL_NOT_FINISHED:
407 errmsg = "Unfinished System or Public ID \" or ' expected";
408 break;
409 case XML_ERR_MISPLACED_CDATA_END:
410 errmsg = "Sequence ']]>' not allowed in content";
411 break;
412 case XML_ERR_URI_REQUIRED:
413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
414 break;
415 case XML_ERR_PUBID_REQUIRED:
416 errmsg = "PUBLIC, the Public Identifier is missing";
417 break;
418 case XML_ERR_HYPHEN_IN_COMMENT:
419 errmsg = "Comment must not contain '--' (double-hyphen)";
420 break;
421 case XML_ERR_PI_NOT_STARTED:
422 errmsg = "xmlParsePI : no target name";
423 break;
424 case XML_ERR_RESERVED_XML_NAME:
425 errmsg = "Invalid PI name";
426 break;
427 case XML_ERR_NOTATION_NOT_STARTED:
428 errmsg = "NOTATION: Name expected here";
429 break;
430 case XML_ERR_NOTATION_NOT_FINISHED:
431 errmsg = "'>' required to close NOTATION declaration";
432 break;
433 case XML_ERR_VALUE_REQUIRED:
434 errmsg = "Entity value required";
435 break;
436 case XML_ERR_URI_FRAGMENT:
437 errmsg = "Fragment not allowed";
438 break;
439 case XML_ERR_ATTLIST_NOT_STARTED:
440 errmsg = "'(' required to start ATTLIST enumeration";
441 break;
442 case XML_ERR_NMTOKEN_REQUIRED:
443 errmsg = "NmToken expected in ATTLIST enumeration";
444 break;
445 case XML_ERR_ATTLIST_NOT_FINISHED:
446 errmsg = "')' required to finish ATTLIST enumeration";
447 break;
448 case XML_ERR_MIXED_NOT_STARTED:
449 errmsg = "MixedContentDecl : '|' or ')*' expected";
450 break;
451 case XML_ERR_PCDATA_REQUIRED:
452 errmsg = "MixedContentDecl : '#PCDATA' expected";
453 break;
454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
455 errmsg = "ContentDecl : Name or '(' expected";
456 break;
457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458 errmsg = "ContentDecl : ',' '|' or ')' expected";
459 break;
460 case XML_ERR_PEREF_IN_INT_SUBSET:
461 errmsg =
462 "PEReference: forbidden within markup decl in internal subset";
463 break;
464 case XML_ERR_GT_REQUIRED:
465 errmsg = "expected '>'";
466 break;
467 case XML_ERR_CONDSEC_INVALID:
468 errmsg = "XML conditional section '[' expected";
469 break;
470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471 errmsg = "Content error in the external subset";
472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
475 "conditional section INCLUDE or IGNORE keyword expected";
476 break;
477 case XML_ERR_CONDSEC_NOT_FINISHED:
478 errmsg = "XML conditional section not closed";
479 break;
480 case XML_ERR_XMLDECL_NOT_STARTED:
481 errmsg = "Text declaration '<?xml' required";
482 break;
483 case XML_ERR_XMLDECL_NOT_FINISHED:
484 errmsg = "parsing XML declaration: '?>' expected";
485 break;
486 case XML_ERR_EXT_ENTITY_STANDALONE:
487 errmsg = "external parsed entities cannot be standalone";
488 break;
489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490 errmsg = "EntityRef: expecting ';'";
491 break;
492 case XML_ERR_DOCTYPE_NOT_FINISHED:
493 errmsg = "DOCTYPE improperly terminated";
494 break;
495 case XML_ERR_LTSLASH_REQUIRED:
496 errmsg = "EndTag: '</' not found";
497 break;
498 case XML_ERR_EQUAL_REQUIRED:
499 errmsg = "expected '='";
500 break;
501 case XML_ERR_STRING_NOT_CLOSED:
502 errmsg = "String not closed expecting \" or '";
503 break;
504 case XML_ERR_STRING_NOT_STARTED:
505 errmsg = "String not started expecting ' or \"";
506 break;
507 case XML_ERR_ENCODING_NAME:
508 errmsg = "Invalid XML encoding name";
509 break;
510 case XML_ERR_STANDALONE_VALUE:
511 errmsg = "standalone accepts only 'yes' or 'no'";
512 break;
513 case XML_ERR_DOCUMENT_EMPTY:
514 errmsg = "Document is empty";
515 break;
516 case XML_ERR_DOCUMENT_END:
517 errmsg = "Extra content at the end of the document";
518 break;
519 case XML_ERR_NOT_WELL_BALANCED:
520 errmsg = "chunk is not well balanced";
521 break;
522 case XML_ERR_EXTRA_CONTENT:
523 errmsg = "extra content at the end of well balanced chunk";
524 break;
525 case XML_ERR_VERSION_MISSING:
526 errmsg = "Malformed declaration expecting version";
527 break;
528 case XML_ERR_NAME_TOO_LONG:
529 errmsg = "Name too long use XML_PARSE_HUGE option";
530 break;
531#if 0
532 case:
533 errmsg = "";
534 break;
535#endif
536 default:
537 errmsg = "Unregistered error message";
538 }
539 if (ctxt != NULL)
540 ctxt->errNo = error;
541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555}
556
557/**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void LIBXML_ATTR_FORMAT(3,0)
566xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
568{
569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
572 if (ctxt != NULL)
573 ctxt->errNo = error;
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
581}
582
583/**
584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
593static void LIBXML_ATTR_FORMAT(3,0)
594xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596{
597 xmlStructuredErrorFunc schannel = NULL;
598
599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
604 schannel = ctxt->sax->serror;
605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
620}
621
622/**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
629 * Handle a validity error.
630 */
631static void LIBXML_ATTR_FORMAT(3,0)
632xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, const xmlChar *str2)
634{
635 xmlStructuredErrorFunc schannel = NULL;
636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
647 ctxt->vctxt.error, ctxt->vctxt.userData,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 ctxt->valid = 0;
653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
659 }
660}
661
662/**
663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
671static void LIBXML_ATTR_FORMAT(3,0)
672xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673 const char *msg, int val)
674{
675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
678 if (ctxt != NULL)
679 ctxt->errNo = error;
680 __xmlRaiseError(NULL, NULL, NULL,
681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
688}
689
690/**
691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
701static void LIBXML_ATTR_FORMAT(3,0)
702xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703 const char *msg, const xmlChar *str1, int val,
704 const xmlChar *str2)
705{
706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
709 if (ctxt != NULL)
710 ctxt->errNo = error;
711 __xmlRaiseError(NULL, NULL, NULL,
712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
720}
721
722/**
723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
731static void LIBXML_ATTR_FORMAT(3,0)
732xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733 const char *msg, const xmlChar * val)
734{
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
738 if (ctxt != NULL)
739 ctxt->errNo = error;
740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
749}
750
751/**
752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
760static void LIBXML_ATTR_FORMAT(3,0)
761xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763{
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
767 if (ctxt != NULL)
768 ctxt->errNo = error;
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773}
774
775/**
776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
785static void LIBXML_ATTR_FORMAT(3,0)
786xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
790{
791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
794 if (ctxt != NULL)
795 ctxt->errNo = error;
796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
802}
803
804/**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
812 * Handle a namespace warning error
813 */
814static void LIBXML_ATTR_FORMAT(3,0)
815xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819{
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827}
828
829/************************************************************************
830 * *
831 * Library wide options *
832 * *
833 ************************************************************************/
834
835/**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845int
846xmlHasFeature(xmlFeature feature)
847{
848 switch (feature) {
849 case XML_WITH_THREAD:
850#ifdef LIBXML_THREAD_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
855 case XML_WITH_TREE:
856#ifdef LIBXML_TREE_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
861 case XML_WITH_OUTPUT:
862#ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
867 case XML_WITH_PUSH:
868#ifdef LIBXML_PUSH_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
873 case XML_WITH_READER:
874#ifdef LIBXML_READER_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
879 case XML_WITH_PATTERN:
880#ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
885 case XML_WITH_WRITER:
886#ifdef LIBXML_WRITER_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
891 case XML_WITH_SAX1:
892#ifdef LIBXML_SAX1_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
897 case XML_WITH_FTP:
898#ifdef LIBXML_FTP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
903 case XML_WITH_HTTP:
904#ifdef LIBXML_HTTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
909 case XML_WITH_VALID:
910#ifdef LIBXML_VALID_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
915 case XML_WITH_HTML:
916#ifdef LIBXML_HTML_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
921 case XML_WITH_LEGACY:
922#ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
927 case XML_WITH_C14N:
928#ifdef LIBXML_C14N_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
933 case XML_WITH_CATALOG:
934#ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
939 case XML_WITH_XPATH:
940#ifdef LIBXML_XPATH_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
945 case XML_WITH_XPTR:
946#ifdef LIBXML_XPTR_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
951 case XML_WITH_XINCLUDE:
952#ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
957 case XML_WITH_ICONV:
958#ifdef LIBXML_ICONV_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
963 case XML_WITH_ISO8859X:
964#ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
969 case XML_WITH_UNICODE:
970#ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
975 case XML_WITH_REGEXP:
976#ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
981 case XML_WITH_AUTOMATA:
982#ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
987 case XML_WITH_EXPR:
988#ifdef LIBXML_EXPR_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
993 case XML_WITH_SCHEMAS:
994#ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
999 case XML_WITH_SCHEMATRON:
1000#ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
1005 case XML_WITH_MODULES:
1006#ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
1011 case XML_WITH_DEBUG:
1012#ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
1017 case XML_WITH_DEBUG_MEM:
1018#ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020#else
1021 return(0);
1022#endif
1023 case XML_WITH_DEBUG_RUN:
1024#ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026#else
1027 return(0);
1028#endif
1029 case XML_WITH_ZLIB:
1030#ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032#else
1033 return(0);
1034#endif
1035 case XML_WITH_LZMA:
1036#ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
1041 case XML_WITH_ICU:
1042#ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
1047 default:
1048 break;
1049 }
1050 return(0);
1051}
1052
1053/************************************************************************
1054 * *
1055 * SAX2 defaulted attributes handling *
1056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065static void
1066xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
1068#ifdef LIBXML_SAX1_ENABLED
1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072#else
1073 ctxt->sax2 = 1;
1074#endif /* LIBXML_SAX1_ENABLED */
1075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
1081 xmlErrMemory(ctxt, NULL);
1082 }
1083}
1084
1085typedef struct _xmlDefAttrs xmlDefAttrs;
1086typedef xmlDefAttrs *xmlDefAttrsPtr;
1087struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
1090#if __STDC_VERSION__ >= 199901L
1091 /* Using a C99 flexible array member avoids UBSan errors. */
1092 const xmlChar *values[]; /* array of localname/prefix/values/external */
1093#else
1094 const xmlChar *values[5];
1095#endif
1096};
1097
1098/**
1099 * xmlAttrNormalizeSpace:
1100 * @src: the source string
1101 * @dst: the target string
1102 *
1103 * Normalize the space in non CDATA attribute values:
1104 * If the attribute type is not CDATA, then the XML processor MUST further
1105 * process the normalized attribute value by discarding any leading and
1106 * trailing space (#x20) characters, and by replacing sequences of space
1107 * (#x20) characters by a single space (#x20) character.
1108 * Note that the size of dst need to be at least src, and if one doesn't need
1109 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110 * passing src as dst is just fine.
1111 *
1112 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113 * is needed.
1114 */
1115static xmlChar *
1116xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117{
1118 if ((src == NULL) || (dst == NULL))
1119 return(NULL);
1120
1121 while (*src == 0x20) src++;
1122 while (*src != 0) {
1123 if (*src == 0x20) {
1124 while (*src == 0x20) src++;
1125 if (*src != 0)
1126 *dst++ = 0x20;
1127 } else {
1128 *dst++ = *src++;
1129 }
1130 }
1131 *dst = 0;
1132 if (dst == src)
1133 return(NULL);
1134 return(dst);
1135}
1136
1137/**
1138 * xmlAttrNormalizeSpace2:
1139 * @src: the source string
1140 *
1141 * Normalize the space in non CDATA attribute values, a slightly more complex
1142 * front end to avoid allocation problems when running on attribute values
1143 * coming from the input.
1144 *
1145 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146 * is needed.
1147 */
1148static const xmlChar *
1149xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1150{
1151 int i;
1152 int remove_head = 0;
1153 int need_realloc = 0;
1154 const xmlChar *cur;
1155
1156 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157 return(NULL);
1158 i = *len;
1159 if (i <= 0)
1160 return(NULL);
1161
1162 cur = src;
1163 while (*cur == 0x20) {
1164 cur++;
1165 remove_head++;
1166 }
1167 while (*cur != 0) {
1168 if (*cur == 0x20) {
1169 cur++;
1170 if ((*cur == 0x20) || (*cur == 0)) {
1171 need_realloc = 1;
1172 break;
1173 }
1174 } else
1175 cur++;
1176 }
1177 if (need_realloc) {
1178 xmlChar *ret;
1179
1180 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 if (ret == NULL) {
1182 xmlErrMemory(ctxt, NULL);
1183 return(NULL);
1184 }
1185 xmlAttrNormalizeSpace(ret, ret);
1186 *len = (int) strlen((const char *)ret);
1187 return(ret);
1188 } else if (remove_head) {
1189 *len -= remove_head;
1190 memmove(src, src + remove_head, 1 + *len);
1191 return(src);
1192 }
1193 return(NULL);
1194}
1195
1196/**
1197 * xmlAddDefAttrs:
1198 * @ctxt: an XML parser context
1199 * @fullname: the element fullname
1200 * @fullattr: the attribute fullname
1201 * @value: the attribute value
1202 *
1203 * Add a defaulted attribute for an element
1204 */
1205static void
1206xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207 const xmlChar *fullname,
1208 const xmlChar *fullattr,
1209 const xmlChar *value) {
1210 xmlDefAttrsPtr defaults;
1211 int len;
1212 const xmlChar *name;
1213 const xmlChar *prefix;
1214
1215 /*
1216 * Allows to detect attribute redefinitions
1217 */
1218 if (ctxt->attsSpecial != NULL) {
1219 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 return;
1221 }
1222
1223 if (ctxt->attsDefault == NULL) {
1224 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1225 if (ctxt->attsDefault == NULL)
1226 goto mem_error;
1227 }
1228
1229 /*
1230 * split the element name into prefix:localname , the string found
1231 * are within the DTD and then not associated to namespace names.
1232 */
1233 name = xmlSplitQName3(fullname, &len);
1234 if (name == NULL) {
1235 name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 prefix = NULL;
1237 } else {
1238 name = xmlDictLookup(ctxt->dict, name, -1);
1239 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240 }
1241
1242 /*
1243 * make sure there is some storage
1244 */
1245 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246 if (defaults == NULL) {
1247 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1248 (4 * 5) * sizeof(const xmlChar *));
1249 if (defaults == NULL)
1250 goto mem_error;
1251 defaults->nbAttrs = 0;
1252 defaults->maxAttrs = 4;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
1258 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1259 xmlDefAttrsPtr temp;
1260
1261 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1262 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1263 if (temp == NULL)
1264 goto mem_error;
1265 defaults = temp;
1266 defaults->maxAttrs *= 2;
1267 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 defaults, NULL) < 0) {
1269 xmlFree(defaults);
1270 goto mem_error;
1271 }
1272 }
1273
1274 /*
1275 * Split the element name into prefix:localname , the string found
1276 * are within the DTD and hen not associated to namespace names.
1277 */
1278 name = xmlSplitQName3(fullattr, &len);
1279 if (name == NULL) {
1280 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 prefix = NULL;
1282 } else {
1283 name = xmlDictLookup(ctxt->dict, name, -1);
1284 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285 }
1286
1287 defaults->values[5 * defaults->nbAttrs] = name;
1288 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1289 /* intern the string and precompute the end */
1290 len = xmlStrlen(value);
1291 value = xmlDictLookup(ctxt->dict, value, len);
1292 defaults->values[5 * defaults->nbAttrs + 2] = value;
1293 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294 if (ctxt->external)
1295 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296 else
1297 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1298 defaults->nbAttrs++;
1299
1300 return;
1301
1302mem_error:
1303 xmlErrMemory(ctxt, NULL);
1304 return;
1305}
1306
1307/**
1308 * xmlAddSpecialAttr:
1309 * @ctxt: an XML parser context
1310 * @fullname: the element fullname
1311 * @fullattr: the attribute fullname
1312 * @type: the attribute type
1313 *
1314 * Register this attribute type
1315 */
1316static void
1317xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 const xmlChar *fullname,
1319 const xmlChar *fullattr,
1320 int type)
1321{
1322 if (ctxt->attsSpecial == NULL) {
1323 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1324 if (ctxt->attsSpecial == NULL)
1325 goto mem_error;
1326 }
1327
1328 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329 return;
1330
1331 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1332 (void *) (ptrdiff_t) type);
1333 return;
1334
1335mem_error:
1336 xmlErrMemory(ctxt, NULL);
1337 return;
1338}
1339
1340/**
1341 * xmlCleanSpecialAttrCallback:
1342 *
1343 * Removes CDATA attributes from the special attribute table
1344 */
1345static void
1346xmlCleanSpecialAttrCallback(void *payload, void *data,
1347 const xmlChar *fullname, const xmlChar *fullattr,
1348 const xmlChar *unused ATTRIBUTE_UNUSED) {
1349 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350
1351 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1352 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 }
1354}
1355
1356/**
1357 * xmlCleanSpecialAttr:
1358 * @ctxt: an XML parser context
1359 *
1360 * Trim the list of attributes defined to remove all those of type
1361 * CDATA as they are not special. This call should be done when finishing
1362 * to parse the DTD and before starting to parse the document root.
1363 */
1364static void
1365xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366{
1367 if (ctxt->attsSpecial == NULL)
1368 return;
1369
1370 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371
1372 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373 xmlHashFree(ctxt->attsSpecial, NULL);
1374 ctxt->attsSpecial = NULL;
1375 }
1376 return;
1377}
1378
1379/**
1380 * xmlCheckLanguageID:
1381 * @lang: pointer to the string value
1382 *
1383 * Checks that the value conforms to the LanguageID production:
1384 *
1385 * NOTE: this is somewhat deprecated, those productions were removed from
1386 * the XML Second edition.
1387 *
1388 * [33] LanguageID ::= Langcode ('-' Subcode)*
1389 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1390 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393 * [38] Subcode ::= ([a-z] | [A-Z])+
1394 *
1395 * The current REC reference the sucessors of RFC 1766, currently 5646
1396 *
1397 * http://www.rfc-editor.org/rfc/rfc5646.txt
1398 * langtag = language
1399 * ["-" script]
1400 * ["-" region]
1401 * *("-" variant)
1402 * *("-" extension)
1403 * ["-" privateuse]
1404 * language = 2*3ALPHA ; shortest ISO 639 code
1405 * ["-" extlang] ; sometimes followed by
1406 * ; extended language subtags
1407 * / 4ALPHA ; or reserved for future use
1408 * / 5*8ALPHA ; or registered language subtag
1409 *
1410 * extlang = 3ALPHA ; selected ISO 639 codes
1411 * *2("-" 3ALPHA) ; permanently reserved
1412 *
1413 * script = 4ALPHA ; ISO 15924 code
1414 *
1415 * region = 2ALPHA ; ISO 3166-1 code
1416 * / 3DIGIT ; UN M.49 code
1417 *
1418 * variant = 5*8alphanum ; registered variants
1419 * / (DIGIT 3alphanum)
1420 *
1421 * extension = singleton 1*("-" (2*8alphanum))
1422 *
1423 * ; Single alphanumerics
1424 * ; "x" reserved for private use
1425 * singleton = DIGIT ; 0 - 9
1426 * / %x41-57 ; A - W
1427 * / %x59-5A ; Y - Z
1428 * / %x61-77 ; a - w
1429 * / %x79-7A ; y - z
1430 *
1431 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432 * The parser below doesn't try to cope with extension or privateuse
1433 * that could be added but that's not interoperable anyway
1434 *
1435 * Returns 1 if correct 0 otherwise
1436 **/
1437int
1438xmlCheckLanguageID(const xmlChar * lang)
1439{
1440 const xmlChar *cur = lang, *nxt;
1441
1442 if (cur == NULL)
1443 return (0);
1444 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1445 ((cur[0] == 'I') && (cur[1] == '-')) ||
1446 ((cur[0] == 'x') && (cur[1] == '-')) ||
1447 ((cur[0] == 'X') && (cur[1] == '-'))) {
1448 /*
1449 * Still allow IANA code and user code which were coming
1450 * from the previous version of the XML-1.0 specification
1451 * it's deprecated but we should not fail
1452 */
1453 cur += 2;
1454 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1455 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456 cur++;
1457 return(cur[0] == 0);
1458 }
1459 nxt = cur;
1460 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462 nxt++;
1463 if (nxt - cur >= 4) {
1464 /*
1465 * Reserved
1466 */
1467 if ((nxt - cur > 8) || (nxt[0] != 0))
1468 return(0);
1469 return(1);
1470 }
1471 if (nxt - cur < 2)
1472 return(0);
1473 /* we got an ISO 639 code */
1474 if (nxt[0] == 0)
1475 return(1);
1476 if (nxt[0] != '-')
1477 return(0);
1478
1479 nxt++;
1480 cur = nxt;
1481 /* now we can have extlang or script or region or variant */
1482 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483 goto region_m49;
1484
1485 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487 nxt++;
1488 if (nxt - cur == 4)
1489 goto script;
1490 if (nxt - cur == 2)
1491 goto region;
1492 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493 goto variant;
1494 if (nxt - cur != 3)
1495 return(0);
1496 /* we parsed an extlang */
1497 if (nxt[0] == 0)
1498 return(1);
1499 if (nxt[0] != '-')
1500 return(0);
1501
1502 nxt++;
1503 cur = nxt;
1504 /* now we can have script or region or variant */
1505 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506 goto region_m49;
1507
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511 if (nxt - cur == 2)
1512 goto region;
1513 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514 goto variant;
1515 if (nxt - cur != 4)
1516 return(0);
1517 /* we parsed a script */
1518script:
1519 if (nxt[0] == 0)
1520 return(1);
1521 if (nxt[0] != '-')
1522 return(0);
1523
1524 nxt++;
1525 cur = nxt;
1526 /* now we can have region or variant */
1527 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528 goto region_m49;
1529
1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532 nxt++;
1533
1534 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535 goto variant;
1536 if (nxt - cur != 2)
1537 return(0);
1538 /* we parsed a region */
1539region:
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can just have a variant */
1548 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550 nxt++;
1551
1552 if ((nxt - cur < 5) || (nxt - cur > 8))
1553 return(0);
1554
1555 /* we parsed a variant */
1556variant:
1557 if (nxt[0] == 0)
1558 return(1);
1559 if (nxt[0] != '-')
1560 return(0);
1561 /* extensions and private use subtags not checked */
1562 return (1);
1563
1564region_m49:
1565 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567 nxt += 3;
1568 goto region;
1569 }
1570 return(0);
1571}
1572
1573/************************************************************************
1574 * *
1575 * Parser stacks related functions and macros *
1576 * *
1577 ************************************************************************/
1578
1579static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580 const xmlChar ** str);
1581
1582#ifdef SAX2
1583/**
1584 * nsPush:
1585 * @ctxt: an XML parser context
1586 * @prefix: the namespace prefix or NULL
1587 * @URL: the namespace name
1588 *
1589 * Pushes a new parser namespace on top of the ns stack
1590 *
1591 * Returns -1 in case of error, -2 if the namespace should be discarded
1592 * and the index in the stack otherwise.
1593 */
1594static int
1595nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596{
1597 if (ctxt->options & XML_PARSE_NSCLEAN) {
1598 int i;
1599 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1600 if (ctxt->nsTab[i] == prefix) {
1601 /* in scope */
1602 if (ctxt->nsTab[i + 1] == URL)
1603 return(-2);
1604 /* out of scope keep it */
1605 break;
1606 }
1607 }
1608 }
1609 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 ctxt->nsMax = 10;
1611 ctxt->nsNr = 0;
1612 ctxt->nsTab = (const xmlChar **)
1613 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 if (ctxt->nsTab == NULL) {
1615 xmlErrMemory(ctxt, NULL);
1616 ctxt->nsMax = 0;
1617 return (-1);
1618 }
1619 } else if (ctxt->nsNr >= ctxt->nsMax) {
1620 const xmlChar ** tmp;
1621 ctxt->nsMax *= 2;
1622 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624 if (tmp == NULL) {
1625 xmlErrMemory(ctxt, NULL);
1626 ctxt->nsMax /= 2;
1627 return (-1);
1628 }
1629 ctxt->nsTab = tmp;
1630 }
1631 ctxt->nsTab[ctxt->nsNr++] = prefix;
1632 ctxt->nsTab[ctxt->nsNr++] = URL;
1633 return (ctxt->nsNr);
1634}
1635/**
1636 * nsPop:
1637 * @ctxt: an XML parser context
1638 * @nr: the number to pop
1639 *
1640 * Pops the top @nr parser prefix/namespace from the ns stack
1641 *
1642 * Returns the number of namespaces removed
1643 */
1644static int
1645nsPop(xmlParserCtxtPtr ctxt, int nr)
1646{
1647 int i;
1648
1649 if (ctxt->nsTab == NULL) return(0);
1650 if (ctxt->nsNr < nr) {
1651 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652 nr = ctxt->nsNr;
1653 }
1654 if (ctxt->nsNr <= 0)
1655 return (0);
1656
1657 for (i = 0;i < nr;i++) {
1658 ctxt->nsNr--;
1659 ctxt->nsTab[ctxt->nsNr] = NULL;
1660 }
1661 return(nr);
1662}
1663#endif
1664
1665static int
1666xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667 const xmlChar **atts;
1668 int *attallocs;
1669 int maxatts;
1670
1671 if (ctxt->atts == NULL) {
1672 maxatts = 55; /* allow for 10 attrs by default */
1673 atts = (const xmlChar **)
1674 xmlMalloc(maxatts * sizeof(xmlChar *));
1675 if (atts == NULL) goto mem_error;
1676 ctxt->atts = atts;
1677 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 if (attallocs == NULL) goto mem_error;
1679 ctxt->attallocs = attallocs;
1680 ctxt->maxatts = maxatts;
1681 } else if (nr + 5 > ctxt->maxatts) {
1682 maxatts = (nr + 5) * 2;
1683 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 maxatts * sizeof(const xmlChar *));
1685 if (atts == NULL) goto mem_error;
1686 ctxt->atts = atts;
1687 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 (maxatts / 5) * sizeof(int));
1689 if (attallocs == NULL) goto mem_error;
1690 ctxt->attallocs = attallocs;
1691 ctxt->maxatts = maxatts;
1692 }
1693 return(ctxt->maxatts);
1694mem_error:
1695 xmlErrMemory(ctxt, NULL);
1696 return(-1);
1697}
1698
1699/**
1700 * inputPush:
1701 * @ctxt: an XML parser context
1702 * @value: the parser input
1703 *
1704 * Pushes a new parser input on top of the input stack
1705 *
1706 * Returns -1 in case of error, the index in the stack otherwise
1707 */
1708int
1709inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710{
1711 if ((ctxt == NULL) || (value == NULL))
1712 return(-1);
1713 if (ctxt->inputNr >= ctxt->inputMax) {
1714 ctxt->inputMax *= 2;
1715 ctxt->inputTab =
1716 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717 ctxt->inputMax *
1718 sizeof(ctxt->inputTab[0]));
1719 if (ctxt->inputTab == NULL) {
1720 xmlErrMemory(ctxt, NULL);
1721 xmlFreeInputStream(value);
1722 ctxt->inputMax /= 2;
1723 value = NULL;
1724 return (-1);
1725 }
1726 }
1727 ctxt->inputTab[ctxt->inputNr] = value;
1728 ctxt->input = value;
1729 return (ctxt->inputNr++);
1730}
1731/**
1732 * inputPop:
1733 * @ctxt: an XML parser context
1734 *
1735 * Pops the top parser input from the input stack
1736 *
1737 * Returns the input just removed
1738 */
1739xmlParserInputPtr
1740inputPop(xmlParserCtxtPtr ctxt)
1741{
1742 xmlParserInputPtr ret;
1743
1744 if (ctxt == NULL)
1745 return(NULL);
1746 if (ctxt->inputNr <= 0)
1747 return (NULL);
1748 ctxt->inputNr--;
1749 if (ctxt->inputNr > 0)
1750 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751 else
1752 ctxt->input = NULL;
1753 ret = ctxt->inputTab[ctxt->inputNr];
1754 ctxt->inputTab[ctxt->inputNr] = NULL;
1755 return (ret);
1756}
1757/**
1758 * nodePush:
1759 * @ctxt: an XML parser context
1760 * @value: the element node
1761 *
1762 * Pushes a new element node on top of the node stack
1763 *
1764 * Returns -1 in case of error, the index in the stack otherwise
1765 */
1766int
1767nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768{
1769 if (ctxt == NULL) return(0);
1770 if (ctxt->nodeNr >= ctxt->nodeMax) {
1771 xmlNodePtr *tmp;
1772
1773 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774 ctxt->nodeMax * 2 *
1775 sizeof(ctxt->nodeTab[0]));
1776 if (tmp == NULL) {
1777 xmlErrMemory(ctxt, NULL);
1778 return (-1);
1779 }
1780 ctxt->nodeTab = tmp;
1781 ctxt->nodeMax *= 2;
1782 }
1783 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1785 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1786 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1787 xmlParserMaxDepth);
1788 xmlHaltParser(ctxt);
1789 return(-1);
1790 }
1791 ctxt->nodeTab[ctxt->nodeNr] = value;
1792 ctxt->node = value;
1793 return (ctxt->nodeNr++);
1794}
1795
1796/**
1797 * nodePop:
1798 * @ctxt: an XML parser context
1799 *
1800 * Pops the top element node from the node stack
1801 *
1802 * Returns the node just removed
1803 */
1804xmlNodePtr
1805nodePop(xmlParserCtxtPtr ctxt)
1806{
1807 xmlNodePtr ret;
1808
1809 if (ctxt == NULL) return(NULL);
1810 if (ctxt->nodeNr <= 0)
1811 return (NULL);
1812 ctxt->nodeNr--;
1813 if (ctxt->nodeNr > 0)
1814 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815 else
1816 ctxt->node = NULL;
1817 ret = ctxt->nodeTab[ctxt->nodeNr];
1818 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1819 return (ret);
1820}
1821
1822#ifdef LIBXML_PUSH_ENABLED
1823/**
1824 * nameNsPush:
1825 * @ctxt: an XML parser context
1826 * @value: the element name
1827 * @prefix: the element prefix
1828 * @URI: the element namespace name
1829 *
1830 * Pushes a new element name/prefix/URL on top of the name stack
1831 *
1832 * Returns -1 in case of error, the index in the stack otherwise
1833 */
1834static int
1835nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837{
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 void **tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1848 }
1849 ctxt->nameTab = tmp;
1850 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax * 3 *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1856 }
1857 ctxt->pushTab = tmp2;
1858 }
1859 ctxt->nameTab[ctxt->nameNr] = value;
1860 ctxt->name = value;
1861 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1863 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1864 return (ctxt->nameNr++);
1865mem_error:
1866 xmlErrMemory(ctxt, NULL);
1867 return (-1);
1868}
1869/**
1870 * nameNsPop:
1871 * @ctxt: an XML parser context
1872 *
1873 * Pops the top element/prefix/URI name from the name stack
1874 *
1875 * Returns the name just removed
1876 */
1877static const xmlChar *
1878nameNsPop(xmlParserCtxtPtr ctxt)
1879{
1880 const xmlChar *ret;
1881
1882 if (ctxt->nameNr <= 0)
1883 return (NULL);
1884 ctxt->nameNr--;
1885 if (ctxt->nameNr > 0)
1886 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887 else
1888 ctxt->name = NULL;
1889 ret = ctxt->nameTab[ctxt->nameNr];
1890 ctxt->nameTab[ctxt->nameNr] = NULL;
1891 return (ret);
1892}
1893#endif /* LIBXML_PUSH_ENABLED */
1894
1895/**
1896 * namePush:
1897 * @ctxt: an XML parser context
1898 * @value: the element name
1899 *
1900 * Pushes a new element name on top of the name stack
1901 *
1902 * Returns -1 in case of error, the index in the stack otherwise
1903 */
1904int
1905namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1906{
1907 if (ctxt == NULL) return (-1);
1908
1909 if (ctxt->nameNr >= ctxt->nameMax) {
1910 const xmlChar * *tmp;
1911 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1912 ctxt->nameMax * 2 *
1913 sizeof(ctxt->nameTab[0]));
1914 if (tmp == NULL) {
1915 goto mem_error;
1916 }
1917 ctxt->nameTab = tmp;
1918 ctxt->nameMax *= 2;
1919 }
1920 ctxt->nameTab[ctxt->nameNr] = value;
1921 ctxt->name = value;
1922 return (ctxt->nameNr++);
1923mem_error:
1924 xmlErrMemory(ctxt, NULL);
1925 return (-1);
1926}
1927/**
1928 * namePop:
1929 * @ctxt: an XML parser context
1930 *
1931 * Pops the top element name from the name stack
1932 *
1933 * Returns the name just removed
1934 */
1935const xmlChar *
1936namePop(xmlParserCtxtPtr ctxt)
1937{
1938 const xmlChar *ret;
1939
1940 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941 return (NULL);
1942 ctxt->nameNr--;
1943 if (ctxt->nameNr > 0)
1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945 else
1946 ctxt->name = NULL;
1947 ret = ctxt->nameTab[ctxt->nameNr];
1948 ctxt->nameTab[ctxt->nameNr] = NULL;
1949 return (ret);
1950}
1951
1952static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1953 if (ctxt->spaceNr >= ctxt->spaceMax) {
1954 int *tmp;
1955
1956 ctxt->spaceMax *= 2;
1957 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959 if (tmp == NULL) {
1960 xmlErrMemory(ctxt, NULL);
1961 ctxt->spaceMax /=2;
1962 return(-1);
1963 }
1964 ctxt->spaceTab = tmp;
1965 }
1966 ctxt->spaceTab[ctxt->spaceNr] = val;
1967 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968 return(ctxt->spaceNr++);
1969}
1970
1971static int spacePop(xmlParserCtxtPtr ctxt) {
1972 int ret;
1973 if (ctxt->spaceNr <= 0) return(0);
1974 ctxt->spaceNr--;
1975 if (ctxt->spaceNr > 0)
1976 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977 else
1978 ctxt->space = &ctxt->spaceTab[0];
1979 ret = ctxt->spaceTab[ctxt->spaceNr];
1980 ctxt->spaceTab[ctxt->spaceNr] = -1;
1981 return(ret);
1982}
1983
1984/*
1985 * Macros for accessing the content. Those should be used only by the parser,
1986 * and not exported.
1987 *
1988 * Dirty macros, i.e. one often need to make assumption on the context to
1989 * use them
1990 *
1991 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1992 * To be used with extreme caution since operations consuming
1993 * characters may move the input buffer to a different location !
1994 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1995 * This should be used internally by the parser
1996 * only to compare to ASCII values otherwise it would break when
1997 * running with UTF-8 encoding.
1998 * RAW same as CUR but in the input buffer, bypass any token
1999 * extraction that may have been done
2000 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2001 * to compare on ASCII based substring.
2002 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2003 * strings without newlines within the parser.
2004 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2005 * defined char within the parser.
2006 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007 *
2008 * NEXT Skip to the next character, this does the proper decoding
2009 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2010 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2011 * CUR_CHAR(l) returns the current unicode character (int), set l
2012 * to the number of xmlChars used for the encoding [0-5].
2013 * CUR_SCHAR same but operate on a string instead of the context
2014 * COPY_BUF copy the current unicode char to the target buffer, increment
2015 * the index
2016 * GROW, SHRINK handling of input buffers
2017 */
2018
2019#define RAW (*ctxt->input->cur)
2020#define CUR (*ctxt->input->cur)
2021#define NXT(val) ctxt->input->cur[(val)]
2022#define CUR_PTR ctxt->input->cur
2023#define BASE_PTR ctxt->input->base
2024
2025#define CMP4( s, c1, c2, c3, c4 ) \
2026 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028#define CMP5( s, c1, c2, c3, c4, c5 ) \
2029 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038 ((unsigned char *) s)[ 8 ] == c9 )
2039#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041 ((unsigned char *) s)[ 9 ] == c10 )
2042
2043#define SKIP(val) do { \
2044 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2045 if (*ctxt->input->cur == 0) \
2046 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2047 } while (0)
2048
2049#define SKIPL(val) do { \
2050 int skipl; \
2051 for(skipl=0; skipl<val; skipl++) { \
2052 if (*(ctxt->input->cur) == '\n') { \
2053 ctxt->input->line++; ctxt->input->col = 1; \
2054 } else ctxt->input->col++; \
2055 ctxt->nbChars++; \
2056 ctxt->input->cur++; \
2057 } \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2060 } while (0)
2061
2062#define SHRINK if ((ctxt->progressive == 0) && \
2063 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2065 xmlSHRINK (ctxt);
2066
2067static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068 xmlParserInputShrink(ctxt->input);
2069 if (*ctxt->input->cur == 0)
2070 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071}
2072
2073#define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2075 xmlGROW (ctxt);
2076
2077static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083 ((ctxt->input->buf) &&
2084 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2087 xmlHaltParser(ctxt);
2088 return;
2089 }
2090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2091 if ((ctxt->input->cur > ctxt->input->end) ||
2092 (ctxt->input->cur < ctxt->input->base)) {
2093 xmlHaltParser(ctxt);
2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 return;
2096 }
2097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2099}
2100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
2105#define NEXT1 { \
2106 ctxt->input->col++; \
2107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
2109 if (*ctxt->input->cur == 0) \
2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
2113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
2117 ctxt->input->cur += l; \
2118 } while (0)
2119
2120#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122
2123#define COPY_BUF(l,b,i,v) \
2124 if (l == 1) b[i++] = (xmlChar) v; \
2125 else i += xmlCopyCharMultiByte(&b[i],v)
2126
2127/**
2128 * xmlSkipBlankChars:
2129 * @ctxt: the XML parser context
2130 *
2131 * skip all blanks character found at that point in the input streams.
2132 * It pops up finished entities in the process if allowable at that point.
2133 *
2134 * Returns the number of space chars skipped
2135 */
2136
2137int
2138xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2139 int res = 0;
2140
2141 /*
2142 * It's Okay to use CUR/NEXT here since all the blanks are on
2143 * the ASCII range.
2144 */
2145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 const xmlChar *cur;
2147 /*
2148 * if we are in the document content, go really fast
2149 */
2150 cur = ctxt->input->cur;
2151 while (IS_BLANK_CH(*cur)) {
2152 if (*cur == '\n') {
2153 ctxt->input->line++; ctxt->input->col = 1;
2154 } else {
2155 ctxt->input->col++;
2156 }
2157 cur++;
2158 res++;
2159 if (*cur == 0) {
2160 ctxt->input->cur = cur;
2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 cur = ctxt->input->cur;
2163 }
2164 }
2165 ctxt->input->cur = cur;
2166 } else {
2167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168
2169 while (1) {
2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2171 NEXT;
2172 } else if (CUR == '%') {
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177 break;
2178 xmlParsePEReference(ctxt);
2179 } else if (CUR == 0) {
2180 if (ctxt->inputNr <= 1)
2181 break;
2182 xmlPopInput(ctxt);
2183 } else {
2184 break;
2185 }
2186
2187 /*
2188 * Also increase the counter when entering or exiting a PERef.
2189 * The spec says: "When a parameter-entity reference is recognized
2190 * in the DTD and included, its replacement text MUST be enlarged
2191 * by the attachment of one leading and one following space (#x20)
2192 * character."
2193 */
2194 res++;
2195 }
2196 }
2197 return(res);
2198}
2199
2200/************************************************************************
2201 * *
2202 * Commodity functions to handle entities *
2203 * *
2204 ************************************************************************/
2205
2206/**
2207 * xmlPopInput:
2208 * @ctxt: an XML parser context
2209 *
2210 * xmlPopInput: the current input pointed by ctxt->input came to an end
2211 * pop it and return the next char.
2212 *
2213 * Returns the current xmlChar in the parser context
2214 */
2215xmlChar
2216xmlPopInput(xmlParserCtxtPtr ctxt) {
2217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2218 if (xmlParserDebugEntities)
2219 xmlGenericError(xmlGenericErrorContext,
2220 "Popping input %d\n", ctxt->inputNr);
2221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222 (ctxt->instate != XML_PARSER_EOF))
2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "Unfinished entity outside the DTD");
2225 xmlFreeInputStream(inputPop(ctxt));
2226 if (*ctxt->input->cur == 0)
2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2228 return(CUR);
2229}
2230
2231/**
2232 * xmlPushInput:
2233 * @ctxt: an XML parser context
2234 * @input: an XML parser input fragment (entity, XML fragment ...).
2235 *
2236 * xmlPushInput: switch to a new input stream which is stacked on top
2237 * of the previous one(s).
2238 * Returns -1 in case of error or the index in the input stack
2239 */
2240int
2241xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2242 int ret;
2243 if (input == NULL) return(-1);
2244
2245 if (xmlParserDebugEntities) {
2246 if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 xmlGenericError(xmlGenericErrorContext,
2248 "%s(%d): ", ctxt->input->filename,
2249 ctxt->input->line);
2250 xmlGenericError(xmlGenericErrorContext,
2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252 }
2253 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254 (ctxt->inputNr > 1024)) {
2255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256 while (ctxt->inputNr > 1)
2257 xmlFreeInputStream(inputPop(ctxt));
2258 return(-1);
2259 }
2260 ret = inputPush(ctxt, input);
2261 if (ctxt->instate == XML_PARSER_EOF)
2262 return(-1);
2263 GROW;
2264 return(ret);
2265}
2266
2267/**
2268 * xmlParseCharRef:
2269 * @ctxt: an XML parser context
2270 *
2271 * parse Reference declarations
2272 *
2273 * [66] CharRef ::= '&#' [0-9]+ ';' |
2274 * '&#x' [0-9a-fA-F]+ ';'
2275 *
2276 * [ WFC: Legal Character ]
2277 * Characters referred to using character references must match the
2278 * production for Char.
2279 *
2280 * Returns the value parsed (as an int), 0 in case of error
2281 */
2282int
2283xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2284 unsigned int val = 0;
2285 int count = 0;
2286 unsigned int outofrange = 0;
2287
2288 /*
2289 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290 */
2291 if ((RAW == '&') && (NXT(1) == '#') &&
2292 (NXT(2) == 'x')) {
2293 SKIP(3);
2294 GROW;
2295 while (RAW != ';') { /* loop blocked by count */
2296 if (count++ > 20) {
2297 count = 0;
2298 GROW;
2299 if (ctxt->instate == XML_PARSER_EOF)
2300 return(0);
2301 }
2302 if ((RAW >= '0') && (RAW <= '9'))
2303 val = val * 16 + (CUR - '0');
2304 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 val = val * 16 + (CUR - 'a') + 10;
2306 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 val = val * 16 + (CUR - 'A') + 10;
2308 else {
2309 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2310 val = 0;
2311 break;
2312 }
2313 if (val > 0x10FFFF)
2314 outofrange = val;
2315
2316 NEXT;
2317 count++;
2318 }
2319 if (RAW == ';') {
2320 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2321 ctxt->input->col++;
2322 ctxt->nbChars ++;
2323 ctxt->input->cur++;
2324 }
2325 } else if ((RAW == '&') && (NXT(1) == '#')) {
2326 SKIP(2);
2327 GROW;
2328 while (RAW != ';') { /* loop blocked by count */
2329 if (count++ > 20) {
2330 count = 0;
2331 GROW;
2332 if (ctxt->instate == XML_PARSER_EOF)
2333 return(0);
2334 }
2335 if ((RAW >= '0') && (RAW <= '9'))
2336 val = val * 10 + (CUR - '0');
2337 else {
2338 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2339 val = 0;
2340 break;
2341 }
2342 if (val > 0x10FFFF)
2343 outofrange = val;
2344
2345 NEXT;
2346 count++;
2347 }
2348 if (RAW == ';') {
2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 ctxt->input->col++;
2351 ctxt->nbChars ++;
2352 ctxt->input->cur++;
2353 }
2354 } else {
2355 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2356 }
2357
2358 /*
2359 * [ WFC: Legal Character ]
2360 * Characters referred to using character references must match the
2361 * production for Char.
2362 */
2363 if ((IS_CHAR(val) && (outofrange == 0))) {
2364 return(val);
2365 } else {
2366 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367 "xmlParseCharRef: invalid xmlChar value %d\n",
2368 val);
2369 }
2370 return(0);
2371}
2372
2373/**
2374 * xmlParseStringCharRef:
2375 * @ctxt: an XML parser context
2376 * @str: a pointer to an index in the string
2377 *
2378 * parse Reference declarations, variant parsing from a string rather
2379 * than an an input flow.
2380 *
2381 * [66] CharRef ::= '&#' [0-9]+ ';' |
2382 * '&#x' [0-9a-fA-F]+ ';'
2383 *
2384 * [ WFC: Legal Character ]
2385 * Characters referred to using character references must match the
2386 * production for Char.
2387 *
2388 * Returns the value parsed (as an int), 0 in case of error, str will be
2389 * updated to the current value of the index
2390 */
2391static int
2392xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393 const xmlChar *ptr;
2394 xmlChar cur;
2395 unsigned int val = 0;
2396 unsigned int outofrange = 0;
2397
2398 if ((str == NULL) || (*str == NULL)) return(0);
2399 ptr = *str;
2400 cur = *ptr;
2401 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 ptr += 3;
2403 cur = *ptr;
2404 while (cur != ';') { /* Non input consuming loop */
2405 if ((cur >= '0') && (cur <= '9'))
2406 val = val * 16 + (cur - '0');
2407 else if ((cur >= 'a') && (cur <= 'f'))
2408 val = val * 16 + (cur - 'a') + 10;
2409 else if ((cur >= 'A') && (cur <= 'F'))
2410 val = val * 16 + (cur - 'A') + 10;
2411 else {
2412 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2413 val = 0;
2414 break;
2415 }
2416 if (val > 0x10FFFF)
2417 outofrange = val;
2418
2419 ptr++;
2420 cur = *ptr;
2421 }
2422 if (cur == ';')
2423 ptr++;
2424 } else if ((cur == '&') && (ptr[1] == '#')){
2425 ptr += 2;
2426 cur = *ptr;
2427 while (cur != ';') { /* Non input consuming loops */
2428 if ((cur >= '0') && (cur <= '9'))
2429 val = val * 10 + (cur - '0');
2430 else {
2431 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2432 val = 0;
2433 break;
2434 }
2435 if (val > 0x10FFFF)
2436 outofrange = val;
2437
2438 ptr++;
2439 cur = *ptr;
2440 }
2441 if (cur == ';')
2442 ptr++;
2443 } else {
2444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2445 return(0);
2446 }
2447 *str = ptr;
2448
2449 /*
2450 * [ WFC: Legal Character ]
2451 * Characters referred to using character references must match the
2452 * production for Char.
2453 */
2454 if ((IS_CHAR(val) && (outofrange == 0))) {
2455 return(val);
2456 } else {
2457 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 val);
2460 }
2461 return(0);
2462}
2463
2464/**
2465 * xmlParserHandlePEReference:
2466 * @ctxt: the parser context
2467 *
2468 * [69] PEReference ::= '%' Name ';'
2469 *
2470 * [ WFC: No Recursion ]
2471 * A parsed entity must not contain a recursive
2472 * reference to itself, either directly or indirectly.
2473 *
2474 * [ WFC: Entity Declared ]
2475 * In a document without any DTD, a document with only an internal DTD
2476 * subset which contains no parameter entity references, or a document
2477 * with "standalone='yes'", ... ... The declaration of a parameter
2478 * entity must precede any reference to it...
2479 *
2480 * [ VC: Entity Declared ]
2481 * In a document with an external subset or external parameter entities
2482 * with "standalone='no'", ... ... The declaration of a parameter entity
2483 * must precede any reference to it...
2484 *
2485 * [ WFC: In DTD ]
2486 * Parameter-entity references may only appear in the DTD.
2487 * NOTE: misleading but this is handled.
2488 *
2489 * A PEReference may have been detected in the current input stream
2490 * the handling is done accordingly to
2491 * http://www.w3.org/TR/REC-xml#entproc
2492 * i.e.
2493 * - Included in literal in entity values
2494 * - Included as Parameter Entity reference within DTDs
2495 */
2496void
2497xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2498 switch(ctxt->instate) {
2499 case XML_PARSER_CDATA_SECTION:
2500 return;
2501 case XML_PARSER_COMMENT:
2502 return;
2503 case XML_PARSER_START_TAG:
2504 return;
2505 case XML_PARSER_END_TAG:
2506 return;
2507 case XML_PARSER_EOF:
2508 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2509 return;
2510 case XML_PARSER_PROLOG:
2511 case XML_PARSER_START:
2512 case XML_PARSER_MISC:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2514 return;
2515 case XML_PARSER_ENTITY_DECL:
2516 case XML_PARSER_CONTENT:
2517 case XML_PARSER_ATTRIBUTE_VALUE:
2518 case XML_PARSER_PI:
2519 case XML_PARSER_SYSTEM_LITERAL:
2520 case XML_PARSER_PUBLIC_LITERAL:
2521 /* we just ignore it there */
2522 return;
2523 case XML_PARSER_EPILOG:
2524 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2525 return;
2526 case XML_PARSER_ENTITY_VALUE:
2527 /*
2528 * NOTE: in the case of entity values, we don't do the
2529 * substitution here since we need the literal
2530 * entity value to be able to save the internal
2531 * subset of the document.
2532 * This will be handled by xmlStringDecodeEntities
2533 */
2534 return;
2535 case XML_PARSER_DTD:
2536 /*
2537 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 * In the internal DTD subset, parameter-entity references
2539 * can occur only where markup declarations can occur, not
2540 * within markup declarations.
2541 * In that case this is handled in xmlParseMarkupDecl
2542 */
2543 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 return;
2545 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2546 return;
2547 break;
2548 case XML_PARSER_IGNORE:
2549 return;
2550 }
2551
2552 xmlParsePEReference(ctxt);
2553}
2554
2555/*
2556 * Macro used to grow the current buffer.
2557 * buffer##_size is expected to be a size_t
2558 * mem_error: is expected to handle memory allocation failures
2559 */
2560#define growBuffer(buffer, n) { \
2561 xmlChar *tmp; \
2562 size_t new_size = buffer##_size * 2 + n; \
2563 if (new_size < buffer##_size) goto mem_error; \
2564 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2565 if (tmp == NULL) goto mem_error; \
2566 buffer = tmp; \
2567 buffer##_size = new_size; \
2568}
2569
2570/**
2571 * xmlStringLenDecodeEntities:
2572 * @ctxt: the parser context
2573 * @str: the input string
2574 * @len: the string length
2575 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576 * @end: an end marker xmlChar, 0 if none
2577 * @end2: an end marker xmlChar, 0 if none
2578 * @end3: an end marker xmlChar, 0 if none
2579 *
2580 * Takes a entity string content and process to do the adequate substitutions.
2581 *
2582 * [67] Reference ::= EntityRef | CharRef
2583 *
2584 * [69] PEReference ::= '%' Name ';'
2585 *
2586 * Returns A newly allocated string with the substitution done. The caller
2587 * must deallocate it !
2588 */
2589xmlChar *
2590xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2592 xmlChar *buffer = NULL;
2593 size_t buffer_size = 0;
2594 size_t nbchars = 0;
2595
2596 xmlChar *current = NULL;
2597 xmlChar *rep = NULL;
2598 const xmlChar *last;
2599 xmlEntityPtr ent;
2600 int c,l;
2601
2602 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2603 return(NULL);
2604 last = str + len;
2605
2606 if (((ctxt->depth > 40) &&
2607 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 (ctxt->depth > 1024)) {
2609 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2610 return(NULL);
2611 }
2612
2613 /*
2614 * allocate a translation buffer.
2615 */
2616 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2617 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2618 if (buffer == NULL) goto mem_error;
2619
2620 /*
2621 * OK loop until we reach one of the ending char or a size limit.
2622 * we are operating on already parsed values.
2623 */
2624 if (str < last)
2625 c = CUR_SCHAR(str, l);
2626 else
2627 c = 0;
2628 while ((c != 0) && (c != end) && /* non input consuming loop */
2629 (c != end2) && (c != end3)) {
2630
2631 if (c == 0) break;
2632 if ((c == '&') && (str[1] == '#')) {
2633 int val = xmlParseStringCharRef(ctxt, &str);
2634 if (val == 0)
2635 goto int_error;
2636 COPY_BUF(0,buffer,nbchars,val);
2637 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2638 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2639 }
2640 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 if (xmlParserDebugEntities)
2642 xmlGenericError(xmlGenericErrorContext,
2643 "String decoding Entity Reference: %.30s\n",
2644 str);
2645 ent = xmlParseStringEntityRef(ctxt, &str);
2646 xmlParserEntityCheck(ctxt, 0, ent, 0);
2647 if (ent != NULL)
2648 ctxt->nbentities += ent->checked / 2;
2649 if ((ent != NULL) &&
2650 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 if (ent->content != NULL) {
2652 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2654 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2655 }
2656 } else {
2657 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 "predefined entity has no content\n");
2659 goto int_error;
2660 }
2661 } else if ((ent != NULL) && (ent->content != NULL)) {
2662 ctxt->depth++;
2663 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 0, 0, 0);
2665 ctxt->depth--;
2666 if (rep == NULL)
2667 goto int_error;
2668
2669 current = rep;
2670 while (*current != 0) { /* non input consuming loop */
2671 buffer[nbchars++] = *current++;
2672 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674 goto int_error;
2675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676 }
2677 }
2678 xmlFree(rep);
2679 rep = NULL;
2680 } else if (ent != NULL) {
2681 int i = xmlStrlen(ent->name);
2682 const xmlChar *cur = ent->name;
2683
2684 buffer[nbchars++] = '&';
2685 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2686 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2687 }
2688 for (;i > 0;i--)
2689 buffer[nbchars++] = *cur++;
2690 buffer[nbchars++] = ';';
2691 }
2692 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 if (xmlParserDebugEntities)
2694 xmlGenericError(xmlGenericErrorContext,
2695 "String decoding PE Reference: %.30s\n", str);
2696 ent = xmlParseStringPEReference(ctxt, &str);
2697 xmlParserEntityCheck(ctxt, 0, ent, 0);
2698 if (ent != NULL)
2699 ctxt->nbentities += ent->checked / 2;
2700 if (ent != NULL) {
2701 if (ent->content == NULL) {
2702 /*
2703 * Note: external parsed entities will not be loaded,
2704 * it is not required for a non-validating parser to
2705 * complete external PEreferences coming from the
2706 * internal subset
2707 */
2708 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 (ctxt->validate != 0)) {
2711 xmlLoadEntityContent(ctxt, ent);
2712 } else {
2713 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 "not validating will not read content for PE entity %s\n",
2715 ent->name, NULL);
2716 }
2717 }
2718 ctxt->depth++;
2719 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 0, 0, 0);
2721 ctxt->depth--;
2722 if (rep == NULL)
2723 goto int_error;
2724 current = rep;
2725 while (*current != 0) { /* non input consuming loop */
2726 buffer[nbchars++] = *current++;
2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729 goto int_error;
2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731 }
2732 }
2733 xmlFree(rep);
2734 rep = NULL;
2735 }
2736 } else {
2737 COPY_BUF(l,buffer,nbchars,c);
2738 str += l;
2739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2741 }
2742 }
2743 if (str < last)
2744 c = CUR_SCHAR(str, l);
2745 else
2746 c = 0;
2747 }
2748 buffer[nbchars] = 0;
2749 return(buffer);
2750
2751mem_error:
2752 xmlErrMemory(ctxt, NULL);
2753int_error:
2754 if (rep != NULL)
2755 xmlFree(rep);
2756 if (buffer != NULL)
2757 xmlFree(buffer);
2758 return(NULL);
2759}
2760
2761/**
2762 * xmlStringDecodeEntities:
2763 * @ctxt: the parser context
2764 * @str: the input string
2765 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766 * @end: an end marker xmlChar, 0 if none
2767 * @end2: an end marker xmlChar, 0 if none
2768 * @end3: an end marker xmlChar, 0 if none
2769 *
2770 * Takes a entity string content and process to do the adequate substitutions.
2771 *
2772 * [67] Reference ::= EntityRef | CharRef
2773 *
2774 * [69] PEReference ::= '%' Name ';'
2775 *
2776 * Returns A newly allocated string with the substitution done. The caller
2777 * must deallocate it !
2778 */
2779xmlChar *
2780xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 xmlChar end, xmlChar end2, xmlChar end3) {
2782 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2783 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784 end, end2, end3));
2785}
2786
2787/************************************************************************
2788 * *
2789 * Commodity functions, cleanup needed ? *
2790 * *
2791 ************************************************************************/
2792
2793/**
2794 * areBlanks:
2795 * @ctxt: an XML parser context
2796 * @str: a xmlChar *
2797 * @len: the size of @str
2798 * @blank_chars: we know the chars are blanks
2799 *
2800 * Is this a sequence of blank chars that one can ignore ?
2801 *
2802 * Returns 1 if ignorable 0 otherwise.
2803 */
2804
2805static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806 int blank_chars) {
2807 int i, ret;
2808 xmlNodePtr lastChild;
2809
2810 /*
2811 * Don't spend time trying to differentiate them, the same callback is
2812 * used !
2813 */
2814 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2815 return(0);
2816
2817 /*
2818 * Check for xml:space value.
2819 */
2820 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821 (*(ctxt->space) == -2))
2822 return(0);
2823
2824 /*
2825 * Check that the string is made of blanks
2826 */
2827 if (blank_chars == 0) {
2828 for (i = 0;i < len;i++)
2829 if (!(IS_BLANK_CH(str[i]))) return(0);
2830 }
2831
2832 /*
2833 * Look if the element is mixed content in the DTD if available
2834 */
2835 if (ctxt->node == NULL) return(0);
2836 if (ctxt->myDoc != NULL) {
2837 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838 if (ret == 0) return(1);
2839 if (ret == 1) return(0);
2840 }
2841
2842 /*
2843 * Otherwise, heuristic :-\
2844 */
2845 if ((RAW != '<') && (RAW != 0xD)) return(0);
2846 if ((ctxt->node->children == NULL) &&
2847 (RAW == '<') && (NXT(1) == '/')) return(0);
2848
2849 lastChild = xmlGetLastChild(ctxt->node);
2850 if (lastChild == NULL) {
2851 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852 (ctxt->node->content != NULL)) return(0);
2853 } else if (xmlNodeIsText(lastChild))
2854 return(0);
2855 else if ((ctxt->node->children != NULL) &&
2856 (xmlNodeIsText(ctxt->node->children)))
2857 return(0);
2858 return(1);
2859}
2860
2861/************************************************************************
2862 * *
2863 * Extra stuff for namespace support *
2864 * Relates to http://www.w3.org/TR/WD-xml-names *
2865 * *
2866 ************************************************************************/
2867
2868/**
2869 * xmlSplitQName:
2870 * @ctxt: an XML parser context
2871 * @name: an XML parser context
2872 * @prefix: a xmlChar **
2873 *
2874 * parse an UTF8 encoded XML qualified name string
2875 *
2876 * [NS 5] QName ::= (Prefix ':')? LocalPart
2877 *
2878 * [NS 6] Prefix ::= NCName
2879 *
2880 * [NS 7] LocalPart ::= NCName
2881 *
2882 * Returns the local part, and prefix is updated
2883 * to get the Prefix if any.
2884 */
2885
2886xmlChar *
2887xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888 xmlChar buf[XML_MAX_NAMELEN + 5];
2889 xmlChar *buffer = NULL;
2890 int len = 0;
2891 int max = XML_MAX_NAMELEN;
2892 xmlChar *ret = NULL;
2893 const xmlChar *cur = name;
2894 int c;
2895
2896 if (prefix == NULL) return(NULL);
2897 *prefix = NULL;
2898
2899 if (cur == NULL) return(NULL);
2900
2901#ifndef XML_XML_NAMESPACE
2902 /* xml: prefix is not really a namespace */
2903 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904 (cur[2] == 'l') && (cur[3] == ':'))
2905 return(xmlStrdup(name));
2906#endif
2907
2908 /* nasty but well=formed */
2909 if (cur[0] == ':')
2910 return(xmlStrdup(name));
2911
2912 c = *cur++;
2913 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 buf[len++] = c;
2915 c = *cur++;
2916 }
2917 if (len >= max) {
2918 /*
2919 * Okay someone managed to make a huge name, so he's ready to pay
2920 * for the processing speed.
2921 */
2922 max = len * 2;
2923
2924 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2925 if (buffer == NULL) {
2926 xmlErrMemory(ctxt, NULL);
2927 return(NULL);
2928 }
2929 memcpy(buffer, buf, len);
2930 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 if (len + 10 > max) {
2932 xmlChar *tmp;
2933
2934 max *= 2;
2935 tmp = (xmlChar *) xmlRealloc(buffer,
2936 max * sizeof(xmlChar));
2937 if (tmp == NULL) {
2938 xmlFree(buffer);
2939 xmlErrMemory(ctxt, NULL);
2940 return(NULL);
2941 }
2942 buffer = tmp;
2943 }
2944 buffer[len++] = c;
2945 c = *cur++;
2946 }
2947 buffer[len] = 0;
2948 }
2949
2950 if ((c == ':') && (*cur == 0)) {
2951 if (buffer != NULL)
2952 xmlFree(buffer);
2953 *prefix = NULL;
2954 return(xmlStrdup(name));
2955 }
2956
2957 if (buffer == NULL)
2958 ret = xmlStrndup(buf, len);
2959 else {
2960 ret = buffer;
2961 buffer = NULL;
2962 max = XML_MAX_NAMELEN;
2963 }
2964
2965
2966 if (c == ':') {
2967 c = *cur;
2968 *prefix = ret;
2969 if (c == 0) {
2970 return(xmlStrndup(BAD_CAST "", 0));
2971 }
2972 len = 0;
2973
2974 /*
2975 * Check that the first character is proper to start
2976 * a new name
2977 */
2978 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 ((c >= 0x41) && (c <= 0x5A)) ||
2980 (c == '_') || (c == ':'))) {
2981 int l;
2982 int first = CUR_SCHAR(cur, l);
2983
2984 if (!IS_LETTER(first) && (first != '_')) {
2985 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2986 "Name %s is not XML Namespace compliant\n",
2987 name);
2988 }
2989 }
2990 cur++;
2991
2992 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
3002
3003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004 if (buffer == NULL) {
3005 xmlErrMemory(ctxt, NULL);
3006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while (c != 0) { /* tested bigname2.xml */
3010 if (len + 10 > max) {
3011 xmlChar *tmp;
3012
3013 max *= 2;
3014 tmp = (xmlChar *) xmlRealloc(buffer,
3015 max * sizeof(xmlChar));
3016 if (tmp == NULL) {
3017 xmlErrMemory(ctxt, NULL);
3018 xmlFree(buffer);
3019 return(NULL);
3020 }
3021 buffer = tmp;
3022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
3028
3029 if (buffer == NULL)
3030 ret = xmlStrndup(buf, len);
3031 else {
3032 ret = buffer;
3033 }
3034 }
3035
3036 return(ret);
3037}
3038
3039/************************************************************************
3040 * *
3041 * The parser itself *
3042 * Relates to http://www.w3.org/TR/REC-xml *
3043 * *
3044 ************************************************************************/
3045
3046/************************************************************************
3047 * *
3048 * Routines to parse Name, NCName and NmToken *
3049 * *
3050 ************************************************************************/
3051#ifdef DEBUG
3052static unsigned long nbParseName = 0;
3053static unsigned long nbParseNmToken = 0;
3054static unsigned long nbParseNCName = 0;
3055static unsigned long nbParseNCNameComplex = 0;
3056static unsigned long nbParseNameComplex = 0;
3057static unsigned long nbParseStringName = 0;
3058#endif
3059
3060/*
3061 * The two following functions are related to the change of accepted
3062 * characters for Name and NmToken in the Revision 5 of XML-1.0
3063 * They correspond to the modified production [4] and the new production [4a]
3064 * changes in that revision. Also note that the macros used for the
3065 * productions Letter, Digit, CombiningChar and Extender are not needed
3066 * anymore.
3067 * We still keep compatibility to pre-revision5 parsing semantic if the
3068 * new XML_PARSE_OLD10 option is given to the parser.
3069 */
3070static int
3071xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073 /*
3074 * Use the new checks of production [4] [4a] amd [5] of the
3075 * Update 5 of XML-1.0
3076 */
3077 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 (((c >= 'a') && (c <= 'z')) ||
3079 ((c >= 'A') && (c <= 'Z')) ||
3080 (c == '_') || (c == ':') ||
3081 ((c >= 0xC0) && (c <= 0xD6)) ||
3082 ((c >= 0xD8) && (c <= 0xF6)) ||
3083 ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 ((c >= 0x370) && (c <= 0x37D)) ||
3085 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 ((c >= 0x200C) && (c <= 0x200D)) ||
3087 ((c >= 0x2070) && (c <= 0x218F)) ||
3088 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 return(1);
3094 } else {
3095 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 return(1);
3097 }
3098 return(0);
3099}
3100
3101static int
3102xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104 /*
3105 * Use the new checks of production [4] [4a] amd [5] of the
3106 * Update 5 of XML-1.0
3107 */
3108 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 (((c >= 'a') && (c <= 'z')) ||
3110 ((c >= 'A') && (c <= 'Z')) ||
3111 ((c >= '0') && (c <= '9')) || /* !start */
3112 (c == '_') || (c == ':') ||
3113 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 ((c >= 0xC0) && (c <= 0xD6)) ||
3115 ((c >= 0xD8) && (c <= 0xF6)) ||
3116 ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 ((c >= 0x370) && (c <= 0x37D)) ||
3119 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 ((c >= 0x200C) && (c <= 0x200D)) ||
3121 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 ((c >= 0x2070) && (c <= 0x218F)) ||
3123 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 return(1);
3129 } else {
3130 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 (c == '.') || (c == '-') ||
3132 (c == '_') || (c == ':') ||
3133 (IS_COMBINING(c)) ||
3134 (IS_EXTENDER(c)))
3135 return(1);
3136 }
3137 return(0);
3138}
3139
3140static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3141 int *len, int *alloc, int normalize);
3142
3143static const xmlChar *
3144xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145 int len = 0, l;
3146 int c;
3147 int count = 0;
3148
3149#ifdef DEBUG
3150 nbParseNameComplex++;
3151#endif
3152
3153 /*
3154 * Handler for more complex cases
3155 */
3156 GROW;
3157 if (ctxt->instate == XML_PARSER_EOF)
3158 return(NULL);
3159 c = CUR_CHAR(l);
3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161 /*
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3164 */
3165 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166 (!(((c >= 'a') && (c <= 'z')) ||
3167 ((c >= 'A') && (c <= 'Z')) ||
3168 (c == '_') || (c == ':') ||
3169 ((c >= 0xC0) && (c <= 0xD6)) ||
3170 ((c >= 0xD8) && (c <= 0xF6)) ||
3171 ((c >= 0xF8) && (c <= 0x2FF)) ||
3172 ((c >= 0x370) && (c <= 0x37D)) ||
3173 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174 ((c >= 0x200C) && (c <= 0x200D)) ||
3175 ((c >= 0x2070) && (c <= 0x218F)) ||
3176 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181 return(NULL);
3182 }
3183 len += l;
3184 NEXTL(l);
3185 c = CUR_CHAR(l);
3186 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187 (((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 ((c >= '0') && (c <= '9')) || /* !start */
3190 (c == '_') || (c == ':') ||
3191 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192 ((c >= 0xC0) && (c <= 0xD6)) ||
3193 ((c >= 0xD8) && (c <= 0xF6)) ||
3194 ((c >= 0xF8) && (c <= 0x2FF)) ||
3195 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196 ((c >= 0x370) && (c <= 0x37D)) ||
3197 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198 ((c >= 0x200C) && (c <= 0x200D)) ||
3199 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200 ((c >= 0x2070) && (c <= 0x218F)) ||
3201 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 ((c >= 0x10000) && (c <= 0xEFFFF))
3206 )) {
3207 if (count++ > XML_PARSER_CHUNK_SIZE) {
3208 count = 0;
3209 GROW;
3210 if (ctxt->instate == XML_PARSER_EOF)
3211 return(NULL);
3212 }
3213 len += l;
3214 NEXTL(l);
3215 c = CUR_CHAR(l);
3216 }
3217 } else {
3218 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219 (!IS_LETTER(c) && (c != '_') &&
3220 (c != ':'))) {
3221 return(NULL);
3222 }
3223 len += l;
3224 NEXTL(l);
3225 c = CUR_CHAR(l);
3226
3227 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229 (c == '.') || (c == '-') ||
3230 (c == '_') || (c == ':') ||
3231 (IS_COMBINING(c)) ||
3232 (IS_EXTENDER(c)))) {
3233 if (count++ > XML_PARSER_CHUNK_SIZE) {
3234 count = 0;
3235 GROW;
3236 if (ctxt->instate == XML_PARSER_EOF)
3237 return(NULL);
3238 }
3239 len += l;
3240 NEXTL(l);
3241 c = CUR_CHAR(l);
3242 }
3243 }
3244 if ((len > XML_MAX_NAME_LENGTH) &&
3245 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247 return(NULL);
3248 }
3249 if (ctxt->input->cur - ctxt->input->base < len) {
3250 /*
3251 * There were a couple of bugs where PERefs lead to to a change
3252 * of the buffer. Check the buffer size to avoid passing an invalid
3253 * pointer to xmlDictLookup.
3254 */
3255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3256 "unexpected change of input buffer");
3257 return (NULL);
3258 }
3259 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262}
3263
3264/**
3265 * xmlParseName:
3266 * @ctxt: an XML parser context
3267 *
3268 * parse an XML name.
3269 *
3270 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3271 * CombiningChar | Extender
3272 *
3273 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3274 *
3275 * [6] Names ::= Name (#x20 Name)*
3276 *
3277 * Returns the Name parsed or NULL
3278 */
3279
3280const xmlChar *
3281xmlParseName(xmlParserCtxtPtr ctxt) {
3282 const xmlChar *in;
3283 const xmlChar *ret;
3284 int count = 0;
3285
3286 GROW;
3287
3288#ifdef DEBUG
3289 nbParseName++;
3290#endif
3291
3292 /*
3293 * Accelerator for simple ASCII names
3294 */
3295 in = ctxt->input->cur;
3296 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297 ((*in >= 0x41) && (*in <= 0x5A)) ||
3298 (*in == '_') || (*in == ':')) {
3299 in++;
3300 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301 ((*in >= 0x41) && (*in <= 0x5A)) ||
3302 ((*in >= 0x30) && (*in <= 0x39)) ||
3303 (*in == '_') || (*in == '-') ||
3304 (*in == ':') || (*in == '.'))
3305 in++;
3306 if ((*in > 0) && (*in < 0x80)) {
3307 count = in - ctxt->input->cur;
3308 if ((count > XML_MAX_NAME_LENGTH) &&
3309 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311 return(NULL);
3312 }
3313 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3314 ctxt->input->cur = in;
3315 ctxt->nbChars += count;
3316 ctxt->input->col += count;
3317 if (ret == NULL)
3318 xmlErrMemory(ctxt, NULL);
3319 return(ret);
3320 }
3321 }
3322 /* accelerator for special cases */
3323 return(xmlParseNameComplex(ctxt));
3324}
3325
3326static const xmlChar *
3327xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3328 int len = 0, l;
3329 int c;
3330 int count = 0;
3331 size_t startPosition = 0;
3332
3333#ifdef DEBUG
3334 nbParseNCNameComplex++;
3335#endif
3336
3337 /*
3338 * Handler for more complex cases
3339 */
3340 GROW;
3341 startPosition = CUR_PTR - BASE_PTR;
3342 c = CUR_CHAR(l);
3343 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345 return(NULL);
3346 }
3347
3348 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3350 if (count++ > XML_PARSER_CHUNK_SIZE) {
3351 if ((len > XML_MAX_NAME_LENGTH) &&
3352 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354 return(NULL);
3355 }
3356 count = 0;
3357 GROW;
3358 if (ctxt->instate == XML_PARSER_EOF)
3359 return(NULL);
3360 }
3361 len += l;
3362 NEXTL(l);
3363 c = CUR_CHAR(l);
3364 if (c == 0) {
3365 count = 0;
3366 /*
3367 * when shrinking to extend the buffer we really need to preserve
3368 * the part of the name we already parsed. Hence rolling back
3369 * by current lenght.
3370 */
3371 ctxt->input->cur -= l;
3372 GROW;
3373 if (ctxt->instate == XML_PARSER_EOF)
3374 return(NULL);
3375 ctxt->input->cur += l;
3376 c = CUR_CHAR(l);
3377 }
3378 }
3379 if ((len > XML_MAX_NAME_LENGTH) &&
3380 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382 return(NULL);
3383 }
3384 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3385}
3386
3387/**
3388 * xmlParseNCName:
3389 * @ctxt: an XML parser context
3390 * @len: length of the string parsed
3391 *
3392 * parse an XML name.
3393 *
3394 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3395 * CombiningChar | Extender
3396 *
3397 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3398 *
3399 * Returns the Name parsed or NULL
3400 */
3401
3402static const xmlChar *
3403xmlParseNCName(xmlParserCtxtPtr ctxt) {
3404 const xmlChar *in, *e;
3405 const xmlChar *ret;
3406 int count = 0;
3407
3408#ifdef DEBUG
3409 nbParseNCName++;
3410#endif
3411
3412 /*
3413 * Accelerator for simple ASCII names
3414 */
3415 in = ctxt->input->cur;
3416 e = ctxt->input->end;
3417 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418 ((*in >= 0x41) && (*in <= 0x5A)) ||
3419 (*in == '_')) && (in < e)) {
3420 in++;
3421 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422 ((*in >= 0x41) && (*in <= 0x5A)) ||
3423 ((*in >= 0x30) && (*in <= 0x39)) ||
3424 (*in == '_') || (*in == '-') ||
3425 (*in == '.')) && (in < e))
3426 in++;
3427 if (in >= e)
3428 goto complex;
3429 if ((*in > 0) && (*in < 0x80)) {
3430 count = in - ctxt->input->cur;
3431 if ((count > XML_MAX_NAME_LENGTH) &&
3432 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434 return(NULL);
3435 }
3436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437 ctxt->input->cur = in;
3438 ctxt->nbChars += count;
3439 ctxt->input->col += count;
3440 if (ret == NULL) {
3441 xmlErrMemory(ctxt, NULL);
3442 }
3443 return(ret);
3444 }
3445 }
3446complex:
3447 return(xmlParseNCNameComplex(ctxt));
3448}
3449
3450/**
3451 * xmlParseNameAndCompare:
3452 * @ctxt: an XML parser context
3453 *
3454 * parse an XML name and compares for match
3455 * (specialized for endtag parsing)
3456 *
3457 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3458 * and the name for mismatch
3459 */
3460
3461static const xmlChar *
3462xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3463 register const xmlChar *cmp = other;
3464 register const xmlChar *in;
3465 const xmlChar *ret;
3466
3467 GROW;
3468 if (ctxt->instate == XML_PARSER_EOF)
3469 return(NULL);
3470
3471 in = ctxt->input->cur;
3472 while (*in != 0 && *in == *cmp) {
3473 ++in;
3474 ++cmp;
3475 ctxt->input->col++;
3476 }
3477 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3478 /* success */
3479 ctxt->input->cur = in;
3480 return (const xmlChar*) 1;
3481 }
3482 /* failure (or end of input buffer), check with full function */
3483 ret = xmlParseName (ctxt);
3484 /* strings coming from the dictionary direct compare possible */
3485 if (ret == other) {
3486 return (const xmlChar*) 1;
3487 }
3488 return ret;
3489}
3490
3491/**
3492 * xmlParseStringName:
3493 * @ctxt: an XML parser context
3494 * @str: a pointer to the string pointer (IN/OUT)
3495 *
3496 * parse an XML name.
3497 *
3498 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3499 * CombiningChar | Extender
3500 *
3501 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3502 *
3503 * [6] Names ::= Name (#x20 Name)*
3504 *
3505 * Returns the Name parsed or NULL. The @str pointer
3506 * is updated to the current location in the string.
3507 */
3508
3509static xmlChar *
3510xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3511 xmlChar buf[XML_MAX_NAMELEN + 5];
3512 const xmlChar *cur = *str;
3513 int len = 0, l;
3514 int c;
3515
3516#ifdef DEBUG
3517 nbParseStringName++;
3518#endif
3519
3520 c = CUR_SCHAR(cur, l);
3521 if (!xmlIsNameStartChar(ctxt, c)) {
3522 return(NULL);
3523 }
3524
3525 COPY_BUF(l,buf,len,c);
3526 cur += l;
3527 c = CUR_SCHAR(cur, l);
3528 while (xmlIsNameChar(ctxt, c)) {
3529 COPY_BUF(l,buf,len,c);
3530 cur += l;
3531 c = CUR_SCHAR(cur, l);
3532 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533 /*
3534 * Okay someone managed to make a huge name, so he's ready to pay
3535 * for the processing speed.
3536 */
3537 xmlChar *buffer;
3538 int max = len * 2;
3539
3540 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3541 if (buffer == NULL) {
3542 xmlErrMemory(ctxt, NULL);
3543 return(NULL);
3544 }
3545 memcpy(buffer, buf, len);
3546 while (xmlIsNameChar(ctxt, c)) {
3547 if (len + 10 > max) {
3548 xmlChar *tmp;
3549
3550 if ((len > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 xmlFree(buffer);
3554 return(NULL);
3555 }
3556 max *= 2;
3557 tmp = (xmlChar *) xmlRealloc(buffer,
3558 max * sizeof(xmlChar));
3559 if (tmp == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3561 xmlFree(buffer);
3562 return(NULL);
3563 }
3564 buffer = tmp;
3565 }
3566 COPY_BUF(l,buffer,len,c);
3567 cur += l;
3568 c = CUR_SCHAR(cur, l);
3569 }
3570 buffer[len] = 0;
3571 *str = cur;
3572 return(buffer);
3573 }
3574 }
3575 if ((len > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578 return(NULL);
3579 }
3580 *str = cur;
3581 return(xmlStrndup(buf, len));
3582}
3583
3584/**
3585 * xmlParseNmtoken:
3586 * @ctxt: an XML parser context
3587 *
3588 * parse an XML Nmtoken.
3589 *
3590 * [7] Nmtoken ::= (NameChar)+
3591 *
3592 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3593 *
3594 * Returns the Nmtoken parsed or NULL
3595 */
3596
3597xmlChar *
3598xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3599 xmlChar buf[XML_MAX_NAMELEN + 5];
3600 int len = 0, l;
3601 int c;
3602 int count = 0;
3603
3604#ifdef DEBUG
3605 nbParseNmToken++;
3606#endif
3607
3608 GROW;
3609 if (ctxt->instate == XML_PARSER_EOF)
3610 return(NULL);
3611 c = CUR_CHAR(l);
3612
3613 while (xmlIsNameChar(ctxt, c)) {
3614 if (count++ > XML_PARSER_CHUNK_SIZE) {
3615 count = 0;
3616 GROW;
3617 }
3618 COPY_BUF(l,buf,len,c);
3619 NEXTL(l);
3620 c = CUR_CHAR(l);
3621 if (c == 0) {
3622 count = 0;
3623 GROW;
3624 if (ctxt->instate == XML_PARSER_EOF)
3625 return(NULL);
3626 c = CUR_CHAR(l);
3627 }
3628 if (len >= XML_MAX_NAMELEN) {
3629 /*
3630 * Okay someone managed to make a huge token, so he's ready to pay
3631 * for the processing speed.
3632 */
3633 xmlChar *buffer;
3634 int max = len * 2;
3635
3636 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3637 if (buffer == NULL) {
3638 xmlErrMemory(ctxt, NULL);
3639 return(NULL);
3640 }
3641 memcpy(buffer, buf, len);
3642 while (xmlIsNameChar(ctxt, c)) {
3643 if (count++ > XML_PARSER_CHUNK_SIZE) {
3644 count = 0;
3645 GROW;
3646 if (ctxt->instate == XML_PARSER_EOF) {
3647 xmlFree(buffer);
3648 return(NULL);
3649 }
3650 }
3651 if (len + 10 > max) {
3652 xmlChar *tmp;
3653
3654 if ((max > XML_MAX_NAME_LENGTH) &&
3655 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657 xmlFree(buffer);
3658 return(NULL);
3659 }
3660 max *= 2;
3661 tmp = (xmlChar *) xmlRealloc(buffer,
3662 max * sizeof(xmlChar));
3663 if (tmp == NULL) {
3664 xmlErrMemory(ctxt, NULL);
3665 xmlFree(buffer);
3666 return(NULL);
3667 }
3668 buffer = tmp;
3669 }
3670 COPY_BUF(l,buffer,len,c);
3671 NEXTL(l);
3672 c = CUR_CHAR(l);
3673 }
3674 buffer[len] = 0;
3675 return(buffer);
3676 }
3677 }
3678 if (len == 0)
3679 return(NULL);
3680 if ((len > XML_MAX_NAME_LENGTH) &&
3681 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683 return(NULL);
3684 }
3685 return(xmlStrndup(buf, len));
3686}
3687
3688/**
3689 * xmlParseEntityValue:
3690 * @ctxt: an XML parser context
3691 * @orig: if non-NULL store a copy of the original entity value
3692 *
3693 * parse a value for ENTITY declarations
3694 *
3695 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3696 * "'" ([^%&'] | PEReference | Reference)* "'"
3697 *
3698 * Returns the EntityValue parsed with reference substituted or NULL
3699 */
3700
3701xmlChar *
3702xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3703 xmlChar *buf = NULL;
3704 int len = 0;
3705 int size = XML_PARSER_BUFFER_SIZE;
3706 int c, l;
3707 xmlChar stop;
3708 xmlChar *ret = NULL;
3709 const xmlChar *cur = NULL;
3710 xmlParserInputPtr input;
3711
3712 if (RAW == '"') stop = '"';
3713 else if (RAW == '\'') stop = '\'';
3714 else {
3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3716 return(NULL);
3717 }
3718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3719 if (buf == NULL) {
3720 xmlErrMemory(ctxt, NULL);
3721 return(NULL);
3722 }
3723
3724 /*
3725 * The content of the entity definition is copied in a buffer.
3726 */
3727
3728 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3729 input = ctxt->input;
3730 GROW;
3731 if (ctxt->instate == XML_PARSER_EOF)
3732 goto error;
3733 NEXT;
3734 c = CUR_CHAR(l);
3735 /*
3736 * NOTE: 4.4.5 Included in Literal
3737 * When a parameter entity reference appears in a literal entity
3738 * value, ... a single or double quote character in the replacement
3739 * text is always treated as a normal data character and will not
3740 * terminate the literal.
3741 * In practice it means we stop the loop only when back at parsing
3742 * the initial entity and the quote is found
3743 */
3744 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3746 if (len + 5 >= size) {
3747 xmlChar *tmp;
3748
3749 size *= 2;
3750 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751 if (tmp == NULL) {
3752 xmlErrMemory(ctxt, NULL);
3753 goto error;
3754 }
3755 buf = tmp;
3756 }
3757 COPY_BUF(l,buf,len,c);
3758 NEXTL(l);
3759
3760 GROW;
3761 c = CUR_CHAR(l);
3762 if (c == 0) {
3763 GROW;
3764 c = CUR_CHAR(l);
3765 }
3766 }
3767 buf[len] = 0;
3768 if (ctxt->instate == XML_PARSER_EOF)
3769 goto error;
3770 if (c != stop) {
3771 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3772 goto error;
3773 }
3774 NEXT;
3775
3776 /*
3777 * Raise problem w.r.t. '&' and '%' being used in non-entities
3778 * reference constructs. Note Charref will be handled in
3779 * xmlStringDecodeEntities()
3780 */
3781 cur = buf;
3782 while (*cur != 0) { /* non input consuming */
3783 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784 xmlChar *name;
3785 xmlChar tmp = *cur;
3786 int nameOk = 0;
3787
3788 cur++;
3789 name = xmlParseStringName(ctxt, &cur);
3790 if (name != NULL) {
3791 nameOk = 1;
3792 xmlFree(name);
3793 }
3794 if ((nameOk == 0) || (*cur != ';')) {
3795 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3796 "EntityValue: '%c' forbidden except for entities references\n",
3797 tmp);
3798 goto error;
3799 }
3800 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801 (ctxt->inputNr == 1)) {
3802 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3803 goto error;
3804 }
3805 if (*cur == 0)
3806 break;
3807 }
3808 cur++;
3809 }
3810
3811 /*
3812 * Then PEReference entities are substituted.
3813 *
3814 * NOTE: 4.4.7 Bypassed
3815 * When a general entity reference appears in the EntityValue in
3816 * an entity declaration, it is bypassed and left as is.
3817 * so XML_SUBSTITUTE_REF is not set here.
3818 */
3819 ++ctxt->depth;
3820 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3821 0, 0, 0);
3822 --ctxt->depth;
3823 if (orig != NULL) {
3824 *orig = buf;
3825 buf = NULL;
3826 }
3827
3828error:
3829 if (buf != NULL)
3830 xmlFree(buf);
3831 return(ret);
3832}
3833
3834/**
3835 * xmlParseAttValueComplex:
3836 * @ctxt: an XML parser context
3837 * @len: the resulting attribute len
3838 * @normalize: wether to apply the inner normalization
3839 *
3840 * parse a value for an attribute, this is the fallback function
3841 * of xmlParseAttValue() when the attribute parsing requires handling
3842 * of non-ASCII characters, or normalization compaction.
3843 *
3844 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845 */
3846static xmlChar *
3847xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3848 xmlChar limit = 0;
3849 xmlChar *buf = NULL;
3850 xmlChar *rep = NULL;
3851 size_t len = 0;
3852 size_t buf_size = 0;
3853 int c, l, in_space = 0;
3854 xmlChar *current = NULL;
3855 xmlEntityPtr ent;
3856
3857 if (NXT(0) == '"') {
3858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 limit = '"';
3860 NEXT;
3861 } else if (NXT(0) == '\'') {
3862 limit = '\'';
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 NEXT;
3865 } else {
3866 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3867 return(NULL);
3868 }
3869
3870 /*
3871 * allocate a translation buffer.
3872 */
3873 buf_size = XML_PARSER_BUFFER_SIZE;
3874 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3875 if (buf == NULL) goto mem_error;
3876
3877 /*
3878 * OK loop until we reach one of the ending char or a size limit.
3879 */
3880 c = CUR_CHAR(l);
3881 while (((NXT(0) != limit) && /* checked */
3882 (IS_CHAR(c)) && (c != '<')) &&
3883 (ctxt->instate != XML_PARSER_EOF)) {
3884 /*
3885 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886 * special option is given
3887 */
3888 if ((len > XML_MAX_TEXT_LENGTH) &&
3889 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3891 "AttValue length too long\n");
3892 goto mem_error;
3893 }
3894 if (c == 0) break;
3895 if (c == '&') {
3896 in_space = 0;
3897 if (NXT(1) == '#') {
3898 int val = xmlParseCharRef(ctxt);
3899
3900 if (val == '&') {
3901 if (ctxt->replaceEntities) {
3902 if (len + 10 > buf_size) {
3903 growBuffer(buf, 10);
3904 }
3905 buf[len++] = '&';
3906 } else {
3907 /*
3908 * The reparsing will be done in xmlStringGetNodeList()
3909 * called by the attribute() function in SAX.c
3910 */
3911 if (len + 10 > buf_size) {
3912 growBuffer(buf, 10);
3913 }
3914 buf[len++] = '&';
3915 buf[len++] = '#';
3916 buf[len++] = '3';
3917 buf[len++] = '8';
3918 buf[len++] = ';';
3919 }
3920 } else if (val != 0) {
3921 if (len + 10 > buf_size) {
3922 growBuffer(buf, 10);
3923 }
3924 len += xmlCopyChar(0, &buf[len], val);
3925 }
3926 } else {
3927 ent = xmlParseEntityRef(ctxt);
3928 ctxt->nbentities++;
3929 if (ent != NULL)
3930 ctxt->nbentities += ent->owner;
3931 if ((ent != NULL) &&
3932 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3933 if (len + 10 > buf_size) {
3934 growBuffer(buf, 10);
3935 }
3936 if ((ctxt->replaceEntities == 0) &&
3937 (ent->content[0] == '&')) {
3938 buf[len++] = '&';
3939 buf[len++] = '#';
3940 buf[len++] = '3';
3941 buf[len++] = '8';
3942 buf[len++] = ';';
3943 } else {
3944 buf[len++] = ent->content[0];
3945 }
3946 } else if ((ent != NULL) &&
3947 (ctxt->replaceEntities != 0)) {
3948 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3949 ++ctxt->depth;
3950 rep = xmlStringDecodeEntities(ctxt, ent->content,
3951 XML_SUBSTITUTE_REF,
3952 0, 0, 0);
3953 --ctxt->depth;
3954 if (rep != NULL) {
3955 current = rep;
3956 while (*current != 0) { /* non input consuming */
3957 if ((*current == 0xD) || (*current == 0xA) ||
3958 (*current == 0x9)) {
3959 buf[len++] = 0x20;
3960 current++;
3961 } else
3962 buf[len++] = *current++;
3963 if (len + 10 > buf_size) {
3964 growBuffer(buf, 10);
3965 }
3966 }
3967 xmlFree(rep);
3968 rep = NULL;
3969 }
3970 } else {
3971 if (len + 10 > buf_size) {
3972 growBuffer(buf, 10);
3973 }
3974 if (ent->content != NULL)
3975 buf[len++] = ent->content[0];
3976 }
3977 } else if (ent != NULL) {
3978 int i = xmlStrlen(ent->name);
3979 const xmlChar *cur = ent->name;
3980
3981 /*
3982 * This may look absurd but is needed to detect
3983 * entities problems
3984 */
3985 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3986 (ent->content != NULL) && (ent->checked == 0)) {
3987 unsigned long oldnbent = ctxt->nbentities;
3988
3989 ++ctxt->depth;
3990 rep = xmlStringDecodeEntities(ctxt, ent->content,
3991 XML_SUBSTITUTE_REF, 0, 0, 0);
3992 --ctxt->depth;
3993
3994 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
3995 if (rep != NULL) {
3996 if (xmlStrchr(rep, '<'))
3997 ent->checked |= 1;
3998 xmlFree(rep);
3999 rep = NULL;
4000 } else {
4001 ent->content[0] = 0;
4002 }
4003 }
4004
4005 /*
4006 * Just output the reference
4007 */
4008 buf[len++] = '&';
4009 while (len + i + 10 > buf_size) {
4010 growBuffer(buf, i + 10);
4011 }
4012 for (;i > 0;i--)
4013 buf[len++] = *cur++;
4014 buf[len++] = ';';
4015 }
4016 }
4017 } else {
4018 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4019 if ((len != 0) || (!normalize)) {
4020 if ((!normalize) || (!in_space)) {
4021 COPY_BUF(l,buf,len,0x20);
4022 while (len + 10 > buf_size) {
4023 growBuffer(buf, 10);
4024 }
4025 }
4026 in_space = 1;
4027 }
4028 } else {
4029 in_space = 0;
4030 COPY_BUF(l,buf,len,c);
4031 if (len + 10 > buf_size) {
4032 growBuffer(buf, 10);
4033 }
4034 }
4035 NEXTL(l);
4036 }
4037 GROW;
4038 c = CUR_CHAR(l);
4039 }
4040 if (ctxt->instate == XML_PARSER_EOF)
4041 goto error;
4042
4043 if ((in_space) && (normalize)) {
4044 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4045 }
4046 buf[len] = 0;
4047 if (RAW == '<') {
4048 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4049 } else if (RAW != limit) {
4050 if ((c != 0) && (!IS_CHAR(c))) {
4051 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052 "invalid character in attribute value\n");
4053 } else {
4054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055 "AttValue: ' expected\n");
4056 }
4057 } else
4058 NEXT;
4059
4060 /*
4061 * There we potentially risk an overflow, don't allow attribute value of
4062 * length more than INT_MAX it is a very reasonnable assumption !
4063 */
4064 if (len >= INT_MAX) {
4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066 "AttValue length too long\n");
4067 goto mem_error;
4068 }
4069
4070 if (attlen != NULL) *attlen = (int) len;
4071 return(buf);
4072
4073mem_error:
4074 xmlErrMemory(ctxt, NULL);
4075error:
4076 if (buf != NULL)
4077 xmlFree(buf);
4078 if (rep != NULL)
4079 xmlFree(rep);
4080 return(NULL);
4081}
4082
4083/**
4084 * xmlParseAttValue:
4085 * @ctxt: an XML parser context
4086 *
4087 * parse a value for an attribute
4088 * Note: the parser won't do substitution of entities here, this
4089 * will be handled later in xmlStringGetNodeList
4090 *
4091 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4092 * "'" ([^<&'] | Reference)* "'"
4093 *
4094 * 3.3.3 Attribute-Value Normalization:
4095 * Before the value of an attribute is passed to the application or
4096 * checked for validity, the XML processor must normalize it as follows:
4097 * - a character reference is processed by appending the referenced
4098 * character to the attribute value
4099 * - an entity reference is processed by recursively processing the
4100 * replacement text of the entity
4101 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4102 * appending #x20 to the normalized value, except that only a single
4103 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4104 * parsed entity or the literal entity value of an internal parsed entity
4105 * - other characters are processed by appending them to the normalized value
4106 * If the declared value is not CDATA, then the XML processor must further
4107 * process the normalized attribute value by discarding any leading and
4108 * trailing space (#x20) characters, and by replacing sequences of space
4109 * (#x20) characters by a single space (#x20) character.
4110 * All attributes for which no declaration has been read should be treated
4111 * by a non-validating parser as if declared CDATA.
4112 *
4113 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4114 */
4115
4116
4117xmlChar *
4118xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4119 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4120 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4121}
4122
4123/**
4124 * xmlParseSystemLiteral:
4125 * @ctxt: an XML parser context
4126 *
4127 * parse an XML Literal
4128 *
4129 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4130 *
4131 * Returns the SystemLiteral parsed or NULL
4132 */
4133
4134xmlChar *
4135xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4136 xmlChar *buf = NULL;
4137 int len = 0;
4138 int size = XML_PARSER_BUFFER_SIZE;
4139 int cur, l;
4140 xmlChar stop;
4141 int state = ctxt->instate;
4142 int count = 0;
4143
4144 SHRINK;
4145 if (RAW == '"') {
4146 NEXT;
4147 stop = '"';
4148 } else if (RAW == '\'') {
4149 NEXT;
4150 stop = '\'';
4151 } else {
4152 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4153 return(NULL);
4154 }
4155
4156 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4157 if (buf == NULL) {
4158 xmlErrMemory(ctxt, NULL);
4159 return(NULL);
4160 }
4161 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4162 cur = CUR_CHAR(l);
4163 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4164 if (len + 5 >= size) {
4165 xmlChar *tmp;
4166
4167 if ((size > XML_MAX_NAME_LENGTH) &&
4168 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170 xmlFree(buf);
4171 ctxt->instate = (xmlParserInputState) state;
4172 return(NULL);
4173 }
4174 size *= 2;
4175 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176 if (tmp == NULL) {
4177 xmlFree(buf);
4178 xmlErrMemory(ctxt, NULL);
4179 ctxt->instate = (xmlParserInputState) state;
4180 return(NULL);
4181 }
4182 buf = tmp;
4183 }
4184 count++;
4185 if (count > 50) {
4186 GROW;
4187 count = 0;
4188 if (ctxt->instate == XML_PARSER_EOF) {
4189 xmlFree(buf);
4190 return(NULL);
4191 }
4192 }
4193 COPY_BUF(l,buf,len,cur);
4194 NEXTL(l);
4195 cur = CUR_CHAR(l);
4196 if (cur == 0) {
4197 GROW;
4198 SHRINK;
4199 cur = CUR_CHAR(l);
4200 }
4201 }
4202 buf[len] = 0;
4203 ctxt->instate = (xmlParserInputState) state;
4204 if (!IS_CHAR(cur)) {
4205 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4206 } else {
4207 NEXT;
4208 }
4209 return(buf);
4210}
4211
4212/**
4213 * xmlParsePubidLiteral:
4214 * @ctxt: an XML parser context
4215 *
4216 * parse an XML public literal
4217 *
4218 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4219 *
4220 * Returns the PubidLiteral parsed or NULL.
4221 */
4222
4223xmlChar *
4224xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4225 xmlChar *buf = NULL;
4226 int len = 0;
4227 int size = XML_PARSER_BUFFER_SIZE;
4228 xmlChar cur;
4229 xmlChar stop;
4230 int count = 0;
4231 xmlParserInputState oldstate = ctxt->instate;
4232
4233 SHRINK;
4234 if (RAW == '"') {
4235 NEXT;
4236 stop = '"';
4237 } else if (RAW == '\'') {
4238 NEXT;
4239 stop = '\'';
4240 } else {
4241 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4242 return(NULL);
4243 }
4244 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4245 if (buf == NULL) {
4246 xmlErrMemory(ctxt, NULL);
4247 return(NULL);
4248 }
4249 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4250 cur = CUR;
4251 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4252 if (len + 1 >= size) {
4253 xmlChar *tmp;
4254
4255 if ((size > XML_MAX_NAME_LENGTH) &&
4256 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258 xmlFree(buf);
4259 return(NULL);
4260 }
4261 size *= 2;
4262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263 if (tmp == NULL) {
4264 xmlErrMemory(ctxt, NULL);
4265 xmlFree(buf);
4266 return(NULL);
4267 }
4268 buf = tmp;
4269 }
4270 buf[len++] = cur;
4271 count++;
4272 if (count > 50) {
4273 GROW;
4274 count = 0;
4275 if (ctxt->instate == XML_PARSER_EOF) {
4276 xmlFree(buf);
4277 return(NULL);
4278 }
4279 }
4280 NEXT;
4281 cur = CUR;
4282 if (cur == 0) {
4283 GROW;
4284 SHRINK;
4285 cur = CUR;
4286 }
4287 }
4288 buf[len] = 0;
4289 if (cur != stop) {
4290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4291 } else {
4292 NEXT;
4293 }
4294 ctxt->instate = oldstate;
4295 return(buf);
4296}
4297
4298static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4299
4300/*
4301 * used for the test in the inner loop of the char data testing
4302 */
4303static const unsigned char test_char_data[256] = {
4304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336};
4337
4338/**
4339 * xmlParseCharData:
4340 * @ctxt: an XML parser context
4341 * @cdata: int indicating whether we are within a CDATA section
4342 *
4343 * parse a CharData section.
4344 * if we are within a CDATA section ']]>' marks an end of section.
4345 *
4346 * The right angle bracket (>) may be represented using the string "&gt;",
4347 * and must, for compatibility, be escaped using "&gt;" or a character
4348 * reference when it appears in the string "]]>" in content, when that
4349 * string is not marking the end of a CDATA section.
4350 *
4351 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4352 */
4353
4354void
4355xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4356 const xmlChar *in;
4357 int nbchar = 0;
4358 int line = ctxt->input->line;
4359 int col = ctxt->input->col;
4360 int ccol;
4361
4362 SHRINK;
4363 GROW;
4364 /*
4365 * Accelerated common case where input don't need to be
4366 * modified before passing it to the handler.
4367 */
4368 if (!cdata) {
4369 in = ctxt->input->cur;
4370 do {
4371get_more_space:
4372 while (*in == 0x20) { in++; ctxt->input->col++; }
4373 if (*in == 0xA) {
4374 do {
4375 ctxt->input->line++; ctxt->input->col = 1;
4376 in++;
4377 } while (*in == 0xA);
4378 goto get_more_space;
4379 }
4380 if (*in == '<') {
4381 nbchar = in - ctxt->input->cur;
4382 if (nbchar > 0) {
4383 const xmlChar *tmp = ctxt->input->cur;
4384 ctxt->input->cur = in;
4385
4386 if ((ctxt->sax != NULL) &&
4387 (ctxt->sax->ignorableWhitespace !=
4388 ctxt->sax->characters)) {
4389 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4390 if (ctxt->sax->ignorableWhitespace != NULL)
4391 ctxt->sax->ignorableWhitespace(ctxt->userData,
4392 tmp, nbchar);
4393 } else {
4394 if (ctxt->sax->characters != NULL)
4395 ctxt->sax->characters(ctxt->userData,
4396 tmp, nbchar);
4397 if (*ctxt->space == -1)
4398 *ctxt->space = -2;
4399 }
4400 } else if ((ctxt->sax != NULL) &&
4401 (ctxt->sax->characters != NULL)) {
4402 ctxt->sax->characters(ctxt->userData,
4403 tmp, nbchar);
4404 }
4405 }
4406 return;
4407 }
4408
4409get_more:
4410 ccol = ctxt->input->col;
4411 while (test_char_data[*in]) {
4412 in++;
4413 ccol++;
4414 }
4415 ctxt->input->col = ccol;
4416 if (*in == 0xA) {
4417 do {
4418 ctxt->input->line++; ctxt->input->col = 1;
4419 in++;
4420 } while (*in == 0xA);
4421 goto get_more;
4422 }
4423 if (*in == ']') {
4424 if ((in[1] == ']') && (in[2] == '>')) {
4425 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4426 ctxt->input->cur = in + 1;
4427 return;
4428 }
4429 in++;
4430 ctxt->input->col++;
4431 goto get_more;
4432 }
4433 nbchar = in - ctxt->input->cur;
4434 if (nbchar > 0) {
4435 if ((ctxt->sax != NULL) &&
4436 (ctxt->sax->ignorableWhitespace !=
4437 ctxt->sax->characters) &&
4438 (IS_BLANK_CH(*ctxt->input->cur))) {
4439 const xmlChar *tmp = ctxt->input->cur;
4440 ctxt->input->cur = in;
4441
4442 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4443 if (ctxt->sax->ignorableWhitespace != NULL)
4444 ctxt->sax->ignorableWhitespace(ctxt->userData,
4445 tmp, nbchar);
4446 } else {
4447 if (ctxt->sax->characters != NULL)
4448 ctxt->sax->characters(ctxt->userData,
4449 tmp, nbchar);
4450 if (*ctxt->space == -1)
4451 *ctxt->space = -2;
4452 }
4453 line = ctxt->input->line;
4454 col = ctxt->input->col;
4455 } else if (ctxt->sax != NULL) {
4456 if (ctxt->sax->characters != NULL)
4457 ctxt->sax->characters(ctxt->userData,
4458 ctxt->input->cur, nbchar);
4459 line = ctxt->input->line;
4460 col = ctxt->input->col;
4461 }
4462 /* something really bad happened in the SAX callback */
4463 if (ctxt->instate != XML_PARSER_CONTENT)
4464 return;
4465 }
4466 ctxt->input->cur = in;
4467 if (*in == 0xD) {
4468 in++;
4469 if (*in == 0xA) {
4470 ctxt->input->cur = in;
4471 in++;
4472 ctxt->input->line++; ctxt->input->col = 1;
4473 continue; /* while */
4474 }
4475 in--;
4476 }
4477 if (*in == '<') {
4478 return;
4479 }
4480 if (*in == '&') {
4481 return;
4482 }
4483 SHRINK;
4484 GROW;
4485 if (ctxt->instate == XML_PARSER_EOF)
4486 return;
4487 in = ctxt->input->cur;
4488 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4489 nbchar = 0;
4490 }
4491 ctxt->input->line = line;
4492 ctxt->input->col = col;
4493 xmlParseCharDataComplex(ctxt, cdata);
4494}
4495
4496/**
4497 * xmlParseCharDataComplex:
4498 * @ctxt: an XML parser context
4499 * @cdata: int indicating whether we are within a CDATA section
4500 *
4501 * parse a CharData section.this is the fallback function
4502 * of xmlParseCharData() when the parsing requires handling
4503 * of non-ASCII characters.
4504 */
4505static void
4506xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4507 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4508 int nbchar = 0;
4509 int cur, l;
4510 int count = 0;
4511
4512 SHRINK;
4513 GROW;
4514 cur = CUR_CHAR(l);
4515 while ((cur != '<') && /* checked */
4516 (cur != '&') &&
4517 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4518 if ((cur == ']') && (NXT(1) == ']') &&
4519 (NXT(2) == '>')) {
4520 if (cdata) break;
4521 else {
4522 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4523 }
4524 }
4525 COPY_BUF(l,buf,nbchar,cur);
4526 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4527 buf[nbchar] = 0;
4528
4529 /*
4530 * OK the segment is to be consumed as chars.
4531 */
4532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4533 if (areBlanks(ctxt, buf, nbchar, 0)) {
4534 if (ctxt->sax->ignorableWhitespace != NULL)
4535 ctxt->sax->ignorableWhitespace(ctxt->userData,
4536 buf, nbchar);
4537 } else {
4538 if (ctxt->sax->characters != NULL)
4539 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4540 if ((ctxt->sax->characters !=
4541 ctxt->sax->ignorableWhitespace) &&
4542 (*ctxt->space == -1))
4543 *ctxt->space = -2;
4544 }
4545 }
4546 nbchar = 0;
4547 /* something really bad happened in the SAX callback */
4548 if (ctxt->instate != XML_PARSER_CONTENT)
4549 return;
4550 }
4551 count++;
4552 if (count > 50) {
4553 GROW;
4554 count = 0;
4555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
4557 }
4558 NEXTL(l);
4559 cur = CUR_CHAR(l);
4560 }
4561 if (nbchar != 0) {
4562 buf[nbchar] = 0;
4563 /*
4564 * OK the segment is to be consumed as chars.
4565 */
4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4567 if (areBlanks(ctxt, buf, nbchar, 0)) {
4568 if (ctxt->sax->ignorableWhitespace != NULL)
4569 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570 } else {
4571 if (ctxt->sax->characters != NULL)
4572 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4573 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574 (*ctxt->space == -1))
4575 *ctxt->space = -2;
4576 }
4577 }
4578 }
4579 if ((cur != 0) && (!IS_CHAR(cur))) {
4580 /* Generate the error and skip the offending character */
4581 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582 "PCDATA invalid Char value %d\n",
4583 cur);
4584 NEXTL(l);
4585 }
4586}
4587
4588/**
4589 * xmlParseExternalID:
4590 * @ctxt: an XML parser context
4591 * @publicID: a xmlChar** receiving PubidLiteral
4592 * @strict: indicate whether we should restrict parsing to only
4593 * production [75], see NOTE below
4594 *
4595 * Parse an External ID or a Public ID
4596 *
4597 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4598 * 'PUBLIC' S PubidLiteral S SystemLiteral
4599 *
4600 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4601 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4602 *
4603 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4604 *
4605 * Returns the function returns SystemLiteral and in the second
4606 * case publicID receives PubidLiteral, is strict is off
4607 * it is possible to return NULL and have publicID set.
4608 */
4609
4610xmlChar *
4611xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4612 xmlChar *URI = NULL;
4613
4614 SHRINK;
4615
4616 *publicID = NULL;
4617 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4618 SKIP(6);
4619 if (SKIP_BLANKS == 0) {
4620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621 "Space required after 'SYSTEM'\n");
4622 }
4623 URI = xmlParseSystemLiteral(ctxt);
4624 if (URI == NULL) {
4625 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4626 }
4627 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4628 SKIP(6);
4629 if (SKIP_BLANKS == 0) {
4630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631 "Space required after 'PUBLIC'\n");
4632 }
4633 *publicID = xmlParsePubidLiteral(ctxt);
4634 if (*publicID == NULL) {
4635 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4636 }
4637 if (strict) {
4638 /*
4639 * We don't handle [83] so "S SystemLiteral" is required.
4640 */
4641 if (SKIP_BLANKS == 0) {
4642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4643 "Space required after the Public Identifier\n");
4644 }
4645 } else {
4646 /*
4647 * We handle [83] so we return immediately, if
4648 * "S SystemLiteral" is not detected. We skip blanks if no
4649 * system literal was found, but this is harmless since we must
4650 * be at the end of a NotationDecl.
4651 */
4652 if (SKIP_BLANKS == 0) return(NULL);
4653 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4654 }
4655 URI = xmlParseSystemLiteral(ctxt);
4656 if (URI == NULL) {
4657 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4658 }
4659 }
4660 return(URI);
4661}
4662
4663/**
4664 * xmlParseCommentComplex:
4665 * @ctxt: an XML parser context
4666 * @buf: the already parsed part of the buffer
4667 * @len: number of bytes filles in the buffer
4668 * @size: allocated size of the buffer
4669 *
4670 * Skip an XML (SGML) comment <!-- .... -->
4671 * The spec says that "For compatibility, the string "--" (double-hyphen)
4672 * must not occur within comments. "
4673 * This is the slow routine in case the accelerator for ascii didn't work
4674 *
4675 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4676 */
4677static void
4678xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4679 size_t len, size_t size) {
4680 int q, ql;
4681 int r, rl;
4682 int cur, l;
4683 size_t count = 0;
4684 int inputid;
4685
4686 inputid = ctxt->input->id;
4687
4688 if (buf == NULL) {
4689 len = 0;
4690 size = XML_PARSER_BUFFER_SIZE;
4691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692 if (buf == NULL) {
4693 xmlErrMemory(ctxt, NULL);
4694 return;
4695 }
4696 }
4697 GROW; /* Assure there's enough input data */
4698 q = CUR_CHAR(ql);
4699 if (q == 0)
4700 goto not_terminated;
4701 if (!IS_CHAR(q)) {
4702 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703 "xmlParseComment: invalid xmlChar value %d\n",
4704 q);
4705 xmlFree (buf);
4706 return;
4707 }
4708 NEXTL(ql);
4709 r = CUR_CHAR(rl);
4710 if (r == 0)
4711 goto not_terminated;
4712 if (!IS_CHAR(r)) {
4713 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714 "xmlParseComment: invalid xmlChar value %d\n",
4715 q);
4716 xmlFree (buf);
4717 return;
4718 }
4719 NEXTL(rl);
4720 cur = CUR_CHAR(l);
4721 if (cur == 0)
4722 goto not_terminated;
4723 while (IS_CHAR(cur) && /* checked */
4724 ((cur != '>') ||
4725 (r != '-') || (q != '-'))) {
4726 if ((r == '-') && (q == '-')) {
4727 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4728 }
4729 if ((len > XML_MAX_TEXT_LENGTH) &&
4730 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732 "Comment too big found", NULL);
4733 xmlFree (buf);
4734 return;
4735 }
4736 if (len + 5 >= size) {
4737 xmlChar *new_buf;
4738 size_t new_size;
4739
4740 new_size = size * 2;
4741 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4742 if (new_buf == NULL) {
4743 xmlFree (buf);
4744 xmlErrMemory(ctxt, NULL);
4745 return;
4746 }
4747 buf = new_buf;
4748 size = new_size;
4749 }
4750 COPY_BUF(ql,buf,len,q);
4751 q = r;
4752 ql = rl;
4753 r = cur;
4754 rl = l;
4755
4756 count++;
4757 if (count > 50) {
4758 GROW;
4759 count = 0;
4760 if (ctxt->instate == XML_PARSER_EOF) {
4761 xmlFree(buf);
4762 return;
4763 }
4764 }
4765 NEXTL(l);
4766 cur = CUR_CHAR(l);
4767 if (cur == 0) {
4768 SHRINK;
4769 GROW;
4770 cur = CUR_CHAR(l);
4771 }
4772 }
4773 buf[len] = 0;
4774 if (cur == 0) {
4775 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4776 "Comment not terminated \n<!--%.50s\n", buf);
4777 } else if (!IS_CHAR(cur)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 cur);
4781 } else {
4782 if (inputid != ctxt->input->id) {
4783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4784 "Comment doesn't start and stop in the same"
4785 " entity\n");
4786 }
4787 NEXT;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789 (!ctxt->disableSAX))
4790 ctxt->sax->comment(ctxt->userData, buf);
4791 }
4792 xmlFree(buf);
4793 return;
4794not_terminated:
4795 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796 "Comment not terminated\n", NULL);
4797 xmlFree(buf);
4798 return;
4799}
4800
4801/**
4802 * xmlParseComment:
4803 * @ctxt: an XML parser context
4804 *
4805 * Skip an XML (SGML) comment <!-- .... -->
4806 * The spec says that "For compatibility, the string "--" (double-hyphen)
4807 * must not occur within comments. "
4808 *
4809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4810 */
4811void
4812xmlParseComment(xmlParserCtxtPtr ctxt) {
4813 xmlChar *buf = NULL;
4814 size_t size = XML_PARSER_BUFFER_SIZE;
4815 size_t len = 0;
4816 xmlParserInputState state;
4817 const xmlChar *in;
4818 size_t nbchar = 0;
4819 int ccol;
4820 int inputid;
4821
4822 /*
4823 * Check that there is a comment right here.
4824 */
4825 if ((RAW != '<') || (NXT(1) != '!') ||
4826 (NXT(2) != '-') || (NXT(3) != '-')) return;
4827 state = ctxt->instate;
4828 ctxt->instate = XML_PARSER_COMMENT;
4829 inputid = ctxt->input->id;
4830 SKIP(4);
4831 SHRINK;
4832 GROW;
4833
4834 /*
4835 * Accelerated common case where input don't need to be
4836 * modified before passing it to the handler.
4837 */
4838 in = ctxt->input->cur;
4839 do {
4840 if (*in == 0xA) {
4841 do {
4842 ctxt->input->line++; ctxt->input->col = 1;
4843 in++;
4844 } while (*in == 0xA);
4845 }
4846get_more:
4847 ccol = ctxt->input->col;
4848 while (((*in > '-') && (*in <= 0x7F)) ||
4849 ((*in >= 0x20) && (*in < '-')) ||
4850 (*in == 0x09)) {
4851 in++;
4852 ccol++;
4853 }
4854 ctxt->input->col = ccol;
4855 if (*in == 0xA) {
4856 do {
4857 ctxt->input->line++; ctxt->input->col = 1;
4858 in++;
4859 } while (*in == 0xA);
4860 goto get_more;
4861 }
4862 nbchar = in - ctxt->input->cur;
4863 /*
4864 * save current set of data
4865 */
4866 if (nbchar > 0) {
4867 if ((ctxt->sax != NULL) &&
4868 (ctxt->sax->comment != NULL)) {
4869 if (buf == NULL) {
4870 if ((*in == '-') && (in[1] == '-'))
4871 size = nbchar + 1;
4872 else
4873 size = XML_PARSER_BUFFER_SIZE + nbchar;
4874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875 if (buf == NULL) {
4876 xmlErrMemory(ctxt, NULL);
4877 ctxt->instate = state;
4878 return;
4879 }
4880 len = 0;
4881 } else if (len + nbchar + 1 >= size) {
4882 xmlChar *new_buf;
4883 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884 new_buf = (xmlChar *) xmlRealloc(buf,
4885 size * sizeof(xmlChar));
4886 if (new_buf == NULL) {
4887 xmlFree (buf);
4888 xmlErrMemory(ctxt, NULL);
4889 ctxt->instate = state;
4890 return;
4891 }
4892 buf = new_buf;
4893 }
4894 memcpy(&buf[len], ctxt->input->cur, nbchar);
4895 len += nbchar;
4896 buf[len] = 0;
4897 }
4898 }
4899 if ((len > XML_MAX_TEXT_LENGTH) &&
4900 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902 "Comment too big found", NULL);
4903 xmlFree (buf);
4904 return;
4905 }
4906 ctxt->input->cur = in;
4907 if (*in == 0xA) {
4908 in++;
4909 ctxt->input->line++; ctxt->input->col = 1;
4910 }
4911 if (*in == 0xD) {
4912 in++;
4913 if (*in == 0xA) {
4914 ctxt->input->cur = in;
4915 in++;
4916 ctxt->input->line++; ctxt->input->col = 1;
4917 continue; /* while */
4918 }
4919 in--;
4920 }
4921 SHRINK;
4922 GROW;
4923 if (ctxt->instate == XML_PARSER_EOF) {
4924 xmlFree(buf);
4925 return;
4926 }
4927 in = ctxt->input->cur;
4928 if (*in == '-') {
4929 if (in[1] == '-') {
4930 if (in[2] == '>') {
4931 if (ctxt->input->id != inputid) {
4932 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4933 "comment doesn't start and stop in the"
4934 " same entity\n");
4935 }
4936 SKIP(3);
4937 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938 (!ctxt->disableSAX)) {
4939 if (buf != NULL)
4940 ctxt->sax->comment(ctxt->userData, buf);
4941 else
4942 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943 }
4944 if (buf != NULL)
4945 xmlFree(buf);
4946 if (ctxt->instate != XML_PARSER_EOF)
4947 ctxt->instate = state;
4948 return;
4949 }
4950 if (buf != NULL) {
4951 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952 "Double hyphen within comment: "
4953 "<!--%.50s\n",
4954 buf);
4955 } else
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment\n", NULL);
4958 in++;
4959 ctxt->input->col++;
4960 }
4961 in++;
4962 ctxt->input->col++;
4963 goto get_more;
4964 }
4965 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4966 xmlParseCommentComplex(ctxt, buf, len, size);
4967 ctxt->instate = state;
4968 return;
4969}
4970
4971
4972/**
4973 * xmlParsePITarget:
4974 * @ctxt: an XML parser context
4975 *
4976 * parse the name of a PI
4977 *
4978 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4979 *
4980 * Returns the PITarget name or NULL
4981 */
4982
4983const xmlChar *
4984xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4985 const xmlChar *name;
4986
4987 name = xmlParseName(ctxt);
4988 if ((name != NULL) &&
4989 ((name[0] == 'x') || (name[0] == 'X')) &&
4990 ((name[1] == 'm') || (name[1] == 'M')) &&
4991 ((name[2] == 'l') || (name[2] == 'L'))) {
4992 int i;
4993 if ((name[0] == 'x') && (name[1] == 'm') &&
4994 (name[2] == 'l') && (name[3] == 0)) {
4995 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4996 "XML declaration allowed only at the start of the document\n");
4997 return(name);
4998 } else if (name[3] == 0) {
4999 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5000 return(name);
5001 }
5002 for (i = 0;;i++) {
5003 if (xmlW3CPIs[i] == NULL) break;
5004 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005 return(name);
5006 }
5007 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008 "xmlParsePITarget: invalid name prefix 'xml'\n",
5009 NULL, NULL);
5010 }
5011 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5012 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5013 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5014 }
5015 return(name);
5016}
5017
5018#ifdef LIBXML_CATALOG_ENABLED
5019/**
5020 * xmlParseCatalogPI:
5021 * @ctxt: an XML parser context
5022 * @catalog: the PI value string
5023 *
5024 * parse an XML Catalog Processing Instruction.
5025 *
5026 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5027 *
5028 * Occurs only if allowed by the user and if happening in the Misc
5029 * part of the document before any doctype informations
5030 * This will add the given catalog to the parsing context in order
5031 * to be used if there is a resolution need further down in the document
5032 */
5033
5034static void
5035xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036 xmlChar *URL = NULL;
5037 const xmlChar *tmp, *base;
5038 xmlChar marker;
5039
5040 tmp = catalog;
5041 while (IS_BLANK_CH(*tmp)) tmp++;
5042 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043 goto error;
5044 tmp += 7;
5045 while (IS_BLANK_CH(*tmp)) tmp++;
5046 if (*tmp != '=') {
5047 return;
5048 }
5049 tmp++;
5050 while (IS_BLANK_CH(*tmp)) tmp++;
5051 marker = *tmp;
5052 if ((marker != '\'') && (marker != '"'))
5053 goto error;
5054 tmp++;
5055 base = tmp;
5056 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057 if (*tmp == 0)
5058 goto error;
5059 URL = xmlStrndup(base, tmp - base);
5060 tmp++;
5061 while (IS_BLANK_CH(*tmp)) tmp++;
5062 if (*tmp != 0)
5063 goto error;
5064
5065 if (URL != NULL) {
5066 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067 xmlFree(URL);
5068 }
5069 return;
5070
5071error:
5072 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073 "Catalog PI syntax error: %s\n",
5074 catalog, NULL);
5075 if (URL != NULL)
5076 xmlFree(URL);
5077}
5078#endif
5079
5080/**
5081 * xmlParsePI:
5082 * @ctxt: an XML parser context
5083 *
5084 * parse an XML Processing Instruction.
5085 *
5086 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5087 *
5088 * The processing is transfered to SAX once parsed.
5089 */
5090
5091void
5092xmlParsePI(xmlParserCtxtPtr ctxt) {
5093 xmlChar *buf = NULL;
5094 size_t len = 0;
5095 size_t size = XML_PARSER_BUFFER_SIZE;
5096 int cur, l;
5097 const xmlChar *target;
5098 xmlParserInputState state;
5099 int count = 0;
5100
5101 if ((RAW == '<') && (NXT(1) == '?')) {
5102 int inputid = ctxt->input->id;
5103 state = ctxt->instate;
5104 ctxt->instate = XML_PARSER_PI;
5105 /*
5106 * this is a Processing Instruction.
5107 */
5108 SKIP(2);
5109 SHRINK;
5110
5111 /*
5112 * Parse the target name and check for special support like
5113 * namespace.
5114 */
5115 target = xmlParsePITarget(ctxt);
5116 if (target != NULL) {
5117 if ((RAW == '?') && (NXT(1) == '>')) {
5118 if (inputid != ctxt->input->id) {
5119 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5120 "PI declaration doesn't start and stop in"
5121 " the same entity\n");
5122 }
5123 SKIP(2);
5124
5125 /*
5126 * SAX: PI detected.
5127 */
5128 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129 (ctxt->sax->processingInstruction != NULL))
5130 ctxt->sax->processingInstruction(ctxt->userData,
5131 target, NULL);
5132 if (ctxt->instate != XML_PARSER_EOF)
5133 ctxt->instate = state;
5134 return;
5135 }
5136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5137 if (buf == NULL) {
5138 xmlErrMemory(ctxt, NULL);
5139 ctxt->instate = state;
5140 return;
5141 }
5142 if (SKIP_BLANKS == 0) {
5143 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144 "ParsePI: PI %s space expected\n", target);
5145 }
5146 cur = CUR_CHAR(l);
5147 while (IS_CHAR(cur) && /* checked */
5148 ((cur != '?') || (NXT(1) != '>'))) {
5149 if (len + 5 >= size) {
5150 xmlChar *tmp;
5151 size_t new_size = size * 2;
5152 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5153 if (tmp == NULL) {
5154 xmlErrMemory(ctxt, NULL);
5155 xmlFree(buf);
5156 ctxt->instate = state;
5157 return;
5158 }
5159 buf = tmp;
5160 size = new_size;
5161 }
5162 count++;
5163 if (count > 50) {
5164 GROW;
5165 if (ctxt->instate == XML_PARSER_EOF) {
5166 xmlFree(buf);
5167 return;
5168 }
5169 count = 0;
5170 if ((len > XML_MAX_TEXT_LENGTH) &&
5171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173 "PI %s too big found", target);
5174 xmlFree(buf);
5175 ctxt->instate = state;
5176 return;
5177 }
5178 }
5179 COPY_BUF(l,buf,len,cur);
5180 NEXTL(l);
5181 cur = CUR_CHAR(l);
5182 if (cur == 0) {
5183 SHRINK;
5184 GROW;
5185 cur = CUR_CHAR(l);
5186 }
5187 }
5188 if ((len > XML_MAX_TEXT_LENGTH) &&
5189 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191 "PI %s too big found", target);
5192 xmlFree(buf);
5193 ctxt->instate = state;
5194 return;
5195 }
5196 buf[len] = 0;
5197 if (cur != '?') {
5198 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199 "ParsePI: PI %s never end ...\n", target);
5200 } else {
5201 if (inputid != ctxt->input->id) {
5202 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203 "PI declaration doesn't start and stop in"
5204 " the same entity\n");
5205 }
5206 SKIP(2);
5207
5208#ifdef LIBXML_CATALOG_ENABLED
5209 if (((state == XML_PARSER_MISC) ||
5210 (state == XML_PARSER_START)) &&
5211 (xmlStrEqual(target, XML_CATALOG_PI))) {
5212 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214 (allow == XML_CATA_ALLOW_ALL))
5215 xmlParseCatalogPI(ctxt, buf);
5216 }
5217#endif
5218
5219
5220 /*
5221 * SAX: PI detected.
5222 */
5223 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 (ctxt->sax->processingInstruction != NULL))
5225 ctxt->sax->processingInstruction(ctxt->userData,
5226 target, buf);
5227 }
5228 xmlFree(buf);
5229 } else {
5230 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5231 }
5232 if (ctxt->instate != XML_PARSER_EOF)
5233 ctxt->instate = state;
5234 }
5235}
5236
5237/**
5238 * xmlParseNotationDecl:
5239 * @ctxt: an XML parser context
5240 *
5241 * parse a notation declaration
5242 *
5243 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5244 *
5245 * Hence there is actually 3 choices:
5246 * 'PUBLIC' S PubidLiteral
5247 * 'PUBLIC' S PubidLiteral S SystemLiteral
5248 * and 'SYSTEM' S SystemLiteral
5249 *
5250 * See the NOTE on xmlParseExternalID().
5251 */
5252
5253void
5254xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5255 const xmlChar *name;
5256 xmlChar *Pubid;
5257 xmlChar *Systemid;
5258
5259 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5260 int inputid = ctxt->input->id;
5261 SHRINK;
5262 SKIP(10);
5263 if (SKIP_BLANKS == 0) {
5264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5265 "Space required after '<!NOTATION'\n");
5266 return;
5267 }
5268
5269 name = xmlParseName(ctxt);
5270 if (name == NULL) {
5271 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5272 return;
5273 }
5274 if (xmlStrchr(name, ':') != NULL) {
5275 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5276 "colons are forbidden from notation names '%s'\n",
5277 name, NULL, NULL);
5278 }
5279 if (SKIP_BLANKS == 0) {
5280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 "Space required after the NOTATION name'\n");
5282 return;
5283 }
5284
5285 /*
5286 * Parse the IDs.
5287 */
5288 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5289 SKIP_BLANKS;
5290
5291 if (RAW == '>') {
5292 if (inputid != ctxt->input->id) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5294 "Notation declaration doesn't start and stop"
5295 " in the same entity\n");
5296 }
5297 NEXT;
5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->notationDecl != NULL))
5300 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5301 } else {
5302 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5303 }
5304 if (Systemid != NULL) xmlFree(Systemid);
5305 if (Pubid != NULL) xmlFree(Pubid);
5306 }
5307}
5308
5309/**
5310 * xmlParseEntityDecl:
5311 * @ctxt: an XML parser context
5312 *
5313 * parse <!ENTITY declarations
5314 *
5315 * [70] EntityDecl ::= GEDecl | PEDecl
5316 *
5317 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5318 *
5319 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5320 *
5321 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5322 *
5323 * [74] PEDef ::= EntityValue | ExternalID
5324 *
5325 * [76] NDataDecl ::= S 'NDATA' S Name
5326 *
5327 * [ VC: Notation Declared ]
5328 * The Name must match the declared name of a notation.
5329 */
5330
5331void
5332xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5333 const xmlChar *name = NULL;
5334 xmlChar *value = NULL;
5335 xmlChar *URI = NULL, *literal = NULL;
5336 const xmlChar *ndata = NULL;
5337 int isParameter = 0;
5338 xmlChar *orig = NULL;
5339
5340 /* GROW; done in the caller */
5341 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5342 int inputid = ctxt->input->id;
5343 SHRINK;
5344 SKIP(8);
5345 if (SKIP_BLANKS == 0) {
5346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5347 "Space required after '<!ENTITY'\n");
5348 }
5349
5350 if (RAW == '%') {
5351 NEXT;
5352 if (SKIP_BLANKS == 0) {
5353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5354 "Space required after '%%'\n");
5355 }
5356 isParameter = 1;
5357 }
5358
5359 name = xmlParseName(ctxt);
5360 if (name == NULL) {
5361 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5362 "xmlParseEntityDecl: no name\n");
5363 return;
5364 }
5365 if (xmlStrchr(name, ':') != NULL) {
5366 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5367 "colons are forbidden from entities names '%s'\n",
5368 name, NULL, NULL);
5369 }
5370 if (SKIP_BLANKS == 0) {
5371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 "Space required after the entity name\n");
5373 }
5374
5375 ctxt->instate = XML_PARSER_ENTITY_DECL;
5376 /*
5377 * handle the various case of definitions...
5378 */
5379 if (isParameter) {
5380 if ((RAW == '"') || (RAW == '\'')) {
5381 value = xmlParseEntityValue(ctxt, &orig);
5382 if (value) {
5383 if ((ctxt->sax != NULL) &&
5384 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5385 ctxt->sax->entityDecl(ctxt->userData, name,
5386 XML_INTERNAL_PARAMETER_ENTITY,
5387 NULL, NULL, value);
5388 }
5389 } else {
5390 URI = xmlParseExternalID(ctxt, &literal, 1);
5391 if ((URI == NULL) && (literal == NULL)) {
5392 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5393 }
5394 if (URI) {
5395 xmlURIPtr uri;
5396
5397 uri = xmlParseURI((const char *) URI);
5398 if (uri == NULL) {
5399 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5400 "Invalid URI: %s\n", URI);
5401 /*
5402 * This really ought to be a well formedness error
5403 * but the XML Core WG decided otherwise c.f. issue
5404 * E26 of the XML erratas.
5405 */
5406 } else {
5407 if (uri->fragment != NULL) {
5408 /*
5409 * Okay this is foolish to block those but not
5410 * invalid URIs.
5411 */
5412 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5413 } else {
5414 if ((ctxt->sax != NULL) &&
5415 (!ctxt->disableSAX) &&
5416 (ctxt->sax->entityDecl != NULL))
5417 ctxt->sax->entityDecl(ctxt->userData, name,
5418 XML_EXTERNAL_PARAMETER_ENTITY,
5419 literal, URI, NULL);
5420 }
5421 xmlFreeURI(uri);
5422 }
5423 }
5424 }
5425 } else {
5426 if ((RAW == '"') || (RAW == '\'')) {
5427 value = xmlParseEntityValue(ctxt, &orig);
5428 if ((ctxt->sax != NULL) &&
5429 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 ctxt->sax->entityDecl(ctxt->userData, name,
5431 XML_INTERNAL_GENERAL_ENTITY,
5432 NULL, NULL, value);
5433 /*
5434 * For expat compatibility in SAX mode.
5435 */
5436 if ((ctxt->myDoc == NULL) ||
5437 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5438 if (ctxt->myDoc == NULL) {
5439 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5440 if (ctxt->myDoc == NULL) {
5441 xmlErrMemory(ctxt, "New Doc failed");
5442 return;
5443 }
5444 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5445 }
5446 if (ctxt->myDoc->intSubset == NULL)
5447 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5448 BAD_CAST "fake", NULL, NULL);
5449
5450 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5451 NULL, NULL, value);
5452 }
5453 } else {
5454 URI = xmlParseExternalID(ctxt, &literal, 1);
5455 if ((URI == NULL) && (literal == NULL)) {
5456 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5457 }
5458 if (URI) {
5459 xmlURIPtr uri;
5460
5461 uri = xmlParseURI((const char *)URI);
5462 if (uri == NULL) {
5463 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5464 "Invalid URI: %s\n", URI);
5465 /*
5466 * This really ought to be a well formedness error
5467 * but the XML Core WG decided otherwise c.f. issue
5468 * E26 of the XML erratas.
5469 */
5470 } else {
5471 if (uri->fragment != NULL) {
5472 /*
5473 * Okay this is foolish to block those but not
5474 * invalid URIs.
5475 */
5476 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5477 }
5478 xmlFreeURI(uri);
5479 }
5480 }
5481 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 "Space required before 'NDATA'\n");
5484 }
5485 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5486 SKIP(5);
5487 if (SKIP_BLANKS == 0) {
5488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5489 "Space required after 'NDATA'\n");
5490 }
5491 ndata = xmlParseName(ctxt);
5492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5493 (ctxt->sax->unparsedEntityDecl != NULL))
5494 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5495 literal, URI, ndata);
5496 } else {
5497 if ((ctxt->sax != NULL) &&
5498 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499 ctxt->sax->entityDecl(ctxt->userData, name,
5500 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5501 literal, URI, NULL);
5502 /*
5503 * For expat compatibility in SAX mode.
5504 * assuming the entity repalcement was asked for
5505 */
5506 if ((ctxt->replaceEntities != 0) &&
5507 ((ctxt->myDoc == NULL) ||
5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5509 if (ctxt->myDoc == NULL) {
5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5511 if (ctxt->myDoc == NULL) {
5512 xmlErrMemory(ctxt, "New Doc failed");
5513 return;
5514 }
5515 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 }
5517
5518 if (ctxt->myDoc->intSubset == NULL)
5519 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520 BAD_CAST "fake", NULL, NULL);
5521 xmlSAX2EntityDecl(ctxt, name,
5522 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5523 literal, URI, NULL);
5524 }
5525 }
5526 }
5527 }
5528 if (ctxt->instate == XML_PARSER_EOF)
5529 goto done;
5530 SKIP_BLANKS;
5531 if (RAW != '>') {
5532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5533 "xmlParseEntityDecl: entity %s not terminated\n", name);
5534 xmlHaltParser(ctxt);
5535 } else {
5536 if (inputid != ctxt->input->id) {
5537 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5538 "Entity declaration doesn't start and stop in"
5539 " the same entity\n");
5540 }
5541 NEXT;
5542 }
5543 if (orig != NULL) {
5544 /*
5545 * Ugly mechanism to save the raw entity value.
5546 */
5547 xmlEntityPtr cur = NULL;
5548
5549 if (isParameter) {
5550 if ((ctxt->sax != NULL) &&
5551 (ctxt->sax->getParameterEntity != NULL))
5552 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5553 } else {
5554 if ((ctxt->sax != NULL) &&
5555 (ctxt->sax->getEntity != NULL))
5556 cur = ctxt->sax->getEntity(ctxt->userData, name);
5557 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5558 cur = xmlSAX2GetEntity(ctxt, name);
5559 }
5560 }
5561 if ((cur != NULL) && (cur->orig == NULL)) {
5562 cur->orig = orig;
5563 orig = NULL;
5564 }
5565 }
5566
5567done:
5568 if (value != NULL) xmlFree(value);
5569 if (URI != NULL) xmlFree(URI);
5570 if (literal != NULL) xmlFree(literal);
5571 if (orig != NULL) xmlFree(orig);
5572 }
5573}
5574
5575/**
5576 * xmlParseDefaultDecl:
5577 * @ctxt: an XML parser context
5578 * @value: Receive a possible fixed default value for the attribute
5579 *
5580 * Parse an attribute default declaration
5581 *
5582 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5583 *
5584 * [ VC: Required Attribute ]
5585 * if the default declaration is the keyword #REQUIRED, then the
5586 * attribute must be specified for all elements of the type in the
5587 * attribute-list declaration.
5588 *
5589 * [ VC: Attribute Default Legal ]
5590 * The declared default value must meet the lexical constraints of
5591 * the declared attribute type c.f. xmlValidateAttributeDecl()
5592 *
5593 * [ VC: Fixed Attribute Default ]
5594 * if an attribute has a default value declared with the #FIXED
5595 * keyword, instances of that attribute must match the default value.
5596 *
5597 * [ WFC: No < in Attribute Values ]
5598 * handled in xmlParseAttValue()
5599 *
5600 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5601 * or XML_ATTRIBUTE_FIXED.
5602 */
5603
5604int
5605xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5606 int val;
5607 xmlChar *ret;
5608
5609 *value = NULL;
5610 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5611 SKIP(9);
5612 return(XML_ATTRIBUTE_REQUIRED);
5613 }
5614 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5615 SKIP(8);
5616 return(XML_ATTRIBUTE_IMPLIED);
5617 }
5618 val = XML_ATTRIBUTE_NONE;
5619 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5620 SKIP(6);
5621 val = XML_ATTRIBUTE_FIXED;
5622 if (SKIP_BLANKS == 0) {
5623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5624 "Space required after '#FIXED'\n");
5625 }
5626 }
5627 ret = xmlParseAttValue(ctxt);
5628 ctxt->instate = XML_PARSER_DTD;
5629 if (ret == NULL) {
5630 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5631 "Attribute default value declaration error\n");
5632 } else
5633 *value = ret;
5634 return(val);
5635}
5636
5637/**
5638 * xmlParseNotationType:
5639 * @ctxt: an XML parser context
5640 *
5641 * parse an Notation attribute type.
5642 *
5643 * Note: the leading 'NOTATION' S part has already being parsed...
5644 *
5645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5646 *
5647 * [ VC: Notation Attributes ]
5648 * Values of this type must match one of the notation names included
5649 * in the declaration; all notation names in the declaration must be declared.
5650 *
5651 * Returns: the notation attribute tree built while parsing
5652 */
5653
5654xmlEnumerationPtr
5655xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5656 const xmlChar *name;
5657 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5658
5659 if (RAW != '(') {
5660 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5661 return(NULL);
5662 }
5663 SHRINK;
5664 do {
5665 NEXT;
5666 SKIP_BLANKS;
5667 name = xmlParseName(ctxt);
5668 if (name == NULL) {
5669 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5670 "Name expected in NOTATION declaration\n");
5671 xmlFreeEnumeration(ret);
5672 return(NULL);
5673 }
5674 tmp = ret;
5675 while (tmp != NULL) {
5676 if (xmlStrEqual(name, tmp->name)) {
5677 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5678 "standalone: attribute notation value token %s duplicated\n",
5679 name, NULL);
5680 if (!xmlDictOwns(ctxt->dict, name))
5681 xmlFree((xmlChar *) name);
5682 break;
5683 }
5684 tmp = tmp->next;
5685 }
5686 if (tmp == NULL) {
5687 cur = xmlCreateEnumeration(name);
5688 if (cur == NULL) {
5689 xmlFreeEnumeration(ret);
5690 return(NULL);
5691 }
5692 if (last == NULL) ret = last = cur;
5693 else {
5694 last->next = cur;
5695 last = cur;
5696 }
5697 }
5698 SKIP_BLANKS;
5699 } while (RAW == '|');
5700 if (RAW != ')') {
5701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5702 xmlFreeEnumeration(ret);
5703 return(NULL);
5704 }
5705 NEXT;
5706 return(ret);
5707}
5708
5709/**
5710 * xmlParseEnumerationType:
5711 * @ctxt: an XML parser context
5712 *
5713 * parse an Enumeration attribute type.
5714 *
5715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5716 *
5717 * [ VC: Enumeration ]
5718 * Values of this type must match one of the Nmtoken tokens in
5719 * the declaration
5720 *
5721 * Returns: the enumeration attribute tree built while parsing
5722 */
5723
5724xmlEnumerationPtr
5725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5726 xmlChar *name;
5727 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5728
5729 if (RAW != '(') {
5730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5731 return(NULL);
5732 }
5733 SHRINK;
5734 do {
5735 NEXT;
5736 SKIP_BLANKS;
5737 name = xmlParseNmtoken(ctxt);
5738 if (name == NULL) {
5739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5740 return(ret);
5741 }
5742 tmp = ret;
5743 while (tmp != NULL) {
5744 if (xmlStrEqual(name, tmp->name)) {
5745 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5746 "standalone: attribute enumeration value token %s duplicated\n",
5747 name, NULL);
5748 if (!xmlDictOwns(ctxt->dict, name))
5749 xmlFree(name);
5750 break;
5751 }
5752 tmp = tmp->next;
5753 }
5754 if (tmp == NULL) {
5755 cur = xmlCreateEnumeration(name);
5756 if (!xmlDictOwns(ctxt->dict, name))
5757 xmlFree(name);
5758 if (cur == NULL) {
5759 xmlFreeEnumeration(ret);
5760 return(NULL);
5761 }
5762 if (last == NULL) ret = last = cur;
5763 else {
5764 last->next = cur;
5765 last = cur;
5766 }
5767 }
5768 SKIP_BLANKS;
5769 } while (RAW == '|');
5770 if (RAW != ')') {
5771 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5772 return(ret);
5773 }
5774 NEXT;
5775 return(ret);
5776}
5777
5778/**
5779 * xmlParseEnumeratedType:
5780 * @ctxt: an XML parser context
5781 * @tree: the enumeration tree built while parsing
5782 *
5783 * parse an Enumerated attribute type.
5784 *
5785 * [57] EnumeratedType ::= NotationType | Enumeration
5786 *
5787 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5788 *
5789 *
5790 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5791 */
5792
5793int
5794xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5795 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5796 SKIP(8);
5797 if (SKIP_BLANKS == 0) {
5798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5799 "Space required after 'NOTATION'\n");
5800 return(0);
5801 }
5802 *tree = xmlParseNotationType(ctxt);
5803 if (*tree == NULL) return(0);
5804 return(XML_ATTRIBUTE_NOTATION);
5805 }
5806 *tree = xmlParseEnumerationType(ctxt);
5807 if (*tree == NULL) return(0);
5808 return(XML_ATTRIBUTE_ENUMERATION);
5809}
5810
5811/**
5812 * xmlParseAttributeType:
5813 * @ctxt: an XML parser context
5814 * @tree: the enumeration tree built while parsing
5815 *
5816 * parse the Attribute list def for an element
5817 *
5818 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5819 *
5820 * [55] StringType ::= 'CDATA'
5821 *
5822 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5823 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5824 *
5825 * Validity constraints for attribute values syntax are checked in
5826 * xmlValidateAttributeValue()
5827 *
5828 * [ VC: ID ]
5829 * Values of type ID must match the Name production. A name must not
5830 * appear more than once in an XML document as a value of this type;
5831 * i.e., ID values must uniquely identify the elements which bear them.
5832 *
5833 * [ VC: One ID per Element Type ]
5834 * No element type may have more than one ID attribute specified.
5835 *
5836 * [ VC: ID Attribute Default ]
5837 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5838 *
5839 * [ VC: IDREF ]
5840 * Values of type IDREF must match the Name production, and values
5841 * of type IDREFS must match Names; each IDREF Name must match the value
5842 * of an ID attribute on some element in the XML document; i.e. IDREF
5843 * values must match the value of some ID attribute.
5844 *
5845 * [ VC: Entity Name ]
5846 * Values of type ENTITY must match the Name production, values
5847 * of type ENTITIES must match Names; each Entity Name must match the
5848 * name of an unparsed entity declared in the DTD.
5849 *
5850 * [ VC: Name Token ]
5851 * Values of type NMTOKEN must match the Nmtoken production; values
5852 * of type NMTOKENS must match Nmtokens.
5853 *
5854 * Returns the attribute type
5855 */
5856int
5857xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5858 SHRINK;
5859 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5860 SKIP(5);
5861 return(XML_ATTRIBUTE_CDATA);
5862 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5863 SKIP(6);
5864 return(XML_ATTRIBUTE_IDREFS);
5865 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5866 SKIP(5);
5867 return(XML_ATTRIBUTE_IDREF);
5868 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5869 SKIP(2);
5870 return(XML_ATTRIBUTE_ID);
5871 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5872 SKIP(6);
5873 return(XML_ATTRIBUTE_ENTITY);
5874 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5875 SKIP(8);
5876 return(XML_ATTRIBUTE_ENTITIES);
5877 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5878 SKIP(8);
5879 return(XML_ATTRIBUTE_NMTOKENS);
5880 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5881 SKIP(7);
5882 return(XML_ATTRIBUTE_NMTOKEN);
5883 }
5884 return(xmlParseEnumeratedType(ctxt, tree));
5885}
5886
5887/**
5888 * xmlParseAttributeListDecl:
5889 * @ctxt: an XML parser context
5890 *
5891 * : parse the Attribute list def for an element
5892 *
5893 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5894 *
5895 * [53] AttDef ::= S Name S AttType S DefaultDecl
5896 *
5897 */
5898void
5899xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5900 const xmlChar *elemName;
5901 const xmlChar *attrName;
5902 xmlEnumerationPtr tree;
5903
5904 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5905 int inputid = ctxt->input->id;
5906
5907 SKIP(9);
5908 if (SKIP_BLANKS == 0) {
5909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5910 "Space required after '<!ATTLIST'\n");
5911 }
5912 elemName = xmlParseName(ctxt);
5913 if (elemName == NULL) {
5914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5915 "ATTLIST: no name for Element\n");
5916 return;
5917 }
5918 SKIP_BLANKS;
5919 GROW;
5920 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5921 int type;
5922 int def;
5923 xmlChar *defaultValue = NULL;
5924
5925 GROW;
5926 tree = NULL;
5927 attrName = xmlParseName(ctxt);
5928 if (attrName == NULL) {
5929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5930 "ATTLIST: no name for Attribute\n");
5931 break;
5932 }
5933 GROW;
5934 if (SKIP_BLANKS == 0) {
5935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5936 "Space required after the attribute name\n");
5937 break;
5938 }
5939
5940 type = xmlParseAttributeType(ctxt, &tree);
5941 if (type <= 0) {
5942 break;
5943 }
5944
5945 GROW;
5946 if (SKIP_BLANKS == 0) {
5947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948 "Space required after the attribute type\n");
5949 if (tree != NULL)
5950 xmlFreeEnumeration(tree);
5951 break;
5952 }
5953
5954 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5955 if (def <= 0) {
5956 if (defaultValue != NULL)
5957 xmlFree(defaultValue);
5958 if (tree != NULL)
5959 xmlFreeEnumeration(tree);
5960 break;
5961 }
5962 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5963 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5964
5965 GROW;
5966 if (RAW != '>') {
5967 if (SKIP_BLANKS == 0) {
5968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5969 "Space required after the attribute default value\n");
5970 if (defaultValue != NULL)
5971 xmlFree(defaultValue);
5972 if (tree != NULL)
5973 xmlFreeEnumeration(tree);
5974 break;
5975 }
5976 }
5977 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5978 (ctxt->sax->attributeDecl != NULL))
5979 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5980 type, def, defaultValue, tree);
5981 else if (tree != NULL)
5982 xmlFreeEnumeration(tree);
5983
5984 if ((ctxt->sax2) && (defaultValue != NULL) &&
5985 (def != XML_ATTRIBUTE_IMPLIED) &&
5986 (def != XML_ATTRIBUTE_REQUIRED)) {
5987 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5988 }
5989 if (ctxt->sax2) {
5990 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5991 }
5992 if (defaultValue != NULL)
5993 xmlFree(defaultValue);
5994 GROW;
5995 }
5996 if (RAW == '>') {
5997 if (inputid != ctxt->input->id) {
5998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5999 "Attribute list declaration doesn't start and"
6000 " stop in the same entity\n");
6001 }
6002 NEXT;
6003 }
6004 }
6005}
6006
6007/**
6008 * xmlParseElementMixedContentDecl:
6009 * @ctxt: an XML parser context
6010 * @inputchk: the input used for the current entity, needed for boundary checks
6011 *
6012 * parse the declaration for a Mixed Element content
6013 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6014 *
6015 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6016 * '(' S? '#PCDATA' S? ')'
6017 *
6018 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6019 *
6020 * [ VC: No Duplicate Types ]
6021 * The same name must not appear more than once in a single
6022 * mixed-content declaration.
6023 *
6024 * returns: the list of the xmlElementContentPtr describing the element choices
6025 */
6026xmlElementContentPtr
6027xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6028 xmlElementContentPtr ret = NULL, cur = NULL, n;
6029 const xmlChar *elem = NULL;
6030
6031 GROW;
6032 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6033 SKIP(7);
6034 SKIP_BLANKS;
6035 SHRINK;
6036 if (RAW == ')') {
6037 if (ctxt->input->id != inputchk) {
6038 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6039 "Element content declaration doesn't start and"
6040 " stop in the same entity\n");
6041 }
6042 NEXT;
6043 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6044 if (ret == NULL)
6045 return(NULL);
6046 if (RAW == '*') {
6047 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6048 NEXT;
6049 }
6050 return(ret);
6051 }
6052 if ((RAW == '(') || (RAW == '|')) {
6053 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6054 if (ret == NULL) return(NULL);
6055 }
6056 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6057 NEXT;
6058 if (elem == NULL) {
6059 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6060 if (ret == NULL) return(NULL);
6061 ret->c1 = cur;
6062 if (cur != NULL)
6063 cur->parent = ret;
6064 cur = ret;
6065 } else {
6066 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6067 if (n == NULL) return(NULL);
6068 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6069 if (n->c1 != NULL)
6070 n->c1->parent = n;
6071 cur->c2 = n;
6072 if (n != NULL)
6073 n->parent = cur;
6074 cur = n;
6075 }
6076 SKIP_BLANKS;
6077 elem = xmlParseName(ctxt);
6078 if (elem == NULL) {
6079 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6080 "xmlParseElementMixedContentDecl : Name expected\n");
6081 xmlFreeDocElementContent(ctxt->myDoc, ret);
6082 return(NULL);
6083 }
6084 SKIP_BLANKS;
6085 GROW;
6086 }
6087 if ((RAW == ')') && (NXT(1) == '*')) {
6088 if (elem != NULL) {
6089 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6090 XML_ELEMENT_CONTENT_ELEMENT);
6091 if (cur->c2 != NULL)
6092 cur->c2->parent = cur;
6093 }
6094 if (ret != NULL)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 if (ctxt->input->id != inputchk) {
6097 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098 "Element content declaration doesn't start and"
6099 " stop in the same entity\n");
6100 }
6101 SKIP(2);
6102 } else {
6103 xmlFreeDocElementContent(ctxt->myDoc, ret);
6104 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6105 return(NULL);
6106 }
6107
6108 } else {
6109 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6110 }
6111 return(ret);
6112}
6113
6114/**
6115 * xmlParseElementChildrenContentDeclPriv:
6116 * @ctxt: an XML parser context
6117 * @inputchk: the input used for the current entity, needed for boundary checks
6118 * @depth: the level of recursion
6119 *
6120 * parse the declaration for a Mixed Element content
6121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6122 *
6123 *
6124 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6125 *
6126 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6127 *
6128 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6129 *
6130 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6131 *
6132 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6133 * TODO Parameter-entity replacement text must be properly nested
6134 * with parenthesized groups. That is to say, if either of the
6135 * opening or closing parentheses in a choice, seq, or Mixed
6136 * construct is contained in the replacement text for a parameter
6137 * entity, both must be contained in the same replacement text. For
6138 * interoperability, if a parameter-entity reference appears in a
6139 * choice, seq, or Mixed construct, its replacement text should not
6140 * be empty, and neither the first nor last non-blank character of
6141 * the replacement text should be a connector (| or ,).
6142 *
6143 * Returns the tree of xmlElementContentPtr describing the element
6144 * hierarchy.
6145 */
6146static xmlElementContentPtr
6147xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6148 int depth) {
6149 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6150 const xmlChar *elem;
6151 xmlChar type = 0;
6152
6153 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6154 (depth > 2048)) {
6155 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6156"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6157 depth);
6158 return(NULL);
6159 }
6160 SKIP_BLANKS;
6161 GROW;
6162 if (RAW == '(') {
6163 int inputid = ctxt->input->id;
6164
6165 /* Recurse on first child */
6166 NEXT;
6167 SKIP_BLANKS;
6168 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6169 depth + 1);
6170 SKIP_BLANKS;
6171 GROW;
6172 } else {
6173 elem = xmlParseName(ctxt);
6174 if (elem == NULL) {
6175 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6176 return(NULL);
6177 }
6178 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6179 if (cur == NULL) {
6180 xmlErrMemory(ctxt, NULL);
6181 return(NULL);
6182 }
6183 GROW;
6184 if (RAW == '?') {
6185 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6186 NEXT;
6187 } else if (RAW == '*') {
6188 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6189 NEXT;
6190 } else if (RAW == '+') {
6191 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6192 NEXT;
6193 } else {
6194 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6195 }
6196 GROW;
6197 }
6198 SKIP_BLANKS;
6199 SHRINK;
6200 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6201 /*
6202 * Each loop we parse one separator and one element.
6203 */
6204 if (RAW == ',') {
6205 if (type == 0) type = CUR;
6206
6207 /*
6208 * Detect "Name | Name , Name" error
6209 */
6210 else if (type != CUR) {
6211 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6212 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6213 type);
6214 if ((last != NULL) && (last != ret))
6215 xmlFreeDocElementContent(ctxt->myDoc, last);
6216 if (ret != NULL)
6217 xmlFreeDocElementContent(ctxt->myDoc, ret);
6218 return(NULL);
6219 }
6220 NEXT;
6221
6222 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6223 if (op == NULL) {
6224 if ((last != NULL) && (last != ret))
6225 xmlFreeDocElementContent(ctxt->myDoc, last);
6226 xmlFreeDocElementContent(ctxt->myDoc, ret);
6227 return(NULL);
6228 }
6229 if (last == NULL) {
6230 op->c1 = ret;
6231 if (ret != NULL)
6232 ret->parent = op;
6233 ret = cur = op;
6234 } else {
6235 cur->c2 = op;
6236 if (op != NULL)
6237 op->parent = cur;
6238 op->c1 = last;
6239 if (last != NULL)
6240 last->parent = op;
6241 cur =op;
6242 last = NULL;
6243 }
6244 } else if (RAW == '|') {
6245 if (type == 0) type = CUR;
6246
6247 /*
6248 * Detect "Name , Name | Name" error
6249 */
6250 else if (type != CUR) {
6251 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6252 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6253 type);
6254 if ((last != NULL) && (last != ret))
6255 xmlFreeDocElementContent(ctxt->myDoc, last);
6256 if (ret != NULL)
6257 xmlFreeDocElementContent(ctxt->myDoc, ret);
6258 return(NULL);
6259 }
6260 NEXT;
6261
6262 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6263 if (op == NULL) {
6264 if ((last != NULL) && (last != ret))
6265 xmlFreeDocElementContent(ctxt->myDoc, last);
6266 if (ret != NULL)
6267 xmlFreeDocElementContent(ctxt->myDoc, ret);
6268 return(NULL);
6269 }
6270 if (last == NULL) {
6271 op->c1 = ret;
6272 if (ret != NULL)
6273 ret->parent = op;
6274 ret = cur = op;
6275 } else {
6276 cur->c2 = op;
6277 if (op != NULL)
6278 op->parent = cur;
6279 op->c1 = last;
6280 if (last != NULL)
6281 last->parent = op;
6282 cur =op;
6283 last = NULL;
6284 }
6285 } else {
6286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6287 if ((last != NULL) && (last != ret))
6288 xmlFreeDocElementContent(ctxt->myDoc, last);
6289 if (ret != NULL)
6290 xmlFreeDocElementContent(ctxt->myDoc, ret);
6291 return(NULL);
6292 }
6293 GROW;
6294 SKIP_BLANKS;
6295 GROW;
6296 if (RAW == '(') {
6297 int inputid = ctxt->input->id;
6298 /* Recurse on second child */
6299 NEXT;
6300 SKIP_BLANKS;
6301 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6302 depth + 1);
6303 SKIP_BLANKS;
6304 } else {
6305 elem = xmlParseName(ctxt);
6306 if (elem == NULL) {
6307 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6308 if (ret != NULL)
6309 xmlFreeDocElementContent(ctxt->myDoc, ret);
6310 return(NULL);
6311 }
6312 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6313 if (last == NULL) {
6314 if (ret != NULL)
6315 xmlFreeDocElementContent(ctxt->myDoc, ret);
6316 return(NULL);
6317 }
6318 if (RAW == '?') {
6319 last->ocur = XML_ELEMENT_CONTENT_OPT;
6320 NEXT;
6321 } else if (RAW == '*') {
6322 last->ocur = XML_ELEMENT_CONTENT_MULT;
6323 NEXT;
6324 } else if (RAW == '+') {
6325 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6326 NEXT;
6327 } else {
6328 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6329 }
6330 }
6331 SKIP_BLANKS;
6332 GROW;
6333 }
6334 if ((cur != NULL) && (last != NULL)) {
6335 cur->c2 = last;
6336 if (last != NULL)
6337 last->parent = cur;
6338 }
6339 if (ctxt->input->id != inputchk) {
6340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6341 "Element content declaration doesn't start and stop in"
6342 " the same entity\n");
6343 }
6344 NEXT;
6345 if (RAW == '?') {
6346 if (ret != NULL) {
6347 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6348 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6349 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6350 else
6351 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6352 }
6353 NEXT;
6354 } else if (RAW == '*') {
6355 if (ret != NULL) {
6356 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6357 cur = ret;
6358 /*
6359 * Some normalization:
6360 * (a | b* | c?)* == (a | b | c)*
6361 */
6362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6363 if ((cur->c1 != NULL) &&
6364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6367 if ((cur->c2 != NULL) &&
6368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6371 cur = cur->c2;
6372 }
6373 }
6374 NEXT;
6375 } else if (RAW == '+') {
6376 if (ret != NULL) {
6377 int found = 0;
6378
6379 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6380 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6381 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6382 else
6383 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6384 /*
6385 * Some normalization:
6386 * (a | b*)+ == (a | b)*
6387 * (a | b?)+ == (a | b)*
6388 */
6389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6390 if ((cur->c1 != NULL) &&
6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 found = 1;
6395 }
6396 if ((cur->c2 != NULL) &&
6397 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6399 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 found = 1;
6401 }
6402 cur = cur->c2;
6403 }
6404 if (found)
6405 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6406 }
6407 NEXT;
6408 }
6409 return(ret);
6410}
6411
6412/**
6413 * xmlParseElementChildrenContentDecl:
6414 * @ctxt: an XML parser context
6415 * @inputchk: the input used for the current entity, needed for boundary checks
6416 *
6417 * parse the declaration for a Mixed Element content
6418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6419 *
6420 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6421 *
6422 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6423 *
6424 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6425 *
6426 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6427 *
6428 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6429 * TODO Parameter-entity replacement text must be properly nested
6430 * with parenthesized groups. That is to say, if either of the
6431 * opening or closing parentheses in a choice, seq, or Mixed
6432 * construct is contained in the replacement text for a parameter
6433 * entity, both must be contained in the same replacement text. For
6434 * interoperability, if a parameter-entity reference appears in a
6435 * choice, seq, or Mixed construct, its replacement text should not
6436 * be empty, and neither the first nor last non-blank character of
6437 * the replacement text should be a connector (| or ,).
6438 *
6439 * Returns the tree of xmlElementContentPtr describing the element
6440 * hierarchy.
6441 */
6442xmlElementContentPtr
6443xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6444 /* stub left for API/ABI compat */
6445 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6446}
6447
6448/**
6449 * xmlParseElementContentDecl:
6450 * @ctxt: an XML parser context
6451 * @name: the name of the element being defined.
6452 * @result: the Element Content pointer will be stored here if any
6453 *
6454 * parse the declaration for an Element content either Mixed or Children,
6455 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6456 *
6457 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6458 *
6459 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6460 */
6461
6462int
6463xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6464 xmlElementContentPtr *result) {
6465
6466 xmlElementContentPtr tree = NULL;
6467 int inputid = ctxt->input->id;
6468 int res;
6469
6470 *result = NULL;
6471
6472 if (RAW != '(') {
6473 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6474 "xmlParseElementContentDecl : %s '(' expected\n", name);
6475 return(-1);
6476 }
6477 NEXT;
6478 GROW;
6479 if (ctxt->instate == XML_PARSER_EOF)
6480 return(-1);
6481 SKIP_BLANKS;
6482 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6483 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6484 res = XML_ELEMENT_TYPE_MIXED;
6485 } else {
6486 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6487 res = XML_ELEMENT_TYPE_ELEMENT;
6488 }
6489 SKIP_BLANKS;
6490 *result = tree;
6491 return(res);
6492}
6493
6494/**
6495 * xmlParseElementDecl:
6496 * @ctxt: an XML parser context
6497 *
6498 * parse an Element declaration.
6499 *
6500 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6501 *
6502 * [ VC: Unique Element Type Declaration ]
6503 * No element type may be declared more than once
6504 *
6505 * Returns the type of the element, or -1 in case of error
6506 */
6507int
6508xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6509 const xmlChar *name;
6510 int ret = -1;
6511 xmlElementContentPtr content = NULL;
6512
6513 /* GROW; done in the caller */
6514 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6515 int inputid = ctxt->input->id;
6516
6517 SKIP(9);
6518 if (SKIP_BLANKS == 0) {
6519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6520 "Space required after 'ELEMENT'\n");
6521 return(-1);
6522 }
6523 name = xmlParseName(ctxt);
6524 if (name == NULL) {
6525 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6526 "xmlParseElementDecl: no name for Element\n");
6527 return(-1);
6528 }
6529 if (SKIP_BLANKS == 0) {
6530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 "Space required after the element name\n");
6532 }
6533 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6534 SKIP(5);
6535 /*
6536 * Element must always be empty.
6537 */
6538 ret = XML_ELEMENT_TYPE_EMPTY;
6539 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6540 (NXT(2) == 'Y')) {
6541 SKIP(3);
6542 /*
6543 * Element is a generic container.
6544 */
6545 ret = XML_ELEMENT_TYPE_ANY;
6546 } else if (RAW == '(') {
6547 ret = xmlParseElementContentDecl(ctxt, name, &content);
6548 } else {
6549 /*
6550 * [ WFC: PEs in Internal Subset ] error handling.
6551 */
6552 if ((RAW == '%') && (ctxt->external == 0) &&
6553 (ctxt->inputNr == 1)) {
6554 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6555 "PEReference: forbidden within markup decl in internal subset\n");
6556 } else {
6557 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6558 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6559 }
6560 return(-1);
6561 }
6562
6563 SKIP_BLANKS;
6564
6565 if (RAW != '>') {
6566 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6567 if (content != NULL) {
6568 xmlFreeDocElementContent(ctxt->myDoc, content);
6569 }
6570 } else {
6571 if (inputid != ctxt->input->id) {
6572 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6573 "Element declaration doesn't start and stop in"
6574 " the same entity\n");
6575 }
6576
6577 NEXT;
6578 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6579 (ctxt->sax->elementDecl != NULL)) {
6580 if (content != NULL)
6581 content->parent = NULL;
6582 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6583 content);
6584 if ((content != NULL) && (content->parent == NULL)) {
6585 /*
6586 * this is a trick: if xmlAddElementDecl is called,
6587 * instead of copying the full tree it is plugged directly
6588 * if called from the parser. Avoid duplicating the
6589 * interfaces or change the API/ABI
6590 */
6591 xmlFreeDocElementContent(ctxt->myDoc, content);
6592 }
6593 } else if (content != NULL) {
6594 xmlFreeDocElementContent(ctxt->myDoc, content);
6595 }
6596 }
6597 }
6598 return(ret);
6599}
6600
6601/**
6602 * xmlParseConditionalSections
6603 * @ctxt: an XML parser context
6604 *
6605 * [61] conditionalSect ::= includeSect | ignoreSect
6606 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6607 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6608 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6609 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6610 */
6611
6612static void
6613xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6614 int id = ctxt->input->id;
6615
6616 SKIP(3);
6617 SKIP_BLANKS;
6618 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6619 SKIP(7);
6620 SKIP_BLANKS;
6621 if (RAW != '[') {
6622 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6623 xmlHaltParser(ctxt);
6624 return;
6625 } else {
6626 if (ctxt->input->id != id) {
6627 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6628 "All markup of the conditional section is not"
6629 " in the same entity\n");
6630 }
6631 NEXT;
6632 }
6633 if (xmlParserDebugEntities) {
6634 if ((ctxt->input != NULL) && (ctxt->input->filename))
6635 xmlGenericError(xmlGenericErrorContext,
6636 "%s(%d): ", ctxt->input->filename,
6637 ctxt->input->line);
6638 xmlGenericError(xmlGenericErrorContext,
6639 "Entering INCLUDE Conditional Section\n");
6640 }
6641
6642 SKIP_BLANKS;
6643 GROW;
6644 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6645 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6646 const xmlChar *check = CUR_PTR;
6647 unsigned int cons = ctxt->input->consumed;
6648
6649 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6650 xmlParseConditionalSections(ctxt);
6651 } else
6652 xmlParseMarkupDecl(ctxt);
6653
6654 SKIP_BLANKS;
6655 GROW;
6656
6657 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6658 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6659 xmlHaltParser(ctxt);
6660 break;
6661 }
6662 }
6663 if (xmlParserDebugEntities) {
6664 if ((ctxt->input != NULL) && (ctxt->input->filename))
6665 xmlGenericError(xmlGenericErrorContext,
6666 "%s(%d): ", ctxt->input->filename,
6667 ctxt->input->line);
6668 xmlGenericError(xmlGenericErrorContext,
6669 "Leaving INCLUDE Conditional Section\n");
6670 }
6671
6672 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6673 int state;
6674 xmlParserInputState instate;
6675 int depth = 0;
6676
6677 SKIP(6);
6678 SKIP_BLANKS;
6679 if (RAW != '[') {
6680 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6681 xmlHaltParser(ctxt);
6682 return;
6683 } else {
6684 if (ctxt->input->id != id) {
6685 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686 "All markup of the conditional section is not"
6687 " in the same entity\n");
6688 }
6689 NEXT;
6690 }
6691 if (xmlParserDebugEntities) {
6692 if ((ctxt->input != NULL) && (ctxt->input->filename))
6693 xmlGenericError(xmlGenericErrorContext,
6694 "%s(%d): ", ctxt->input->filename,
6695 ctxt->input->line);
6696 xmlGenericError(xmlGenericErrorContext,
6697 "Entering IGNORE Conditional Section\n");
6698 }
6699
6700 /*
6701 * Parse up to the end of the conditional section
6702 * But disable SAX event generating DTD building in the meantime
6703 */
6704 state = ctxt->disableSAX;
6705 instate = ctxt->instate;
6706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6707 ctxt->instate = XML_PARSER_IGNORE;
6708
6709 while (((depth >= 0) && (RAW != 0)) &&
6710 (ctxt->instate != XML_PARSER_EOF)) {
6711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 depth++;
6713 SKIP(3);
6714 continue;
6715 }
6716 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6717 if (--depth >= 0) SKIP(3);
6718 continue;
6719 }
6720 NEXT;
6721 continue;
6722 }
6723
6724 ctxt->disableSAX = state;
6725 ctxt->instate = instate;
6726
6727 if (xmlParserDebugEntities) {
6728 if ((ctxt->input != NULL) && (ctxt->input->filename))
6729 xmlGenericError(xmlGenericErrorContext,
6730 "%s(%d): ", ctxt->input->filename,
6731 ctxt->input->line);
6732 xmlGenericError(xmlGenericErrorContext,
6733 "Leaving IGNORE Conditional Section\n");
6734 }
6735
6736 } else {
6737 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6738 xmlHaltParser(ctxt);
6739 return;
6740 }
6741
6742 if (RAW == 0)
6743 SHRINK;
6744
6745 if (RAW == 0) {
6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6747 } else {
6748 if (ctxt->input->id != id) {
6749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "All markup of the conditional section is not in"
6751 " the same entity\n");
6752 }
6753 if ((ctxt-> instate != XML_PARSER_EOF) &&
6754 ((ctxt->input->cur + 3) <= ctxt->input->end))
6755 SKIP(3);
6756 }
6757}
6758
6759/**
6760 * xmlParseMarkupDecl:
6761 * @ctxt: an XML parser context
6762 *
6763 * parse Markup declarations
6764 *
6765 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6766 * NotationDecl | PI | Comment
6767 *
6768 * [ VC: Proper Declaration/PE Nesting ]
6769 * Parameter-entity replacement text must be properly nested with
6770 * markup declarations. That is to say, if either the first character
6771 * or the last character of a markup declaration (markupdecl above) is
6772 * contained in the replacement text for a parameter-entity reference,
6773 * both must be contained in the same replacement text.
6774 *
6775 * [ WFC: PEs in Internal Subset ]
6776 * In the internal DTD subset, parameter-entity references can occur
6777 * only where markup declarations can occur, not within markup declarations.
6778 * (This does not apply to references that occur in external parameter
6779 * entities or to the external subset.)
6780 */
6781void
6782xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6783 GROW;
6784 if (CUR == '<') {
6785 if (NXT(1) == '!') {
6786 switch (NXT(2)) {
6787 case 'E':
6788 if (NXT(3) == 'L')
6789 xmlParseElementDecl(ctxt);
6790 else if (NXT(3) == 'N')
6791 xmlParseEntityDecl(ctxt);
6792 break;
6793 case 'A':
6794 xmlParseAttributeListDecl(ctxt);
6795 break;
6796 case 'N':
6797 xmlParseNotationDecl(ctxt);
6798 break;
6799 case '-':
6800 xmlParseComment(ctxt);
6801 break;
6802 default:
6803 /* there is an error but it will be detected later */
6804 break;
6805 }
6806 } else if (NXT(1) == '?') {
6807 xmlParsePI(ctxt);
6808 }
6809 }
6810
6811 /*
6812 * detect requirement to exit there and act accordingly
6813 * and avoid having instate overriden later on
6814 */
6815 if (ctxt->instate == XML_PARSER_EOF)
6816 return;
6817
6818 /*
6819 * Conditional sections are allowed from entities included
6820 * by PE References in the internal subset.
6821 */
6822 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
6825 }
6826 }
6827
6828 ctxt->instate = XML_PARSER_DTD;
6829}
6830
6831/**
6832 * xmlParseTextDecl:
6833 * @ctxt: an XML parser context
6834 *
6835 * parse an XML declaration header for external entities
6836 *
6837 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6838 */
6839
6840void
6841xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6842 xmlChar *version;
6843 const xmlChar *encoding;
6844
6845 /*
6846 * We know that '<?xml' is here.
6847 */
6848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6849 SKIP(5);
6850 } else {
6851 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6852 return;
6853 }
6854
6855 if (SKIP_BLANKS == 0) {
6856 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6857 "Space needed after '<?xml'\n");
6858 }
6859
6860 /*
6861 * We may have the VersionInfo here.
6862 */
6863 version = xmlParseVersionInfo(ctxt);
6864 if (version == NULL)
6865 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6866 else {
6867 if (SKIP_BLANKS == 0) {
6868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 "Space needed here\n");
6870 }
6871 }
6872 ctxt->input->version = version;
6873
6874 /*
6875 * We must have the encoding declaration
6876 */
6877 encoding = xmlParseEncodingDecl(ctxt);
6878 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 /*
6880 * The XML REC instructs us to stop parsing right here
6881 */
6882 return;
6883 }
6884 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6885 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6886 "Missing encoding in text declaration\n");
6887 }
6888
6889 SKIP_BLANKS;
6890 if ((RAW == '?') && (NXT(1) == '>')) {
6891 SKIP(2);
6892 } else if (RAW == '>') {
6893 /* Deprecated old WD ... */
6894 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6895 NEXT;
6896 } else {
6897 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6898 MOVETO_ENDTAG(CUR_PTR);
6899 NEXT;
6900 }
6901}
6902
6903/**
6904 * xmlParseExternalSubset:
6905 * @ctxt: an XML parser context
6906 * @ExternalID: the external identifier
6907 * @SystemID: the system identifier (or URL)
6908 *
6909 * parse Markup declarations from an external subset
6910 *
6911 * [30] extSubset ::= textDecl? extSubsetDecl
6912 *
6913 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6914 */
6915void
6916xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6917 const xmlChar *SystemID) {
6918 xmlDetectSAX2(ctxt);
6919 GROW;
6920
6921 if ((ctxt->encoding == NULL) &&
6922 (ctxt->input->end - ctxt->input->cur >= 4)) {
6923 xmlChar start[4];
6924 xmlCharEncoding enc;
6925
6926 start[0] = RAW;
6927 start[1] = NXT(1);
6928 start[2] = NXT(2);
6929 start[3] = NXT(3);
6930 enc = xmlDetectCharEncoding(start, 4);
6931 if (enc != XML_CHAR_ENCODING_NONE)
6932 xmlSwitchEncoding(ctxt, enc);
6933 }
6934
6935 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6936 xmlParseTextDecl(ctxt);
6937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6938 /*
6939 * The XML REC instructs us to stop parsing right here
6940 */
6941 xmlHaltParser(ctxt);
6942 return;
6943 }
6944 }
6945 if (ctxt->myDoc == NULL) {
6946 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6947 if (ctxt->myDoc == NULL) {
6948 xmlErrMemory(ctxt, "New Doc failed");
6949 return;
6950 }
6951 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6952 }
6953 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6954 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6955
6956 ctxt->instate = XML_PARSER_DTD;
6957 ctxt->external = 1;
6958 SKIP_BLANKS;
6959 while (((RAW == '<') && (NXT(1) == '?')) ||
6960 ((RAW == '<') && (NXT(1) == '!')) ||
6961 (RAW == '%')) {
6962 const xmlChar *check = CUR_PTR;
6963 unsigned int cons = ctxt->input->consumed;
6964
6965 GROW;
6966 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6967 xmlParseConditionalSections(ctxt);
6968 } else
6969 xmlParseMarkupDecl(ctxt);
6970 SKIP_BLANKS;
6971
6972 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6973 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6974 break;
6975 }
6976 }
6977
6978 if (RAW != 0) {
6979 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6980 }
6981
6982}
6983
6984/**
6985 * xmlParseReference:
6986 * @ctxt: an XML parser context
6987 *
6988 * parse and handle entity references in content, depending on the SAX
6989 * interface, this may end-up in a call to character() if this is a
6990 * CharRef, a predefined entity, if there is no reference() callback.
6991 * or if the parser was asked to switch to that mode.
6992 *
6993 * [67] Reference ::= EntityRef | CharRef
6994 */
6995void
6996xmlParseReference(xmlParserCtxtPtr ctxt) {
6997 xmlEntityPtr ent;
6998 xmlChar *val;
6999 int was_checked;
7000 xmlNodePtr list = NULL;
7001 xmlParserErrors ret = XML_ERR_OK;
7002
7003
7004 if (RAW != '&')
7005 return;
7006
7007 /*
7008 * Simple case of a CharRef
7009 */
7010 if (NXT(1) == '#') {
7011 int i = 0;
7012 xmlChar out[10];
7013 int hex = NXT(2);
7014 int value = xmlParseCharRef(ctxt);
7015
7016 if (value == 0)
7017 return;
7018 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7019 /*
7020 * So we are using non-UTF-8 buffers
7021 * Check that the char fit on 8bits, if not
7022 * generate a CharRef.
7023 */
7024 if (value <= 0xFF) {
7025 out[0] = value;
7026 out[1] = 0;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7028 (!ctxt->disableSAX))
7029 ctxt->sax->characters(ctxt->userData, out, 1);
7030 } else {
7031 if ((hex == 'x') || (hex == 'X'))
7032 snprintf((char *)out, sizeof(out), "#x%X", value);
7033 else
7034 snprintf((char *)out, sizeof(out), "#%d", value);
7035 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7036 (!ctxt->disableSAX))
7037 ctxt->sax->reference(ctxt->userData, out);
7038 }
7039 } else {
7040 /*
7041 * Just encode the value in UTF-8
7042 */
7043 COPY_BUF(0 ,out, i, value);
7044 out[i] = 0;
7045 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7046 (!ctxt->disableSAX))
7047 ctxt->sax->characters(ctxt->userData, out, i);
7048 }
7049 return;
7050 }
7051
7052 /*
7053 * We are seeing an entity reference
7054 */
7055 ent = xmlParseEntityRef(ctxt);
7056 if (ent == NULL) return;
7057 if (!ctxt->wellFormed)
7058 return;
7059 was_checked = ent->checked;
7060
7061 /* special case of predefined entities */
7062 if ((ent->name == NULL) ||
7063 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7064 val = ent->content;
7065 if (val == NULL) return;
7066 /*
7067 * inline the entity.
7068 */
7069 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7070 (!ctxt->disableSAX))
7071 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7072 return;
7073 }
7074
7075 /*
7076 * The first reference to the entity trigger a parsing phase
7077 * where the ent->children is filled with the result from
7078 * the parsing.
7079 * Note: external parsed entities will not be loaded, it is not
7080 * required for a non-validating parser, unless the parsing option
7081 * of validating, or substituting entities were given. Doing so is
7082 * far more secure as the parser will only process data coming from
7083 * the document entity by default.
7084 */
7085 if (((ent->checked == 0) ||
7086 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7087 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7088 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7089 unsigned long oldnbent = ctxt->nbentities;
7090
7091 /*
7092 * This is a bit hackish but this seems the best
7093 * way to make sure both SAX and DOM entity support
7094 * behaves okay.
7095 */
7096 void *user_data;
7097 if (ctxt->userData == ctxt)
7098 user_data = NULL;
7099 else
7100 user_data = ctxt->userData;
7101
7102 /*
7103 * Check that this entity is well formed
7104 * 4.3.2: An internal general parsed entity is well-formed
7105 * if its replacement text matches the production labeled
7106 * content.
7107 */
7108 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7109 ctxt->depth++;
7110 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7111 user_data, &list);
7112 ctxt->depth--;
7113
7114 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7115 ctxt->depth++;
7116 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7117 user_data, ctxt->depth, ent->URI,
7118 ent->ExternalID, &list);
7119 ctxt->depth--;
7120 } else {
7121 ret = XML_ERR_ENTITY_PE_INTERNAL;
7122 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7123 "invalid entity type found\n", NULL);
7124 }
7125
7126 /*
7127 * Store the number of entities needing parsing for this entity
7128 * content and do checkings
7129 */
7130 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7131 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7132 ent->checked |= 1;
7133 if (ret == XML_ERR_ENTITY_LOOP) {
7134 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7135 xmlFreeNodeList(list);
7136 return;
7137 }
7138 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7139 xmlFreeNodeList(list);
7140 return;
7141 }
7142
7143 if ((ret == XML_ERR_OK) && (list != NULL)) {
7144 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7145 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7146 (ent->children == NULL)) {
7147 ent->children = list;
7148 if (ctxt->replaceEntities) {
7149 /*
7150 * Prune it directly in the generated document
7151 * except for single text nodes.
7152 */
7153 if (((list->type == XML_TEXT_NODE) &&
7154 (list->next == NULL)) ||
7155 (ctxt->parseMode == XML_PARSE_READER)) {
7156 list->parent = (xmlNodePtr) ent;
7157 list = NULL;
7158 ent->owner = 1;
7159 } else {
7160 ent->owner = 0;
7161 while (list != NULL) {
7162 list->parent = (xmlNodePtr) ctxt->node;
7163 list->doc = ctxt->myDoc;
7164 if (list->next == NULL)
7165 ent->last = list;
7166 list = list->next;
7167 }
7168 list = ent->children;
7169#ifdef LIBXML_LEGACY_ENABLED
7170 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7171 xmlAddEntityReference(ent, list, NULL);
7172#endif /* LIBXML_LEGACY_ENABLED */
7173 }
7174 } else {
7175 ent->owner = 1;
7176 while (list != NULL) {
7177 list->parent = (xmlNodePtr) ent;
7178 xmlSetTreeDoc(list, ent->doc);
7179 if (list->next == NULL)
7180 ent->last = list;
7181 list = list->next;
7182 }
7183 }
7184 } else {
7185 xmlFreeNodeList(list);
7186 list = NULL;
7187 }
7188 } else if ((ret != XML_ERR_OK) &&
7189 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7190 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7191 "Entity '%s' failed to parse\n", ent->name);
7192 if (ent->content != NULL)
7193 ent->content[0] = 0;
7194 xmlParserEntityCheck(ctxt, 0, ent, 0);
7195 } else if (list != NULL) {
7196 xmlFreeNodeList(list);
7197 list = NULL;
7198 }
7199 if (ent->checked == 0)
7200 ent->checked = 2;
7201
7202 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7203 was_checked = 0;
7204 } else if (ent->checked != 1) {
7205 ctxt->nbentities += ent->checked / 2;
7206 }
7207
7208 /*
7209 * Now that the entity content has been gathered
7210 * provide it to the application, this can take different forms based
7211 * on the parsing modes.
7212 */
7213 if (ent->children == NULL) {
7214 /*
7215 * Probably running in SAX mode and the callbacks don't
7216 * build the entity content. So unless we already went
7217 * though parsing for first checking go though the entity
7218 * content to generate callbacks associated to the entity
7219 */
7220 if (was_checked != 0) {
7221 void *user_data;
7222 /*
7223 * This is a bit hackish but this seems the best
7224 * way to make sure both SAX and DOM entity support
7225 * behaves okay.
7226 */
7227 if (ctxt->userData == ctxt)
7228 user_data = NULL;
7229 else
7230 user_data = ctxt->userData;
7231
7232 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7233 ctxt->depth++;
7234 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7235 ent->content, user_data, NULL);
7236 ctxt->depth--;
7237 } else if (ent->etype ==
7238 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7239 ctxt->depth++;
7240 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7241 ctxt->sax, user_data, ctxt->depth,
7242 ent->URI, ent->ExternalID, NULL);
7243 ctxt->depth--;
7244 } else {
7245 ret = XML_ERR_ENTITY_PE_INTERNAL;
7246 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7247 "invalid entity type found\n", NULL);
7248 }
7249 if (ret == XML_ERR_ENTITY_LOOP) {
7250 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7251 return;
7252 }
7253 }
7254 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7255 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7256 /*
7257 * Entity reference callback comes second, it's somewhat
7258 * superfluous but a compatibility to historical behaviour
7259 */
7260 ctxt->sax->reference(ctxt->userData, ent->name);
7261 }
7262 return;
7263 }
7264
7265 /*
7266 * If we didn't get any children for the entity being built
7267 */
7268 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7269 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7270 /*
7271 * Create a node.
7272 */
7273 ctxt->sax->reference(ctxt->userData, ent->name);
7274 return;
7275 }
7276
7277 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7278 /*
7279 * There is a problem on the handling of _private for entities
7280 * (bug 155816): Should we copy the content of the field from
7281 * the entity (possibly overwriting some value set by the user
7282 * when a copy is created), should we leave it alone, or should
7283 * we try to take care of different situations? The problem
7284 * is exacerbated by the usage of this field by the xmlReader.
7285 * To fix this bug, we look at _private on the created node
7286 * and, if it's NULL, we copy in whatever was in the entity.
7287 * If it's not NULL we leave it alone. This is somewhat of a
7288 * hack - maybe we should have further tests to determine
7289 * what to do.
7290 */
7291 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7292 /*
7293 * Seems we are generating the DOM content, do
7294 * a simple tree copy for all references except the first
7295 * In the first occurrence list contains the replacement.
7296 */
7297 if (((list == NULL) && (ent->owner == 0)) ||
7298 (ctxt->parseMode == XML_PARSE_READER)) {
7299 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7300
7301 /*
7302 * We are copying here, make sure there is no abuse
7303 */
7304 ctxt->sizeentcopy += ent->length + 5;
7305 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7306 return;
7307
7308 /*
7309 * when operating on a reader, the entities definitions
7310 * are always owning the entities subtree.
7311 if (ctxt->parseMode == XML_PARSE_READER)
7312 ent->owner = 1;
7313 */
7314
7315 cur = ent->children;
7316 while (cur != NULL) {
7317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7318 if (nw != NULL) {
7319 if (nw->_private == NULL)
7320 nw->_private = cur->_private;
7321 if (firstChild == NULL){
7322 firstChild = nw;
7323 }
7324 nw = xmlAddChild(ctxt->node, nw);
7325 }
7326 if (cur == ent->last) {
7327 /*
7328 * needed to detect some strange empty
7329 * node cases in the reader tests
7330 */
7331 if ((ctxt->parseMode == XML_PARSE_READER) &&
7332 (nw != NULL) &&
7333 (nw->type == XML_ELEMENT_NODE) &&
7334 (nw->children == NULL))
7335 nw->extra = 1;
7336
7337 break;
7338 }
7339 cur = cur->next;
7340 }
7341#ifdef LIBXML_LEGACY_ENABLED
7342 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7343 xmlAddEntityReference(ent, firstChild, nw);
7344#endif /* LIBXML_LEGACY_ENABLED */
7345 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7346 xmlNodePtr nw = NULL, cur, next, last,
7347 firstChild = NULL;
7348
7349 /*
7350 * We are copying here, make sure there is no abuse
7351 */
7352 ctxt->sizeentcopy += ent->length + 5;
7353 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7354 return;
7355
7356 /*
7357 * Copy the entity child list and make it the new
7358 * entity child list. The goal is to make sure any
7359 * ID or REF referenced will be the one from the
7360 * document content and not the entity copy.
7361 */
7362 cur = ent->children;
7363 ent->children = NULL;
7364 last = ent->last;
7365 ent->last = NULL;
7366 while (cur != NULL) {
7367 next = cur->next;
7368 cur->next = NULL;
7369 cur->parent = NULL;
7370 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7371 if (nw != NULL) {
7372 if (nw->_private == NULL)
7373 nw->_private = cur->_private;
7374 if (firstChild == NULL){
7375 firstChild = cur;
7376 }
7377 xmlAddChild((xmlNodePtr) ent, nw);
7378 xmlAddChild(ctxt->node, cur);
7379 }
7380 if (cur == last)
7381 break;
7382 cur = next;
7383 }
7384 if (ent->owner == 0)
7385 ent->owner = 1;
7386#ifdef LIBXML_LEGACY_ENABLED
7387 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7388 xmlAddEntityReference(ent, firstChild, nw);
7389#endif /* LIBXML_LEGACY_ENABLED */
7390 } else {
7391 const xmlChar *nbktext;
7392
7393 /*
7394 * the name change is to avoid coalescing of the
7395 * node with a possible previous text one which
7396 * would make ent->children a dangling pointer
7397 */
7398 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7399 -1);
7400 if (ent->children->type == XML_TEXT_NODE)
7401 ent->children->name = nbktext;
7402 if ((ent->last != ent->children) &&
7403 (ent->last->type == XML_TEXT_NODE))
7404 ent->last->name = nbktext;
7405 xmlAddChildList(ctxt->node, ent->children);
7406 }
7407
7408 /*
7409 * This is to avoid a nasty side effect, see
7410 * characters() in SAX.c
7411 */
7412 ctxt->nodemem = 0;
7413 ctxt->nodelen = 0;
7414 return;
7415 }
7416 }
7417}
7418
7419/**
7420 * xmlParseEntityRef:
7421 * @ctxt: an XML parser context
7422 *
7423 * parse ENTITY references declarations
7424 *
7425 * [68] EntityRef ::= '&' Name ';'
7426 *
7427 * [ WFC: Entity Declared ]
7428 * In a document without any DTD, a document with only an internal DTD
7429 * subset which contains no parameter entity references, or a document
7430 * with "standalone='yes'", the Name given in the entity reference
7431 * must match that in an entity declaration, except that well-formed
7432 * documents need not declare any of the following entities: amp, lt,
7433 * gt, apos, quot. The declaration of a parameter entity must precede
7434 * any reference to it. Similarly, the declaration of a general entity
7435 * must precede any reference to it which appears in a default value in an
7436 * attribute-list declaration. Note that if entities are declared in the
7437 * external subset or in external parameter entities, a non-validating
7438 * processor is not obligated to read and process their declarations;
7439 * for such documents, the rule that an entity must be declared is a
7440 * well-formedness constraint only if standalone='yes'.
7441 *
7442 * [ WFC: Parsed Entity ]
7443 * An entity reference must not contain the name of an unparsed entity
7444 *
7445 * Returns the xmlEntityPtr if found, or NULL otherwise.
7446 */
7447xmlEntityPtr
7448xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7449 const xmlChar *name;
7450 xmlEntityPtr ent = NULL;
7451
7452 GROW;
7453 if (ctxt->instate == XML_PARSER_EOF)
7454 return(NULL);
7455
7456 if (RAW != '&')
7457 return(NULL);
7458 NEXT;
7459 name = xmlParseName(ctxt);
7460 if (name == NULL) {
7461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 "xmlParseEntityRef: no name\n");
7463 return(NULL);
7464 }
7465 if (RAW != ';') {
7466 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7467 return(NULL);
7468 }
7469 NEXT;
7470
7471 /*
7472 * Predefined entities override any extra definition
7473 */
7474 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7475 ent = xmlGetPredefinedEntity(name);
7476 if (ent != NULL)
7477 return(ent);
7478 }
7479
7480 /*
7481 * Increase the number of entity references parsed
7482 */
7483 ctxt->nbentities++;
7484
7485 /*
7486 * Ask first SAX for entity resolution, otherwise try the
7487 * entities which may have stored in the parser context.
7488 */
7489 if (ctxt->sax != NULL) {
7490 if (ctxt->sax->getEntity != NULL)
7491 ent = ctxt->sax->getEntity(ctxt->userData, name);
7492 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7493 (ctxt->options & XML_PARSE_OLDSAX))
7494 ent = xmlGetPredefinedEntity(name);
7495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 (ctxt->userData==ctxt)) {
7497 ent = xmlSAX2GetEntity(ctxt, name);
7498 }
7499 }
7500 if (ctxt->instate == XML_PARSER_EOF)
7501 return(NULL);
7502 /*
7503 * [ WFC: Entity Declared ]
7504 * In a document without any DTD, a document with only an
7505 * internal DTD subset which contains no parameter entity
7506 * references, or a document with "standalone='yes'", the
7507 * Name given in the entity reference must match that in an
7508 * entity declaration, except that well-formed documents
7509 * need not declare any of the following entities: amp, lt,
7510 * gt, apos, quot.
7511 * The declaration of a parameter entity must precede any
7512 * reference to it.
7513 * Similarly, the declaration of a general entity must
7514 * precede any reference to it which appears in a default
7515 * value in an attribute-list declaration. Note that if
7516 * entities are declared in the external subset or in
7517 * external parameter entities, a non-validating processor
7518 * is not obligated to read and process their declarations;
7519 * for such documents, the rule that an entity must be
7520 * declared is a well-formedness constraint only if
7521 * standalone='yes'.
7522 */
7523 if (ent == NULL) {
7524 if ((ctxt->standalone == 1) ||
7525 ((ctxt->hasExternalSubset == 0) &&
7526 (ctxt->hasPErefs == 0))) {
7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7528 "Entity '%s' not defined\n", name);
7529 } else {
7530 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7532 if ((ctxt->inSubset == 0) &&
7533 (ctxt->sax != NULL) &&
7534 (ctxt->sax->reference != NULL)) {
7535 ctxt->sax->reference(ctxt->userData, name);
7536 }
7537 }
7538 xmlParserEntityCheck(ctxt, 0, ent, 0);
7539 ctxt->valid = 0;
7540 }
7541
7542 /*
7543 * [ WFC: Parsed Entity ]
7544 * An entity reference must not contain the name of an
7545 * unparsed entity
7546 */
7547 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7549 "Entity reference to unparsed entity %s\n", name);
7550 }
7551
7552 /*
7553 * [ WFC: No External Entity References ]
7554 * Attribute values cannot contain direct or indirect
7555 * entity references to external entities.
7556 */
7557 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7558 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7560 "Attribute references external entity '%s'\n", name);
7561 }
7562 /*
7563 * [ WFC: No < in Attribute Values ]
7564 * The replacement text of any entity referred to directly or
7565 * indirectly in an attribute value (other than "&lt;") must
7566 * not contain a <.
7567 */
7568 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7569 (ent != NULL) &&
7570 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7571 if (((ent->checked & 1) || (ent->checked == 0)) &&
7572 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7573 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7574 "'<' in entity '%s' is not allowed in attributes values\n", name);
7575 }
7576 }
7577
7578 /*
7579 * Internal check, no parameter entities here ...
7580 */
7581 else {
7582 switch (ent->etype) {
7583 case XML_INTERNAL_PARAMETER_ENTITY:
7584 case XML_EXTERNAL_PARAMETER_ENTITY:
7585 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7586 "Attempt to reference the parameter entity '%s'\n",
7587 name);
7588 break;
7589 default:
7590 break;
7591 }
7592 }
7593
7594 /*
7595 * [ WFC: No Recursion ]
7596 * A parsed entity must not contain a recursive reference
7597 * to itself, either directly or indirectly.
7598 * Done somewhere else
7599 */
7600 return(ent);
7601}
7602
7603/**
7604 * xmlParseStringEntityRef:
7605 * @ctxt: an XML parser context
7606 * @str: a pointer to an index in the string
7607 *
7608 * parse ENTITY references declarations, but this version parses it from
7609 * a string value.
7610 *
7611 * [68] EntityRef ::= '&' Name ';'
7612 *
7613 * [ WFC: Entity Declared ]
7614 * In a document without any DTD, a document with only an internal DTD
7615 * subset which contains no parameter entity references, or a document
7616 * with "standalone='yes'", the Name given in the entity reference
7617 * must match that in an entity declaration, except that well-formed
7618 * documents need not declare any of the following entities: amp, lt,
7619 * gt, apos, quot. The declaration of a parameter entity must precede
7620 * any reference to it. Similarly, the declaration of a general entity
7621 * must precede any reference to it which appears in a default value in an
7622 * attribute-list declaration. Note that if entities are declared in the
7623 * external subset or in external parameter entities, a non-validating
7624 * processor is not obligated to read and process their declarations;
7625 * for such documents, the rule that an entity must be declared is a
7626 * well-formedness constraint only if standalone='yes'.
7627 *
7628 * [ WFC: Parsed Entity ]
7629 * An entity reference must not contain the name of an unparsed entity
7630 *
7631 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7632 * is updated to the current location in the string.
7633 */
7634static xmlEntityPtr
7635xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7636 xmlChar *name;
7637 const xmlChar *ptr;
7638 xmlChar cur;
7639 xmlEntityPtr ent = NULL;
7640
7641 if ((str == NULL) || (*str == NULL))
7642 return(NULL);
7643 ptr = *str;
7644 cur = *ptr;
7645 if (cur != '&')
7646 return(NULL);
7647
7648 ptr++;
7649 name = xmlParseStringName(ctxt, &ptr);
7650 if (name == NULL) {
7651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7652 "xmlParseStringEntityRef: no name\n");
7653 *str = ptr;
7654 return(NULL);
7655 }
7656 if (*ptr != ';') {
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658 xmlFree(name);
7659 *str = ptr;
7660 return(NULL);
7661 }
7662 ptr++;
7663
7664
7665 /*
7666 * Predefined entities override any extra definition
7667 */
7668 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7669 ent = xmlGetPredefinedEntity(name);
7670 if (ent != NULL) {
7671 xmlFree(name);
7672 *str = ptr;
7673 return(ent);
7674 }
7675 }
7676
7677 /*
7678 * Increate the number of entity references parsed
7679 */
7680 ctxt->nbentities++;
7681
7682 /*
7683 * Ask first SAX for entity resolution, otherwise try the
7684 * entities which may have stored in the parser context.
7685 */
7686 if (ctxt->sax != NULL) {
7687 if (ctxt->sax->getEntity != NULL)
7688 ent = ctxt->sax->getEntity(ctxt->userData, name);
7689 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7690 ent = xmlGetPredefinedEntity(name);
7691 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7692 ent = xmlSAX2GetEntity(ctxt, name);
7693 }
7694 }
7695 if (ctxt->instate == XML_PARSER_EOF) {
7696 xmlFree(name);
7697 return(NULL);
7698 }
7699
7700 /*
7701 * [ WFC: Entity Declared ]
7702 * In a document without any DTD, a document with only an
7703 * internal DTD subset which contains no parameter entity
7704 * references, or a document with "standalone='yes'", the
7705 * Name given in the entity reference must match that in an
7706 * entity declaration, except that well-formed documents
7707 * need not declare any of the following entities: amp, lt,
7708 * gt, apos, quot.
7709 * The declaration of a parameter entity must precede any
7710 * reference to it.
7711 * Similarly, the declaration of a general entity must
7712 * precede any reference to it which appears in a default
7713 * value in an attribute-list declaration. Note that if
7714 * entities are declared in the external subset or in
7715 * external parameter entities, a non-validating processor
7716 * is not obligated to read and process their declarations;
7717 * for such documents, the rule that an entity must be
7718 * declared is a well-formedness constraint only if
7719 * standalone='yes'.
7720 */
7721 if (ent == NULL) {
7722 if ((ctxt->standalone == 1) ||
7723 ((ctxt->hasExternalSubset == 0) &&
7724 (ctxt->hasPErefs == 0))) {
7725 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7726 "Entity '%s' not defined\n", name);
7727 } else {
7728 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n",
7730 name);
7731 }
7732 xmlParserEntityCheck(ctxt, 0, ent, 0);
7733 /* TODO ? check regressions ctxt->valid = 0; */
7734 }
7735
7736 /*
7737 * [ WFC: Parsed Entity ]
7738 * An entity reference must not contain the name of an
7739 * unparsed entity
7740 */
7741 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7743 "Entity reference to unparsed entity %s\n", name);
7744 }
7745
7746 /*
7747 * [ WFC: No External Entity References ]
7748 * Attribute values cannot contain direct or indirect
7749 * entity references to external entities.
7750 */
7751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7752 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7753 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7754 "Attribute references external entity '%s'\n", name);
7755 }
7756 /*
7757 * [ WFC: No < in Attribute Values ]
7758 * The replacement text of any entity referred to directly or
7759 * indirectly in an attribute value (other than "&lt;") must
7760 * not contain a <.
7761 */
7762 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7763 (ent != NULL) && (ent->content != NULL) &&
7764 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7765 (xmlStrchr(ent->content, '<'))) {
7766 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7767 "'<' in entity '%s' is not allowed in attributes values\n",
7768 name);
7769 }
7770
7771 /*
7772 * Internal check, no parameter entities here ...
7773 */
7774 else {
7775 switch (ent->etype) {
7776 case XML_INTERNAL_PARAMETER_ENTITY:
7777 case XML_EXTERNAL_PARAMETER_ENTITY:
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7779 "Attempt to reference the parameter entity '%s'\n",
7780 name);
7781 break;
7782 default:
7783 break;
7784 }
7785 }
7786
7787 /*
7788 * [ WFC: No Recursion ]
7789 * A parsed entity must not contain a recursive reference
7790 * to itself, either directly or indirectly.
7791 * Done somewhere else
7792 */
7793
7794 xmlFree(name);
7795 *str = ptr;
7796 return(ent);
7797}
7798
7799/**
7800 * xmlParsePEReference:
7801 * @ctxt: an XML parser context
7802 *
7803 * parse PEReference declarations
7804 * The entity content is handled directly by pushing it's content as
7805 * a new input stream.
7806 *
7807 * [69] PEReference ::= '%' Name ';'
7808 *
7809 * [ WFC: No Recursion ]
7810 * A parsed entity must not contain a recursive
7811 * reference to itself, either directly or indirectly.
7812 *
7813 * [ WFC: Entity Declared ]
7814 * In a document without any DTD, a document with only an internal DTD
7815 * subset which contains no parameter entity references, or a document
7816 * with "standalone='yes'", ... ... The declaration of a parameter
7817 * entity must precede any reference to it...
7818 *
7819 * [ VC: Entity Declared ]
7820 * In a document with an external subset or external parameter entities
7821 * with "standalone='no'", ... ... The declaration of a parameter entity
7822 * must precede any reference to it...
7823 *
7824 * [ WFC: In DTD ]
7825 * Parameter-entity references may only appear in the DTD.
7826 * NOTE: misleading but this is handled.
7827 */
7828void
7829xmlParsePEReference(xmlParserCtxtPtr ctxt)
7830{
7831 const xmlChar *name;
7832 xmlEntityPtr entity = NULL;
7833 xmlParserInputPtr input;
7834
7835 if (RAW != '%')
7836 return;
7837 NEXT;
7838 name = xmlParseName(ctxt);
7839 if (name == NULL) {
7840 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7841 return;
7842 }
7843 if (xmlParserDebugEntities)
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PEReference: %s\n", name);
7846 if (RAW != ';') {
7847 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7848 return;
7849 }
7850
7851 NEXT;
7852
7853 /*
7854 * Increate the number of entity references parsed
7855 */
7856 ctxt->nbentities++;
7857
7858 /*
7859 * Request the entity from SAX
7860 */
7861 if ((ctxt->sax != NULL) &&
7862 (ctxt->sax->getParameterEntity != NULL))
7863 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7864 if (ctxt->instate == XML_PARSER_EOF)
7865 return;
7866 if (entity == NULL) {
7867 /*
7868 * [ WFC: Entity Declared ]
7869 * In a document without any DTD, a document with only an
7870 * internal DTD subset which contains no parameter entity
7871 * references, or a document with "standalone='yes'", ...
7872 * ... The declaration of a parameter entity must precede
7873 * any reference to it...
7874 */
7875 if ((ctxt->standalone == 1) ||
7876 ((ctxt->hasExternalSubset == 0) &&
7877 (ctxt->hasPErefs == 0))) {
7878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 "PEReference: %%%s; not found\n",
7880 name);
7881 } else {
7882 /*
7883 * [ VC: Entity Declared ]
7884 * In a document with an external subset or external
7885 * parameter entities with "standalone='no'", ...
7886 * ... The declaration of a parameter entity must
7887 * precede any reference to it...
7888 */
7889 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7890 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891 "PEReference: %%%s; not found\n",
7892 name, NULL);
7893 } else
7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895 "PEReference: %%%s; not found\n",
7896 name, NULL);
7897 ctxt->valid = 0;
7898 }
7899 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7900 } else {
7901 /*
7902 * Internal checking in case the entity quest barfed
7903 */
7904 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "Internal: %%%s; is not a parameter entity\n",
7908 name, NULL);
7909 } else {
7910 xmlChar start[4];
7911 xmlCharEncoding enc;
7912
7913 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7914 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7915 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7916 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7917 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7918 (ctxt->replaceEntities == 0) &&
7919 (ctxt->validate == 0))
7920 return;
7921
7922 input = xmlNewEntityInputStream(ctxt, entity);
7923 if (xmlPushInput(ctxt, input) < 0) {
7924 xmlFreeInputStream(input);
7925 return;
7926 }
7927
7928 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7929 /*
7930 * Get the 4 first bytes and decode the charset
7931 * if enc != XML_CHAR_ENCODING_NONE
7932 * plug some encoding conversion routines.
7933 * Note that, since we may have some non-UTF8
7934 * encoding (like UTF16, bug 135229), the 'length'
7935 * is not known, but we can calculate based upon
7936 * the amount of data in the buffer.
7937 */
7938 GROW
7939 if (ctxt->instate == XML_PARSER_EOF)
7940 return;
7941 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7942 start[0] = RAW;
7943 start[1] = NXT(1);
7944 start[2] = NXT(2);
7945 start[3] = NXT(3);
7946 enc = xmlDetectCharEncoding(start, 4);
7947 if (enc != XML_CHAR_ENCODING_NONE) {
7948 xmlSwitchEncoding(ctxt, enc);
7949 }
7950 }
7951
7952 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7953 (IS_BLANK_CH(NXT(5)))) {
7954 xmlParseTextDecl(ctxt);
7955 }
7956 }
7957 }
7958 }
7959 ctxt->hasPErefs = 1;
7960}
7961
7962/**
7963 * xmlLoadEntityContent:
7964 * @ctxt: an XML parser context
7965 * @entity: an unloaded system entity
7966 *
7967 * Load the original content of the given system entity from the
7968 * ExternalID/SystemID given. This is to be used for Included in Literal
7969 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7970 *
7971 * Returns 0 in case of success and -1 in case of failure
7972 */
7973static int
7974xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7975 xmlParserInputPtr input;
7976 xmlBufferPtr buf;
7977 int l, c;
7978 int count = 0;
7979
7980 if ((ctxt == NULL) || (entity == NULL) ||
7981 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7983 (entity->content != NULL)) {
7984 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7985 "xmlLoadEntityContent parameter error");
7986 return(-1);
7987 }
7988
7989 if (xmlParserDebugEntities)
7990 xmlGenericError(xmlGenericErrorContext,
7991 "Reading %s entity content input\n", entity->name);
7992
7993 buf = xmlBufferCreate();
7994 if (buf == NULL) {
7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 "xmlLoadEntityContent parameter error");
7997 return(-1);
7998 }
7999
8000 input = xmlNewEntityInputStream(ctxt, entity);
8001 if (input == NULL) {
8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 "xmlLoadEntityContent input error");
8004 xmlBufferFree(buf);
8005 return(-1);
8006 }
8007
8008 /*
8009 * Push the entity as the current input, read char by char
8010 * saving to the buffer until the end of the entity or an error
8011 */
8012 if (xmlPushInput(ctxt, input) < 0) {
8013 xmlBufferFree(buf);
8014 return(-1);
8015 }
8016
8017 GROW;
8018 c = CUR_CHAR(l);
8019 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8020 (IS_CHAR(c))) {
8021 xmlBufferAdd(buf, ctxt->input->cur, l);
8022 if (count++ > XML_PARSER_CHUNK_SIZE) {
8023 count = 0;
8024 GROW;
8025 if (ctxt->instate == XML_PARSER_EOF) {
8026 xmlBufferFree(buf);
8027 return(-1);
8028 }
8029 }
8030 NEXTL(l);
8031 c = CUR_CHAR(l);
8032 if (c == 0) {
8033 count = 0;
8034 GROW;
8035 if (ctxt->instate == XML_PARSER_EOF) {
8036 xmlBufferFree(buf);
8037 return(-1);
8038 }
8039 c = CUR_CHAR(l);
8040 }
8041 }
8042
8043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8044 xmlPopInput(ctxt);
8045 } else if (!IS_CHAR(c)) {
8046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8047 "xmlLoadEntityContent: invalid char value %d\n",
8048 c);
8049 xmlBufferFree(buf);
8050 return(-1);
8051 }
8052 entity->content = buf->content;
8053 buf->content = NULL;
8054 xmlBufferFree(buf);
8055
8056 return(0);
8057}
8058
8059/**
8060 * xmlParseStringPEReference:
8061 * @ctxt: an XML parser context
8062 * @str: a pointer to an index in the string
8063 *
8064 * parse PEReference declarations
8065 *
8066 * [69] PEReference ::= '%' Name ';'
8067 *
8068 * [ WFC: No Recursion ]
8069 * A parsed entity must not contain a recursive
8070 * reference to itself, either directly or indirectly.
8071 *
8072 * [ WFC: Entity Declared ]
8073 * In a document without any DTD, a document with only an internal DTD
8074 * subset which contains no parameter entity references, or a document
8075 * with "standalone='yes'", ... ... The declaration of a parameter
8076 * entity must precede any reference to it...
8077 *
8078 * [ VC: Entity Declared ]
8079 * In a document with an external subset or external parameter entities
8080 * with "standalone='no'", ... ... The declaration of a parameter entity
8081 * must precede any reference to it...
8082 *
8083 * [ WFC: In DTD ]
8084 * Parameter-entity references may only appear in the DTD.
8085 * NOTE: misleading but this is handled.
8086 *
8087 * Returns the string of the entity content.
8088 * str is updated to the current value of the index
8089 */
8090static xmlEntityPtr
8091xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8092 const xmlChar *ptr;
8093 xmlChar cur;
8094 xmlChar *name;
8095 xmlEntityPtr entity = NULL;
8096
8097 if ((str == NULL) || (*str == NULL)) return(NULL);
8098 ptr = *str;
8099 cur = *ptr;
8100 if (cur != '%')
8101 return(NULL);
8102 ptr++;
8103 name = xmlParseStringName(ctxt, &ptr);
8104 if (name == NULL) {
8105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106 "xmlParseStringPEReference: no name\n");
8107 *str = ptr;
8108 return(NULL);
8109 }
8110 cur = *ptr;
8111 if (cur != ';') {
8112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8113 xmlFree(name);
8114 *str = ptr;
8115 return(NULL);
8116 }
8117 ptr++;
8118
8119 /*
8120 * Increate the number of entity references parsed
8121 */
8122 ctxt->nbentities++;
8123
8124 /*
8125 * Request the entity from SAX
8126 */
8127 if ((ctxt->sax != NULL) &&
8128 (ctxt->sax->getParameterEntity != NULL))
8129 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130 if (ctxt->instate == XML_PARSER_EOF) {
8131 xmlFree(name);
8132 *str = ptr;
8133 return(NULL);
8134 }
8135 if (entity == NULL) {
8136 /*
8137 * [ WFC: Entity Declared ]
8138 * In a document without any DTD, a document with only an
8139 * internal DTD subset which contains no parameter entity
8140 * references, or a document with "standalone='yes'", ...
8141 * ... The declaration of a parameter entity must precede
8142 * any reference to it...
8143 */
8144 if ((ctxt->standalone == 1) ||
8145 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8147 "PEReference: %%%s; not found\n", name);
8148 } else {
8149 /*
8150 * [ VC: Entity Declared ]
8151 * In a document with an external subset or external
8152 * parameter entities with "standalone='no'", ...
8153 * ... The declaration of a parameter entity must
8154 * precede any reference to it...
8155 */
8156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8157 "PEReference: %%%s; not found\n",
8158 name, NULL);
8159 ctxt->valid = 0;
8160 }
8161 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8162 } else {
8163 /*
8164 * Internal checking in case the entity quest barfed
8165 */
8166 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8167 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8168 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8169 "%%%s; is not a parameter entity\n",
8170 name, NULL);
8171 }
8172 }
8173 ctxt->hasPErefs = 1;
8174 xmlFree(name);
8175 *str = ptr;
8176 return(entity);
8177}
8178
8179/**
8180 * xmlParseDocTypeDecl:
8181 * @ctxt: an XML parser context
8182 *
8183 * parse a DOCTYPE declaration
8184 *
8185 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8186 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8187 *
8188 * [ VC: Root Element Type ]
8189 * The Name in the document type declaration must match the element
8190 * type of the root element.
8191 */
8192
8193void
8194xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8195 const xmlChar *name = NULL;
8196 xmlChar *ExternalID = NULL;
8197 xmlChar *URI = NULL;
8198
8199 /*
8200 * We know that '<!DOCTYPE' has been detected.
8201 */
8202 SKIP(9);
8203
8204 SKIP_BLANKS;
8205
8206 /*
8207 * Parse the DOCTYPE name.
8208 */
8209 name = xmlParseName(ctxt);
8210 if (name == NULL) {
8211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8213 }
8214 ctxt->intSubName = name;
8215
8216 SKIP_BLANKS;
8217
8218 /*
8219 * Check for SystemID and ExternalID
8220 */
8221 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8222
8223 if ((URI != NULL) || (ExternalID != NULL)) {
8224 ctxt->hasExternalSubset = 1;
8225 }
8226 ctxt->extSubURI = URI;
8227 ctxt->extSubSystem = ExternalID;
8228
8229 SKIP_BLANKS;
8230
8231 /*
8232 * Create and update the internal subset.
8233 */
8234 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8235 (!ctxt->disableSAX))
8236 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8237 if (ctxt->instate == XML_PARSER_EOF)
8238 return;
8239
8240 /*
8241 * Is there any internal subset declarations ?
8242 * they are handled separately in xmlParseInternalSubset()
8243 */
8244 if (RAW == '[')
8245 return;
8246
8247 /*
8248 * We should be at the end of the DOCTYPE declaration.
8249 */
8250 if (RAW != '>') {
8251 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8252 }
8253 NEXT;
8254}
8255
8256/**
8257 * xmlParseInternalSubset:
8258 * @ctxt: an XML parser context
8259 *
8260 * parse the internal subset declaration
8261 *
8262 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263 */
8264
8265static void
8266xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267 /*
8268 * Is there any DTD definition ?
8269 */
8270 if (RAW == '[') {
8271 int baseInputNr = ctxt->inputNr;
8272 ctxt->instate = XML_PARSER_DTD;
8273 NEXT;
8274 /*
8275 * Parse the succession of Markup declarations and
8276 * PEReferences.
8277 * Subsequence (markupdecl | PEReference | S)*
8278 */
8279 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8280 (ctxt->instate != XML_PARSER_EOF)) {
8281 const xmlChar *check = CUR_PTR;
8282 unsigned int cons = ctxt->input->consumed;
8283
8284 SKIP_BLANKS;
8285 xmlParseMarkupDecl(ctxt);
8286 xmlParsePEReference(ctxt);
8287
8288 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8289 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8290 "xmlParseInternalSubset: error detected in Markup declaration\n");
8291 if (ctxt->inputNr > baseInputNr)
8292 xmlPopInput(ctxt);
8293 else
8294 break;
8295 }
8296 }
8297 if (RAW == ']') {
8298 NEXT;
8299 SKIP_BLANKS;
8300 }
8301 }
8302
8303 /*
8304 * We should be at the end of the DOCTYPE declaration.
8305 */
8306 if (RAW != '>') {
8307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8308 return;
8309 }
8310 NEXT;
8311}
8312
8313#ifdef LIBXML_SAX1_ENABLED
8314/**
8315 * xmlParseAttribute:
8316 * @ctxt: an XML parser context
8317 * @value: a xmlChar ** used to store the value of the attribute
8318 *
8319 * parse an attribute
8320 *
8321 * [41] Attribute ::= Name Eq AttValue
8322 *
8323 * [ WFC: No External Entity References ]
8324 * Attribute values cannot contain direct or indirect entity references
8325 * to external entities.
8326 *
8327 * [ WFC: No < in Attribute Values ]
8328 * The replacement text of any entity referred to directly or indirectly in
8329 * an attribute value (other than "&lt;") must not contain a <.
8330 *
8331 * [ VC: Attribute Value Type ]
8332 * The attribute must have been declared; the value must be of the type
8333 * declared for it.
8334 *
8335 * [25] Eq ::= S? '=' S?
8336 *
8337 * With namespace:
8338 *
8339 * [NS 11] Attribute ::= QName Eq AttValue
8340 *
8341 * Also the case QName == xmlns:??? is handled independently as a namespace
8342 * definition.
8343 *
8344 * Returns the attribute name, and the value in *value.
8345 */
8346
8347const xmlChar *
8348xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8349 const xmlChar *name;
8350 xmlChar *val;
8351
8352 *value = NULL;
8353 GROW;
8354 name = xmlParseName(ctxt);
8355 if (name == NULL) {
8356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8357 "error parsing attribute name\n");
8358 return(NULL);
8359 }
8360
8361 /*
8362 * read the value
8363 */
8364 SKIP_BLANKS;
8365 if (RAW == '=') {
8366 NEXT;
8367 SKIP_BLANKS;
8368 val = xmlParseAttValue(ctxt);
8369 ctxt->instate = XML_PARSER_CONTENT;
8370 } else {
8371 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8372 "Specification mandates value for attribute %s\n", name);
8373 return(NULL);
8374 }
8375
8376 /*
8377 * Check that xml:lang conforms to the specification
8378 * No more registered as an error, just generate a warning now
8379 * since this was deprecated in XML second edition
8380 */
8381 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 if (!xmlCheckLanguageID(val)) {
8383 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 "Malformed value for xml:lang : %s\n",
8385 val, NULL);
8386 }
8387 }
8388
8389 /*
8390 * Check that xml:space conforms to the specification
8391 */
8392 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 if (xmlStrEqual(val, BAD_CAST "default"))
8394 *(ctxt->space) = 0;
8395 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 *(ctxt->space) = 1;
8397 else {
8398 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8399"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8400 val, NULL);
8401 }
8402 }
8403
8404 *value = val;
8405 return(name);
8406}
8407
8408/**
8409 * xmlParseStartTag:
8410 * @ctxt: an XML parser context
8411 *
8412 * parse a start of tag either for rule element or
8413 * EmptyElement. In both case we don't parse the tag closing chars.
8414 *
8415 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416 *
8417 * [ WFC: Unique Att Spec ]
8418 * No attribute name may appear more than once in the same start-tag or
8419 * empty-element tag.
8420 *
8421 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422 *
8423 * [ WFC: Unique Att Spec ]
8424 * No attribute name may appear more than once in the same start-tag or
8425 * empty-element tag.
8426 *
8427 * With namespace:
8428 *
8429 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430 *
8431 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432 *
8433 * Returns the element name parsed
8434 */
8435
8436const xmlChar *
8437xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8438 const xmlChar *name;
8439 const xmlChar *attname;
8440 xmlChar *attvalue;
8441 const xmlChar **atts = ctxt->atts;
8442 int nbatts = 0;
8443 int maxatts = ctxt->maxatts;
8444 int i;
8445
8446 if (RAW != '<') return(NULL);
8447 NEXT1;
8448
8449 name = xmlParseName(ctxt);
8450 if (name == NULL) {
8451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8452 "xmlParseStartTag: invalid element name\n");
8453 return(NULL);
8454 }
8455
8456 /*
8457 * Now parse the attributes, it ends up with the ending
8458 *
8459 * (S Attribute)* S?
8460 */
8461 SKIP_BLANKS;
8462 GROW;
8463
8464 while (((RAW != '>') &&
8465 ((RAW != '/') || (NXT(1) != '>')) &&
8466 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8467 const xmlChar *q = CUR_PTR;
8468 unsigned int cons = ctxt->input->consumed;
8469
8470 attname = xmlParseAttribute(ctxt, &attvalue);
8471 if ((attname != NULL) && (attvalue != NULL)) {
8472 /*
8473 * [ WFC: Unique Att Spec ]
8474 * No attribute name may appear more than once in the same
8475 * start-tag or empty-element tag.
8476 */
8477 for (i = 0; i < nbatts;i += 2) {
8478 if (xmlStrEqual(atts[i], attname)) {
8479 xmlErrAttributeDup(ctxt, NULL, attname);
8480 xmlFree(attvalue);
8481 goto failed;
8482 }
8483 }
8484 /*
8485 * Add the pair to atts
8486 */
8487 if (atts == NULL) {
8488 maxatts = 22; /* allow for 10 attrs by default */
8489 atts = (const xmlChar **)
8490 xmlMalloc(maxatts * sizeof(xmlChar *));
8491 if (atts == NULL) {
8492 xmlErrMemory(ctxt, NULL);
8493 if (attvalue != NULL)
8494 xmlFree(attvalue);
8495 goto failed;
8496 }
8497 ctxt->atts = atts;
8498 ctxt->maxatts = maxatts;
8499 } else if (nbatts + 4 > maxatts) {
8500 const xmlChar **n;
8501
8502 maxatts *= 2;
8503 n = (const xmlChar **) xmlRealloc((void *) atts,
8504 maxatts * sizeof(const xmlChar *));
8505 if (n == NULL) {
8506 xmlErrMemory(ctxt, NULL);
8507 if (attvalue != NULL)
8508 xmlFree(attvalue);
8509 goto failed;
8510 }
8511 atts = n;
8512 ctxt->atts = atts;
8513 ctxt->maxatts = maxatts;
8514 }
8515 atts[nbatts++] = attname;
8516 atts[nbatts++] = attvalue;
8517 atts[nbatts] = NULL;
8518 atts[nbatts + 1] = NULL;
8519 } else {
8520 if (attvalue != NULL)
8521 xmlFree(attvalue);
8522 }
8523
8524failed:
8525
8526 GROW
8527 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 break;
8529 if (SKIP_BLANKS == 0) {
8530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 "attributes construct error\n");
8532 }
8533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534 (attname == NULL) && (attvalue == NULL)) {
8535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 "xmlParseStartTag: problem parsing attributes\n");
8537 break;
8538 }
8539 SHRINK;
8540 GROW;
8541 }
8542
8543 /*
8544 * SAX: Start of Element !
8545 */
8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8547 (!ctxt->disableSAX)) {
8548 if (nbatts > 0)
8549 ctxt->sax->startElement(ctxt->userData, name, atts);
8550 else
8551 ctxt->sax->startElement(ctxt->userData, name, NULL);
8552 }
8553
8554 if (atts != NULL) {
8555 /* Free only the content strings */
8556 for (i = 1;i < nbatts;i+=2)
8557 if (atts[i] != NULL)
8558 xmlFree((xmlChar *) atts[i]);
8559 }
8560 return(name);
8561}
8562
8563/**
8564 * xmlParseEndTag1:
8565 * @ctxt: an XML parser context
8566 * @line: line of the start tag
8567 * @nsNr: number of namespaces on the start tag
8568 *
8569 * parse an end of tag
8570 *
8571 * [42] ETag ::= '</' Name S? '>'
8572 *
8573 * With namespace
8574 *
8575 * [NS 9] ETag ::= '</' QName S? '>'
8576 */
8577
8578static void
8579xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8580 const xmlChar *name;
8581
8582 GROW;
8583 if ((RAW != '<') || (NXT(1) != '/')) {
8584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8585 "xmlParseEndTag: '</' not found\n");
8586 return;
8587 }
8588 SKIP(2);
8589
8590 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8591
8592 /*
8593 * We should definitely be at the ending "S? '>'" part
8594 */
8595 GROW;
8596 SKIP_BLANKS;
8597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8599 } else
8600 NEXT1;
8601
8602 /*
8603 * [ WFC: Element Type Match ]
8604 * The Name in an element's end-tag must match the element type in the
8605 * start-tag.
8606 *
8607 */
8608 if (name != (xmlChar*)1) {
8609 if (name == NULL) name = BAD_CAST "unparseable";
8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8611 "Opening and ending tag mismatch: %s line %d and %s\n",
8612 ctxt->name, line, name);
8613 }
8614
8615 /*
8616 * SAX: End of Tag
8617 */
8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 (!ctxt->disableSAX))
8620 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8621
8622 namePop(ctxt);
8623 spacePop(ctxt);
8624 return;
8625}
8626
8627/**
8628 * xmlParseEndTag:
8629 * @ctxt: an XML parser context
8630 *
8631 * parse an end of tag
8632 *
8633 * [42] ETag ::= '</' Name S? '>'
8634 *
8635 * With namespace
8636 *
8637 * [NS 9] ETag ::= '</' QName S? '>'
8638 */
8639
8640void
8641xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8642 xmlParseEndTag1(ctxt, 0);
8643}
8644#endif /* LIBXML_SAX1_ENABLED */
8645
8646/************************************************************************
8647 * *
8648 * SAX 2 specific operations *
8649 * *
8650 ************************************************************************/
8651
8652/*
8653 * xmlGetNamespace:
8654 * @ctxt: an XML parser context
8655 * @prefix: the prefix to lookup
8656 *
8657 * Lookup the namespace name for the @prefix (which ca be NULL)
8658 * The prefix must come from the @ctxt->dict dictionary
8659 *
8660 * Returns the namespace name or NULL if not bound
8661 */
8662static const xmlChar *
8663xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664 int i;
8665
8666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8667 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8668 if (ctxt->nsTab[i] == prefix) {
8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 return(NULL);
8671 return(ctxt->nsTab[i + 1]);
8672 }
8673 return(NULL);
8674}
8675
8676/**
8677 * xmlParseQName:
8678 * @ctxt: an XML parser context
8679 * @prefix: pointer to store the prefix part
8680 *
8681 * parse an XML Namespace QName
8682 *
8683 * [6] QName ::= (Prefix ':')? LocalPart
8684 * [7] Prefix ::= NCName
8685 * [8] LocalPart ::= NCName
8686 *
8687 * Returns the Name parsed or NULL
8688 */
8689
8690static const xmlChar *
8691xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692 const xmlChar *l, *p;
8693
8694 GROW;
8695
8696 l = xmlParseNCName(ctxt);
8697 if (l == NULL) {
8698 if (CUR == ':') {
8699 l = xmlParseName(ctxt);
8700 if (l != NULL) {
8701 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8702 "Failed to parse QName '%s'\n", l, NULL, NULL);
8703 *prefix = NULL;
8704 return(l);
8705 }
8706 }
8707 return(NULL);
8708 }
8709 if (CUR == ':') {
8710 NEXT;
8711 p = l;
8712 l = xmlParseNCName(ctxt);
8713 if (l == NULL) {
8714 xmlChar *tmp;
8715
8716 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8718 l = xmlParseNmtoken(ctxt);
8719 if (l == NULL)
8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 else {
8722 tmp = xmlBuildQName(l, p, NULL, 0);
8723 xmlFree((char *)l);
8724 }
8725 p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 if (tmp != NULL) xmlFree(tmp);
8727 *prefix = NULL;
8728 return(p);
8729 }
8730 if (CUR == ':') {
8731 xmlChar *tmp;
8732
8733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8735 NEXT;
8736 tmp = (xmlChar *) xmlParseName(ctxt);
8737 if (tmp != NULL) {
8738 tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 if (tmp != NULL) xmlFree(tmp);
8741 *prefix = p;
8742 return(l);
8743 }
8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 if (tmp != NULL) xmlFree(tmp);
8747 *prefix = p;
8748 return(l);
8749 }
8750 *prefix = p;
8751 } else
8752 *prefix = NULL;
8753 return(l);
8754}
8755
8756/**
8757 * xmlParseQNameAndCompare:
8758 * @ctxt: an XML parser context
8759 * @name: the localname
8760 * @prefix: the prefix, if any.
8761 *
8762 * parse an XML name and compares for match
8763 * (specialized for endtag parsing)
8764 *
8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766 * and the name for mismatch
8767 */
8768
8769static const xmlChar *
8770xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771 xmlChar const *prefix) {
8772 const xmlChar *cmp;
8773 const xmlChar *in;
8774 const xmlChar *ret;
8775 const xmlChar *prefix2;
8776
8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778
8779 GROW;
8780 in = ctxt->input->cur;
8781
8782 cmp = prefix;
8783 while (*in != 0 && *in == *cmp) {
8784 ++in;
8785 ++cmp;
8786 }
8787 if ((*cmp == 0) && (*in == ':')) {
8788 in++;
8789 cmp = name;
8790 while (*in != 0 && *in == *cmp) {
8791 ++in;
8792 ++cmp;
8793 }
8794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8795 /* success */
8796 ctxt->input->cur = in;
8797 return((const xmlChar*) 1);
8798 }
8799 }
8800 /*
8801 * all strings coms from the dictionary, equality can be done directly
8802 */
8803 ret = xmlParseQName (ctxt, &prefix2);
8804 if ((ret == name) && (prefix == prefix2))
8805 return((const xmlChar*) 1);
8806 return ret;
8807}
8808
8809/**
8810 * xmlParseAttValueInternal:
8811 * @ctxt: an XML parser context
8812 * @len: attribute len result
8813 * @alloc: whether the attribute was reallocated as a new string
8814 * @normalize: if 1 then further non-CDATA normalization must be done
8815 *
8816 * parse a value for an attribute.
8817 * NOTE: if no normalization is needed, the routine will return pointers
8818 * directly from the data buffer.
8819 *
8820 * 3.3.3 Attribute-Value Normalization:
8821 * Before the value of an attribute is passed to the application or
8822 * checked for validity, the XML processor must normalize it as follows:
8823 * - a character reference is processed by appending the referenced
8824 * character to the attribute value
8825 * - an entity reference is processed by recursively processing the
8826 * replacement text of the entity
8827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828 * appending #x20 to the normalized value, except that only a single
8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8830 * parsed entity or the literal entity value of an internal parsed entity
8831 * - other characters are processed by appending them to the normalized value
8832 * If the declared value is not CDATA, then the XML processor must further
8833 * process the normalized attribute value by discarding any leading and
8834 * trailing space (#x20) characters, and by replacing sequences of space
8835 * (#x20) characters by a single space (#x20) character.
8836 * All attributes for which no declaration has been read should be treated
8837 * by a non-validating parser as if declared CDATA.
8838 *
8839 * Returns the AttValue parsed or NULL. The value has to be freed by the
8840 * caller if it was copied, this can be detected by val[*len] == 0.
8841 */
8842
8843static xmlChar *
8844xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845 int normalize)
8846{
8847 xmlChar limit = 0;
8848 const xmlChar *in = NULL, *start, *end, *last;
8849 xmlChar *ret = NULL;
8850 int line, col;
8851
8852 GROW;
8853 in = (xmlChar *) CUR_PTR;
8854 line = ctxt->input->line;
8855 col = ctxt->input->col;
8856 if (*in != '"' && *in != '\'') {
8857 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8858 return (NULL);
8859 }
8860 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8861
8862 /*
8863 * try to handle in this routine the most common case where no
8864 * allocation of a new string is required and where content is
8865 * pure ASCII.
8866 */
8867 limit = *in++;
8868 col++;
8869 end = ctxt->input->end;
8870 start = in;
8871 if (in >= end) {
8872 const xmlChar *oldbase = ctxt->input->base;
8873 GROW;
8874 if (oldbase != ctxt->input->base) {
8875 long delta = ctxt->input->base - oldbase;
8876 start = start + delta;
8877 in = in + delta;
8878 }
8879 end = ctxt->input->end;
8880 }
8881 if (normalize) {
8882 /*
8883 * Skip any leading spaces
8884 */
8885 while ((in < end) && (*in != limit) &&
8886 ((*in == 0x20) || (*in == 0x9) ||
8887 (*in == 0xA) || (*in == 0xD))) {
8888 if (*in == 0xA) {
8889 line++; col = 1;
8890 } else {
8891 col++;
8892 }
8893 in++;
8894 start = in;
8895 if (in >= end) {
8896 const xmlChar *oldbase = ctxt->input->base;
8897 GROW;
8898 if (ctxt->instate == XML_PARSER_EOF)
8899 return(NULL);
8900 if (oldbase != ctxt->input->base) {
8901 long delta = ctxt->input->base - oldbase;
8902 start = start + delta;
8903 in = in + delta;
8904 }
8905 end = ctxt->input->end;
8906 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8907 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8908 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8909 "AttValue length too long\n");
8910 return(NULL);
8911 }
8912 }
8913 }
8914 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8915 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8916 col++;
8917 if ((*in++ == 0x20) && (*in == 0x20)) break;
8918 if (in >= end) {
8919 const xmlChar *oldbase = ctxt->input->base;
8920 GROW;
8921 if (ctxt->instate == XML_PARSER_EOF)
8922 return(NULL);
8923 if (oldbase != ctxt->input->base) {
8924 long delta = ctxt->input->base - oldbase;
8925 start = start + delta;
8926 in = in + delta;
8927 }
8928 end = ctxt->input->end;
8929 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8932 "AttValue length too long\n");
8933 return(NULL);
8934 }
8935 }
8936 }
8937 last = in;
8938 /*
8939 * skip the trailing blanks
8940 */
8941 while ((last[-1] == 0x20) && (last > start)) last--;
8942 while ((in < end) && (*in != limit) &&
8943 ((*in == 0x20) || (*in == 0x9) ||
8944 (*in == 0xA) || (*in == 0xD))) {
8945 if (*in == 0xA) {
8946 line++, col = 1;
8947 } else {
8948 col++;
8949 }
8950 in++;
8951 if (in >= end) {
8952 const xmlChar *oldbase = ctxt->input->base;
8953 GROW;
8954 if (ctxt->instate == XML_PARSER_EOF)
8955 return(NULL);
8956 if (oldbase != ctxt->input->base) {
8957 long delta = ctxt->input->base - oldbase;
8958 start = start + delta;
8959 in = in + delta;
8960 last = last + delta;
8961 }
8962 end = ctxt->input->end;
8963 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8966 "AttValue length too long\n");
8967 return(NULL);
8968 }
8969 }
8970 }
8971 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8972 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8973 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8974 "AttValue length too long\n");
8975 return(NULL);
8976 }
8977 if (*in != limit) goto need_complex;
8978 } else {
8979 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8980 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8981 in++;
8982 col++;
8983 if (in >= end) {
8984 const xmlChar *oldbase = ctxt->input->base;
8985 GROW;
8986 if (ctxt->instate == XML_PARSER_EOF)
8987 return(NULL);
8988 if (oldbase != ctxt->input->base) {
8989 long delta = ctxt->input->base - oldbase;
8990 start = start + delta;
8991 in = in + delta;
8992 }
8993 end = ctxt->input->end;
8994 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8995 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8996 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8997 "AttValue length too long\n");
8998 return(NULL);
8999 }
9000 }
9001 }
9002 last = in;
9003 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9006 "AttValue length too long\n");
9007 return(NULL);
9008 }
9009 if (*in != limit) goto need_complex;
9010 }
9011 in++;
9012 col++;
9013 if (len != NULL) {
9014 *len = last - start;
9015 ret = (xmlChar *) start;
9016 } else {
9017 if (alloc) *alloc = 1;
9018 ret = xmlStrndup(start, last - start);
9019 }
9020 CUR_PTR = in;
9021 ctxt->input->line = line;
9022 ctxt->input->col = col;
9023 if (alloc) *alloc = 0;
9024 return ret;
9025need_complex:
9026 if (alloc) *alloc = 1;
9027 return xmlParseAttValueComplex(ctxt, len, normalize);
9028}
9029
9030/**
9031 * xmlParseAttribute2:
9032 * @ctxt: an XML parser context
9033 * @pref: the element prefix
9034 * @elem: the element name
9035 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9036 * @value: a xmlChar ** used to store the value of the attribute
9037 * @len: an int * to save the length of the attribute
9038 * @alloc: an int * to indicate if the attribute was allocated
9039 *
9040 * parse an attribute in the new SAX2 framework.
9041 *
9042 * Returns the attribute name, and the value in *value, .
9043 */
9044
9045static const xmlChar *
9046xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9047 const xmlChar * pref, const xmlChar * elem,
9048 const xmlChar ** prefix, xmlChar ** value,
9049 int *len, int *alloc)
9050{
9051 const xmlChar *name;
9052 xmlChar *val, *internal_val = NULL;
9053 int normalize = 0;
9054
9055 *value = NULL;
9056 GROW;
9057 name = xmlParseQName(ctxt, prefix);
9058 if (name == NULL) {
9059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9060 "error parsing attribute name\n");
9061 return (NULL);
9062 }
9063
9064 /*
9065 * get the type if needed
9066 */
9067 if (ctxt->attsSpecial != NULL) {
9068 int type;
9069
9070 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9071 pref, elem, *prefix, name);
9072 if (type != 0)
9073 normalize = 1;
9074 }
9075
9076 /*
9077 * read the value
9078 */
9079 SKIP_BLANKS;
9080 if (RAW == '=') {
9081 NEXT;
9082 SKIP_BLANKS;
9083 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9084 if (normalize) {
9085 /*
9086 * Sometimes a second normalisation pass for spaces is needed
9087 * but that only happens if charrefs or entities refernces
9088 * have been used in the attribute value, i.e. the attribute
9089 * value have been extracted in an allocated string already.
9090 */
9091 if (*alloc) {
9092 const xmlChar *val2;
9093
9094 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9095 if ((val2 != NULL) && (val2 != val)) {
9096 xmlFree(val);
9097 val = (xmlChar *) val2;
9098 }
9099 }
9100 }
9101 ctxt->instate = XML_PARSER_CONTENT;
9102 } else {
9103 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9104 "Specification mandates value for attribute %s\n",
9105 name);
9106 return (NULL);
9107 }
9108
9109 if (*prefix == ctxt->str_xml) {
9110 /*
9111 * Check that xml:lang conforms to the specification
9112 * No more registered as an error, just generate a warning now
9113 * since this was deprecated in XML second edition
9114 */
9115 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9116 internal_val = xmlStrndup(val, *len);
9117 if (!xmlCheckLanguageID(internal_val)) {
9118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9119 "Malformed value for xml:lang : %s\n",
9120 internal_val, NULL);
9121 }
9122 }
9123
9124 /*
9125 * Check that xml:space conforms to the specification
9126 */
9127 if (xmlStrEqual(name, BAD_CAST "space")) {
9128 internal_val = xmlStrndup(val, *len);
9129 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9130 *(ctxt->space) = 0;
9131 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9132 *(ctxt->space) = 1;
9133 else {
9134 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9136 internal_val, NULL);
9137 }
9138 }
9139 if (internal_val) {
9140 xmlFree(internal_val);
9141 }
9142 }
9143
9144 *value = val;
9145 return (name);
9146}
9147/**
9148 * xmlParseStartTag2:
9149 * @ctxt: an XML parser context
9150 *
9151 * parse a start of tag either for rule element or
9152 * EmptyElement. In both case we don't parse the tag closing chars.
9153 * This routine is called when running SAX2 parsing
9154 *
9155 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9156 *
9157 * [ WFC: Unique Att Spec ]
9158 * No attribute name may appear more than once in the same start-tag or
9159 * empty-element tag.
9160 *
9161 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9162 *
9163 * [ WFC: Unique Att Spec ]
9164 * No attribute name may appear more than once in the same start-tag or
9165 * empty-element tag.
9166 *
9167 * With namespace:
9168 *
9169 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9170 *
9171 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9172 *
9173 * Returns the element name parsed
9174 */
9175
9176static const xmlChar *
9177xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9178 const xmlChar **URI, int *tlen) {
9179 const xmlChar *localname;
9180 const xmlChar *prefix;
9181 const xmlChar *attname;
9182 const xmlChar *aprefix;
9183 const xmlChar *nsname;
9184 xmlChar *attvalue;
9185 const xmlChar **atts = ctxt->atts;
9186 int maxatts = ctxt->maxatts;
9187 int nratts, nbatts, nbdef, inputid;
9188 int i, j, nbNs, attval;
9189 unsigned long cur;
9190 int nsNr = ctxt->nsNr;
9191
9192 if (RAW != '<') return(NULL);
9193 NEXT1;
9194
9195 /*
9196 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9197 * point since the attribute values may be stored as pointers to
9198 * the buffer and calling SHRINK would destroy them !
9199 * The Shrinking is only possible once the full set of attribute
9200 * callbacks have been done.
9201 */
9202 SHRINK;
9203 cur = ctxt->input->cur - ctxt->input->base;
9204 inputid = ctxt->input->id;
9205 nbatts = 0;
9206 nratts = 0;
9207 nbdef = 0;
9208 nbNs = 0;
9209 attval = 0;
9210 /* Forget any namespaces added during an earlier parse of this element. */
9211 ctxt->nsNr = nsNr;
9212
9213 localname = xmlParseQName(ctxt, &prefix);
9214 if (localname == NULL) {
9215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9216 "StartTag: invalid element name\n");
9217 return(NULL);
9218 }
9219 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9220
9221 /*
9222 * Now parse the attributes, it ends up with the ending
9223 *
9224 * (S Attribute)* S?
9225 */
9226 SKIP_BLANKS;
9227 GROW;
9228
9229 while (((RAW != '>') &&
9230 ((RAW != '/') || (NXT(1) != '>')) &&
9231 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9232 const xmlChar *q = CUR_PTR;
9233 unsigned int cons = ctxt->input->consumed;
9234 int len = -1, alloc = 0;
9235
9236 attname = xmlParseAttribute2(ctxt, prefix, localname,
9237 &aprefix, &attvalue, &len, &alloc);
9238 if ((attname == NULL) || (attvalue == NULL))
9239 goto next_attr;
9240 if (len < 0) len = xmlStrlen(attvalue);
9241
9242 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9243 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9244 xmlURIPtr uri;
9245
9246 if (URL == NULL) {
9247 xmlErrMemory(ctxt, "dictionary allocation failure");
9248 if ((attvalue != NULL) && (alloc != 0))
9249 xmlFree(attvalue);
9250 return(NULL);
9251 }
9252 if (*URL != 0) {
9253 uri = xmlParseURI((const char *) URL);
9254 if (uri == NULL) {
9255 xmlNsErr(ctxt, XML_WAR_NS_URI,
9256 "xmlns: '%s' is not a valid URI\n",
9257 URL, NULL, NULL);
9258 } else {
9259 if (uri->scheme == NULL) {
9260 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9261 "xmlns: URI %s is not absolute\n",
9262 URL, NULL, NULL);
9263 }
9264 xmlFreeURI(uri);
9265 }
9266 if (URL == ctxt->str_xml_ns) {
9267 if (attname != ctxt->str_xml) {
9268 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9269 "xml namespace URI cannot be the default namespace\n",
9270 NULL, NULL, NULL);
9271 }
9272 goto next_attr;
9273 }
9274 if ((len == 29) &&
9275 (xmlStrEqual(URL,
9276 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9277 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9278 "reuse of the xmlns namespace name is forbidden\n",
9279 NULL, NULL, NULL);
9280 goto next_attr;
9281 }
9282 }
9283 /*
9284 * check that it's not a defined namespace
9285 */
9286 for (j = 1;j <= nbNs;j++)
9287 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9288 break;
9289 if (j <= nbNs)
9290 xmlErrAttributeDup(ctxt, NULL, attname);
9291 else
9292 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9293
9294 } else if (aprefix == ctxt->str_xmlns) {
9295 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296 xmlURIPtr uri;
9297
9298 if (attname == ctxt->str_xml) {
9299 if (URL != ctxt->str_xml_ns) {
9300 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301 "xml namespace prefix mapped to wrong URI\n",
9302 NULL, NULL, NULL);
9303 }
9304 /*
9305 * Do not keep a namespace definition node
9306 */
9307 goto next_attr;
9308 }
9309 if (URL == ctxt->str_xml_ns) {
9310 if (attname != ctxt->str_xml) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "xml namespace URI mapped to wrong prefix\n",
9313 NULL, NULL, NULL);
9314 }
9315 goto next_attr;
9316 }
9317 if (attname == ctxt->str_xmlns) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "redefinition of the xmlns prefix is forbidden\n",
9320 NULL, NULL, NULL);
9321 goto next_attr;
9322 }
9323 if ((len == 29) &&
9324 (xmlStrEqual(URL,
9325 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9326 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9327 "reuse of the xmlns namespace name is forbidden\n",
9328 NULL, NULL, NULL);
9329 goto next_attr;
9330 }
9331 if ((URL == NULL) || (URL[0] == 0)) {
9332 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333 "xmlns:%s: Empty XML namespace is not allowed\n",
9334 attname, NULL, NULL);
9335 goto next_attr;
9336 } else {
9337 uri = xmlParseURI((const char *) URL);
9338 if (uri == NULL) {
9339 xmlNsErr(ctxt, XML_WAR_NS_URI,
9340 "xmlns:%s: '%s' is not a valid URI\n",
9341 attname, URL, NULL);
9342 } else {
9343 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9344 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9345 "xmlns:%s: URI %s is not absolute\n",
9346 attname, URL, NULL);
9347 }
9348 xmlFreeURI(uri);
9349 }
9350 }
9351
9352 /*
9353 * check that it's not a defined namespace
9354 */
9355 for (j = 1;j <= nbNs;j++)
9356 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9357 break;
9358 if (j <= nbNs)
9359 xmlErrAttributeDup(ctxt, aprefix, attname);
9360 else
9361 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9362
9363 } else {
9364 /*
9365 * Add the pair to atts
9366 */
9367 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9368 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9369 goto next_attr;
9370 }
9371 maxatts = ctxt->maxatts;
9372 atts = ctxt->atts;
9373 }
9374 ctxt->attallocs[nratts++] = alloc;
9375 atts[nbatts++] = attname;
9376 atts[nbatts++] = aprefix;
9377 /*
9378 * The namespace URI field is used temporarily to point at the
9379 * base of the current input buffer for non-alloced attributes.
9380 * When the input buffer is reallocated, all the pointers become
9381 * invalid, but they can be reconstructed later.
9382 */
9383 if (alloc)
9384 atts[nbatts++] = NULL;
9385 else
9386 atts[nbatts++] = ctxt->input->base;
9387 atts[nbatts++] = attvalue;
9388 attvalue += len;
9389 atts[nbatts++] = attvalue;
9390 /*
9391 * tag if some deallocation is needed
9392 */
9393 if (alloc != 0) attval = 1;
9394 attvalue = NULL; /* moved into atts */
9395 }
9396
9397next_attr:
9398 if ((attvalue != NULL) && (alloc != 0)) {
9399 xmlFree(attvalue);
9400 attvalue = NULL;
9401 }
9402
9403 GROW
9404 if (ctxt->instate == XML_PARSER_EOF)
9405 break;
9406 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9407 break;
9408 if (SKIP_BLANKS == 0) {
9409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9410 "attributes construct error\n");
9411 break;
9412 }
9413 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9414 (attname == NULL) && (attvalue == NULL)) {
9415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9416 "xmlParseStartTag: problem parsing attributes\n");
9417 break;
9418 }
9419 GROW;
9420 }
9421
9422 if (ctxt->input->id != inputid) {
9423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9424 "Unexpected change of input\n");
9425 localname = NULL;
9426 goto done;
9427 }
9428
9429 /* Reconstruct attribute value pointers. */
9430 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9431 if (atts[i+2] != NULL) {
9432 /*
9433 * Arithmetic on dangling pointers is technically undefined
9434 * behavior, but well...
9435 */
9436 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9437 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9438 atts[i+3] += offset; /* value */
9439 atts[i+4] += offset; /* valuend */
9440 }
9441 }
9442
9443 /*
9444 * The attributes defaulting
9445 */
9446 if (ctxt->attsDefault != NULL) {
9447 xmlDefAttrsPtr defaults;
9448
9449 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9450 if (defaults != NULL) {
9451 for (i = 0;i < defaults->nbAttrs;i++) {
9452 attname = defaults->values[5 * i];
9453 aprefix = defaults->values[5 * i + 1];
9454
9455 /*
9456 * special work for namespaces defaulted defs
9457 */
9458 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9459 /*
9460 * check that it's not a defined namespace
9461 */
9462 for (j = 1;j <= nbNs;j++)
9463 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 break;
9465 if (j <= nbNs) continue;
9466
9467 nsname = xmlGetNamespace(ctxt, NULL);
9468 if (nsname != defaults->values[5 * i + 2]) {
9469 if (nsPush(ctxt, NULL,
9470 defaults->values[5 * i + 2]) > 0)
9471 nbNs++;
9472 }
9473 } else if (aprefix == ctxt->str_xmlns) {
9474 /*
9475 * check that it's not a defined namespace
9476 */
9477 for (j = 1;j <= nbNs;j++)
9478 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9479 break;
9480 if (j <= nbNs) continue;
9481
9482 nsname = xmlGetNamespace(ctxt, attname);
9483 if (nsname != defaults->values[2]) {
9484 if (nsPush(ctxt, attname,
9485 defaults->values[5 * i + 2]) > 0)
9486 nbNs++;
9487 }
9488 } else {
9489 /*
9490 * check that it's not a defined attribute
9491 */
9492 for (j = 0;j < nbatts;j+=5) {
9493 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9494 break;
9495 }
9496 if (j < nbatts) continue;
9497
9498 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9499 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9500 return(NULL);
9501 }
9502 maxatts = ctxt->maxatts;
9503 atts = ctxt->atts;
9504 }
9505 atts[nbatts++] = attname;
9506 atts[nbatts++] = aprefix;
9507 if (aprefix == NULL)
9508 atts[nbatts++] = NULL;
9509 else
9510 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9511 atts[nbatts++] = defaults->values[5 * i + 2];
9512 atts[nbatts++] = defaults->values[5 * i + 3];
9513 if ((ctxt->standalone == 1) &&
9514 (defaults->values[5 * i + 4] != NULL)) {
9515 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9516 "standalone: attribute %s on %s defaulted from external subset\n",
9517 attname, localname);
9518 }
9519 nbdef++;
9520 }
9521 }
9522 }
9523 }
9524
9525 /*
9526 * The attributes checkings
9527 */
9528 for (i = 0; i < nbatts;i += 5) {
9529 /*
9530 * The default namespace does not apply to attribute names.
9531 */
9532 if (atts[i + 1] != NULL) {
9533 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9534 if (nsname == NULL) {
9535 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9536 "Namespace prefix %s for %s on %s is not defined\n",
9537 atts[i + 1], atts[i], localname);
9538 }
9539 atts[i + 2] = nsname;
9540 } else
9541 nsname = NULL;
9542 /*
9543 * [ WFC: Unique Att Spec ]
9544 * No attribute name may appear more than once in the same
9545 * start-tag or empty-element tag.
9546 * As extended by the Namespace in XML REC.
9547 */
9548 for (j = 0; j < i;j += 5) {
9549 if (atts[i] == atts[j]) {
9550 if (atts[i+1] == atts[j+1]) {
9551 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9552 break;
9553 }
9554 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9555 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9556 "Namespaced Attribute %s in '%s' redefined\n",
9557 atts[i], nsname, NULL);
9558 break;
9559 }
9560 }
9561 }
9562 }
9563
9564 nsname = xmlGetNamespace(ctxt, prefix);
9565 if ((prefix != NULL) && (nsname == NULL)) {
9566 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9567 "Namespace prefix %s on %s is not defined\n",
9568 prefix, localname, NULL);
9569 }
9570 *pref = prefix;
9571 *URI = nsname;
9572
9573 /*
9574 * SAX: Start of Element !
9575 */
9576 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9577 (!ctxt->disableSAX)) {
9578 if (nbNs > 0)
9579 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9580 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9581 nbatts / 5, nbdef, atts);
9582 else
9583 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9584 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9585 }
9586
9587done:
9588 /*
9589 * Free up attribute allocated strings if needed
9590 */
9591 if (attval != 0) {
9592 for (i = 3,j = 0; j < nratts;i += 5,j++)
9593 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9594 xmlFree((xmlChar *) atts[i]);
9595 }
9596
9597 return(localname);
9598}
9599
9600/**
9601 * xmlParseEndTag2:
9602 * @ctxt: an XML parser context
9603 * @line: line of the start tag
9604 * @nsNr: number of namespaces on the start tag
9605 *
9606 * parse an end of tag
9607 *
9608 * [42] ETag ::= '</' Name S? '>'
9609 *
9610 * With namespace
9611 *
9612 * [NS 9] ETag ::= '</' QName S? '>'
9613 */
9614
9615static void
9616xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9617 const xmlChar *URI, int line, int nsNr, int tlen) {
9618 const xmlChar *name;
9619 size_t curLength;
9620
9621 GROW;
9622 if ((RAW != '<') || (NXT(1) != '/')) {
9623 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9624 return;
9625 }
9626 SKIP(2);
9627
9628 curLength = ctxt->input->end - ctxt->input->cur;
9629 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9630 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9631 if ((curLength >= (size_t)(tlen + 1)) &&
9632 (ctxt->input->cur[tlen] == '>')) {
9633 ctxt->input->cur += tlen + 1;
9634 ctxt->input->col += tlen + 1;
9635 goto done;
9636 }
9637 ctxt->input->cur += tlen;
9638 ctxt->input->col += tlen;
9639 name = (xmlChar*)1;
9640 } else {
9641 if (prefix == NULL)
9642 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9643 else
9644 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9645 }
9646
9647 /*
9648 * We should definitely be at the ending "S? '>'" part
9649 */
9650 GROW;
9651 if (ctxt->instate == XML_PARSER_EOF)
9652 return;
9653 SKIP_BLANKS;
9654 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9655 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9656 } else
9657 NEXT1;
9658
9659 /*
9660 * [ WFC: Element Type Match ]
9661 * The Name in an element's end-tag must match the element type in the
9662 * start-tag.
9663 *
9664 */
9665 if (name != (xmlChar*)1) {
9666 if (name == NULL) name = BAD_CAST "unparseable";
9667 if ((line == 0) && (ctxt->node != NULL))
9668 line = ctxt->node->line;
9669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9670 "Opening and ending tag mismatch: %s line %d and %s\n",
9671 ctxt->name, line, name);
9672 }
9673
9674 /*
9675 * SAX: End of Tag
9676 */
9677done:
9678 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9679 (!ctxt->disableSAX))
9680 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9681
9682 spacePop(ctxt);
9683 if (nsNr != 0)
9684 nsPop(ctxt, nsNr);
9685 return;
9686}
9687
9688/**
9689 * xmlParseCDSect:
9690 * @ctxt: an XML parser context
9691 *
9692 * Parse escaped pure raw content.
9693 *
9694 * [18] CDSect ::= CDStart CData CDEnd
9695 *
9696 * [19] CDStart ::= '<![CDATA['
9697 *
9698 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699 *
9700 * [21] CDEnd ::= ']]>'
9701 */
9702void
9703xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704 xmlChar *buf = NULL;
9705 int len = 0;
9706 int size = XML_PARSER_BUFFER_SIZE;
9707 int r, rl;
9708 int s, sl;
9709 int cur, l;
9710 int count = 0;
9711
9712 /* Check 2.6.0 was NXT(0) not RAW */
9713 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9714 SKIP(9);
9715 } else
9716 return;
9717
9718 ctxt->instate = XML_PARSER_CDATA_SECTION;
9719 r = CUR_CHAR(rl);
9720 if (!IS_CHAR(r)) {
9721 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9722 ctxt->instate = XML_PARSER_CONTENT;
9723 return;
9724 }
9725 NEXTL(rl);
9726 s = CUR_CHAR(sl);
9727 if (!IS_CHAR(s)) {
9728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9729 ctxt->instate = XML_PARSER_CONTENT;
9730 return;
9731 }
9732 NEXTL(sl);
9733 cur = CUR_CHAR(l);
9734 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9735 if (buf == NULL) {
9736 xmlErrMemory(ctxt, NULL);
9737 return;
9738 }
9739 while (IS_CHAR(cur) &&
9740 ((r != ']') || (s != ']') || (cur != '>'))) {
9741 if (len + 5 >= size) {
9742 xmlChar *tmp;
9743
9744 if ((size > XML_MAX_TEXT_LENGTH) &&
9745 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9746 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9747 "CData section too big found", NULL);
9748 xmlFree (buf);
9749 return;
9750 }
9751 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9752 if (tmp == NULL) {
9753 xmlFree(buf);
9754 xmlErrMemory(ctxt, NULL);
9755 return;
9756 }
9757 buf = tmp;
9758 size *= 2;
9759 }
9760 COPY_BUF(rl,buf,len,r);
9761 r = s;
9762 rl = sl;
9763 s = cur;
9764 sl = l;
9765 count++;
9766 if (count > 50) {
9767 GROW;
9768 if (ctxt->instate == XML_PARSER_EOF) {
9769 xmlFree(buf);
9770 return;
9771 }
9772 count = 0;
9773 }
9774 NEXTL(l);
9775 cur = CUR_CHAR(l);
9776 }
9777 buf[len] = 0;
9778 ctxt->instate = XML_PARSER_CONTENT;
9779 if (cur != '>') {
9780 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9781 "CData section not finished\n%.50s\n", buf);
9782 xmlFree(buf);
9783 return;
9784 }
9785 NEXTL(l);
9786
9787 /*
9788 * OK the buffer is to be consumed as cdata.
9789 */
9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9791 if (ctxt->sax->cdataBlock != NULL)
9792 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9793 else if (ctxt->sax->characters != NULL)
9794 ctxt->sax->characters(ctxt->userData, buf, len);
9795 }
9796 xmlFree(buf);
9797}
9798
9799/**
9800 * xmlParseContent:
9801 * @ctxt: an XML parser context
9802 *
9803 * Parse a content:
9804 *
9805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9806 */
9807
9808void
9809xmlParseContent(xmlParserCtxtPtr ctxt) {
9810 GROW;
9811 while ((RAW != 0) &&
9812 ((RAW != '<') || (NXT(1) != '/')) &&
9813 (ctxt->instate != XML_PARSER_EOF)) {
9814 const xmlChar *test = CUR_PTR;
9815 unsigned int cons = ctxt->input->consumed;
9816 const xmlChar *cur = ctxt->input->cur;
9817
9818 /*
9819 * First case : a Processing Instruction.
9820 */
9821 if ((*cur == '<') && (cur[1] == '?')) {
9822 xmlParsePI(ctxt);
9823 }
9824
9825 /*
9826 * Second case : a CDSection
9827 */
9828 /* 2.6.0 test was *cur not RAW */
9829 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9830 xmlParseCDSect(ctxt);
9831 }
9832
9833 /*
9834 * Third case : a comment
9835 */
9836 else if ((*cur == '<') && (NXT(1) == '!') &&
9837 (NXT(2) == '-') && (NXT(3) == '-')) {
9838 xmlParseComment(ctxt);
9839 ctxt->instate = XML_PARSER_CONTENT;
9840 }
9841
9842 /*
9843 * Fourth case : a sub-element.
9844 */
9845 else if (*cur == '<') {
9846 xmlParseElement(ctxt);
9847 }
9848
9849 /*
9850 * Fifth case : a reference. If if has not been resolved,
9851 * parsing returns it's Name, create the node
9852 */
9853
9854 else if (*cur == '&') {
9855 xmlParseReference(ctxt);
9856 }
9857
9858 /*
9859 * Last case, text. Note that References are handled directly.
9860 */
9861 else {
9862 xmlParseCharData(ctxt, 0);
9863 }
9864
9865 GROW;
9866 SHRINK;
9867
9868 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9869 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9870 "detected an error in element content\n");
9871 xmlHaltParser(ctxt);
9872 break;
9873 }
9874 }
9875}
9876
9877/**
9878 * xmlParseElement:
9879 * @ctxt: an XML parser context
9880 *
9881 * parse an XML element, this is highly recursive
9882 *
9883 * [39] element ::= EmptyElemTag | STag content ETag
9884 *
9885 * [ WFC: Element Type Match ]
9886 * The Name in an element's end-tag must match the element type in the
9887 * start-tag.
9888 *
9889 */
9890
9891void
9892xmlParseElement(xmlParserCtxtPtr ctxt) {
9893 const xmlChar *name;
9894 const xmlChar *prefix = NULL;
9895 const xmlChar *URI = NULL;
9896 xmlParserNodeInfo node_info;
9897 int line, tlen = 0;
9898 xmlNodePtr ret;
9899 int nsNr = ctxt->nsNr;
9900
9901 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9902 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9903 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9904 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9905 xmlParserMaxDepth);
9906 xmlHaltParser(ctxt);
9907 return;
9908 }
9909
9910 /* Capture start position */
9911 if (ctxt->record_info) {
9912 node_info.begin_pos = ctxt->input->consumed +
9913 (CUR_PTR - ctxt->input->base);
9914 node_info.begin_line = ctxt->input->line;
9915 }
9916
9917 if (ctxt->spaceNr == 0)
9918 spacePush(ctxt, -1);
9919 else if (*ctxt->space == -2)
9920 spacePush(ctxt, -1);
9921 else
9922 spacePush(ctxt, *ctxt->space);
9923
9924 line = ctxt->input->line;
9925#ifdef LIBXML_SAX1_ENABLED
9926 if (ctxt->sax2)
9927#endif /* LIBXML_SAX1_ENABLED */
9928 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9929#ifdef LIBXML_SAX1_ENABLED
9930 else
9931 name = xmlParseStartTag(ctxt);
9932#endif /* LIBXML_SAX1_ENABLED */
9933 if (ctxt->instate == XML_PARSER_EOF)
9934 return;
9935 if (name == NULL) {
9936 spacePop(ctxt);
9937 return;
9938 }
9939 namePush(ctxt, name);
9940 ret = ctxt->node;
9941
9942#ifdef LIBXML_VALID_ENABLED
9943 /*
9944 * [ VC: Root Element Type ]
9945 * The Name in the document type declaration must match the element
9946 * type of the root element.
9947 */
9948 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9949 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9950 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9951#endif /* LIBXML_VALID_ENABLED */
9952
9953 /*
9954 * Check for an Empty Element.
9955 */
9956 if ((RAW == '/') && (NXT(1) == '>')) {
9957 SKIP(2);
9958 if (ctxt->sax2) {
9959 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9960 (!ctxt->disableSAX))
9961 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9962#ifdef LIBXML_SAX1_ENABLED
9963 } else {
9964 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9965 (!ctxt->disableSAX))
9966 ctxt->sax->endElement(ctxt->userData, name);
9967#endif /* LIBXML_SAX1_ENABLED */
9968 }
9969 namePop(ctxt);
9970 spacePop(ctxt);
9971 if (nsNr != ctxt->nsNr)
9972 nsPop(ctxt, ctxt->nsNr - nsNr);
9973 if ( ret != NULL && ctxt->record_info ) {
9974 node_info.end_pos = ctxt->input->consumed +
9975 (CUR_PTR - ctxt->input->base);
9976 node_info.end_line = ctxt->input->line;
9977 node_info.node = ret;
9978 xmlParserAddNodeInfo(ctxt, &node_info);
9979 }
9980 return;
9981 }
9982 if (RAW == '>') {
9983 NEXT1;
9984 } else {
9985 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9986 "Couldn't find end of Start Tag %s line %d\n",
9987 name, line, NULL);
9988
9989 /*
9990 * end of parsing of this node.
9991 */
9992 nodePop(ctxt);
9993 namePop(ctxt);
9994 spacePop(ctxt);
9995 if (nsNr != ctxt->nsNr)
9996 nsPop(ctxt, ctxt->nsNr - nsNr);
9997
9998 /*
9999 * Capture end position and add node
10000 */
10001 if ( ret != NULL && ctxt->record_info ) {
10002 node_info.end_pos = ctxt->input->consumed +
10003 (CUR_PTR - ctxt->input->base);
10004 node_info.end_line = ctxt->input->line;
10005 node_info.node = ret;
10006 xmlParserAddNodeInfo(ctxt, &node_info);
10007 }
10008 return;
10009 }
10010
10011 /*
10012 * Parse the content of the element:
10013 */
10014 xmlParseContent(ctxt);
10015 if (ctxt->instate == XML_PARSER_EOF)
10016 return;
10017 if (!IS_BYTE_CHAR(RAW)) {
10018 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10019 "Premature end of data in tag %s line %d\n",
10020 name, line, NULL);
10021
10022 /*
10023 * end of parsing of this node.
10024 */
10025 nodePop(ctxt);
10026 namePop(ctxt);
10027 spacePop(ctxt);
10028 if (nsNr != ctxt->nsNr)
10029 nsPop(ctxt, ctxt->nsNr - nsNr);
10030 return;
10031 }
10032
10033 /*
10034 * parse the end of tag: '</' should be here.
10035 */
10036 if (ctxt->sax2) {
10037 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10038 namePop(ctxt);
10039 }
10040#ifdef LIBXML_SAX1_ENABLED
10041 else
10042 xmlParseEndTag1(ctxt, line);
10043#endif /* LIBXML_SAX1_ENABLED */
10044
10045 /*
10046 * Capture end position and add node
10047 */
10048 if ( ret != NULL && ctxt->record_info ) {
10049 node_info.end_pos = ctxt->input->consumed +
10050 (CUR_PTR - ctxt->input->base);
10051 node_info.end_line = ctxt->input->line;
10052 node_info.node = ret;
10053 xmlParserAddNodeInfo(ctxt, &node_info);
10054 }
10055}
10056
10057/**
10058 * xmlParseVersionNum:
10059 * @ctxt: an XML parser context
10060 *
10061 * parse the XML version value.
10062 *
10063 * [26] VersionNum ::= '1.' [0-9]+
10064 *
10065 * In practice allow [0-9].[0-9]+ at that level
10066 *
10067 * Returns the string giving the XML version number, or NULL
10068 */
10069xmlChar *
10070xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10071 xmlChar *buf = NULL;
10072 int len = 0;
10073 int size = 10;
10074 xmlChar cur;
10075
10076 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10077 if (buf == NULL) {
10078 xmlErrMemory(ctxt, NULL);
10079 return(NULL);
10080 }
10081 cur = CUR;
10082 if (!((cur >= '0') && (cur <= '9'))) {
10083 xmlFree(buf);
10084 return(NULL);
10085 }
10086 buf[len++] = cur;
10087 NEXT;
10088 cur=CUR;
10089 if (cur != '.') {
10090 xmlFree(buf);
10091 return(NULL);
10092 }
10093 buf[len++] = cur;
10094 NEXT;
10095 cur=CUR;
10096 while ((cur >= '0') && (cur <= '9')) {
10097 if (len + 1 >= size) {
10098 xmlChar *tmp;
10099
10100 size *= 2;
10101 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10102 if (tmp == NULL) {
10103 xmlFree(buf);
10104 xmlErrMemory(ctxt, NULL);
10105 return(NULL);
10106 }
10107 buf = tmp;
10108 }
10109 buf[len++] = cur;
10110 NEXT;
10111 cur=CUR;
10112 }
10113 buf[len] = 0;
10114 return(buf);
10115}
10116
10117/**
10118 * xmlParseVersionInfo:
10119 * @ctxt: an XML parser context
10120 *
10121 * parse the XML version.
10122 *
10123 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10124 *
10125 * [25] Eq ::= S? '=' S?
10126 *
10127 * Returns the version string, e.g. "1.0"
10128 */
10129
10130xmlChar *
10131xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10132 xmlChar *version = NULL;
10133
10134 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10135 SKIP(7);
10136 SKIP_BLANKS;
10137 if (RAW != '=') {
10138 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10139 return(NULL);
10140 }
10141 NEXT;
10142 SKIP_BLANKS;
10143 if (RAW == '"') {
10144 NEXT;
10145 version = xmlParseVersionNum(ctxt);
10146 if (RAW != '"') {
10147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10148 } else
10149 NEXT;
10150 } else if (RAW == '\''){
10151 NEXT;
10152 version = xmlParseVersionNum(ctxt);
10153 if (RAW != '\'') {
10154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10155 } else
10156 NEXT;
10157 } else {
10158 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10159 }
10160 }
10161 return(version);
10162}
10163
10164/**
10165 * xmlParseEncName:
10166 * @ctxt: an XML parser context
10167 *
10168 * parse the XML encoding name
10169 *
10170 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10171 *
10172 * Returns the encoding name value or NULL
10173 */
10174xmlChar *
10175xmlParseEncName(xmlParserCtxtPtr ctxt) {
10176 xmlChar *buf = NULL;
10177 int len = 0;
10178 int size = 10;
10179 xmlChar cur;
10180
10181 cur = CUR;
10182 if (((cur >= 'a') && (cur <= 'z')) ||
10183 ((cur >= 'A') && (cur <= 'Z'))) {
10184 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10185 if (buf == NULL) {
10186 xmlErrMemory(ctxt, NULL);
10187 return(NULL);
10188 }
10189
10190 buf[len++] = cur;
10191 NEXT;
10192 cur = CUR;
10193 while (((cur >= 'a') && (cur <= 'z')) ||
10194 ((cur >= 'A') && (cur <= 'Z')) ||
10195 ((cur >= '0') && (cur <= '9')) ||
10196 (cur == '.') || (cur == '_') ||
10197 (cur == '-')) {
10198 if (len + 1 >= size) {
10199 xmlChar *tmp;
10200
10201 size *= 2;
10202 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10203 if (tmp == NULL) {
10204 xmlErrMemory(ctxt, NULL);
10205 xmlFree(buf);
10206 return(NULL);
10207 }
10208 buf = tmp;
10209 }
10210 buf[len++] = cur;
10211 NEXT;
10212 cur = CUR;
10213 if (cur == 0) {
10214 SHRINK;
10215 GROW;
10216 cur = CUR;
10217 }
10218 }
10219 buf[len] = 0;
10220 } else {
10221 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10222 }
10223 return(buf);
10224}
10225
10226/**
10227 * xmlParseEncodingDecl:
10228 * @ctxt: an XML parser context
10229 *
10230 * parse the XML encoding declaration
10231 *
10232 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10233 *
10234 * this setups the conversion filters.
10235 *
10236 * Returns the encoding value or NULL
10237 */
10238
10239const xmlChar *
10240xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10241 xmlChar *encoding = NULL;
10242
10243 SKIP_BLANKS;
10244 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10245 SKIP(8);
10246 SKIP_BLANKS;
10247 if (RAW != '=') {
10248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10249 return(NULL);
10250 }
10251 NEXT;
10252 SKIP_BLANKS;
10253 if (RAW == '"') {
10254 NEXT;
10255 encoding = xmlParseEncName(ctxt);
10256 if (RAW != '"') {
10257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10258 xmlFree((xmlChar *) encoding);
10259 return(NULL);
10260 } else
10261 NEXT;
10262 } else if (RAW == '\''){
10263 NEXT;
10264 encoding = xmlParseEncName(ctxt);
10265 if (RAW != '\'') {
10266 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10267 xmlFree((xmlChar *) encoding);
10268 return(NULL);
10269 } else
10270 NEXT;
10271 } else {
10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10273 }
10274
10275 /*
10276 * Non standard parsing, allowing the user to ignore encoding
10277 */
10278 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10279 xmlFree((xmlChar *) encoding);
10280 return(NULL);
10281 }
10282
10283 /*
10284 * UTF-16 encoding stwich has already taken place at this stage,
10285 * more over the little-endian/big-endian selection is already done
10286 */
10287 if ((encoding != NULL) &&
10288 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10289 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10290 /*
10291 * If no encoding was passed to the parser, that we are
10292 * using UTF-16 and no decoder is present i.e. the
10293 * document is apparently UTF-8 compatible, then raise an
10294 * encoding mismatch fatal error
10295 */
10296 if ((ctxt->encoding == NULL) &&
10297 (ctxt->input->buf != NULL) &&
10298 (ctxt->input->buf->encoder == NULL)) {
10299 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10300 "Document labelled UTF-16 but has UTF-8 content\n");
10301 }
10302 if (ctxt->encoding != NULL)
10303 xmlFree((xmlChar *) ctxt->encoding);
10304 ctxt->encoding = encoding;
10305 }
10306 /*
10307 * UTF-8 encoding is handled natively
10308 */
10309 else if ((encoding != NULL) &&
10310 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10311 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10312 if (ctxt->encoding != NULL)
10313 xmlFree((xmlChar *) ctxt->encoding);
10314 ctxt->encoding = encoding;
10315 }
10316 else if (encoding != NULL) {
10317 xmlCharEncodingHandlerPtr handler;
10318
10319 if (ctxt->input->encoding != NULL)
10320 xmlFree((xmlChar *) ctxt->input->encoding);
10321 ctxt->input->encoding = encoding;
10322
10323 handler = xmlFindCharEncodingHandler((const char *) encoding);
10324 if (handler != NULL) {
10325 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10326 /* failed to convert */
10327 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10328 return(NULL);
10329 }
10330 } else {
10331 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10332 "Unsupported encoding %s\n", encoding);
10333 return(NULL);
10334 }
10335 }
10336 }
10337 return(encoding);
10338}
10339
10340/**
10341 * xmlParseSDDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * parse the XML standalone declaration
10345 *
10346 * [32] SDDecl ::= S 'standalone' Eq
10347 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10348 *
10349 * [ VC: Standalone Document Declaration ]
10350 * TODO The standalone document declaration must have the value "no"
10351 * if any external markup declarations contain declarations of:
10352 * - attributes with default values, if elements to which these
10353 * attributes apply appear in the document without specifications
10354 * of values for these attributes, or
10355 * - entities (other than amp, lt, gt, apos, quot), if references
10356 * to those entities appear in the document, or
10357 * - attributes with values subject to normalization, where the
10358 * attribute appears in the document with a value which will change
10359 * as a result of normalization, or
10360 * - element types with element content, if white space occurs directly
10361 * within any instance of those types.
10362 *
10363 * Returns:
10364 * 1 if standalone="yes"
10365 * 0 if standalone="no"
10366 * -2 if standalone attribute is missing or invalid
10367 * (A standalone value of -2 means that the XML declaration was found,
10368 * but no value was specified for the standalone attribute).
10369 */
10370
10371int
10372xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10373 int standalone = -2;
10374
10375 SKIP_BLANKS;
10376 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10377 SKIP(10);
10378 SKIP_BLANKS;
10379 if (RAW != '=') {
10380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10381 return(standalone);
10382 }
10383 NEXT;
10384 SKIP_BLANKS;
10385 if (RAW == '\''){
10386 NEXT;
10387 if ((RAW == 'n') && (NXT(1) == 'o')) {
10388 standalone = 0;
10389 SKIP(2);
10390 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10391 (NXT(2) == 's')) {
10392 standalone = 1;
10393 SKIP(3);
10394 } else {
10395 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10396 }
10397 if (RAW != '\'') {
10398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10399 } else
10400 NEXT;
10401 } else if (RAW == '"'){
10402 NEXT;
10403 if ((RAW == 'n') && (NXT(1) == 'o')) {
10404 standalone = 0;
10405 SKIP(2);
10406 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10407 (NXT(2) == 's')) {
10408 standalone = 1;
10409 SKIP(3);
10410 } else {
10411 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10412 }
10413 if (RAW != '"') {
10414 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10415 } else
10416 NEXT;
10417 } else {
10418 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10419 }
10420 }
10421 return(standalone);
10422}
10423
10424/**
10425 * xmlParseXMLDecl:
10426 * @ctxt: an XML parser context
10427 *
10428 * parse an XML declaration header
10429 *
10430 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10431 */
10432
10433void
10434xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10435 xmlChar *version;
10436
10437 /*
10438 * This value for standalone indicates that the document has an
10439 * XML declaration but it does not have a standalone attribute.
10440 * It will be overwritten later if a standalone attribute is found.
10441 */
10442 ctxt->input->standalone = -2;
10443
10444 /*
10445 * We know that '<?xml' is here.
10446 */
10447 SKIP(5);
10448
10449 if (!IS_BLANK_CH(RAW)) {
10450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10451 "Blank needed after '<?xml'\n");
10452 }
10453 SKIP_BLANKS;
10454
10455 /*
10456 * We must have the VersionInfo here.
10457 */
10458 version = xmlParseVersionInfo(ctxt);
10459 if (version == NULL) {
10460 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10461 } else {
10462 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10463 /*
10464 * Changed here for XML-1.0 5th edition
10465 */
10466 if (ctxt->options & XML_PARSE_OLD10) {
10467 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10468 "Unsupported version '%s'\n",
10469 version);
10470 } else {
10471 if ((version[0] == '1') && ((version[1] == '.'))) {
10472 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10473 "Unsupported version '%s'\n",
10474 version, NULL);
10475 } else {
10476 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477 "Unsupported version '%s'\n",
10478 version);
10479 }
10480 }
10481 }
10482 if (ctxt->version != NULL)
10483 xmlFree((void *) ctxt->version);
10484 ctxt->version = version;
10485 }
10486
10487 /*
10488 * We may have the encoding declaration
10489 */
10490 if (!IS_BLANK_CH(RAW)) {
10491 if ((RAW == '?') && (NXT(1) == '>')) {
10492 SKIP(2);
10493 return;
10494 }
10495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10496 }
10497 xmlParseEncodingDecl(ctxt);
10498 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10499 (ctxt->instate == XML_PARSER_EOF)) {
10500 /*
10501 * The XML REC instructs us to stop parsing right here
10502 */
10503 return;
10504 }
10505
10506 /*
10507 * We may have the standalone status.
10508 */
10509 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10510 if ((RAW == '?') && (NXT(1) == '>')) {
10511 SKIP(2);
10512 return;
10513 }
10514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10515 }
10516
10517 /*
10518 * We can grow the input buffer freely at that point
10519 */
10520 GROW;
10521
10522 SKIP_BLANKS;
10523 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524
10525 SKIP_BLANKS;
10526 if ((RAW == '?') && (NXT(1) == '>')) {
10527 SKIP(2);
10528 } else if (RAW == '>') {
10529 /* Deprecated old WD ... */
10530 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10531 NEXT;
10532 } else {
10533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10534 MOVETO_ENDTAG(CUR_PTR);
10535 NEXT;
10536 }
10537}
10538
10539/**
10540 * xmlParseMisc:
10541 * @ctxt: an XML parser context
10542 *
10543 * parse an XML Misc* optional field.
10544 *
10545 * [27] Misc ::= Comment | PI | S
10546 */
10547
10548void
10549xmlParseMisc(xmlParserCtxtPtr ctxt) {
10550 while ((ctxt->instate != XML_PARSER_EOF) &&
10551 (((RAW == '<') && (NXT(1) == '?')) ||
10552 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553 IS_BLANK_CH(CUR))) {
10554 if ((RAW == '<') && (NXT(1) == '?')) {
10555 xmlParsePI(ctxt);
10556 } else if (IS_BLANK_CH(CUR)) {
10557 NEXT;
10558 } else
10559 xmlParseComment(ctxt);
10560 }
10561}
10562
10563/**
10564 * xmlParseDocument:
10565 * @ctxt: an XML parser context
10566 *
10567 * parse an XML document (and build a tree if using the standard SAX
10568 * interface).
10569 *
10570 * [1] document ::= prolog element Misc*
10571 *
10572 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573 *
10574 * Returns 0, -1 in case of error. the parser context is augmented
10575 * as a result of the parsing.
10576 */
10577
10578int
10579xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580 xmlChar start[4];
10581 xmlCharEncoding enc;
10582
10583 xmlInitParser();
10584
10585 if ((ctxt == NULL) || (ctxt->input == NULL))
10586 return(-1);
10587
10588 GROW;
10589
10590 /*
10591 * SAX: detecting the level.
10592 */
10593 xmlDetectSAX2(ctxt);
10594
10595 /*
10596 * SAX: beginning of the document processing.
10597 */
10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10600 if (ctxt->instate == XML_PARSER_EOF)
10601 return(-1);
10602
10603 if ((ctxt->encoding == NULL) &&
10604 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10605 /*
10606 * Get the 4 first bytes and decode the charset
10607 * if enc != XML_CHAR_ENCODING_NONE
10608 * plug some encoding conversion routines.
10609 */
10610 start[0] = RAW;
10611 start[1] = NXT(1);
10612 start[2] = NXT(2);
10613 start[3] = NXT(3);
10614 enc = xmlDetectCharEncoding(&start[0], 4);
10615 if (enc != XML_CHAR_ENCODING_NONE) {
10616 xmlSwitchEncoding(ctxt, enc);
10617 }
10618 }
10619
10620
10621 if (CUR == 0) {
10622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10623 return(-1);
10624 }
10625
10626 /*
10627 * Check for the XMLDecl in the Prolog.
10628 * do not GROW here to avoid the detected encoder to decode more
10629 * than just the first line, unless the amount of data is really
10630 * too small to hold "<?xml version="1.0" encoding="foo"
10631 */
10632 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10633 GROW;
10634 }
10635 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10636
10637 /*
10638 * Note that we will switch encoding on the fly.
10639 */
10640 xmlParseXMLDecl(ctxt);
10641 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10642 (ctxt->instate == XML_PARSER_EOF)) {
10643 /*
10644 * The XML REC instructs us to stop parsing right here
10645 */
10646 return(-1);
10647 }
10648 ctxt->standalone = ctxt->input->standalone;
10649 SKIP_BLANKS;
10650 } else {
10651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652 }
10653 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10654 ctxt->sax->startDocument(ctxt->userData);
10655 if (ctxt->instate == XML_PARSER_EOF)
10656 return(-1);
10657 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10658 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10659 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10660 }
10661
10662 /*
10663 * The Misc part of the Prolog
10664 */
10665 GROW;
10666 xmlParseMisc(ctxt);
10667
10668 /*
10669 * Then possibly doc type declaration(s) and more Misc
10670 * (doctypedecl Misc*)?
10671 */
10672 GROW;
10673 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10674
10675 ctxt->inSubset = 1;
10676 xmlParseDocTypeDecl(ctxt);
10677 if (RAW == '[') {
10678 ctxt->instate = XML_PARSER_DTD;
10679 xmlParseInternalSubset(ctxt);
10680 if (ctxt->instate == XML_PARSER_EOF)
10681 return(-1);
10682 }
10683
10684 /*
10685 * Create and update the external subset.
10686 */
10687 ctxt->inSubset = 2;
10688 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10689 (!ctxt->disableSAX))
10690 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10691 ctxt->extSubSystem, ctxt->extSubURI);
10692 if (ctxt->instate == XML_PARSER_EOF)
10693 return(-1);
10694 ctxt->inSubset = 0;
10695
10696 xmlCleanSpecialAttr(ctxt);
10697
10698 ctxt->instate = XML_PARSER_PROLOG;
10699 xmlParseMisc(ctxt);
10700 }
10701
10702 /*
10703 * Time to start parsing the tree itself
10704 */
10705 GROW;
10706 if (RAW != '<') {
10707 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10708 "Start tag expected, '<' not found\n");
10709 } else {
10710 ctxt->instate = XML_PARSER_CONTENT;
10711 xmlParseElement(ctxt);
10712 ctxt->instate = XML_PARSER_EPILOG;
10713
10714
10715 /*
10716 * The Misc part at the end
10717 */
10718 xmlParseMisc(ctxt);
10719
10720 if (RAW != 0) {
10721 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10722 }
10723 ctxt->instate = XML_PARSER_EOF;
10724 }
10725
10726 /*
10727 * SAX: end of the document processing.
10728 */
10729 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10730 ctxt->sax->endDocument(ctxt->userData);
10731
10732 /*
10733 * Remove locally kept entity definitions if the tree was not built
10734 */
10735 if ((ctxt->myDoc != NULL) &&
10736 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10737 xmlFreeDoc(ctxt->myDoc);
10738 ctxt->myDoc = NULL;
10739 }
10740
10741 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10742 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10743 if (ctxt->valid)
10744 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10745 if (ctxt->nsWellFormed)
10746 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10747 if (ctxt->options & XML_PARSE_OLD10)
10748 ctxt->myDoc->properties |= XML_DOC_OLD10;
10749 }
10750 if (! ctxt->wellFormed) {
10751 ctxt->valid = 0;
10752 return(-1);
10753 }
10754 return(0);
10755}
10756
10757/**
10758 * xmlParseExtParsedEnt:
10759 * @ctxt: an XML parser context
10760 *
10761 * parse a general parsed entity
10762 * An external general parsed entity is well-formed if it matches the
10763 * production labeled extParsedEnt.
10764 *
10765 * [78] extParsedEnt ::= TextDecl? content
10766 *
10767 * Returns 0, -1 in case of error. the parser context is augmented
10768 * as a result of the parsing.
10769 */
10770
10771int
10772xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10773 xmlChar start[4];
10774 xmlCharEncoding enc;
10775
10776 if ((ctxt == NULL) || (ctxt->input == NULL))
10777 return(-1);
10778
10779 xmlDefaultSAXHandlerInit();
10780
10781 xmlDetectSAX2(ctxt);
10782
10783 GROW;
10784
10785 /*
10786 * SAX: beginning of the document processing.
10787 */
10788 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10789 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10790
10791 /*
10792 * Get the 4 first bytes and decode the charset
10793 * if enc != XML_CHAR_ENCODING_NONE
10794 * plug some encoding conversion routines.
10795 */
10796 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10797 start[0] = RAW;
10798 start[1] = NXT(1);
10799 start[2] = NXT(2);
10800 start[3] = NXT(3);
10801 enc = xmlDetectCharEncoding(start, 4);
10802 if (enc != XML_CHAR_ENCODING_NONE) {
10803 xmlSwitchEncoding(ctxt, enc);
10804 }
10805 }
10806
10807
10808 if (CUR == 0) {
10809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10810 }
10811
10812 /*
10813 * Check for the XMLDecl in the Prolog.
10814 */
10815 GROW;
10816 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10817
10818 /*
10819 * Note that we will switch encoding on the fly.
10820 */
10821 xmlParseXMLDecl(ctxt);
10822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10823 /*
10824 * The XML REC instructs us to stop parsing right here
10825 */
10826 return(-1);
10827 }
10828 SKIP_BLANKS;
10829 } else {
10830 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10831 }
10832 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10833 ctxt->sax->startDocument(ctxt->userData);
10834 if (ctxt->instate == XML_PARSER_EOF)
10835 return(-1);
10836
10837 /*
10838 * Doing validity checking on chunk doesn't make sense
10839 */
10840 ctxt->instate = XML_PARSER_CONTENT;
10841 ctxt->validate = 0;
10842 ctxt->loadsubset = 0;
10843 ctxt->depth = 0;
10844
10845 xmlParseContent(ctxt);
10846 if (ctxt->instate == XML_PARSER_EOF)
10847 return(-1);
10848
10849 if ((RAW == '<') && (NXT(1) == '/')) {
10850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10851 } else if (RAW != 0) {
10852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10853 }
10854
10855 /*
10856 * SAX: end of the document processing.
10857 */
10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859 ctxt->sax->endDocument(ctxt->userData);
10860
10861 if (! ctxt->wellFormed) return(-1);
10862 return(0);
10863}
10864
10865#ifdef LIBXML_PUSH_ENABLED
10866/************************************************************************
10867 * *
10868 * Progressive parsing interfaces *
10869 * *
10870 ************************************************************************/
10871
10872/**
10873 * xmlParseLookupSequence:
10874 * @ctxt: an XML parser context
10875 * @first: the first char to lookup
10876 * @next: the next char to lookup or zero
10877 * @third: the next char to lookup or zero
10878 *
10879 * Try to find if a sequence (first, next, third) or just (first next) or
10880 * (first) is available in the input stream.
10881 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10882 * to avoid rescanning sequences of bytes, it DOES change the state of the
10883 * parser, do not use liberally.
10884 *
10885 * Returns the index to the current parsing point if the full sequence
10886 * is available, -1 otherwise.
10887 */
10888static int
10889xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10890 xmlChar next, xmlChar third) {
10891 int base, len;
10892 xmlParserInputPtr in;
10893 const xmlChar *buf;
10894
10895 in = ctxt->input;
10896 if (in == NULL) return(-1);
10897 base = in->cur - in->base;
10898 if (base < 0) return(-1);
10899 if (ctxt->checkIndex > base)
10900 base = ctxt->checkIndex;
10901 if (in->buf == NULL) {
10902 buf = in->base;
10903 len = in->length;
10904 } else {
10905 buf = xmlBufContent(in->buf->buffer);
10906 len = xmlBufUse(in->buf->buffer);
10907 }
10908 /* take into account the sequence length */
10909 if (third) len -= 2;
10910 else if (next) len --;
10911 for (;base < len;base++) {
10912 if (buf[base] == first) {
10913 if (third != 0) {
10914 if ((buf[base + 1] != next) ||
10915 (buf[base + 2] != third)) continue;
10916 } else if (next != 0) {
10917 if (buf[base + 1] != next) continue;
10918 }
10919 ctxt->checkIndex = 0;
10920#ifdef DEBUG_PUSH
10921 if (next == 0)
10922 xmlGenericError(xmlGenericErrorContext,
10923 "PP: lookup '%c' found at %d\n",
10924 first, base);
10925 else if (third == 0)
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: lookup '%c%c' found at %d\n",
10928 first, next, base);
10929 else
10930 xmlGenericError(xmlGenericErrorContext,
10931 "PP: lookup '%c%c%c' found at %d\n",
10932 first, next, third, base);
10933#endif
10934 return(base - (in->cur - in->base));
10935 }
10936 }
10937 ctxt->checkIndex = base;
10938#ifdef DEBUG_PUSH
10939 if (next == 0)
10940 xmlGenericError(xmlGenericErrorContext,
10941 "PP: lookup '%c' failed\n", first);
10942 else if (third == 0)
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c%c' failed\n", first, next);
10945 else
10946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c%c' failed\n", first, next, third);
10948#endif
10949 return(-1);
10950}
10951
10952/**
10953 * xmlParseGetLasts:
10954 * @ctxt: an XML parser context
10955 * @lastlt: pointer to store the last '<' from the input
10956 * @lastgt: pointer to store the last '>' from the input
10957 *
10958 * Lookup the last < and > in the current chunk
10959 */
10960static void
10961xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10962 const xmlChar **lastgt) {
10963 const xmlChar *tmp;
10964
10965 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10966 xmlGenericError(xmlGenericErrorContext,
10967 "Internal error: xmlParseGetLasts\n");
10968 return;
10969 }
10970 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10971 tmp = ctxt->input->end;
10972 tmp--;
10973 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10974 if (tmp < ctxt->input->base) {
10975 *lastlt = NULL;
10976 *lastgt = NULL;
10977 } else {
10978 *lastlt = tmp;
10979 tmp++;
10980 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10981 if (*tmp == '\'') {
10982 tmp++;
10983 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10984 if (tmp < ctxt->input->end) tmp++;
10985 } else if (*tmp == '"') {
10986 tmp++;
10987 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10988 if (tmp < ctxt->input->end) tmp++;
10989 } else
10990 tmp++;
10991 }
10992 if (tmp < ctxt->input->end)
10993 *lastgt = tmp;
10994 else {
10995 tmp = *lastlt;
10996 tmp--;
10997 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10998 if (tmp >= ctxt->input->base)
10999 *lastgt = tmp;
11000 else
11001 *lastgt = NULL;
11002 }
11003 }
11004 } else {
11005 *lastlt = NULL;
11006 *lastgt = NULL;
11007 }
11008}
11009/**
11010 * xmlCheckCdataPush:
11011 * @cur: pointer to the block of characters
11012 * @len: length of the block in bytes
11013 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11014 *
11015 * Check that the block of characters is okay as SCdata content [20]
11016 *
11017 * Returns the number of bytes to pass if okay, a negative index where an
11018 * UTF-8 error occurred otherwise
11019 */
11020static int
11021xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11022 int ix;
11023 unsigned char c;
11024 int codepoint;
11025
11026 if ((utf == NULL) || (len <= 0))
11027 return(0);
11028
11029 for (ix = 0; ix < len;) { /* string is 0-terminated */
11030 c = utf[ix];
11031 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11032 if (c >= 0x20)
11033 ix++;
11034 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11035 ix++;
11036 else
11037 return(-ix);
11038 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11039 if (ix + 2 > len) return(complete ? -ix : ix);
11040 if ((utf[ix+1] & 0xc0 ) != 0x80)
11041 return(-ix);
11042 codepoint = (utf[ix] & 0x1f) << 6;
11043 codepoint |= utf[ix+1] & 0x3f;
11044 if (!xmlIsCharQ(codepoint))
11045 return(-ix);
11046 ix += 2;
11047 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11048 if (ix + 3 > len) return(complete ? -ix : ix);
11049 if (((utf[ix+1] & 0xc0) != 0x80) ||
11050 ((utf[ix+2] & 0xc0) != 0x80))
11051 return(-ix);
11052 codepoint = (utf[ix] & 0xf) << 12;
11053 codepoint |= (utf[ix+1] & 0x3f) << 6;
11054 codepoint |= utf[ix+2] & 0x3f;
11055 if (!xmlIsCharQ(codepoint))
11056 return(-ix);
11057 ix += 3;
11058 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11059 if (ix + 4 > len) return(complete ? -ix : ix);
11060 if (((utf[ix+1] & 0xc0) != 0x80) ||
11061 ((utf[ix+2] & 0xc0) != 0x80) ||
11062 ((utf[ix+3] & 0xc0) != 0x80))
11063 return(-ix);
11064 codepoint = (utf[ix] & 0x7) << 18;
11065 codepoint |= (utf[ix+1] & 0x3f) << 12;
11066 codepoint |= (utf[ix+2] & 0x3f) << 6;
11067 codepoint |= utf[ix+3] & 0x3f;
11068 if (!xmlIsCharQ(codepoint))
11069 return(-ix);
11070 ix += 4;
11071 } else /* unknown encoding */
11072 return(-ix);
11073 }
11074 return(ix);
11075}
11076
11077/**
11078 * xmlParseTryOrFinish:
11079 * @ctxt: an XML parser context
11080 * @terminate: last chunk indicator
11081 *
11082 * Try to progress on parsing
11083 *
11084 * Returns zero if no parsing was possible
11085 */
11086static int
11087xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11088 int ret = 0;
11089 int avail, tlen;
11090 xmlChar cur, next;
11091 const xmlChar *lastlt, *lastgt;
11092
11093 if (ctxt->input == NULL)
11094 return(0);
11095
11096#ifdef DEBUG_PUSH
11097 switch (ctxt->instate) {
11098 case XML_PARSER_EOF:
11099 xmlGenericError(xmlGenericErrorContext,
11100 "PP: try EOF\n"); break;
11101 case XML_PARSER_START:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try START\n"); break;
11104 case XML_PARSER_MISC:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try MISC\n");break;
11107 case XML_PARSER_COMMENT:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try COMMENT\n");break;
11110 case XML_PARSER_PROLOG:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try PROLOG\n");break;
11113 case XML_PARSER_START_TAG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try START_TAG\n");break;
11116 case XML_PARSER_CONTENT:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try CONTENT\n");break;
11119 case XML_PARSER_CDATA_SECTION:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CDATA_SECTION\n");break;
11122 case XML_PARSER_END_TAG:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try END_TAG\n");break;
11125 case XML_PARSER_ENTITY_DECL:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try ENTITY_DECL\n");break;
11128 case XML_PARSER_ENTITY_VALUE:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_VALUE\n");break;
11131 case XML_PARSER_ATTRIBUTE_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ATTRIBUTE_VALUE\n");break;
11134 case XML_PARSER_DTD:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try DTD\n");break;
11137 case XML_PARSER_EPILOG:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try EPILOG\n");break;
11140 case XML_PARSER_PI:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try PI\n");break;
11143 case XML_PARSER_IGNORE:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try IGNORE\n");break;
11146 }
11147#endif
11148
11149 if ((ctxt->input != NULL) &&
11150 (ctxt->input->cur - ctxt->input->base > 4096)) {
11151 xmlSHRINK(ctxt);
11152 ctxt->checkIndex = 0;
11153 }
11154 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11155
11156 while (ctxt->instate != XML_PARSER_EOF) {
11157 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11158 return(0);
11159
11160 if (ctxt->input == NULL) break;
11161 if (ctxt->input->buf == NULL)
11162 avail = ctxt->input->length -
11163 (ctxt->input->cur - ctxt->input->base);
11164 else {
11165 /*
11166 * If we are operating on converted input, try to flush
11167 * remainng chars to avoid them stalling in the non-converted
11168 * buffer. But do not do this in document start where
11169 * encoding="..." may not have been read and we work on a
11170 * guessed encoding.
11171 */
11172 if ((ctxt->instate != XML_PARSER_START) &&
11173 (ctxt->input->buf->raw != NULL) &&
11174 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11175 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11176 ctxt->input);
11177 size_t current = ctxt->input->cur - ctxt->input->base;
11178
11179 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11180 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11181 base, current);
11182 }
11183 avail = xmlBufUse(ctxt->input->buf->buffer) -
11184 (ctxt->input->cur - ctxt->input->base);
11185 }
11186 if (avail < 1)
11187 goto done;
11188 switch (ctxt->instate) {
11189 case XML_PARSER_EOF:
11190 /*
11191 * Document parsing is done !
11192 */
11193 goto done;
11194 case XML_PARSER_START:
11195 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11196 xmlChar start[4];
11197 xmlCharEncoding enc;
11198
11199 /*
11200 * Very first chars read from the document flow.
11201 */
11202 if (avail < 4)
11203 goto done;
11204
11205 /*
11206 * Get the 4 first bytes and decode the charset
11207 * if enc != XML_CHAR_ENCODING_NONE
11208 * plug some encoding conversion routines,
11209 * else xmlSwitchEncoding will set to (default)
11210 * UTF8.
11211 */
11212 start[0] = RAW;
11213 start[1] = NXT(1);
11214 start[2] = NXT(2);
11215 start[3] = NXT(3);
11216 enc = xmlDetectCharEncoding(start, 4);
11217 xmlSwitchEncoding(ctxt, enc);
11218 break;
11219 }
11220
11221 if (avail < 2)
11222 goto done;
11223 cur = ctxt->input->cur[0];
11224 next = ctxt->input->cur[1];
11225 if (cur == 0) {
11226 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227 ctxt->sax->setDocumentLocator(ctxt->userData,
11228 &xmlDefaultSAXLocator);
11229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230 xmlHaltParser(ctxt);
11231#ifdef DEBUG_PUSH
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: entering EOF\n");
11234#endif
11235 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236 ctxt->sax->endDocument(ctxt->userData);
11237 goto done;
11238 }
11239 if ((cur == '<') && (next == '?')) {
11240 /* PI or XML decl */
11241 if (avail < 5) return(ret);
11242 if ((!terminate) &&
11243 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11244 return(ret);
11245 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246 ctxt->sax->setDocumentLocator(ctxt->userData,
11247 &xmlDefaultSAXLocator);
11248 if ((ctxt->input->cur[2] == 'x') &&
11249 (ctxt->input->cur[3] == 'm') &&
11250 (ctxt->input->cur[4] == 'l') &&
11251 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11252 ret += 5;
11253#ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing XML Decl\n");
11256#endif
11257 xmlParseXMLDecl(ctxt);
11258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259 /*
11260 * The XML REC instructs us to stop parsing right
11261 * here
11262 */
11263 xmlHaltParser(ctxt);
11264 return(0);
11265 }
11266 ctxt->standalone = ctxt->input->standalone;
11267 if ((ctxt->encoding == NULL) &&
11268 (ctxt->input->encoding != NULL))
11269 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271 (!ctxt->disableSAX))
11272 ctxt->sax->startDocument(ctxt->userData);
11273 ctxt->instate = XML_PARSER_MISC;
11274#ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: entering MISC\n");
11277#endif
11278 } else {
11279 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281 (!ctxt->disableSAX))
11282 ctxt->sax->startDocument(ctxt->userData);
11283 ctxt->instate = XML_PARSER_MISC;
11284#ifdef DEBUG_PUSH
11285 xmlGenericError(xmlGenericErrorContext,
11286 "PP: entering MISC\n");
11287#endif
11288 }
11289 } else {
11290 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291 ctxt->sax->setDocumentLocator(ctxt->userData,
11292 &xmlDefaultSAXLocator);
11293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294 if (ctxt->version == NULL) {
11295 xmlErrMemory(ctxt, NULL);
11296 break;
11297 }
11298 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299 (!ctxt->disableSAX))
11300 ctxt->sax->startDocument(ctxt->userData);
11301 ctxt->instate = XML_PARSER_MISC;
11302#ifdef DEBUG_PUSH
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: entering MISC\n");
11305#endif
11306 }
11307 break;
11308 case XML_PARSER_START_TAG: {
11309 const xmlChar *name;
11310 const xmlChar *prefix = NULL;
11311 const xmlChar *URI = NULL;
11312 int nsNr = ctxt->nsNr;
11313
11314 if ((avail < 2) && (ctxt->inputNr == 1))
11315 goto done;
11316 cur = ctxt->input->cur[0];
11317 if (cur != '<') {
11318 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11319 xmlHaltParser(ctxt);
11320 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11321 ctxt->sax->endDocument(ctxt->userData);
11322 goto done;
11323 }
11324 if (!terminate) {
11325 if (ctxt->progressive) {
11326 /* > can be found unescaped in attribute values */
11327 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11328 goto done;
11329 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11330 goto done;
11331 }
11332 }
11333 if (ctxt->spaceNr == 0)
11334 spacePush(ctxt, -1);
11335 else if (*ctxt->space == -2)
11336 spacePush(ctxt, -1);
11337 else
11338 spacePush(ctxt, *ctxt->space);
11339#ifdef LIBXML_SAX1_ENABLED
11340 if (ctxt->sax2)
11341#endif /* LIBXML_SAX1_ENABLED */
11342 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11343#ifdef LIBXML_SAX1_ENABLED
11344 else
11345 name = xmlParseStartTag(ctxt);
11346#endif /* LIBXML_SAX1_ENABLED */
11347 if (ctxt->instate == XML_PARSER_EOF)
11348 goto done;
11349 if (name == NULL) {
11350 spacePop(ctxt);
11351 xmlHaltParser(ctxt);
11352 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11353 ctxt->sax->endDocument(ctxt->userData);
11354 goto done;
11355 }
11356#ifdef LIBXML_VALID_ENABLED
11357 /*
11358 * [ VC: Root Element Type ]
11359 * The Name in the document type declaration must match
11360 * the element type of the root element.
11361 */
11362 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11363 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11364 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11365#endif /* LIBXML_VALID_ENABLED */
11366
11367 /*
11368 * Check for an Empty Element.
11369 */
11370 if ((RAW == '/') && (NXT(1) == '>')) {
11371 SKIP(2);
11372
11373 if (ctxt->sax2) {
11374 if ((ctxt->sax != NULL) &&
11375 (ctxt->sax->endElementNs != NULL) &&
11376 (!ctxt->disableSAX))
11377 ctxt->sax->endElementNs(ctxt->userData, name,
11378 prefix, URI);
11379 if (ctxt->nsNr - nsNr > 0)
11380 nsPop(ctxt, ctxt->nsNr - nsNr);
11381#ifdef LIBXML_SAX1_ENABLED
11382 } else {
11383 if ((ctxt->sax != NULL) &&
11384 (ctxt->sax->endElement != NULL) &&
11385 (!ctxt->disableSAX))
11386 ctxt->sax->endElement(ctxt->userData, name);
11387#endif /* LIBXML_SAX1_ENABLED */
11388 }
11389 if (ctxt->instate == XML_PARSER_EOF)
11390 goto done;
11391 spacePop(ctxt);
11392 if (ctxt->nameNr == 0) {
11393 ctxt->instate = XML_PARSER_EPILOG;
11394 } else {
11395 ctxt->instate = XML_PARSER_CONTENT;
11396 }
11397 ctxt->progressive = 1;
11398 break;
11399 }
11400 if (RAW == '>') {
11401 NEXT;
11402 } else {
11403 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11404 "Couldn't find end of Start Tag %s\n",
11405 name);
11406 nodePop(ctxt);
11407 spacePop(ctxt);
11408 }
11409 if (ctxt->sax2)
11410 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11411#ifdef LIBXML_SAX1_ENABLED
11412 else
11413 namePush(ctxt, name);
11414#endif /* LIBXML_SAX1_ENABLED */
11415
11416 ctxt->instate = XML_PARSER_CONTENT;
11417 ctxt->progressive = 1;
11418 break;
11419 }
11420 case XML_PARSER_CONTENT: {
11421 const xmlChar *test;
11422 unsigned int cons;
11423 if ((avail < 2) && (ctxt->inputNr == 1))
11424 goto done;
11425 cur = ctxt->input->cur[0];
11426 next = ctxt->input->cur[1];
11427
11428 test = CUR_PTR;
11429 cons = ctxt->input->consumed;
11430 if ((cur == '<') && (next == '/')) {
11431 ctxt->instate = XML_PARSER_END_TAG;
11432 break;
11433 } else if ((cur == '<') && (next == '?')) {
11434 if ((!terminate) &&
11435 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11436 ctxt->progressive = XML_PARSER_PI;
11437 goto done;
11438 }
11439 xmlParsePI(ctxt);
11440 ctxt->instate = XML_PARSER_CONTENT;
11441 ctxt->progressive = 1;
11442 } else if ((cur == '<') && (next != '!')) {
11443 ctxt->instate = XML_PARSER_START_TAG;
11444 break;
11445 } else if ((cur == '<') && (next == '!') &&
11446 (ctxt->input->cur[2] == '-') &&
11447 (ctxt->input->cur[3] == '-')) {
11448 int term;
11449
11450 if (avail < 4)
11451 goto done;
11452 ctxt->input->cur += 4;
11453 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11454 ctxt->input->cur -= 4;
11455 if ((!terminate) && (term < 0)) {
11456 ctxt->progressive = XML_PARSER_COMMENT;
11457 goto done;
11458 }
11459 xmlParseComment(ctxt);
11460 ctxt->instate = XML_PARSER_CONTENT;
11461 ctxt->progressive = 1;
11462 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11463 (ctxt->input->cur[2] == '[') &&
11464 (ctxt->input->cur[3] == 'C') &&
11465 (ctxt->input->cur[4] == 'D') &&
11466 (ctxt->input->cur[5] == 'A') &&
11467 (ctxt->input->cur[6] == 'T') &&
11468 (ctxt->input->cur[7] == 'A') &&
11469 (ctxt->input->cur[8] == '[')) {
11470 SKIP(9);
11471 ctxt->instate = XML_PARSER_CDATA_SECTION;
11472 break;
11473 } else if ((cur == '<') && (next == '!') &&
11474 (avail < 9)) {
11475 goto done;
11476 } else if (cur == '&') {
11477 if ((!terminate) &&
11478 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11479 goto done;
11480 xmlParseReference(ctxt);
11481 } else {
11482 /* TODO Avoid the extra copy, handle directly !!! */
11483 /*
11484 * Goal of the following test is:
11485 * - minimize calls to the SAX 'character' callback
11486 * when they are mergeable
11487 * - handle an problem for isBlank when we only parse
11488 * a sequence of blank chars and the next one is
11489 * not available to check against '<' presence.
11490 * - tries to homogenize the differences in SAX
11491 * callbacks between the push and pull versions
11492 * of the parser.
11493 */
11494 if ((ctxt->inputNr == 1) &&
11495 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11496 if (!terminate) {
11497 if (ctxt->progressive) {
11498 if ((lastlt == NULL) ||
11499 (ctxt->input->cur > lastlt))
11500 goto done;
11501 } else if (xmlParseLookupSequence(ctxt,
11502 '<', 0, 0) < 0) {
11503 goto done;
11504 }
11505 }
11506 }
11507 ctxt->checkIndex = 0;
11508 xmlParseCharData(ctxt, 0);
11509 }
11510 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11512 "detected an error in element content\n");
11513 xmlHaltParser(ctxt);
11514 break;
11515 }
11516 break;
11517 }
11518 case XML_PARSER_END_TAG:
11519 if (avail < 2)
11520 goto done;
11521 if (!terminate) {
11522 if (ctxt->progressive) {
11523 /* > can be found unescaped in attribute values */
11524 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11525 goto done;
11526 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11527 goto done;
11528 }
11529 }
11530 if (ctxt->sax2) {
11531 xmlParseEndTag2(ctxt,
11532 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11533 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11534 (int) (ptrdiff_t)
11535 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11536 nameNsPop(ctxt);
11537 }
11538#ifdef LIBXML_SAX1_ENABLED
11539 else
11540 xmlParseEndTag1(ctxt, 0);
11541#endif /* LIBXML_SAX1_ENABLED */
11542 if (ctxt->instate == XML_PARSER_EOF) {
11543 /* Nothing */
11544 } else if (ctxt->nameNr == 0) {
11545 ctxt->instate = XML_PARSER_EPILOG;
11546 } else {
11547 ctxt->instate = XML_PARSER_CONTENT;
11548 }
11549 break;
11550 case XML_PARSER_CDATA_SECTION: {
11551 /*
11552 * The Push mode need to have the SAX callback for
11553 * cdataBlock merge back contiguous callbacks.
11554 */
11555 int base;
11556
11557 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 if (base < 0) {
11559 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11560 int tmp;
11561
11562 tmp = xmlCheckCdataPush(ctxt->input->cur,
11563 XML_PARSER_BIG_BUFFER_SIZE, 0);
11564 if (tmp < 0) {
11565 tmp = -tmp;
11566 ctxt->input->cur += tmp;
11567 goto encoding_error;
11568 }
11569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 if (ctxt->sax->cdataBlock != NULL)
11571 ctxt->sax->cdataBlock(ctxt->userData,
11572 ctxt->input->cur, tmp);
11573 else if (ctxt->sax->characters != NULL)
11574 ctxt->sax->characters(ctxt->userData,
11575 ctxt->input->cur, tmp);
11576 }
11577 if (ctxt->instate == XML_PARSER_EOF)
11578 goto done;
11579 SKIPL(tmp);
11580 ctxt->checkIndex = 0;
11581 }
11582 goto done;
11583 } else {
11584 int tmp;
11585
11586 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11587 if ((tmp < 0) || (tmp != base)) {
11588 tmp = -tmp;
11589 ctxt->input->cur += tmp;
11590 goto encoding_error;
11591 }
11592 if ((ctxt->sax != NULL) && (base == 0) &&
11593 (ctxt->sax->cdataBlock != NULL) &&
11594 (!ctxt->disableSAX)) {
11595 /*
11596 * Special case to provide identical behaviour
11597 * between pull and push parsers on enpty CDATA
11598 * sections
11599 */
11600 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11601 (!strncmp((const char *)&ctxt->input->cur[-9],
11602 "<![CDATA[", 9)))
11603 ctxt->sax->cdataBlock(ctxt->userData,
11604 BAD_CAST "", 0);
11605 } else if ((ctxt->sax != NULL) && (base > 0) &&
11606 (!ctxt->disableSAX)) {
11607 if (ctxt->sax->cdataBlock != NULL)
11608 ctxt->sax->cdataBlock(ctxt->userData,
11609 ctxt->input->cur, base);
11610 else if (ctxt->sax->characters != NULL)
11611 ctxt->sax->characters(ctxt->userData,
11612 ctxt->input->cur, base);
11613 }
11614 if (ctxt->instate == XML_PARSER_EOF)
11615 goto done;
11616 SKIPL(base + 3);
11617 ctxt->checkIndex = 0;
11618 ctxt->instate = XML_PARSER_CONTENT;
11619#ifdef DEBUG_PUSH
11620 xmlGenericError(xmlGenericErrorContext,
11621 "PP: entering CONTENT\n");
11622#endif
11623 }
11624 break;
11625 }
11626 case XML_PARSER_MISC:
11627 SKIP_BLANKS;
11628 if (ctxt->input->buf == NULL)
11629 avail = ctxt->input->length -
11630 (ctxt->input->cur - ctxt->input->base);
11631 else
11632 avail = xmlBufUse(ctxt->input->buf->buffer) -
11633 (ctxt->input->cur - ctxt->input->base);
11634 if (avail < 2)
11635 goto done;
11636 cur = ctxt->input->cur[0];
11637 next = ctxt->input->cur[1];
11638 if ((cur == '<') && (next == '?')) {
11639 if ((!terminate) &&
11640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11641 ctxt->progressive = XML_PARSER_PI;
11642 goto done;
11643 }
11644#ifdef DEBUG_PUSH
11645 xmlGenericError(xmlGenericErrorContext,
11646 "PP: Parsing PI\n");
11647#endif
11648 xmlParsePI(ctxt);
11649 if (ctxt->instate == XML_PARSER_EOF)
11650 goto done;
11651 ctxt->instate = XML_PARSER_MISC;
11652 ctxt->progressive = 1;
11653 ctxt->checkIndex = 0;
11654 } else if ((cur == '<') && (next == '!') &&
11655 (ctxt->input->cur[2] == '-') &&
11656 (ctxt->input->cur[3] == '-')) {
11657 if ((!terminate) &&
11658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11659 ctxt->progressive = XML_PARSER_COMMENT;
11660 goto done;
11661 }
11662#ifdef DEBUG_PUSH
11663 xmlGenericError(xmlGenericErrorContext,
11664 "PP: Parsing Comment\n");
11665#endif
11666 xmlParseComment(ctxt);
11667 if (ctxt->instate == XML_PARSER_EOF)
11668 goto done;
11669 ctxt->instate = XML_PARSER_MISC;
11670 ctxt->progressive = 1;
11671 ctxt->checkIndex = 0;
11672 } else if ((cur == '<') && (next == '!') &&
11673 (ctxt->input->cur[2] == 'D') &&
11674 (ctxt->input->cur[3] == 'O') &&
11675 (ctxt->input->cur[4] == 'C') &&
11676 (ctxt->input->cur[5] == 'T') &&
11677 (ctxt->input->cur[6] == 'Y') &&
11678 (ctxt->input->cur[7] == 'P') &&
11679 (ctxt->input->cur[8] == 'E')) {
11680 if ((!terminate) &&
11681 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11682 ctxt->progressive = XML_PARSER_DTD;
11683 goto done;
11684 }
11685#ifdef DEBUG_PUSH
11686 xmlGenericError(xmlGenericErrorContext,
11687 "PP: Parsing internal subset\n");
11688#endif
11689 ctxt->inSubset = 1;
11690 ctxt->progressive = 0;
11691 ctxt->checkIndex = 0;
11692 xmlParseDocTypeDecl(ctxt);
11693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
11695 if (RAW == '[') {
11696 ctxt->instate = XML_PARSER_DTD;
11697#ifdef DEBUG_PUSH
11698 xmlGenericError(xmlGenericErrorContext,
11699 "PP: entering DTD\n");
11700#endif
11701 } else {
11702 /*
11703 * Create and update the external subset.
11704 */
11705 ctxt->inSubset = 2;
11706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11707 (ctxt->sax->externalSubset != NULL))
11708 ctxt->sax->externalSubset(ctxt->userData,
11709 ctxt->intSubName, ctxt->extSubSystem,
11710 ctxt->extSubURI);
11711 ctxt->inSubset = 0;
11712 xmlCleanSpecialAttr(ctxt);
11713 ctxt->instate = XML_PARSER_PROLOG;
11714#ifdef DEBUG_PUSH
11715 xmlGenericError(xmlGenericErrorContext,
11716 "PP: entering PROLOG\n");
11717#endif
11718 }
11719 } else if ((cur == '<') && (next == '!') &&
11720 (avail < 9)) {
11721 goto done;
11722 } else {
11723 ctxt->instate = XML_PARSER_START_TAG;
11724 ctxt->progressive = XML_PARSER_START_TAG;
11725 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11726#ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering START_TAG\n");
11729#endif
11730 }
11731 break;
11732 case XML_PARSER_PROLOG:
11733 SKIP_BLANKS;
11734 if (ctxt->input->buf == NULL)
11735 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11736 else
11737 avail = xmlBufUse(ctxt->input->buf->buffer) -
11738 (ctxt->input->cur - ctxt->input->base);
11739 if (avail < 2)
11740 goto done;
11741 cur = ctxt->input->cur[0];
11742 next = ctxt->input->cur[1];
11743 if ((cur == '<') && (next == '?')) {
11744 if ((!terminate) &&
11745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746 ctxt->progressive = XML_PARSER_PI;
11747 goto done;
11748 }
11749#ifdef DEBUG_PUSH
11750 xmlGenericError(xmlGenericErrorContext,
11751 "PP: Parsing PI\n");
11752#endif
11753 xmlParsePI(ctxt);
11754 if (ctxt->instate == XML_PARSER_EOF)
11755 goto done;
11756 ctxt->instate = XML_PARSER_PROLOG;
11757 ctxt->progressive = 1;
11758 } else if ((cur == '<') && (next == '!') &&
11759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11760 if ((!terminate) &&
11761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11762 ctxt->progressive = XML_PARSER_COMMENT;
11763 goto done;
11764 }
11765#ifdef DEBUG_PUSH
11766 xmlGenericError(xmlGenericErrorContext,
11767 "PP: Parsing Comment\n");
11768#endif
11769 xmlParseComment(ctxt);
11770 if (ctxt->instate == XML_PARSER_EOF)
11771 goto done;
11772 ctxt->instate = XML_PARSER_PROLOG;
11773 ctxt->progressive = 1;
11774 } else if ((cur == '<') && (next == '!') &&
11775 (avail < 4)) {
11776 goto done;
11777 } else {
11778 ctxt->instate = XML_PARSER_START_TAG;
11779 if (ctxt->progressive == 0)
11780 ctxt->progressive = XML_PARSER_START_TAG;
11781 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11782#ifdef DEBUG_PUSH
11783 xmlGenericError(xmlGenericErrorContext,
11784 "PP: entering START_TAG\n");
11785#endif
11786 }
11787 break;
11788 case XML_PARSER_EPILOG:
11789 SKIP_BLANKS;
11790 if (ctxt->input->buf == NULL)
11791 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11792 else
11793 avail = xmlBufUse(ctxt->input->buf->buffer) -
11794 (ctxt->input->cur - ctxt->input->base);
11795 if (avail < 2)
11796 goto done;
11797 cur = ctxt->input->cur[0];
11798 next = ctxt->input->cur[1];
11799 if ((cur == '<') && (next == '?')) {
11800 if ((!terminate) &&
11801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11802 ctxt->progressive = XML_PARSER_PI;
11803 goto done;
11804 }
11805#ifdef DEBUG_PUSH
11806 xmlGenericError(xmlGenericErrorContext,
11807 "PP: Parsing PI\n");
11808#endif
11809 xmlParsePI(ctxt);
11810 if (ctxt->instate == XML_PARSER_EOF)
11811 goto done;
11812 ctxt->instate = XML_PARSER_EPILOG;
11813 ctxt->progressive = 1;
11814 } else if ((cur == '<') && (next == '!') &&
11815 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11816 if ((!terminate) &&
11817 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11818 ctxt->progressive = XML_PARSER_COMMENT;
11819 goto done;
11820 }
11821#ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: Parsing Comment\n");
11824#endif
11825 xmlParseComment(ctxt);
11826 if (ctxt->instate == XML_PARSER_EOF)
11827 goto done;
11828 ctxt->instate = XML_PARSER_EPILOG;
11829 ctxt->progressive = 1;
11830 } else if ((cur == '<') && (next == '!') &&
11831 (avail < 4)) {
11832 goto done;
11833 } else {
11834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11835 xmlHaltParser(ctxt);
11836#ifdef DEBUG_PUSH
11837 xmlGenericError(xmlGenericErrorContext,
11838 "PP: entering EOF\n");
11839#endif
11840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11841 ctxt->sax->endDocument(ctxt->userData);
11842 goto done;
11843 }
11844 break;
11845 case XML_PARSER_DTD: {
11846 /*
11847 * Sorry but progressive parsing of the internal subset
11848 * is not expected to be supported. We first check that
11849 * the full content of the internal subset is available and
11850 * the parsing is launched only at that point.
11851 * Internal subset ends up with "']' S? '>'" in an unescaped
11852 * section and not in a ']]>' sequence which are conditional
11853 * sections (whoever argued to keep that crap in XML deserve
11854 * a place in hell !).
11855 */
11856 int base, i;
11857 xmlChar *buf;
11858 xmlChar quote = 0;
11859 size_t use;
11860
11861 base = ctxt->input->cur - ctxt->input->base;
11862 if (base < 0) return(0);
11863 if (ctxt->checkIndex > base)
11864 base = ctxt->checkIndex;
11865 buf = xmlBufContent(ctxt->input->buf->buffer);
11866 use = xmlBufUse(ctxt->input->buf->buffer);
11867 for (;(unsigned int) base < use; base++) {
11868 if (quote != 0) {
11869 if (buf[base] == quote)
11870 quote = 0;
11871 continue;
11872 }
11873 if ((quote == 0) && (buf[base] == '<')) {
11874 int found = 0;
11875 /* special handling of comments */
11876 if (((unsigned int) base + 4 < use) &&
11877 (buf[base + 1] == '!') &&
11878 (buf[base + 2] == '-') &&
11879 (buf[base + 3] == '-')) {
11880 for (;(unsigned int) base + 3 < use; base++) {
11881 if ((buf[base] == '-') &&
11882 (buf[base + 1] == '-') &&
11883 (buf[base + 2] == '>')) {
11884 found = 1;
11885 base += 2;
11886 break;
11887 }
11888 }
11889 if (!found) {
11890#if 0
11891 fprintf(stderr, "unfinished comment\n");
11892#endif
11893 break; /* for */
11894 }
11895 continue;
11896 }
11897 }
11898 if (buf[base] == '"') {
11899 quote = '"';
11900 continue;
11901 }
11902 if (buf[base] == '\'') {
11903 quote = '\'';
11904 continue;
11905 }
11906 if (buf[base] == ']') {
11907#if 0
11908 fprintf(stderr, "%c%c%c%c: ", buf[base],
11909 buf[base + 1], buf[base + 2], buf[base + 3]);
11910#endif
11911 if ((unsigned int) base +1 >= use)
11912 break;
11913 if (buf[base + 1] == ']') {
11914 /* conditional crap, skip both ']' ! */
11915 base++;
11916 continue;
11917 }
11918 for (i = 1; (unsigned int) base + i < use; i++) {
11919 if (buf[base + i] == '>') {
11920#if 0
11921 fprintf(stderr, "found\n");
11922#endif
11923 goto found_end_int_subset;
11924 }
11925 if (!IS_BLANK_CH(buf[base + i])) {
11926#if 0
11927 fprintf(stderr, "not found\n");
11928#endif
11929 goto not_end_of_int_subset;
11930 }
11931 }
11932#if 0
11933 fprintf(stderr, "end of stream\n");
11934#endif
11935 break;
11936
11937 }
11938not_end_of_int_subset:
11939 continue; /* for */
11940 }
11941 /*
11942 * We didn't found the end of the Internal subset
11943 */
11944 if (quote == 0)
11945 ctxt->checkIndex = base;
11946 else
11947 ctxt->checkIndex = 0;
11948#ifdef DEBUG_PUSH
11949 if (next == 0)
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: lookup of int subset end filed\n");
11952#endif
11953 goto done;
11954
11955found_end_int_subset:
11956 ctxt->checkIndex = 0;
11957 xmlParseInternalSubset(ctxt);
11958 if (ctxt->instate == XML_PARSER_EOF)
11959 goto done;
11960 ctxt->inSubset = 2;
11961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11962 (ctxt->sax->externalSubset != NULL))
11963 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11964 ctxt->extSubSystem, ctxt->extSubURI);
11965 ctxt->inSubset = 0;
11966 xmlCleanSpecialAttr(ctxt);
11967 if (ctxt->instate == XML_PARSER_EOF)
11968 goto done;
11969 ctxt->instate = XML_PARSER_PROLOG;
11970 ctxt->checkIndex = 0;
11971#ifdef DEBUG_PUSH
11972 xmlGenericError(xmlGenericErrorContext,
11973 "PP: entering PROLOG\n");
11974#endif
11975 break;
11976 }
11977 case XML_PARSER_COMMENT:
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: internal error, state == COMMENT\n");
11980 ctxt->instate = XML_PARSER_CONTENT;
11981#ifdef DEBUG_PUSH
11982 xmlGenericError(xmlGenericErrorContext,
11983 "PP: entering CONTENT\n");
11984#endif
11985 break;
11986 case XML_PARSER_IGNORE:
11987 xmlGenericError(xmlGenericErrorContext,
11988 "PP: internal error, state == IGNORE");
11989 ctxt->instate = XML_PARSER_DTD;
11990#ifdef DEBUG_PUSH
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: entering DTD\n");
11993#endif
11994 break;
11995 case XML_PARSER_PI:
11996 xmlGenericError(xmlGenericErrorContext,
11997 "PP: internal error, state == PI\n");
11998 ctxt->instate = XML_PARSER_CONTENT;
11999#ifdef DEBUG_PUSH
12000 xmlGenericError(xmlGenericErrorContext,
12001 "PP: entering CONTENT\n");
12002#endif
12003 break;
12004 case XML_PARSER_ENTITY_DECL:
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: internal error, state == ENTITY_DECL\n");
12007 ctxt->instate = XML_PARSER_DTD;
12008#ifdef DEBUG_PUSH
12009 xmlGenericError(xmlGenericErrorContext,
12010 "PP: entering DTD\n");
12011#endif
12012 break;
12013 case XML_PARSER_ENTITY_VALUE:
12014 xmlGenericError(xmlGenericErrorContext,
12015 "PP: internal error, state == ENTITY_VALUE\n");
12016 ctxt->instate = XML_PARSER_CONTENT;
12017#ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: entering DTD\n");
12020#endif
12021 break;
12022 case XML_PARSER_ATTRIBUTE_VALUE:
12023 xmlGenericError(xmlGenericErrorContext,
12024 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12025 ctxt->instate = XML_PARSER_START_TAG;
12026#ifdef DEBUG_PUSH
12027 xmlGenericError(xmlGenericErrorContext,
12028 "PP: entering START_TAG\n");
12029#endif
12030 break;
12031 case XML_PARSER_SYSTEM_LITERAL:
12032 xmlGenericError(xmlGenericErrorContext,
12033 "PP: internal error, state == SYSTEM_LITERAL\n");
12034 ctxt->instate = XML_PARSER_START_TAG;
12035#ifdef DEBUG_PUSH
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12038#endif
12039 break;
12040 case XML_PARSER_PUBLIC_LITERAL:
12041 xmlGenericError(xmlGenericErrorContext,
12042 "PP: internal error, state == PUBLIC_LITERAL\n");
12043 ctxt->instate = XML_PARSER_START_TAG;
12044#ifdef DEBUG_PUSH
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: entering START_TAG\n");
12047#endif
12048 break;
12049 }
12050 }
12051done:
12052#ifdef DEBUG_PUSH
12053 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12054#endif
12055 return(ret);
12056encoding_error:
12057 {
12058 char buffer[150];
12059
12060 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12061 ctxt->input->cur[0], ctxt->input->cur[1],
12062 ctxt->input->cur[2], ctxt->input->cur[3]);
12063 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12064 "Input is not proper UTF-8, indicate encoding !\n%s",
12065 BAD_CAST buffer, NULL);
12066 }
12067 return(0);
12068}
12069
12070/**
12071 * xmlParseCheckTransition:
12072 * @ctxt: an XML parser context
12073 * @chunk: a char array
12074 * @size: the size in byte of the chunk
12075 *
12076 * Check depending on the current parser state if the chunk given must be
12077 * processed immediately or one need more data to advance on parsing.
12078 *
12079 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12080 */
12081static int
12082xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12083 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12084 return(-1);
12085 if (ctxt->instate == XML_PARSER_START_TAG) {
12086 if (memchr(chunk, '>', size) != NULL)
12087 return(1);
12088 return(0);
12089 }
12090 if (ctxt->progressive == XML_PARSER_COMMENT) {
12091 if (memchr(chunk, '>', size) != NULL)
12092 return(1);
12093 return(0);
12094 }
12095 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12096 if (memchr(chunk, '>', size) != NULL)
12097 return(1);
12098 return(0);
12099 }
12100 if (ctxt->progressive == XML_PARSER_PI) {
12101 if (memchr(chunk, '>', size) != NULL)
12102 return(1);
12103 return(0);
12104 }
12105 if (ctxt->instate == XML_PARSER_END_TAG) {
12106 if (memchr(chunk, '>', size) != NULL)
12107 return(1);
12108 return(0);
12109 }
12110 if ((ctxt->progressive == XML_PARSER_DTD) ||
12111 (ctxt->instate == XML_PARSER_DTD)) {
12112 if (memchr(chunk, '>', size) != NULL)
12113 return(1);
12114 return(0);
12115 }
12116 return(1);
12117}
12118
12119/**
12120 * xmlParseChunk:
12121 * @ctxt: an XML parser context
12122 * @chunk: an char array
12123 * @size: the size in byte of the chunk
12124 * @terminate: last chunk indicator
12125 *
12126 * Parse a Chunk of memory
12127 *
12128 * Returns zero if no error, the xmlParserErrors otherwise.
12129 */
12130int
12131xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12132 int terminate) {
12133 int end_in_lf = 0;
12134 int remain = 0;
12135 size_t old_avail = 0;
12136 size_t avail = 0;
12137
12138 if (ctxt == NULL)
12139 return(XML_ERR_INTERNAL_ERROR);
12140 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12141 return(ctxt->errNo);
12142 if (ctxt->instate == XML_PARSER_EOF)
12143 return(-1);
12144 if (ctxt->instate == XML_PARSER_START)
12145 xmlDetectSAX2(ctxt);
12146 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12147 (chunk[size - 1] == '\r')) {
12148 end_in_lf = 1;
12149 size--;
12150 }
12151
12152xmldecl_done:
12153
12154 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12155 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12156 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12157 size_t cur = ctxt->input->cur - ctxt->input->base;
12158 int res;
12159
12160 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12161 /*
12162 * Specific handling if we autodetected an encoding, we should not
12163 * push more than the first line ... which depend on the encoding
12164 * And only push the rest once the final encoding was detected
12165 */
12166 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12167 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12168 unsigned int len = 45;
12169
12170 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12171 BAD_CAST "UTF-16")) ||
12172 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173 BAD_CAST "UTF16")))
12174 len = 90;
12175 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176 BAD_CAST "UCS-4")) ||
12177 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178 BAD_CAST "UCS4")))
12179 len = 180;
12180
12181 if (ctxt->input->buf->rawconsumed < len)
12182 len -= ctxt->input->buf->rawconsumed;
12183
12184 /*
12185 * Change size for reading the initial declaration only
12186 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12187 * will blindly copy extra bytes from memory.
12188 */
12189 if ((unsigned int) size > len) {
12190 remain = size - len;
12191 size = len;
12192 } else {
12193 remain = 0;
12194 }
12195 }
12196 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12197 if (res < 0) {
12198 ctxt->errNo = XML_PARSER_EOF;
12199 xmlHaltParser(ctxt);
12200 return (XML_PARSER_EOF);
12201 }
12202 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12203#ifdef DEBUG_PUSH
12204 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12205#endif
12206
12207 } else if (ctxt->instate != XML_PARSER_EOF) {
12208 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12209 xmlParserInputBufferPtr in = ctxt->input->buf;
12210 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12211 (in->raw != NULL)) {
12212 int nbchars;
12213 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12214 size_t current = ctxt->input->cur - ctxt->input->base;
12215
12216 nbchars = xmlCharEncInput(in, terminate);
12217 if (nbchars < 0) {
12218 /* TODO 2.6.0 */
12219 xmlGenericError(xmlGenericErrorContext,
12220 "xmlParseChunk: encoder error\n");
12221 xmlHaltParser(ctxt);
12222 return(XML_ERR_INVALID_ENCODING);
12223 }
12224 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12225 }
12226 }
12227 }
12228 if (remain != 0) {
12229 xmlParseTryOrFinish(ctxt, 0);
12230 } else {
12231 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232 avail = xmlBufUse(ctxt->input->buf->buffer);
12233 /*
12234 * Depending on the current state it may not be such
12235 * a good idea to try parsing if there is nothing in the chunk
12236 * which would be worth doing a parser state transition and we
12237 * need to wait for more data
12238 */
12239 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240 (old_avail == 0) || (avail == 0) ||
12241 (xmlParseCheckTransition(ctxt,
12242 (const char *)&ctxt->input->base[old_avail],
12243 avail - old_avail)))
12244 xmlParseTryOrFinish(ctxt, terminate);
12245 }
12246 if (ctxt->instate == XML_PARSER_EOF)
12247 return(ctxt->errNo);
12248
12249 if ((ctxt->input != NULL) &&
12250 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12254 xmlHaltParser(ctxt);
12255 }
12256 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257 return(ctxt->errNo);
12258
12259 if (remain != 0) {
12260 chunk += size;
12261 size = remain;
12262 remain = 0;
12263 goto xmldecl_done;
12264 }
12265 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266 (ctxt->input->buf != NULL)) {
12267 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 ctxt->input);
12269 size_t current = ctxt->input->cur - ctxt->input->base;
12270
12271 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12272
12273 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 base, current);
12275 }
12276 if (terminate) {
12277 /*
12278 * Check for termination
12279 */
12280 int cur_avail = 0;
12281
12282 if (ctxt->input != NULL) {
12283 if (ctxt->input->buf == NULL)
12284 cur_avail = ctxt->input->length -
12285 (ctxt->input->cur - ctxt->input->base);
12286 else
12287 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 (ctxt->input->cur - ctxt->input->base);
12289 }
12290
12291 if ((ctxt->instate != XML_PARSER_EOF) &&
12292 (ctxt->instate != XML_PARSER_EPILOG)) {
12293 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12294 }
12295 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12297 }
12298 if (ctxt->instate != XML_PARSER_EOF) {
12299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12300 ctxt->sax->endDocument(ctxt->userData);
12301 }
12302 ctxt->instate = XML_PARSER_EOF;
12303 }
12304 if (ctxt->wellFormed == 0)
12305 return((xmlParserErrors) ctxt->errNo);
12306 else
12307 return(0);
12308}
12309
12310/************************************************************************
12311 * *
12312 * I/O front end functions to the parser *
12313 * *
12314 ************************************************************************/
12315
12316/**
12317 * xmlCreatePushParserCtxt:
12318 * @sax: a SAX handler
12319 * @user_data: The user data returned on SAX callbacks
12320 * @chunk: a pointer to an array of chars
12321 * @size: number of chars in the array
12322 * @filename: an optional file name or URI
12323 *
12324 * Create a parser context for using the XML parser in push mode.
12325 * If @buffer and @size are non-NULL, the data is used to detect
12326 * the encoding. The remaining characters will be parsed so they
12327 * don't need to be fed in again through xmlParseChunk.
12328 * To allow content encoding detection, @size should be >= 4
12329 * The value of @filename is used for fetching external entities
12330 * and error/warning reports.
12331 *
12332 * Returns the new parser context or NULL
12333 */
12334
12335xmlParserCtxtPtr
12336xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12337 const char *chunk, int size, const char *filename) {
12338 xmlParserCtxtPtr ctxt;
12339 xmlParserInputPtr inputStream;
12340 xmlParserInputBufferPtr buf;
12341 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342
12343 /*
12344 * plug some encoding conversion routines
12345 */
12346 if ((chunk != NULL) && (size >= 4))
12347 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348
12349 buf = xmlAllocParserInputBuffer(enc);
12350 if (buf == NULL) return(NULL);
12351
12352 ctxt = xmlNewParserCtxt();
12353 if (ctxt == NULL) {
12354 xmlErrMemory(NULL, "creating parser: out of memory\n");
12355 xmlFreeParserInputBuffer(buf);
12356 return(NULL);
12357 }
12358 ctxt->dictNames = 1;
12359 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360 if (ctxt->pushTab == NULL) {
12361 xmlErrMemory(ctxt, NULL);
12362 xmlFreeParserInputBuffer(buf);
12363 xmlFreeParserCtxt(ctxt);
12364 return(NULL);
12365 }
12366 if (sax != NULL) {
12367#ifdef LIBXML_SAX1_ENABLED
12368 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12369#endif /* LIBXML_SAX1_ENABLED */
12370 xmlFree(ctxt->sax);
12371 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 if (ctxt->sax == NULL) {
12373 xmlErrMemory(ctxt, NULL);
12374 xmlFreeParserInputBuffer(buf);
12375 xmlFreeParserCtxt(ctxt);
12376 return(NULL);
12377 }
12378 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 if (sax->initialized == XML_SAX2_MAGIC)
12380 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 else
12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12383 if (user_data != NULL)
12384 ctxt->userData = user_data;
12385 }
12386 if (filename == NULL) {
12387 ctxt->directory = NULL;
12388 } else {
12389 ctxt->directory = xmlParserGetDirectory(filename);
12390 }
12391
12392 inputStream = xmlNewInputStream(ctxt);
12393 if (inputStream == NULL) {
12394 xmlFreeParserCtxt(ctxt);
12395 xmlFreeParserInputBuffer(buf);
12396 return(NULL);
12397 }
12398
12399 if (filename == NULL)
12400 inputStream->filename = NULL;
12401 else {
12402 inputStream->filename = (char *)
12403 xmlCanonicPath((const xmlChar *) filename);
12404 if (inputStream->filename == NULL) {
12405 xmlFreeParserCtxt(ctxt);
12406 xmlFreeParserInputBuffer(buf);
12407 return(NULL);
12408 }
12409 }
12410 inputStream->buf = buf;
12411 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12412 inputPush(ctxt, inputStream);
12413
12414 /*
12415 * If the caller didn't provide an initial 'chunk' for determining
12416 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417 * that it can be automatically determined later
12418 */
12419 if ((size == 0) || (chunk == NULL)) {
12420 ctxt->charset = XML_CHAR_ENCODING_NONE;
12421 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 size_t cur = ctxt->input->cur - ctxt->input->base;
12424
12425 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12426
12427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12428#ifdef DEBUG_PUSH
12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430#endif
12431 }
12432
12433 if (enc != XML_CHAR_ENCODING_NONE) {
12434 xmlSwitchEncoding(ctxt, enc);
12435 }
12436
12437 return(ctxt);
12438}
12439#endif /* LIBXML_PUSH_ENABLED */
12440
12441/**
12442 * xmlHaltParser:
12443 * @ctxt: an XML parser context
12444 *
12445 * Blocks further parser processing don't override error
12446 * for internal use
12447 */
12448static void
12449xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450 if (ctxt == NULL)
12451 return;
12452 ctxt->instate = XML_PARSER_EOF;
12453 ctxt->disableSAX = 1;
12454 while (ctxt->inputNr > 1)
12455 xmlFreeInputStream(inputPop(ctxt));
12456 if (ctxt->input != NULL) {
12457 /*
12458 * in case there was a specific allocation deallocate before
12459 * overriding base
12460 */
12461 if (ctxt->input->free != NULL) {
12462 ctxt->input->free((xmlChar *) ctxt->input->base);
12463 ctxt->input->free = NULL;
12464 }
12465 ctxt->input->cur = BAD_CAST"";
12466 ctxt->input->base = ctxt->input->cur;
12467 ctxt->input->end = ctxt->input->cur;
12468 }
12469}
12470
12471/**
12472 * xmlStopParser:
12473 * @ctxt: an XML parser context
12474 *
12475 * Blocks further parser processing
12476 */
12477void
12478xmlStopParser(xmlParserCtxtPtr ctxt) {
12479 if (ctxt == NULL)
12480 return;
12481 xmlHaltParser(ctxt);
12482 ctxt->errNo = XML_ERR_USER_STOP;
12483}
12484
12485/**
12486 * xmlCreateIOParserCtxt:
12487 * @sax: a SAX handler
12488 * @user_data: The user data returned on SAX callbacks
12489 * @ioread: an I/O read function
12490 * @ioclose: an I/O close function
12491 * @ioctx: an I/O handler
12492 * @enc: the charset encoding if known
12493 *
12494 * Create a parser context for using the XML parser with an existing
12495 * I/O stream
12496 *
12497 * Returns the new parser context or NULL
12498 */
12499xmlParserCtxtPtr
12500xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12501 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12502 void *ioctx, xmlCharEncoding enc) {
12503 xmlParserCtxtPtr ctxt;
12504 xmlParserInputPtr inputStream;
12505 xmlParserInputBufferPtr buf;
12506
12507 if (ioread == NULL) return(NULL);
12508
12509 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12510 if (buf == NULL) {
12511 if (ioclose != NULL)
12512 ioclose(ioctx);
12513 return (NULL);
12514 }
12515
12516 ctxt = xmlNewParserCtxt();
12517 if (ctxt == NULL) {
12518 xmlFreeParserInputBuffer(buf);
12519 return(NULL);
12520 }
12521 if (sax != NULL) {
12522#ifdef LIBXML_SAX1_ENABLED
12523 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12524#endif /* LIBXML_SAX1_ENABLED */
12525 xmlFree(ctxt->sax);
12526 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12527 if (ctxt->sax == NULL) {
12528 xmlErrMemory(ctxt, NULL);
12529 xmlFreeParserCtxt(ctxt);
12530 return(NULL);
12531 }
12532 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12533 if (sax->initialized == XML_SAX2_MAGIC)
12534 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12535 else
12536 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12537 if (user_data != NULL)
12538 ctxt->userData = user_data;
12539 }
12540
12541 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12542 if (inputStream == NULL) {
12543 xmlFreeParserCtxt(ctxt);
12544 return(NULL);
12545 }
12546 inputPush(ctxt, inputStream);
12547
12548 return(ctxt);
12549}
12550
12551#ifdef LIBXML_VALID_ENABLED
12552/************************************************************************
12553 * *
12554 * Front ends when parsing a DTD *
12555 * *
12556 ************************************************************************/
12557
12558/**
12559 * xmlIOParseDTD:
12560 * @sax: the SAX handler block or NULL
12561 * @input: an Input Buffer
12562 * @enc: the charset encoding if known
12563 *
12564 * Load and parse a DTD
12565 *
12566 * Returns the resulting xmlDtdPtr or NULL in case of error.
12567 * @input will be freed by the function in any case.
12568 */
12569
12570xmlDtdPtr
12571xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12572 xmlCharEncoding enc) {
12573 xmlDtdPtr ret = NULL;
12574 xmlParserCtxtPtr ctxt;
12575 xmlParserInputPtr pinput = NULL;
12576 xmlChar start[4];
12577
12578 if (input == NULL)
12579 return(NULL);
12580
12581 ctxt = xmlNewParserCtxt();
12582 if (ctxt == NULL) {
12583 xmlFreeParserInputBuffer(input);
12584 return(NULL);
12585 }
12586
12587 /* We are loading a DTD */
12588 ctxt->options |= XML_PARSE_DTDLOAD;
12589
12590 /*
12591 * Set-up the SAX context
12592 */
12593 if (sax != NULL) {
12594 if (ctxt->sax != NULL)
12595 xmlFree(ctxt->sax);
12596 ctxt->sax = sax;
12597 ctxt->userData = ctxt;
12598 }
12599 xmlDetectSAX2(ctxt);
12600
12601 /*
12602 * generate a parser input from the I/O handler
12603 */
12604
12605 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12606 if (pinput == NULL) {
12607 if (sax != NULL) ctxt->sax = NULL;
12608 xmlFreeParserInputBuffer(input);
12609 xmlFreeParserCtxt(ctxt);
12610 return(NULL);
12611 }
12612
12613 /*
12614 * plug some encoding conversion routines here.
12615 */
12616 if (xmlPushInput(ctxt, pinput) < 0) {
12617 if (sax != NULL) ctxt->sax = NULL;
12618 xmlFreeParserCtxt(ctxt);
12619 return(NULL);
12620 }
12621 if (enc != XML_CHAR_ENCODING_NONE) {
12622 xmlSwitchEncoding(ctxt, enc);
12623 }
12624
12625 pinput->filename = NULL;
12626 pinput->line = 1;
12627 pinput->col = 1;
12628 pinput->base = ctxt->input->cur;
12629 pinput->cur = ctxt->input->cur;
12630 pinput->free = NULL;
12631
12632 /*
12633 * let's parse that entity knowing it's an external subset.
12634 */
12635 ctxt->inSubset = 2;
12636 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12637 if (ctxt->myDoc == NULL) {
12638 xmlErrMemory(ctxt, "New Doc failed");
12639 return(NULL);
12640 }
12641 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12642 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12643 BAD_CAST "none", BAD_CAST "none");
12644
12645 if ((enc == XML_CHAR_ENCODING_NONE) &&
12646 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12647 /*
12648 * Get the 4 first bytes and decode the charset
12649 * if enc != XML_CHAR_ENCODING_NONE
12650 * plug some encoding conversion routines.
12651 */
12652 start[0] = RAW;
12653 start[1] = NXT(1);
12654 start[2] = NXT(2);
12655 start[3] = NXT(3);
12656 enc = xmlDetectCharEncoding(start, 4);
12657 if (enc != XML_CHAR_ENCODING_NONE) {
12658 xmlSwitchEncoding(ctxt, enc);
12659 }
12660 }
12661
12662 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12663
12664 if (ctxt->myDoc != NULL) {
12665 if (ctxt->wellFormed) {
12666 ret = ctxt->myDoc->extSubset;
12667 ctxt->myDoc->extSubset = NULL;
12668 if (ret != NULL) {
12669 xmlNodePtr tmp;
12670
12671 ret->doc = NULL;
12672 tmp = ret->children;
12673 while (tmp != NULL) {
12674 tmp->doc = NULL;
12675 tmp = tmp->next;
12676 }
12677 }
12678 } else {
12679 ret = NULL;
12680 }
12681 xmlFreeDoc(ctxt->myDoc);
12682 ctxt->myDoc = NULL;
12683 }
12684 if (sax != NULL) ctxt->sax = NULL;
12685 xmlFreeParserCtxt(ctxt);
12686
12687 return(ret);
12688}
12689
12690/**
12691 * xmlSAXParseDTD:
12692 * @sax: the SAX handler block
12693 * @ExternalID: a NAME* containing the External ID of the DTD
12694 * @SystemID: a NAME* containing the URL to the DTD
12695 *
12696 * Load and parse an external subset.
12697 *
12698 * Returns the resulting xmlDtdPtr or NULL in case of error.
12699 */
12700
12701xmlDtdPtr
12702xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12703 const xmlChar *SystemID) {
12704 xmlDtdPtr ret = NULL;
12705 xmlParserCtxtPtr ctxt;
12706 xmlParserInputPtr input = NULL;
12707 xmlCharEncoding enc;
12708 xmlChar* systemIdCanonic;
12709
12710 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12711
12712 ctxt = xmlNewParserCtxt();
12713 if (ctxt == NULL) {
12714 return(NULL);
12715 }
12716
12717 /* We are loading a DTD */
12718 ctxt->options |= XML_PARSE_DTDLOAD;
12719
12720 /*
12721 * Set-up the SAX context
12722 */
12723 if (sax != NULL) {
12724 if (ctxt->sax != NULL)
12725 xmlFree(ctxt->sax);
12726 ctxt->sax = sax;
12727 ctxt->userData = ctxt;
12728 }
12729
12730 /*
12731 * Canonicalise the system ID
12732 */
12733 systemIdCanonic = xmlCanonicPath(SystemID);
12734 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12735 xmlFreeParserCtxt(ctxt);
12736 return(NULL);
12737 }
12738
12739 /*
12740 * Ask the Entity resolver to load the damn thing
12741 */
12742
12743 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12744 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12745 systemIdCanonic);
12746 if (input == NULL) {
12747 if (sax != NULL) ctxt->sax = NULL;
12748 xmlFreeParserCtxt(ctxt);
12749 if (systemIdCanonic != NULL)
12750 xmlFree(systemIdCanonic);
12751 return(NULL);
12752 }
12753
12754 /*
12755 * plug some encoding conversion routines here.
12756 */
12757 if (xmlPushInput(ctxt, input) < 0) {
12758 if (sax != NULL) ctxt->sax = NULL;
12759 xmlFreeParserCtxt(ctxt);
12760 if (systemIdCanonic != NULL)
12761 xmlFree(systemIdCanonic);
12762 return(NULL);
12763 }
12764 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12765 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12766 xmlSwitchEncoding(ctxt, enc);
12767 }
12768
12769 if (input->filename == NULL)
12770 input->filename = (char *) systemIdCanonic;
12771 else
12772 xmlFree(systemIdCanonic);
12773 input->line = 1;
12774 input->col = 1;
12775 input->base = ctxt->input->cur;
12776 input->cur = ctxt->input->cur;
12777 input->free = NULL;
12778
12779 /*
12780 * let's parse that entity knowing it's an external subset.
12781 */
12782 ctxt->inSubset = 2;
12783 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12784 if (ctxt->myDoc == NULL) {
12785 xmlErrMemory(ctxt, "New Doc failed");
12786 if (sax != NULL) ctxt->sax = NULL;
12787 xmlFreeParserCtxt(ctxt);
12788 return(NULL);
12789 }
12790 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12791 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12792 ExternalID, SystemID);
12793 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12794
12795 if (ctxt->myDoc != NULL) {
12796 if (ctxt->wellFormed) {
12797 ret = ctxt->myDoc->extSubset;
12798 ctxt->myDoc->extSubset = NULL;
12799 if (ret != NULL) {
12800 xmlNodePtr tmp;
12801
12802 ret->doc = NULL;
12803 tmp = ret->children;
12804 while (tmp != NULL) {
12805 tmp->doc = NULL;
12806 tmp = tmp->next;
12807 }
12808 }
12809 } else {
12810 ret = NULL;
12811 }
12812 xmlFreeDoc(ctxt->myDoc);
12813 ctxt->myDoc = NULL;
12814 }
12815 if (sax != NULL) ctxt->sax = NULL;
12816 xmlFreeParserCtxt(ctxt);
12817
12818 return(ret);
12819}
12820
12821
12822/**
12823 * xmlParseDTD:
12824 * @ExternalID: a NAME* containing the External ID of the DTD
12825 * @SystemID: a NAME* containing the URL to the DTD
12826 *
12827 * Load and parse an external subset.
12828 *
12829 * Returns the resulting xmlDtdPtr or NULL in case of error.
12830 */
12831
12832xmlDtdPtr
12833xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12834 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12835}
12836#endif /* LIBXML_VALID_ENABLED */
12837
12838/************************************************************************
12839 * *
12840 * Front ends when parsing an Entity *
12841 * *
12842 ************************************************************************/
12843
12844/**
12845 * xmlParseCtxtExternalEntity:
12846 * @ctx: the existing parsing context
12847 * @URL: the URL for the entity to load
12848 * @ID: the System ID for the entity to load
12849 * @lst: the return value for the set of parsed nodes
12850 *
12851 * Parse an external general entity within an existing parsing context
12852 * An external general parsed entity is well-formed if it matches the
12853 * production labeled extParsedEnt.
12854 *
12855 * [78] extParsedEnt ::= TextDecl? content
12856 *
12857 * Returns 0 if the entity is well formed, -1 in case of args problem and
12858 * the parser error code otherwise
12859 */
12860
12861int
12862xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12863 const xmlChar *ID, xmlNodePtr *lst) {
12864 xmlParserCtxtPtr ctxt;
12865 xmlDocPtr newDoc;
12866 xmlNodePtr newRoot;
12867 xmlSAXHandlerPtr oldsax = NULL;
12868 int ret = 0;
12869 xmlChar start[4];
12870 xmlCharEncoding enc;
12871
12872 if (ctx == NULL) return(-1);
12873
12874 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12875 (ctx->depth > 1024)) {
12876 return(XML_ERR_ENTITY_LOOP);
12877 }
12878
12879 if (lst != NULL)
12880 *lst = NULL;
12881 if ((URL == NULL) && (ID == NULL))
12882 return(-1);
12883 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12884 return(-1);
12885
12886 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12887 if (ctxt == NULL) {
12888 return(-1);
12889 }
12890
12891 oldsax = ctxt->sax;
12892 ctxt->sax = ctx->sax;
12893 xmlDetectSAX2(ctxt);
12894 newDoc = xmlNewDoc(BAD_CAST "1.0");
12895 if (newDoc == NULL) {
12896 xmlFreeParserCtxt(ctxt);
12897 return(-1);
12898 }
12899 newDoc->properties = XML_DOC_INTERNAL;
12900 if (ctx->myDoc->dict) {
12901 newDoc->dict = ctx->myDoc->dict;
12902 xmlDictReference(newDoc->dict);
12903 }
12904 if (ctx->myDoc != NULL) {
12905 newDoc->intSubset = ctx->myDoc->intSubset;
12906 newDoc->extSubset = ctx->myDoc->extSubset;
12907 }
12908 if (ctx->myDoc->URL != NULL) {
12909 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12910 }
12911 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12912 if (newRoot == NULL) {
12913 ctxt->sax = oldsax;
12914 xmlFreeParserCtxt(ctxt);
12915 newDoc->intSubset = NULL;
12916 newDoc->extSubset = NULL;
12917 xmlFreeDoc(newDoc);
12918 return(-1);
12919 }
12920 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12921 nodePush(ctxt, newDoc->children);
12922 if (ctx->myDoc == NULL) {
12923 ctxt->myDoc = newDoc;
12924 } else {
12925 ctxt->myDoc = ctx->myDoc;
12926 newDoc->children->doc = ctx->myDoc;
12927 }
12928
12929 /*
12930 * Get the 4 first bytes and decode the charset
12931 * if enc != XML_CHAR_ENCODING_NONE
12932 * plug some encoding conversion routines.
12933 */
12934 GROW
12935 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12936 start[0] = RAW;
12937 start[1] = NXT(1);
12938 start[2] = NXT(2);
12939 start[3] = NXT(3);
12940 enc = xmlDetectCharEncoding(start, 4);
12941 if (enc != XML_CHAR_ENCODING_NONE) {
12942 xmlSwitchEncoding(ctxt, enc);
12943 }
12944 }
12945
12946 /*
12947 * Parse a possible text declaration first
12948 */
12949 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12950 xmlParseTextDecl(ctxt);
12951 /*
12952 * An XML-1.0 document can't reference an entity not XML-1.0
12953 */
12954 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12955 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12956 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12957 "Version mismatch between document and entity\n");
12958 }
12959 }
12960
12961 /*
12962 * If the user provided its own SAX callbacks then reuse the
12963 * useData callback field, otherwise the expected setup in a
12964 * DOM builder is to have userData == ctxt
12965 */
12966 if (ctx->userData == ctx)
12967 ctxt->userData = ctxt;
12968 else
12969 ctxt->userData = ctx->userData;
12970
12971 /*
12972 * Doing validity checking on chunk doesn't make sense
12973 */
12974 ctxt->instate = XML_PARSER_CONTENT;
12975 ctxt->validate = ctx->validate;
12976 ctxt->valid = ctx->valid;
12977 ctxt->loadsubset = ctx->loadsubset;
12978 ctxt->depth = ctx->depth + 1;
12979 ctxt->replaceEntities = ctx->replaceEntities;
12980 if (ctxt->validate) {
12981 ctxt->vctxt.error = ctx->vctxt.error;
12982 ctxt->vctxt.warning = ctx->vctxt.warning;
12983 } else {
12984 ctxt->vctxt.error = NULL;
12985 ctxt->vctxt.warning = NULL;
12986 }
12987 ctxt->vctxt.nodeTab = NULL;
12988 ctxt->vctxt.nodeNr = 0;
12989 ctxt->vctxt.nodeMax = 0;
12990 ctxt->vctxt.node = NULL;
12991 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12992 ctxt->dict = ctx->dict;
12993 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12994 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12995 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12996 ctxt->dictNames = ctx->dictNames;
12997 ctxt->attsDefault = ctx->attsDefault;
12998 ctxt->attsSpecial = ctx->attsSpecial;
12999 ctxt->linenumbers = ctx->linenumbers;
13000
13001 xmlParseContent(ctxt);
13002
13003 ctx->validate = ctxt->validate;
13004 ctx->valid = ctxt->valid;
13005 if ((RAW == '<') && (NXT(1) == '/')) {
13006 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13007 } else if (RAW != 0) {
13008 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13009 }
13010 if (ctxt->node != newDoc->children) {
13011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13012 }
13013
13014 if (!ctxt->wellFormed) {
13015 if (ctxt->errNo == 0)
13016 ret = 1;
13017 else
13018 ret = ctxt->errNo;
13019 } else {
13020 if (lst != NULL) {
13021 xmlNodePtr cur;
13022
13023 /*
13024 * Return the newly created nodeset after unlinking it from
13025 * they pseudo parent.
13026 */
13027 cur = newDoc->children->children;
13028 *lst = cur;
13029 while (cur != NULL) {
13030 cur->parent = NULL;
13031 cur = cur->next;
13032 }
13033 newDoc->children->children = NULL;
13034 }
13035 ret = 0;
13036 }
13037 ctxt->sax = oldsax;
13038 ctxt->dict = NULL;
13039 ctxt->attsDefault = NULL;
13040 ctxt->attsSpecial = NULL;
13041 xmlFreeParserCtxt(ctxt);
13042 newDoc->intSubset = NULL;
13043 newDoc->extSubset = NULL;
13044 xmlFreeDoc(newDoc);
13045
13046 return(ret);
13047}
13048
13049/**
13050 * xmlParseExternalEntityPrivate:
13051 * @doc: the document the chunk pertains to
13052 * @oldctxt: the previous parser context if available
13053 * @sax: the SAX handler bloc (possibly NULL)
13054 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13055 * @depth: Used for loop detection, use 0
13056 * @URL: the URL for the entity to load
13057 * @ID: the System ID for the entity to load
13058 * @list: the return value for the set of parsed nodes
13059 *
13060 * Private version of xmlParseExternalEntity()
13061 *
13062 * Returns 0 if the entity is well formed, -1 in case of args problem and
13063 * the parser error code otherwise
13064 */
13065
13066static xmlParserErrors
13067xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13068 xmlSAXHandlerPtr sax,
13069 void *user_data, int depth, const xmlChar *URL,
13070 const xmlChar *ID, xmlNodePtr *list) {
13071 xmlParserCtxtPtr ctxt;
13072 xmlDocPtr newDoc;
13073 xmlNodePtr newRoot;
13074 xmlSAXHandlerPtr oldsax = NULL;
13075 xmlParserErrors ret = XML_ERR_OK;
13076 xmlChar start[4];
13077 xmlCharEncoding enc;
13078
13079 if (((depth > 40) &&
13080 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13081 (depth > 1024)) {
13082 return(XML_ERR_ENTITY_LOOP);
13083 }
13084
13085 if (list != NULL)
13086 *list = NULL;
13087 if ((URL == NULL) && (ID == NULL))
13088 return(XML_ERR_INTERNAL_ERROR);
13089 if (doc == NULL)
13090 return(XML_ERR_INTERNAL_ERROR);
13091
13092
13093 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13094 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13095 ctxt->userData = ctxt;
13096 if (oldctxt != NULL) {
13097 ctxt->_private = oldctxt->_private;
13098 ctxt->loadsubset = oldctxt->loadsubset;
13099 ctxt->validate = oldctxt->validate;
13100 ctxt->external = oldctxt->external;
13101 ctxt->record_info = oldctxt->record_info;
13102 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13103 ctxt->node_seq.length = oldctxt->node_seq.length;
13104 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13105 } else {
13106 /*
13107 * Doing validity checking on chunk without context
13108 * doesn't make sense
13109 */
13110 ctxt->_private = NULL;
13111 ctxt->validate = 0;
13112 ctxt->external = 2;
13113 ctxt->loadsubset = 0;
13114 }
13115 if (sax != NULL) {
13116 oldsax = ctxt->sax;
13117 ctxt->sax = sax;
13118 if (user_data != NULL)
13119 ctxt->userData = user_data;
13120 }
13121 xmlDetectSAX2(ctxt);
13122 newDoc = xmlNewDoc(BAD_CAST "1.0");
13123 if (newDoc == NULL) {
13124 ctxt->node_seq.maximum = 0;
13125 ctxt->node_seq.length = 0;
13126 ctxt->node_seq.buffer = NULL;
13127 xmlFreeParserCtxt(ctxt);
13128 return(XML_ERR_INTERNAL_ERROR);
13129 }
13130 newDoc->properties = XML_DOC_INTERNAL;
13131 newDoc->intSubset = doc->intSubset;
13132 newDoc->extSubset = doc->extSubset;
13133 newDoc->dict = doc->dict;
13134 xmlDictReference(newDoc->dict);
13135
13136 if (doc->URL != NULL) {
13137 newDoc->URL = xmlStrdup(doc->URL);
13138 }
13139 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13140 if (newRoot == NULL) {
13141 if (sax != NULL)
13142 ctxt->sax = oldsax;
13143 ctxt->node_seq.maximum = 0;
13144 ctxt->node_seq.length = 0;
13145 ctxt->node_seq.buffer = NULL;
13146 xmlFreeParserCtxt(ctxt);
13147 newDoc->intSubset = NULL;
13148 newDoc->extSubset = NULL;
13149 xmlFreeDoc(newDoc);
13150 return(XML_ERR_INTERNAL_ERROR);
13151 }
13152 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13153 nodePush(ctxt, newDoc->children);
13154 ctxt->myDoc = doc;
13155 newRoot->doc = doc;
13156
13157 /*
13158 * Get the 4 first bytes and decode the charset
13159 * if enc != XML_CHAR_ENCODING_NONE
13160 * plug some encoding conversion routines.
13161 */
13162 GROW;
13163 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13164 start[0] = RAW;
13165 start[1] = NXT(1);
13166 start[2] = NXT(2);
13167 start[3] = NXT(3);
13168 enc = xmlDetectCharEncoding(start, 4);
13169 if (enc != XML_CHAR_ENCODING_NONE) {
13170 xmlSwitchEncoding(ctxt, enc);
13171 }
13172 }
13173
13174 /*
13175 * Parse a possible text declaration first
13176 */
13177 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13178 xmlParseTextDecl(ctxt);
13179 }
13180
13181 ctxt->instate = XML_PARSER_CONTENT;
13182 ctxt->depth = depth;
13183
13184 xmlParseContent(ctxt);
13185
13186 if ((RAW == '<') && (NXT(1) == '/')) {
13187 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13188 } else if (RAW != 0) {
13189 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13190 }
13191 if (ctxt->node != newDoc->children) {
13192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193 }
13194
13195 if (!ctxt->wellFormed) {
13196 if (ctxt->errNo == 0)
13197 ret = XML_ERR_INTERNAL_ERROR;
13198 else
13199 ret = (xmlParserErrors)ctxt->errNo;
13200 } else {
13201 if (list != NULL) {
13202 xmlNodePtr cur;
13203
13204 /*
13205 * Return the newly created nodeset after unlinking it from
13206 * they pseudo parent.
13207 */
13208 cur = newDoc->children->children;
13209 *list = cur;
13210 while (cur != NULL) {
13211 cur->parent = NULL;
13212 cur = cur->next;
13213 }
13214 newDoc->children->children = NULL;
13215 }
13216 ret = XML_ERR_OK;
13217 }
13218
13219 /*
13220 * Record in the parent context the number of entities replacement
13221 * done when parsing that reference.
13222 */
13223 if (oldctxt != NULL)
13224 oldctxt->nbentities += ctxt->nbentities;
13225
13226 /*
13227 * Also record the size of the entity parsed
13228 */
13229 if (ctxt->input != NULL && oldctxt != NULL) {
13230 oldctxt->sizeentities += ctxt->input->consumed;
13231 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13232 }
13233 /*
13234 * And record the last error if any
13235 */
13236 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13237 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13238
13239 if (sax != NULL)
13240 ctxt->sax = oldsax;
13241 if (oldctxt != NULL) {
13242 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243 oldctxt->node_seq.length = ctxt->node_seq.length;
13244 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245 }
13246 ctxt->node_seq.maximum = 0;
13247 ctxt->node_seq.length = 0;
13248 ctxt->node_seq.buffer = NULL;
13249 xmlFreeParserCtxt(ctxt);
13250 newDoc->intSubset = NULL;
13251 newDoc->extSubset = NULL;
13252 xmlFreeDoc(newDoc);
13253
13254 return(ret);
13255}
13256
13257#ifdef LIBXML_SAX1_ENABLED
13258/**
13259 * xmlParseExternalEntity:
13260 * @doc: the document the chunk pertains to
13261 * @sax: the SAX handler bloc (possibly NULL)
13262 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13263 * @depth: Used for loop detection, use 0
13264 * @URL: the URL for the entity to load
13265 * @ID: the System ID for the entity to load
13266 * @lst: the return value for the set of parsed nodes
13267 *
13268 * Parse an external general entity
13269 * An external general parsed entity is well-formed if it matches the
13270 * production labeled extParsedEnt.
13271 *
13272 * [78] extParsedEnt ::= TextDecl? content
13273 *
13274 * Returns 0 if the entity is well formed, -1 in case of args problem and
13275 * the parser error code otherwise
13276 */
13277
13278int
13279xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13280 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13281 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13282 ID, lst));
13283}
13284
13285/**
13286 * xmlParseBalancedChunkMemory:
13287 * @doc: the document the chunk pertains to
13288 * @sax: the SAX handler bloc (possibly NULL)
13289 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13290 * @depth: Used for loop detection, use 0
13291 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13292 * @lst: the return value for the set of parsed nodes
13293 *
13294 * Parse a well-balanced chunk of an XML document
13295 * called by the parser
13296 * The allowed sequence for the Well Balanced Chunk is the one defined by
13297 * the content production in the XML grammar:
13298 *
13299 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300 *
13301 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13302 * the parser error code otherwise
13303 */
13304
13305int
13306xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13307 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13308 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13309 depth, string, lst, 0 );
13310}
13311#endif /* LIBXML_SAX1_ENABLED */
13312
13313/**
13314 * xmlParseBalancedChunkMemoryInternal:
13315 * @oldctxt: the existing parsing context
13316 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13317 * @user_data: the user data field for the parser context
13318 * @lst: the return value for the set of parsed nodes
13319 *
13320 *
13321 * Parse a well-balanced chunk of an XML document
13322 * called by the parser
13323 * The allowed sequence for the Well Balanced Chunk is the one defined by
13324 * the content production in the XML grammar:
13325 *
13326 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327 *
13328 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13329 * error code otherwise
13330 *
13331 * In case recover is set to 1, the nodelist will not be empty even if
13332 * the parsed chunk is not well balanced.
13333 */
13334static xmlParserErrors
13335xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13336 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13337 xmlParserCtxtPtr ctxt;
13338 xmlDocPtr newDoc = NULL;
13339 xmlNodePtr newRoot;
13340 xmlSAXHandlerPtr oldsax = NULL;
13341 xmlNodePtr content = NULL;
13342 xmlNodePtr last = NULL;
13343 int size;
13344 xmlParserErrors ret = XML_ERR_OK;
13345#ifdef SAX2
13346 int i;
13347#endif
13348
13349 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13350 (oldctxt->depth > 1024)) {
13351 return(XML_ERR_ENTITY_LOOP);
13352 }
13353
13354
13355 if (lst != NULL)
13356 *lst = NULL;
13357 if (string == NULL)
13358 return(XML_ERR_INTERNAL_ERROR);
13359
13360 size = xmlStrlen(string);
13361
13362 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13363 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13364 if (user_data != NULL)
13365 ctxt->userData = user_data;
13366 else
13367 ctxt->userData = ctxt;
13368 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13369 ctxt->dict = oldctxt->dict;
13370 ctxt->input_id = oldctxt->input_id + 1;
13371 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13372 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13373 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13374
13375#ifdef SAX2
13376 /* propagate namespaces down the entity */
13377 for (i = 0;i < oldctxt->nsNr;i += 2) {
13378 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13379 }
13380#endif
13381
13382 oldsax = ctxt->sax;
13383 ctxt->sax = oldctxt->sax;
13384 xmlDetectSAX2(ctxt);
13385 ctxt->replaceEntities = oldctxt->replaceEntities;
13386 ctxt->options = oldctxt->options;
13387
13388 ctxt->_private = oldctxt->_private;
13389 if (oldctxt->myDoc == NULL) {
13390 newDoc = xmlNewDoc(BAD_CAST "1.0");
13391 if (newDoc == NULL) {
13392 ctxt->sax = oldsax;
13393 ctxt->dict = NULL;
13394 xmlFreeParserCtxt(ctxt);
13395 return(XML_ERR_INTERNAL_ERROR);
13396 }
13397 newDoc->properties = XML_DOC_INTERNAL;
13398 newDoc->dict = ctxt->dict;
13399 xmlDictReference(newDoc->dict);
13400 ctxt->myDoc = newDoc;
13401 } else {
13402 ctxt->myDoc = oldctxt->myDoc;
13403 content = ctxt->myDoc->children;
13404 last = ctxt->myDoc->last;
13405 }
13406 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13407 if (newRoot == NULL) {
13408 ctxt->sax = oldsax;
13409 ctxt->dict = NULL;
13410 xmlFreeParserCtxt(ctxt);
13411 if (newDoc != NULL) {
13412 xmlFreeDoc(newDoc);
13413 }
13414 return(XML_ERR_INTERNAL_ERROR);
13415 }
13416 ctxt->myDoc->children = NULL;
13417 ctxt->myDoc->last = NULL;
13418 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13419 nodePush(ctxt, ctxt->myDoc->children);
13420 ctxt->instate = XML_PARSER_CONTENT;
13421 ctxt->depth = oldctxt->depth + 1;
13422
13423 ctxt->validate = 0;
13424 ctxt->loadsubset = oldctxt->loadsubset;
13425 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13426 /*
13427 * ID/IDREF registration will be done in xmlValidateElement below
13428 */
13429 ctxt->loadsubset |= XML_SKIP_IDS;
13430 }
13431 ctxt->dictNames = oldctxt->dictNames;
13432 ctxt->attsDefault = oldctxt->attsDefault;
13433 ctxt->attsSpecial = oldctxt->attsSpecial;
13434
13435 xmlParseContent(ctxt);
13436 if ((RAW == '<') && (NXT(1) == '/')) {
13437 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13438 } else if (RAW != 0) {
13439 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13440 }
13441 if (ctxt->node != ctxt->myDoc->children) {
13442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13443 }
13444
13445 if (!ctxt->wellFormed) {
13446 if (ctxt->errNo == 0)
13447 ret = XML_ERR_INTERNAL_ERROR;
13448 else
13449 ret = (xmlParserErrors)ctxt->errNo;
13450 } else {
13451 ret = XML_ERR_OK;
13452 }
13453
13454 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13455 xmlNodePtr cur;
13456
13457 /*
13458 * Return the newly created nodeset after unlinking it from
13459 * they pseudo parent.
13460 */
13461 cur = ctxt->myDoc->children->children;
13462 *lst = cur;
13463 while (cur != NULL) {
13464#ifdef LIBXML_VALID_ENABLED
13465 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13466 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13467 (cur->type == XML_ELEMENT_NODE)) {
13468 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13469 oldctxt->myDoc, cur);
13470 }
13471#endif /* LIBXML_VALID_ENABLED */
13472 cur->parent = NULL;
13473 cur = cur->next;
13474 }
13475 ctxt->myDoc->children->children = NULL;
13476 }
13477 if (ctxt->myDoc != NULL) {
13478 xmlFreeNode(ctxt->myDoc->children);
13479 ctxt->myDoc->children = content;
13480 ctxt->myDoc->last = last;
13481 }
13482
13483 /*
13484 * Record in the parent context the number of entities replacement
13485 * done when parsing that reference.
13486 */
13487 if (oldctxt != NULL)
13488 oldctxt->nbentities += ctxt->nbentities;
13489
13490 /*
13491 * Also record the last error if any
13492 */
13493 if (ctxt->lastError.code != XML_ERR_OK)
13494 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13495
13496 ctxt->sax = oldsax;
13497 ctxt->dict = NULL;
13498 ctxt->attsDefault = NULL;
13499 ctxt->attsSpecial = NULL;
13500 xmlFreeParserCtxt(ctxt);
13501 if (newDoc != NULL) {
13502 xmlFreeDoc(newDoc);
13503 }
13504
13505 return(ret);
13506}
13507
13508/**
13509 * xmlParseInNodeContext:
13510 * @node: the context node
13511 * @data: the input string
13512 * @datalen: the input string length in bytes
13513 * @options: a combination of xmlParserOption
13514 * @lst: the return value for the set of parsed nodes
13515 *
13516 * Parse a well-balanced chunk of an XML document
13517 * within the context (DTD, namespaces, etc ...) of the given node.
13518 *
13519 * The allowed sequence for the data is a Well Balanced Chunk defined by
13520 * the content production in the XML grammar:
13521 *
13522 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13523 *
13524 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13525 * error code otherwise
13526 */
13527xmlParserErrors
13528xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13529 int options, xmlNodePtr *lst) {
13530#ifdef SAX2
13531 xmlParserCtxtPtr ctxt;
13532 xmlDocPtr doc = NULL;
13533 xmlNodePtr fake, cur;
13534 int nsnr = 0;
13535
13536 xmlParserErrors ret = XML_ERR_OK;
13537
13538 /*
13539 * check all input parameters, grab the document
13540 */
13541 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13542 return(XML_ERR_INTERNAL_ERROR);
13543 switch (node->type) {
13544 case XML_ELEMENT_NODE:
13545 case XML_ATTRIBUTE_NODE:
13546 case XML_TEXT_NODE:
13547 case XML_CDATA_SECTION_NODE:
13548 case XML_ENTITY_REF_NODE:
13549 case XML_PI_NODE:
13550 case XML_COMMENT_NODE:
13551 case XML_DOCUMENT_NODE:
13552 case XML_HTML_DOCUMENT_NODE:
13553 break;
13554 default:
13555 return(XML_ERR_INTERNAL_ERROR);
13556
13557 }
13558 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13559 (node->type != XML_DOCUMENT_NODE) &&
13560 (node->type != XML_HTML_DOCUMENT_NODE))
13561 node = node->parent;
13562 if (node == NULL)
13563 return(XML_ERR_INTERNAL_ERROR);
13564 if (node->type == XML_ELEMENT_NODE)
13565 doc = node->doc;
13566 else
13567 doc = (xmlDocPtr) node;
13568 if (doc == NULL)
13569 return(XML_ERR_INTERNAL_ERROR);
13570
13571 /*
13572 * allocate a context and set-up everything not related to the
13573 * node position in the tree
13574 */
13575 if (doc->type == XML_DOCUMENT_NODE)
13576 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13577#ifdef LIBXML_HTML_ENABLED
13578 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13579 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13580 /*
13581 * When parsing in context, it makes no sense to add implied
13582 * elements like html/body/etc...
13583 */
13584 options |= HTML_PARSE_NOIMPLIED;
13585 }
13586#endif
13587 else
13588 return(XML_ERR_INTERNAL_ERROR);
13589
13590 if (ctxt == NULL)
13591 return(XML_ERR_NO_MEMORY);
13592
13593 /*
13594 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13595 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13596 * we must wait until the last moment to free the original one.
13597 */
13598 if (doc->dict != NULL) {
13599 if (ctxt->dict != NULL)
13600 xmlDictFree(ctxt->dict);
13601 ctxt->dict = doc->dict;
13602 } else
13603 options |= XML_PARSE_NODICT;
13604
13605 if (doc->encoding != NULL) {
13606 xmlCharEncodingHandlerPtr hdlr;
13607
13608 if (ctxt->encoding != NULL)
13609 xmlFree((xmlChar *) ctxt->encoding);
13610 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13611
13612 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13613 if (hdlr != NULL) {
13614 xmlSwitchToEncoding(ctxt, hdlr);
13615 } else {
13616 return(XML_ERR_UNSUPPORTED_ENCODING);
13617 }
13618 }
13619
13620 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13621 xmlDetectSAX2(ctxt);
13622 ctxt->myDoc = doc;
13623 /* parsing in context, i.e. as within existing content */
13624 ctxt->input_id = 2;
13625 ctxt->instate = XML_PARSER_CONTENT;
13626
13627 fake = xmlNewComment(NULL);
13628 if (fake == NULL) {
13629 xmlFreeParserCtxt(ctxt);
13630 return(XML_ERR_NO_MEMORY);
13631 }
13632 xmlAddChild(node, fake);
13633
13634 if (node->type == XML_ELEMENT_NODE) {
13635 nodePush(ctxt, node);
13636 /*
13637 * initialize the SAX2 namespaces stack
13638 */
13639 cur = node;
13640 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13641 xmlNsPtr ns = cur->nsDef;
13642 const xmlChar *iprefix, *ihref;
13643
13644 while (ns != NULL) {
13645 if (ctxt->dict) {
13646 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13647 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648 } else {
13649 iprefix = ns->prefix;
13650 ihref = ns->href;
13651 }
13652
13653 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13654 nsPush(ctxt, iprefix, ihref);
13655 nsnr++;
13656 }
13657 ns = ns->next;
13658 }
13659 cur = cur->parent;
13660 }
13661 }
13662
13663 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13664 /*
13665 * ID/IDREF registration will be done in xmlValidateElement below
13666 */
13667 ctxt->loadsubset |= XML_SKIP_IDS;
13668 }
13669
13670#ifdef LIBXML_HTML_ENABLED
13671 if (doc->type == XML_HTML_DOCUMENT_NODE)
13672 __htmlParseContent(ctxt);
13673 else
13674#endif
13675 xmlParseContent(ctxt);
13676
13677 nsPop(ctxt, nsnr);
13678 if ((RAW == '<') && (NXT(1) == '/')) {
13679 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680 } else if (RAW != 0) {
13681 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13682 }
13683 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685 ctxt->wellFormed = 0;
13686 }
13687
13688 if (!ctxt->wellFormed) {
13689 if (ctxt->errNo == 0)
13690 ret = XML_ERR_INTERNAL_ERROR;
13691 else
13692 ret = (xmlParserErrors)ctxt->errNo;
13693 } else {
13694 ret = XML_ERR_OK;
13695 }
13696
13697 /*
13698 * Return the newly created nodeset after unlinking it from
13699 * the pseudo sibling.
13700 */
13701
13702 cur = fake->next;
13703 fake->next = NULL;
13704 node->last = fake;
13705
13706 if (cur != NULL) {
13707 cur->prev = NULL;
13708 }
13709
13710 *lst = cur;
13711
13712 while (cur != NULL) {
13713 cur->parent = NULL;
13714 cur = cur->next;
13715 }
13716
13717 xmlUnlinkNode(fake);
13718 xmlFreeNode(fake);
13719
13720
13721 if (ret != XML_ERR_OK) {
13722 xmlFreeNodeList(*lst);
13723 *lst = NULL;
13724 }
13725
13726 if (doc->dict != NULL)
13727 ctxt->dict = NULL;
13728 xmlFreeParserCtxt(ctxt);
13729
13730 return(ret);
13731#else /* !SAX2 */
13732 return(XML_ERR_INTERNAL_ERROR);
13733#endif
13734}
13735
13736#ifdef LIBXML_SAX1_ENABLED
13737/**
13738 * xmlParseBalancedChunkMemoryRecover:
13739 * @doc: the document the chunk pertains to
13740 * @sax: the SAX handler bloc (possibly NULL)
13741 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13742 * @depth: Used for loop detection, use 0
13743 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13744 * @lst: the return value for the set of parsed nodes
13745 * @recover: return nodes even if the data is broken (use 0)
13746 *
13747 *
13748 * Parse a well-balanced chunk of an XML document
13749 * called by the parser
13750 * The allowed sequence for the Well Balanced Chunk is the one defined by
13751 * the content production in the XML grammar:
13752 *
13753 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13754 *
13755 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756 * the parser error code otherwise
13757 *
13758 * In case recover is set to 1, the nodelist will not be empty even if
13759 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13760 * some extent.
13761 */
13762int
13763xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13764 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13765 int recover) {
13766 xmlParserCtxtPtr ctxt;
13767 xmlDocPtr newDoc;
13768 xmlSAXHandlerPtr oldsax = NULL;
13769 xmlNodePtr content, newRoot;
13770 int size;
13771 int ret = 0;
13772
13773 if (depth > 40) {
13774 return(XML_ERR_ENTITY_LOOP);
13775 }
13776
13777
13778 if (lst != NULL)
13779 *lst = NULL;
13780 if (string == NULL)
13781 return(-1);
13782
13783 size = xmlStrlen(string);
13784
13785 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786 if (ctxt == NULL) return(-1);
13787 ctxt->userData = ctxt;
13788 if (sax != NULL) {
13789 oldsax = ctxt->sax;
13790 ctxt->sax = sax;
13791 if (user_data != NULL)
13792 ctxt->userData = user_data;
13793 }
13794 newDoc = xmlNewDoc(BAD_CAST "1.0");
13795 if (newDoc == NULL) {
13796 xmlFreeParserCtxt(ctxt);
13797 return(-1);
13798 }
13799 newDoc->properties = XML_DOC_INTERNAL;
13800 if ((doc != NULL) && (doc->dict != NULL)) {
13801 xmlDictFree(ctxt->dict);
13802 ctxt->dict = doc->dict;
13803 xmlDictReference(ctxt->dict);
13804 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807 ctxt->dictNames = 1;
13808 } else {
13809 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13810 }
13811 if (doc != NULL) {
13812 newDoc->intSubset = doc->intSubset;
13813 newDoc->extSubset = doc->extSubset;
13814 }
13815 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816 if (newRoot == NULL) {
13817 if (sax != NULL)
13818 ctxt->sax = oldsax;
13819 xmlFreeParserCtxt(ctxt);
13820 newDoc->intSubset = NULL;
13821 newDoc->extSubset = NULL;
13822 xmlFreeDoc(newDoc);
13823 return(-1);
13824 }
13825 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826 nodePush(ctxt, newRoot);
13827 if (doc == NULL) {
13828 ctxt->myDoc = newDoc;
13829 } else {
13830 ctxt->myDoc = newDoc;
13831 newDoc->children->doc = doc;
13832 /* Ensure that doc has XML spec namespace */
13833 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834 newDoc->oldNs = doc->oldNs;
13835 }
13836 ctxt->instate = XML_PARSER_CONTENT;
13837 ctxt->input_id = 2;
13838 ctxt->depth = depth;
13839
13840 /*
13841 * Doing validity checking on chunk doesn't make sense
13842 */
13843 ctxt->validate = 0;
13844 ctxt->loadsubset = 0;
13845 xmlDetectSAX2(ctxt);
13846
13847 if ( doc != NULL ){
13848 content = doc->children;
13849 doc->children = NULL;
13850 xmlParseContent(ctxt);
13851 doc->children = content;
13852 }
13853 else {
13854 xmlParseContent(ctxt);
13855 }
13856 if ((RAW == '<') && (NXT(1) == '/')) {
13857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13858 } else if (RAW != 0) {
13859 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13860 }
13861 if (ctxt->node != newDoc->children) {
13862 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13863 }
13864
13865 if (!ctxt->wellFormed) {
13866 if (ctxt->errNo == 0)
13867 ret = 1;
13868 else
13869 ret = ctxt->errNo;
13870 } else {
13871 ret = 0;
13872 }
13873
13874 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13875 xmlNodePtr cur;
13876
13877 /*
13878 * Return the newly created nodeset after unlinking it from
13879 * they pseudo parent.
13880 */
13881 cur = newDoc->children->children;
13882 *lst = cur;
13883 while (cur != NULL) {
13884 xmlSetTreeDoc(cur, doc);
13885 cur->parent = NULL;
13886 cur = cur->next;
13887 }
13888 newDoc->children->children = NULL;
13889 }
13890
13891 if (sax != NULL)
13892 ctxt->sax = oldsax;
13893 xmlFreeParserCtxt(ctxt);
13894 newDoc->intSubset = NULL;
13895 newDoc->extSubset = NULL;
13896 newDoc->oldNs = NULL;
13897 xmlFreeDoc(newDoc);
13898
13899 return(ret);
13900}
13901
13902/**
13903 * xmlSAXParseEntity:
13904 * @sax: the SAX handler block
13905 * @filename: the filename
13906 *
13907 * parse an XML external entity out of context and build a tree.
13908 * It use the given SAX function block to handle the parsing callback.
13909 * If sax is NULL, fallback to the default DOM tree building routines.
13910 *
13911 * [78] extParsedEnt ::= TextDecl? content
13912 *
13913 * This correspond to a "Well Balanced" chunk
13914 *
13915 * Returns the resulting document tree
13916 */
13917
13918xmlDocPtr
13919xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13920 xmlDocPtr ret;
13921 xmlParserCtxtPtr ctxt;
13922
13923 ctxt = xmlCreateFileParserCtxt(filename);
13924 if (ctxt == NULL) {
13925 return(NULL);
13926 }
13927 if (sax != NULL) {
13928 if (ctxt->sax != NULL)
13929 xmlFree(ctxt->sax);
13930 ctxt->sax = sax;
13931 ctxt->userData = NULL;
13932 }
13933
13934 xmlParseExtParsedEnt(ctxt);
13935
13936 if (ctxt->wellFormed)
13937 ret = ctxt->myDoc;
13938 else {
13939 ret = NULL;
13940 xmlFreeDoc(ctxt->myDoc);
13941 ctxt->myDoc = NULL;
13942 }
13943 if (sax != NULL)
13944 ctxt->sax = NULL;
13945 xmlFreeParserCtxt(ctxt);
13946
13947 return(ret);
13948}
13949
13950/**
13951 * xmlParseEntity:
13952 * @filename: the filename
13953 *
13954 * parse an XML external entity out of context and build a tree.
13955 *
13956 * [78] extParsedEnt ::= TextDecl? content
13957 *
13958 * This correspond to a "Well Balanced" chunk
13959 *
13960 * Returns the resulting document tree
13961 */
13962
13963xmlDocPtr
13964xmlParseEntity(const char *filename) {
13965 return(xmlSAXParseEntity(NULL, filename));
13966}
13967#endif /* LIBXML_SAX1_ENABLED */
13968
13969/**
13970 * xmlCreateEntityParserCtxtInternal:
13971 * @URL: the entity URL
13972 * @ID: the entity PUBLIC ID
13973 * @base: a possible base for the target URI
13974 * @pctx: parser context used to set options on new context
13975 *
13976 * Create a parser context for an external entity
13977 * Automatic support for ZLIB/Compress compressed document is provided
13978 * by default if found at compile-time.
13979 *
13980 * Returns the new parser context or NULL
13981 */
13982static xmlParserCtxtPtr
13983xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13984 const xmlChar *base, xmlParserCtxtPtr pctx) {
13985 xmlParserCtxtPtr ctxt;
13986 xmlParserInputPtr inputStream;
13987 char *directory = NULL;
13988 xmlChar *uri;
13989
13990 ctxt = xmlNewParserCtxt();
13991 if (ctxt == NULL) {
13992 return(NULL);
13993 }
13994
13995 if (pctx != NULL) {
13996 ctxt->options = pctx->options;
13997 ctxt->_private = pctx->_private;
13998 /*
13999 * this is a subparser of pctx, so the input_id should be
14000 * incremented to distinguish from main entity
14001 */
14002 ctxt->input_id = pctx->input_id + 1;
14003 }
14004
14005 uri = xmlBuildURI(URL, base);
14006
14007 if (uri == NULL) {
14008 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14009 if (inputStream == NULL) {
14010 xmlFreeParserCtxt(ctxt);
14011 return(NULL);
14012 }
14013
14014 inputPush(ctxt, inputStream);
14015
14016 if ((ctxt->directory == NULL) && (directory == NULL))
14017 directory = xmlParserGetDirectory((char *)URL);
14018 if ((ctxt->directory == NULL) && (directory != NULL))
14019 ctxt->directory = directory;
14020 } else {
14021 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14022 if (inputStream == NULL) {
14023 xmlFree(uri);
14024 xmlFreeParserCtxt(ctxt);
14025 return(NULL);
14026 }
14027
14028 inputPush(ctxt, inputStream);
14029
14030 if ((ctxt->directory == NULL) && (directory == NULL))
14031 directory = xmlParserGetDirectory((char *)uri);
14032 if ((ctxt->directory == NULL) && (directory != NULL))
14033 ctxt->directory = directory;
14034 xmlFree(uri);
14035 }
14036 return(ctxt);
14037}
14038
14039/**
14040 * xmlCreateEntityParserCtxt:
14041 * @URL: the entity URL
14042 * @ID: the entity PUBLIC ID
14043 * @base: a possible base for the target URI
14044 *
14045 * Create a parser context for an external entity
14046 * Automatic support for ZLIB/Compress compressed document is provided
14047 * by default if found at compile-time.
14048 *
14049 * Returns the new parser context or NULL
14050 */
14051xmlParserCtxtPtr
14052xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14053 const xmlChar *base) {
14054 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14055
14056}
14057
14058/************************************************************************
14059 * *
14060 * Front ends when parsing from a file *
14061 * *
14062 ************************************************************************/
14063
14064/**
14065 * xmlCreateURLParserCtxt:
14066 * @filename: the filename or URL
14067 * @options: a combination of xmlParserOption
14068 *
14069 * Create a parser context for a file or URL content.
14070 * Automatic support for ZLIB/Compress compressed document is provided
14071 * by default if found at compile-time and for file accesses
14072 *
14073 * Returns the new parser context or NULL
14074 */
14075xmlParserCtxtPtr
14076xmlCreateURLParserCtxt(const char *filename, int options)
14077{
14078 xmlParserCtxtPtr ctxt;
14079 xmlParserInputPtr inputStream;
14080 char *directory = NULL;
14081
14082 ctxt = xmlNewParserCtxt();
14083 if (ctxt == NULL) {
14084 xmlErrMemory(NULL, "cannot allocate parser context");
14085 return(NULL);
14086 }
14087
14088 if (options)
14089 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14090 ctxt->linenumbers = 1;
14091
14092 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14093 if (inputStream == NULL) {
14094 xmlFreeParserCtxt(ctxt);
14095 return(NULL);
14096 }
14097
14098 inputPush(ctxt, inputStream);
14099 if ((ctxt->directory == NULL) && (directory == NULL))
14100 directory = xmlParserGetDirectory(filename);
14101 if ((ctxt->directory == NULL) && (directory != NULL))
14102 ctxt->directory = directory;
14103
14104 return(ctxt);
14105}
14106
14107/**
14108 * xmlCreateFileParserCtxt:
14109 * @filename: the filename
14110 *
14111 * Create a parser context for a file content.
14112 * Automatic support for ZLIB/Compress compressed document is provided
14113 * by default if found at compile-time.
14114 *
14115 * Returns the new parser context or NULL
14116 */
14117xmlParserCtxtPtr
14118xmlCreateFileParserCtxt(const char *filename)
14119{
14120 return(xmlCreateURLParserCtxt(filename, 0));
14121}
14122
14123#ifdef LIBXML_SAX1_ENABLED
14124/**
14125 * xmlSAXParseFileWithData:
14126 * @sax: the SAX handler block
14127 * @filename: the filename
14128 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14129 * documents
14130 * @data: the userdata
14131 *
14132 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14133 * compressed document is provided by default if found at compile-time.
14134 * It use the given SAX function block to handle the parsing callback.
14135 * If sax is NULL, fallback to the default DOM tree building routines.
14136 *
14137 * User data (void *) is stored within the parser context in the
14138 * context's _private member, so it is available nearly everywhere in libxml
14139 *
14140 * Returns the resulting document tree
14141 */
14142
14143xmlDocPtr
14144xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14145 int recovery, void *data) {
14146 xmlDocPtr ret;
14147 xmlParserCtxtPtr ctxt;
14148
14149 xmlInitParser();
14150
14151 ctxt = xmlCreateFileParserCtxt(filename);
14152 if (ctxt == NULL) {
14153 return(NULL);
14154 }
14155 if (sax != NULL) {
14156 if (ctxt->sax != NULL)
14157 xmlFree(ctxt->sax);
14158 ctxt->sax = sax;
14159 }
14160 xmlDetectSAX2(ctxt);
14161 if (data!=NULL) {
14162 ctxt->_private = data;
14163 }
14164
14165 if (ctxt->directory == NULL)
14166 ctxt->directory = xmlParserGetDirectory(filename);
14167
14168 ctxt->recovery = recovery;
14169
14170 xmlParseDocument(ctxt);
14171
14172 if ((ctxt->wellFormed) || recovery) {
14173 ret = ctxt->myDoc;
14174 if (ret != NULL) {
14175 if (ctxt->input->buf->compressed > 0)
14176 ret->compression = 9;
14177 else
14178 ret->compression = ctxt->input->buf->compressed;
14179 }
14180 }
14181 else {
14182 ret = NULL;
14183 xmlFreeDoc(ctxt->myDoc);
14184 ctxt->myDoc = NULL;
14185 }
14186 if (sax != NULL)
14187 ctxt->sax = NULL;
14188 xmlFreeParserCtxt(ctxt);
14189
14190 return(ret);
14191}
14192
14193/**
14194 * xmlSAXParseFile:
14195 * @sax: the SAX handler block
14196 * @filename: the filename
14197 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14198 * documents
14199 *
14200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201 * compressed document is provided by default if found at compile-time.
14202 * It use the given SAX function block to handle the parsing callback.
14203 * If sax is NULL, fallback to the default DOM tree building routines.
14204 *
14205 * Returns the resulting document tree
14206 */
14207
14208xmlDocPtr
14209xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14210 int recovery) {
14211 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14212}
14213
14214/**
14215 * xmlRecoverDoc:
14216 * @cur: a pointer to an array of xmlChar
14217 *
14218 * parse an XML in-memory document and build a tree.
14219 * In the case the document is not Well Formed, a attempt to build a
14220 * tree is tried anyway
14221 *
14222 * Returns the resulting document tree or NULL in case of failure
14223 */
14224
14225xmlDocPtr
14226xmlRecoverDoc(const xmlChar *cur) {
14227 return(xmlSAXParseDoc(NULL, cur, 1));
14228}
14229
14230/**
14231 * xmlParseFile:
14232 * @filename: the filename
14233 *
14234 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14235 * compressed document is provided by default if found at compile-time.
14236 *
14237 * Returns the resulting document tree if the file was wellformed,
14238 * NULL otherwise.
14239 */
14240
14241xmlDocPtr
14242xmlParseFile(const char *filename) {
14243 return(xmlSAXParseFile(NULL, filename, 0));
14244}
14245
14246/**
14247 * xmlRecoverFile:
14248 * @filename: the filename
14249 *
14250 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14251 * compressed document is provided by default if found at compile-time.
14252 * In the case the document is not Well Formed, it attempts to build
14253 * a tree anyway
14254 *
14255 * Returns the resulting document tree or NULL in case of failure
14256 */
14257
14258xmlDocPtr
14259xmlRecoverFile(const char *filename) {
14260 return(xmlSAXParseFile(NULL, filename, 1));
14261}
14262
14263
14264/**
14265 * xmlSetupParserForBuffer:
14266 * @ctxt: an XML parser context
14267 * @buffer: a xmlChar * buffer
14268 * @filename: a file name
14269 *
14270 * Setup the parser context to parse a new buffer; Clears any prior
14271 * contents from the parser context. The buffer parameter must not be
14272 * NULL, but the filename parameter can be
14273 */
14274void
14275xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14276 const char* filename)
14277{
14278 xmlParserInputPtr input;
14279
14280 if ((ctxt == NULL) || (buffer == NULL))
14281 return;
14282
14283 input = xmlNewInputStream(ctxt);
14284 if (input == NULL) {
14285 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14286 xmlClearParserCtxt(ctxt);
14287 return;
14288 }
14289
14290 xmlClearParserCtxt(ctxt);
14291 if (filename != NULL)
14292 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14293 input->base = buffer;
14294 input->cur = buffer;
14295 input->end = &buffer[xmlStrlen(buffer)];
14296 inputPush(ctxt, input);
14297}
14298
14299/**
14300 * xmlSAXUserParseFile:
14301 * @sax: a SAX handler
14302 * @user_data: The user data returned on SAX callbacks
14303 * @filename: a file name
14304 *
14305 * parse an XML file and call the given SAX handler routines.
14306 * Automatic support for ZLIB/Compress compressed document is provided
14307 *
14308 * Returns 0 in case of success or a error number otherwise
14309 */
14310int
14311xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14312 const char *filename) {
14313 int ret = 0;
14314 xmlParserCtxtPtr ctxt;
14315
14316 ctxt = xmlCreateFileParserCtxt(filename);
14317 if (ctxt == NULL) return -1;
14318 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14319 xmlFree(ctxt->sax);
14320 ctxt->sax = sax;
14321 xmlDetectSAX2(ctxt);
14322
14323 if (user_data != NULL)
14324 ctxt->userData = user_data;
14325
14326 xmlParseDocument(ctxt);
14327
14328 if (ctxt->wellFormed)
14329 ret = 0;
14330 else {
14331 if (ctxt->errNo != 0)
14332 ret = ctxt->errNo;
14333 else
14334 ret = -1;
14335 }
14336 if (sax != NULL)
14337 ctxt->sax = NULL;
14338 if (ctxt->myDoc != NULL) {
14339 xmlFreeDoc(ctxt->myDoc);
14340 ctxt->myDoc = NULL;
14341 }
14342 xmlFreeParserCtxt(ctxt);
14343
14344 return ret;
14345}
14346#endif /* LIBXML_SAX1_ENABLED */
14347
14348/************************************************************************
14349 * *
14350 * Front ends when parsing from memory *
14351 * *
14352 ************************************************************************/
14353
14354/**
14355 * xmlCreateMemoryParserCtxt:
14356 * @buffer: a pointer to a char array
14357 * @size: the size of the array
14358 *
14359 * Create a parser context for an XML in-memory document.
14360 *
14361 * Returns the new parser context or NULL
14362 */
14363xmlParserCtxtPtr
14364xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14365 xmlParserCtxtPtr ctxt;
14366 xmlParserInputPtr input;
14367 xmlParserInputBufferPtr buf;
14368
14369 if (buffer == NULL)
14370 return(NULL);
14371 if (size <= 0)
14372 return(NULL);
14373
14374 ctxt = xmlNewParserCtxt();
14375 if (ctxt == NULL)
14376 return(NULL);
14377
14378 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14379 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14380 if (buf == NULL) {
14381 xmlFreeParserCtxt(ctxt);
14382 return(NULL);
14383 }
14384
14385 input = xmlNewInputStream(ctxt);
14386 if (input == NULL) {
14387 xmlFreeParserInputBuffer(buf);
14388 xmlFreeParserCtxt(ctxt);
14389 return(NULL);
14390 }
14391
14392 input->filename = NULL;
14393 input->buf = buf;
14394 xmlBufResetInput(input->buf->buffer, input);
14395
14396 inputPush(ctxt, input);
14397 return(ctxt);
14398}
14399
14400#ifdef LIBXML_SAX1_ENABLED
14401/**
14402 * xmlSAXParseMemoryWithData:
14403 * @sax: the SAX handler block
14404 * @buffer: an pointer to a char array
14405 * @size: the size of the array
14406 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14407 * documents
14408 * @data: the userdata
14409 *
14410 * parse an XML in-memory block and use the given SAX function block
14411 * to handle the parsing callback. If sax is NULL, fallback to the default
14412 * DOM tree building routines.
14413 *
14414 * User data (void *) is stored within the parser context in the
14415 * context's _private member, so it is available nearly everywhere in libxml
14416 *
14417 * Returns the resulting document tree
14418 */
14419
14420xmlDocPtr
14421xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14422 int size, int recovery, void *data) {
14423 xmlDocPtr ret;
14424 xmlParserCtxtPtr ctxt;
14425
14426 xmlInitParser();
14427
14428 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14429 if (ctxt == NULL) return(NULL);
14430 if (sax != NULL) {
14431 if (ctxt->sax != NULL)
14432 xmlFree(ctxt->sax);
14433 ctxt->sax = sax;
14434 }
14435 xmlDetectSAX2(ctxt);
14436 if (data!=NULL) {
14437 ctxt->_private=data;
14438 }
14439
14440 ctxt->recovery = recovery;
14441
14442 xmlParseDocument(ctxt);
14443
14444 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14445 else {
14446 ret = NULL;
14447 xmlFreeDoc(ctxt->myDoc);
14448 ctxt->myDoc = NULL;
14449 }
14450 if (sax != NULL)
14451 ctxt->sax = NULL;
14452 xmlFreeParserCtxt(ctxt);
14453
14454 return(ret);
14455}
14456
14457/**
14458 * xmlSAXParseMemory:
14459 * @sax: the SAX handler block
14460 * @buffer: an pointer to a char array
14461 * @size: the size of the array
14462 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14463 * documents
14464 *
14465 * parse an XML in-memory block and use the given SAX function block
14466 * to handle the parsing callback. If sax is NULL, fallback to the default
14467 * DOM tree building routines.
14468 *
14469 * Returns the resulting document tree
14470 */
14471xmlDocPtr
14472xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14473 int size, int recovery) {
14474 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14475}
14476
14477/**
14478 * xmlParseMemory:
14479 * @buffer: an pointer to a char array
14480 * @size: the size of the array
14481 *
14482 * parse an XML in-memory block and build a tree.
14483 *
14484 * Returns the resulting document tree
14485 */
14486
14487xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14488 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14489}
14490
14491/**
14492 * xmlRecoverMemory:
14493 * @buffer: an pointer to a char array
14494 * @size: the size of the array
14495 *
14496 * parse an XML in-memory block and build a tree.
14497 * In the case the document is not Well Formed, an attempt to
14498 * build a tree is tried anyway
14499 *
14500 * Returns the resulting document tree or NULL in case of error
14501 */
14502
14503xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14504 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14505}
14506
14507/**
14508 * xmlSAXUserParseMemory:
14509 * @sax: a SAX handler
14510 * @user_data: The user data returned on SAX callbacks
14511 * @buffer: an in-memory XML document input
14512 * @size: the length of the XML document in bytes
14513 *
14514 * A better SAX parsing routine.
14515 * parse an XML in-memory buffer and call the given SAX handler routines.
14516 *
14517 * Returns 0 in case of success or a error number otherwise
14518 */
14519int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14520 const char *buffer, int size) {
14521 int ret = 0;
14522 xmlParserCtxtPtr ctxt;
14523
14524 xmlInitParser();
14525
14526 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14527 if (ctxt == NULL) return -1;
14528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14529 xmlFree(ctxt->sax);
14530 ctxt->sax = sax;
14531 xmlDetectSAX2(ctxt);
14532
14533 if (user_data != NULL)
14534 ctxt->userData = user_data;
14535
14536 xmlParseDocument(ctxt);
14537
14538 if (ctxt->wellFormed)
14539 ret = 0;
14540 else {
14541 if (ctxt->errNo != 0)
14542 ret = ctxt->errNo;
14543 else
14544 ret = -1;
14545 }
14546 if (sax != NULL)
14547 ctxt->sax = NULL;
14548 if (ctxt->myDoc != NULL) {
14549 xmlFreeDoc(ctxt->myDoc);
14550 ctxt->myDoc = NULL;
14551 }
14552 xmlFreeParserCtxt(ctxt);
14553
14554 return ret;
14555}
14556#endif /* LIBXML_SAX1_ENABLED */
14557
14558/**
14559 * xmlCreateDocParserCtxt:
14560 * @cur: a pointer to an array of xmlChar
14561 *
14562 * Creates a parser context for an XML in-memory document.
14563 *
14564 * Returns the new parser context or NULL
14565 */
14566xmlParserCtxtPtr
14567xmlCreateDocParserCtxt(const xmlChar *cur) {
14568 int len;
14569
14570 if (cur == NULL)
14571 return(NULL);
14572 len = xmlStrlen(cur);
14573 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14574}
14575
14576#ifdef LIBXML_SAX1_ENABLED
14577/**
14578 * xmlSAXParseDoc:
14579 * @sax: the SAX handler block
14580 * @cur: a pointer to an array of xmlChar
14581 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14582 * documents
14583 *
14584 * parse an XML in-memory document and build a tree.
14585 * It use the given SAX function block to handle the parsing callback.
14586 * If sax is NULL, fallback to the default DOM tree building routines.
14587 *
14588 * Returns the resulting document tree
14589 */
14590
14591xmlDocPtr
14592xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14593 xmlDocPtr ret;
14594 xmlParserCtxtPtr ctxt;
14595 xmlSAXHandlerPtr oldsax = NULL;
14596
14597 if (cur == NULL) return(NULL);
14598
14599
14600 ctxt = xmlCreateDocParserCtxt(cur);
14601 if (ctxt == NULL) return(NULL);
14602 if (sax != NULL) {
14603 oldsax = ctxt->sax;
14604 ctxt->sax = sax;
14605 ctxt->userData = NULL;
14606 }
14607 xmlDetectSAX2(ctxt);
14608
14609 xmlParseDocument(ctxt);
14610 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14611 else {
14612 ret = NULL;
14613 xmlFreeDoc(ctxt->myDoc);
14614 ctxt->myDoc = NULL;
14615 }
14616 if (sax != NULL)
14617 ctxt->sax = oldsax;
14618 xmlFreeParserCtxt(ctxt);
14619
14620 return(ret);
14621}
14622
14623/**
14624 * xmlParseDoc:
14625 * @cur: a pointer to an array of xmlChar
14626 *
14627 * parse an XML in-memory document and build a tree.
14628 *
14629 * Returns the resulting document tree
14630 */
14631
14632xmlDocPtr
14633xmlParseDoc(const xmlChar *cur) {
14634 return(xmlSAXParseDoc(NULL, cur, 0));
14635}
14636#endif /* LIBXML_SAX1_ENABLED */
14637
14638#ifdef LIBXML_LEGACY_ENABLED
14639/************************************************************************
14640 * *
14641 * Specific function to keep track of entities references *
14642 * and used by the XSLT debugger *
14643 * *
14644 ************************************************************************/
14645
14646static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14647
14648/**
14649 * xmlAddEntityReference:
14650 * @ent : A valid entity
14651 * @firstNode : A valid first node for children of entity
14652 * @lastNode : A valid last node of children entity
14653 *
14654 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14655 */
14656static void
14657xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14658 xmlNodePtr lastNode)
14659{
14660 if (xmlEntityRefFunc != NULL) {
14661 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14662 }
14663}
14664
14665
14666/**
14667 * xmlSetEntityReferenceFunc:
14668 * @func: A valid function
14669 *
14670 * Set the function to call call back when a xml reference has been made
14671 */
14672void
14673xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14674{
14675 xmlEntityRefFunc = func;
14676}
14677#endif /* LIBXML_LEGACY_ENABLED */
14678
14679/************************************************************************
14680 * *
14681 * Miscellaneous *
14682 * *
14683 ************************************************************************/
14684
14685#ifdef LIBXML_XPATH_ENABLED
14686#include <libxml/xpath.h>
14687#endif
14688
14689extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14690static int xmlParserInitialized = 0;
14691
14692/**
14693 * xmlInitParser:
14694 *
14695 * Initialization function for the XML parser.
14696 * This is not reentrant. Call once before processing in case of
14697 * use in multithreaded programs.
14698 */
14699
14700void
14701xmlInitParser(void) {
14702 if (xmlParserInitialized != 0)
14703 return;
14704
14705#ifdef LIBXML_THREAD_ENABLED
14706 __xmlGlobalInitMutexLock();
14707 if (xmlParserInitialized == 0) {
14708#endif
14709 xmlInitThreads();
14710 xmlInitGlobals();
14711 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14712 (xmlGenericError == NULL))
14713 initGenericErrorDefaultFunc(NULL);
14714 xmlInitMemory();
14715 xmlInitializeDict();
14716 xmlInitCharEncodingHandlers();
14717 xmlDefaultSAXHandlerInit();
14718 xmlRegisterDefaultInputCallbacks();
14719#ifdef LIBXML_OUTPUT_ENABLED
14720 xmlRegisterDefaultOutputCallbacks();
14721#endif /* LIBXML_OUTPUT_ENABLED */
14722#ifdef LIBXML_HTML_ENABLED
14723 htmlInitAutoClose();
14724 htmlDefaultSAXHandlerInit();
14725#endif
14726#ifdef LIBXML_XPATH_ENABLED
14727 xmlXPathInit();
14728#endif
14729 xmlParserInitialized = 1;
14730#ifdef LIBXML_THREAD_ENABLED
14731 }
14732 __xmlGlobalInitMutexUnlock();
14733#endif
14734}
14735
14736/**
14737 * xmlCleanupParser:
14738 *
14739 * This function name is somewhat misleading. It does not clean up
14740 * parser state, it cleans up memory allocated by the library itself.
14741 * It is a cleanup function for the XML library. It tries to reclaim all
14742 * related global memory allocated for the library processing.
14743 * It doesn't deallocate any document related memory. One should
14744 * call xmlCleanupParser() only when the process has finished using
14745 * the library and all XML/HTML documents built with it.
14746 * See also xmlInitParser() which has the opposite function of preparing
14747 * the library for operations.
14748 *
14749 * WARNING: if your application is multithreaded or has plugin support
14750 * calling this may crash the application if another thread or
14751 * a plugin is still using libxml2. It's sometimes very hard to
14752 * guess if libxml2 is in use in the application, some libraries
14753 * or plugins may use it without notice. In case of doubt abstain
14754 * from calling this function or do it just before calling exit()
14755 * to avoid leak reports from valgrind !
14756 */
14757
14758void
14759xmlCleanupParser(void) {
14760 if (!xmlParserInitialized)
14761 return;
14762
14763 xmlCleanupCharEncodingHandlers();
14764#ifdef LIBXML_CATALOG_ENABLED
14765 xmlCatalogCleanup();
14766#endif
14767 xmlDictCleanup();
14768 xmlCleanupInputCallbacks();
14769#ifdef LIBXML_OUTPUT_ENABLED
14770 xmlCleanupOutputCallbacks();
14771#endif
14772#ifdef LIBXML_SCHEMAS_ENABLED
14773 xmlSchemaCleanupTypes();
14774 xmlRelaxNGCleanupTypes();
14775#endif
14776 xmlResetLastError();
14777 xmlCleanupGlobals();
14778 xmlCleanupThreads(); /* must be last if called not from the main thread */
14779 xmlCleanupMemory();
14780 xmlParserInitialized = 0;
14781}
14782
14783/************************************************************************
14784 * *
14785 * New set (2.6.0) of simpler and more flexible APIs *
14786 * *
14787 ************************************************************************/
14788
14789/**
14790 * DICT_FREE:
14791 * @str: a string
14792 *
14793 * Free a string if it is not owned by the "dict" dictionary in the
14794 * current scope
14795 */
14796#define DICT_FREE(str) \
14797 if ((str) && ((!dict) || \
14798 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14799 xmlFree((char *)(str));
14800
14801/**
14802 * xmlCtxtReset:
14803 * @ctxt: an XML parser context
14804 *
14805 * Reset a parser context
14806 */
14807void
14808xmlCtxtReset(xmlParserCtxtPtr ctxt)
14809{
14810 xmlParserInputPtr input;
14811 xmlDictPtr dict;
14812
14813 if (ctxt == NULL)
14814 return;
14815
14816 dict = ctxt->dict;
14817
14818 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14819 xmlFreeInputStream(input);
14820 }
14821 ctxt->inputNr = 0;
14822 ctxt->input = NULL;
14823
14824 ctxt->spaceNr = 0;
14825 if (ctxt->spaceTab != NULL) {
14826 ctxt->spaceTab[0] = -1;
14827 ctxt->space = &ctxt->spaceTab[0];
14828 } else {
14829 ctxt->space = NULL;
14830 }
14831
14832
14833 ctxt->nodeNr = 0;
14834 ctxt->node = NULL;
14835
14836 ctxt->nameNr = 0;
14837 ctxt->name = NULL;
14838
14839 DICT_FREE(ctxt->version);
14840 ctxt->version = NULL;
14841 DICT_FREE(ctxt->encoding);
14842 ctxt->encoding = NULL;
14843 DICT_FREE(ctxt->directory);
14844 ctxt->directory = NULL;
14845 DICT_FREE(ctxt->extSubURI);
14846 ctxt->extSubURI = NULL;
14847 DICT_FREE(ctxt->extSubSystem);
14848 ctxt->extSubSystem = NULL;
14849 if (ctxt->myDoc != NULL)
14850 xmlFreeDoc(ctxt->myDoc);
14851 ctxt->myDoc = NULL;
14852
14853 ctxt->standalone = -1;
14854 ctxt->hasExternalSubset = 0;
14855 ctxt->hasPErefs = 0;
14856 ctxt->html = 0;
14857 ctxt->external = 0;
14858 ctxt->instate = XML_PARSER_START;
14859 ctxt->token = 0;
14860
14861 ctxt->wellFormed = 1;
14862 ctxt->nsWellFormed = 1;
14863 ctxt->disableSAX = 0;
14864 ctxt->valid = 1;
14865#if 0
14866 ctxt->vctxt.userData = ctxt;
14867 ctxt->vctxt.error = xmlParserValidityError;
14868 ctxt->vctxt.warning = xmlParserValidityWarning;
14869#endif
14870 ctxt->record_info = 0;
14871 ctxt->nbChars = 0;
14872 ctxt->checkIndex = 0;
14873 ctxt->inSubset = 0;
14874 ctxt->errNo = XML_ERR_OK;
14875 ctxt->depth = 0;
14876 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14877 ctxt->catalogs = NULL;
14878 ctxt->nbentities = 0;
14879 ctxt->sizeentities = 0;
14880 ctxt->sizeentcopy = 0;
14881 xmlInitNodeInfoSeq(&ctxt->node_seq);
14882
14883 if (ctxt->attsDefault != NULL) {
14884 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14885 ctxt->attsDefault = NULL;
14886 }
14887 if (ctxt->attsSpecial != NULL) {
14888 xmlHashFree(ctxt->attsSpecial, NULL);
14889 ctxt->attsSpecial = NULL;
14890 }
14891
14892#ifdef LIBXML_CATALOG_ENABLED
14893 if (ctxt->catalogs != NULL)
14894 xmlCatalogFreeLocal(ctxt->catalogs);
14895#endif
14896 if (ctxt->lastError.code != XML_ERR_OK)
14897 xmlResetError(&ctxt->lastError);
14898}
14899
14900/**
14901 * xmlCtxtResetPush:
14902 * @ctxt: an XML parser context
14903 * @chunk: a pointer to an array of chars
14904 * @size: number of chars in the array
14905 * @filename: an optional file name or URI
14906 * @encoding: the document encoding, or NULL
14907 *
14908 * Reset a push parser context
14909 *
14910 * Returns 0 in case of success and 1 in case of error
14911 */
14912int
14913xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14914 int size, const char *filename, const char *encoding)
14915{
14916 xmlParserInputPtr inputStream;
14917 xmlParserInputBufferPtr buf;
14918 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14919
14920 if (ctxt == NULL)
14921 return(1);
14922
14923 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14924 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14925
14926 buf = xmlAllocParserInputBuffer(enc);
14927 if (buf == NULL)
14928 return(1);
14929
14930 if (ctxt == NULL) {
14931 xmlFreeParserInputBuffer(buf);
14932 return(1);
14933 }
14934
14935 xmlCtxtReset(ctxt);
14936
14937 if (ctxt->pushTab == NULL) {
14938 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14939 sizeof(xmlChar *));
14940 if (ctxt->pushTab == NULL) {
14941 xmlErrMemory(ctxt, NULL);
14942 xmlFreeParserInputBuffer(buf);
14943 return(1);
14944 }
14945 }
14946
14947 if (filename == NULL) {
14948 ctxt->directory = NULL;
14949 } else {
14950 ctxt->directory = xmlParserGetDirectory(filename);
14951 }
14952
14953 inputStream = xmlNewInputStream(ctxt);
14954 if (inputStream == NULL) {
14955 xmlFreeParserInputBuffer(buf);
14956 return(1);
14957 }
14958
14959 if (filename == NULL)
14960 inputStream->filename = NULL;
14961 else
14962 inputStream->filename = (char *)
14963 xmlCanonicPath((const xmlChar *) filename);
14964 inputStream->buf = buf;
14965 xmlBufResetInput(buf->buffer, inputStream);
14966
14967 inputPush(ctxt, inputStream);
14968
14969 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14970 (ctxt->input->buf != NULL)) {
14971 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14972 size_t cur = ctxt->input->cur - ctxt->input->base;
14973
14974 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14975
14976 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14977#ifdef DEBUG_PUSH
14978 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14979#endif
14980 }
14981
14982 if (encoding != NULL) {
14983 xmlCharEncodingHandlerPtr hdlr;
14984
14985 if (ctxt->encoding != NULL)
14986 xmlFree((xmlChar *) ctxt->encoding);
14987 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14988
14989 hdlr = xmlFindCharEncodingHandler(encoding);
14990 if (hdlr != NULL) {
14991 xmlSwitchToEncoding(ctxt, hdlr);
14992 } else {
14993 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14994 "Unsupported encoding %s\n", BAD_CAST encoding);
14995 }
14996 } else if (enc != XML_CHAR_ENCODING_NONE) {
14997 xmlSwitchEncoding(ctxt, enc);
14998 }
14999
15000 return(0);
15001}
15002
15003
15004/**
15005 * xmlCtxtUseOptionsInternal:
15006 * @ctxt: an XML parser context
15007 * @options: a combination of xmlParserOption
15008 * @encoding: the user provided encoding to use
15009 *
15010 * Applies the options to the parser context
15011 *
15012 * Returns 0 in case of success, the set of unknown or unimplemented options
15013 * in case of error.
15014 */
15015static int
15016xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15017{
15018 if (ctxt == NULL)
15019 return(-1);
15020 if (encoding != NULL) {
15021 if (ctxt->encoding != NULL)
15022 xmlFree((xmlChar *) ctxt->encoding);
15023 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15024 }
15025 if (options & XML_PARSE_RECOVER) {
15026 ctxt->recovery = 1;
15027 options -= XML_PARSE_RECOVER;
15028 ctxt->options |= XML_PARSE_RECOVER;
15029 } else
15030 ctxt->recovery = 0;
15031 if (options & XML_PARSE_DTDLOAD) {
15032 ctxt->loadsubset = XML_DETECT_IDS;
15033 options -= XML_PARSE_DTDLOAD;
15034 ctxt->options |= XML_PARSE_DTDLOAD;
15035 } else
15036 ctxt->loadsubset = 0;
15037 if (options & XML_PARSE_DTDATTR) {
15038 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15039 options -= XML_PARSE_DTDATTR;
15040 ctxt->options |= XML_PARSE_DTDATTR;
15041 }
15042 if (options & XML_PARSE_NOENT) {
15043 ctxt->replaceEntities = 1;
15044 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15045 options -= XML_PARSE_NOENT;
15046 ctxt->options |= XML_PARSE_NOENT;
15047 } else
15048 ctxt->replaceEntities = 0;
15049 if (options & XML_PARSE_PEDANTIC) {
15050 ctxt->pedantic = 1;
15051 options -= XML_PARSE_PEDANTIC;
15052 ctxt->options |= XML_PARSE_PEDANTIC;
15053 } else
15054 ctxt->pedantic = 0;
15055 if (options & XML_PARSE_NOBLANKS) {
15056 ctxt->keepBlanks = 0;
15057 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15058 options -= XML_PARSE_NOBLANKS;
15059 ctxt->options |= XML_PARSE_NOBLANKS;
15060 } else
15061 ctxt->keepBlanks = 1;
15062 if (options & XML_PARSE_DTDVALID) {
15063 ctxt->validate = 1;
15064 if (options & XML_PARSE_NOWARNING)
15065 ctxt->vctxt.warning = NULL;
15066 if (options & XML_PARSE_NOERROR)
15067 ctxt->vctxt.error = NULL;
15068 options -= XML_PARSE_DTDVALID;
15069 ctxt->options |= XML_PARSE_DTDVALID;
15070 } else
15071 ctxt->validate = 0;
15072 if (options & XML_PARSE_NOWARNING) {
15073 ctxt->sax->warning = NULL;
15074 options -= XML_PARSE_NOWARNING;
15075 }
15076 if (options & XML_PARSE_NOERROR) {
15077 ctxt->sax->error = NULL;
15078 ctxt->sax->fatalError = NULL;
15079 options -= XML_PARSE_NOERROR;
15080 }
15081#ifdef LIBXML_SAX1_ENABLED
15082 if (options & XML_PARSE_SAX1) {
15083 ctxt->sax->startElement = xmlSAX2StartElement;
15084 ctxt->sax->endElement = xmlSAX2EndElement;
15085 ctxt->sax->startElementNs = NULL;
15086 ctxt->sax->endElementNs = NULL;
15087 ctxt->sax->initialized = 1;
15088 options -= XML_PARSE_SAX1;
15089 ctxt->options |= XML_PARSE_SAX1;
15090 }
15091#endif /* LIBXML_SAX1_ENABLED */
15092 if (options & XML_PARSE_NODICT) {
15093 ctxt->dictNames = 0;
15094 options -= XML_PARSE_NODICT;
15095 ctxt->options |= XML_PARSE_NODICT;
15096 } else {
15097 ctxt->dictNames = 1;
15098 }
15099 if (options & XML_PARSE_NOCDATA) {
15100 ctxt->sax->cdataBlock = NULL;
15101 options -= XML_PARSE_NOCDATA;
15102 ctxt->options |= XML_PARSE_NOCDATA;
15103 }
15104 if (options & XML_PARSE_NSCLEAN) {
15105 ctxt->options |= XML_PARSE_NSCLEAN;
15106 options -= XML_PARSE_NSCLEAN;
15107 }
15108 if (options & XML_PARSE_NONET) {
15109 ctxt->options |= XML_PARSE_NONET;
15110 options -= XML_PARSE_NONET;
15111 }
15112 if (options & XML_PARSE_COMPACT) {
15113 ctxt->options |= XML_PARSE_COMPACT;
15114 options -= XML_PARSE_COMPACT;
15115 }
15116 if (options & XML_PARSE_OLD10) {
15117 ctxt->options |= XML_PARSE_OLD10;
15118 options -= XML_PARSE_OLD10;
15119 }
15120 if (options & XML_PARSE_NOBASEFIX) {
15121 ctxt->options |= XML_PARSE_NOBASEFIX;
15122 options -= XML_PARSE_NOBASEFIX;
15123 }
15124 if (options & XML_PARSE_HUGE) {
15125 ctxt->options |= XML_PARSE_HUGE;
15126 options -= XML_PARSE_HUGE;
15127 if (ctxt->dict != NULL)
15128 xmlDictSetLimit(ctxt->dict, 0);
15129 }
15130 if (options & XML_PARSE_OLDSAX) {
15131 ctxt->options |= XML_PARSE_OLDSAX;
15132 options -= XML_PARSE_OLDSAX;
15133 }
15134 if (options & XML_PARSE_IGNORE_ENC) {
15135 ctxt->options |= XML_PARSE_IGNORE_ENC;
15136 options -= XML_PARSE_IGNORE_ENC;
15137 }
15138 if (options & XML_PARSE_BIG_LINES) {
15139 ctxt->options |= XML_PARSE_BIG_LINES;
15140 options -= XML_PARSE_BIG_LINES;
15141 }
15142 ctxt->linenumbers = 1;
15143 return (options);
15144}
15145
15146/**
15147 * xmlCtxtUseOptions:
15148 * @ctxt: an XML parser context
15149 * @options: a combination of xmlParserOption
15150 *
15151 * Applies the options to the parser context
15152 *
15153 * Returns 0 in case of success, the set of unknown or unimplemented options
15154 * in case of error.
15155 */
15156int
15157xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15158{
15159 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15160}
15161
15162/**
15163 * xmlDoRead:
15164 * @ctxt: an XML parser context
15165 * @URL: the base URL to use for the document
15166 * @encoding: the document encoding, or NULL
15167 * @options: a combination of xmlParserOption
15168 * @reuse: keep the context for reuse
15169 *
15170 * Common front-end for the xmlRead functions
15171 *
15172 * Returns the resulting document tree or NULL
15173 */
15174static xmlDocPtr
15175xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15176 int options, int reuse)
15177{
15178 xmlDocPtr ret;
15179
15180 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15181 if (encoding != NULL) {
15182 xmlCharEncodingHandlerPtr hdlr;
15183
15184 hdlr = xmlFindCharEncodingHandler(encoding);
15185 if (hdlr != NULL)
15186 xmlSwitchToEncoding(ctxt, hdlr);
15187 }
15188 if ((URL != NULL) && (ctxt->input != NULL) &&
15189 (ctxt->input->filename == NULL))
15190 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15191 xmlParseDocument(ctxt);
15192 if ((ctxt->wellFormed) || ctxt->recovery)
15193 ret = ctxt->myDoc;
15194 else {
15195 ret = NULL;
15196 if (ctxt->myDoc != NULL) {
15197 xmlFreeDoc(ctxt->myDoc);
15198 }
15199 }
15200 ctxt->myDoc = NULL;
15201 if (!reuse) {
15202 xmlFreeParserCtxt(ctxt);
15203 }
15204
15205 return (ret);
15206}
15207
15208/**
15209 * xmlReadDoc:
15210 * @cur: a pointer to a zero terminated string
15211 * @URL: the base URL to use for the document
15212 * @encoding: the document encoding, or NULL
15213 * @options: a combination of xmlParserOption
15214 *
15215 * parse an XML in-memory document and build a tree.
15216 *
15217 * Returns the resulting document tree
15218 */
15219xmlDocPtr
15220xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15221{
15222 xmlParserCtxtPtr ctxt;
15223
15224 if (cur == NULL)
15225 return (NULL);
15226 xmlInitParser();
15227
15228 ctxt = xmlCreateDocParserCtxt(cur);
15229 if (ctxt == NULL)
15230 return (NULL);
15231 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15232}
15233
15234/**
15235 * xmlReadFile:
15236 * @filename: a file or URL
15237 * @encoding: the document encoding, or NULL
15238 * @options: a combination of xmlParserOption
15239 *
15240 * parse an XML file from the filesystem or the network.
15241 *
15242 * Returns the resulting document tree
15243 */
15244xmlDocPtr
15245xmlReadFile(const char *filename, const char *encoding, int options)
15246{
15247 xmlParserCtxtPtr ctxt;
15248
15249 xmlInitParser();
15250 ctxt = xmlCreateURLParserCtxt(filename, options);
15251 if (ctxt == NULL)
15252 return (NULL);
15253 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15254}
15255
15256/**
15257 * xmlReadMemory:
15258 * @buffer: a pointer to a char array
15259 * @size: the size of the array
15260 * @URL: the base URL to use for the document
15261 * @encoding: the document encoding, or NULL
15262 * @options: a combination of xmlParserOption
15263 *
15264 * parse an XML in-memory document and build a tree.
15265 *
15266 * Returns the resulting document tree
15267 */
15268xmlDocPtr
15269xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15270{
15271 xmlParserCtxtPtr ctxt;
15272
15273 xmlInitParser();
15274 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15275 if (ctxt == NULL)
15276 return (NULL);
15277 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15278}
15279
15280/**
15281 * xmlReadFd:
15282 * @fd: an open file descriptor
15283 * @URL: the base URL to use for the document
15284 * @encoding: the document encoding, or NULL
15285 * @options: a combination of xmlParserOption
15286 *
15287 * parse an XML from a file descriptor and build a tree.
15288 * NOTE that the file descriptor will not be closed when the
15289 * reader is closed or reset.
15290 *
15291 * Returns the resulting document tree
15292 */
15293xmlDocPtr
15294xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15295{
15296 xmlParserCtxtPtr ctxt;
15297 xmlParserInputBufferPtr input;
15298 xmlParserInputPtr stream;
15299
15300 if (fd < 0)
15301 return (NULL);
15302 xmlInitParser();
15303
15304 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15305 if (input == NULL)
15306 return (NULL);
15307 input->closecallback = NULL;
15308 ctxt = xmlNewParserCtxt();
15309 if (ctxt == NULL) {
15310 xmlFreeParserInputBuffer(input);
15311 return (NULL);
15312 }
15313 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15314 if (stream == NULL) {
15315 xmlFreeParserInputBuffer(input);
15316 xmlFreeParserCtxt(ctxt);
15317 return (NULL);
15318 }
15319 inputPush(ctxt, stream);
15320 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15321}
15322
15323/**
15324 * xmlReadIO:
15325 * @ioread: an I/O read function
15326 * @ioclose: an I/O close function
15327 * @ioctx: an I/O handler
15328 * @URL: the base URL to use for the document
15329 * @encoding: the document encoding, or NULL
15330 * @options: a combination of xmlParserOption
15331 *
15332 * parse an XML document from I/O functions and source and build a tree.
15333 *
15334 * Returns the resulting document tree
15335 */
15336xmlDocPtr
15337xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15338 void *ioctx, const char *URL, const char *encoding, int options)
15339{
15340 xmlParserCtxtPtr ctxt;
15341 xmlParserInputBufferPtr input;
15342 xmlParserInputPtr stream;
15343
15344 if (ioread == NULL)
15345 return (NULL);
15346 xmlInitParser();
15347
15348 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15349 XML_CHAR_ENCODING_NONE);
15350 if (input == NULL) {
15351 if (ioclose != NULL)
15352 ioclose(ioctx);
15353 return (NULL);
15354 }
15355 ctxt = xmlNewParserCtxt();
15356 if (ctxt == NULL) {
15357 xmlFreeParserInputBuffer(input);
15358 return (NULL);
15359 }
15360 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15361 if (stream == NULL) {
15362 xmlFreeParserInputBuffer(input);
15363 xmlFreeParserCtxt(ctxt);
15364 return (NULL);
15365 }
15366 inputPush(ctxt, stream);
15367 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15368}
15369
15370/**
15371 * xmlCtxtReadDoc:
15372 * @ctxt: an XML parser context
15373 * @cur: a pointer to a zero terminated string
15374 * @URL: the base URL to use for the document
15375 * @encoding: the document encoding, or NULL
15376 * @options: a combination of xmlParserOption
15377 *
15378 * parse an XML in-memory document and build a tree.
15379 * This reuses the existing @ctxt parser context
15380 *
15381 * Returns the resulting document tree
15382 */
15383xmlDocPtr
15384xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15385 const char *URL, const char *encoding, int options)
15386{
15387 xmlParserInputPtr stream;
15388
15389 if (cur == NULL)
15390 return (NULL);
15391 if (ctxt == NULL)
15392 return (NULL);
15393 xmlInitParser();
15394
15395 xmlCtxtReset(ctxt);
15396
15397 stream = xmlNewStringInputStream(ctxt, cur);
15398 if (stream == NULL) {
15399 return (NULL);
15400 }
15401 inputPush(ctxt, stream);
15402 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15403}
15404
15405/**
15406 * xmlCtxtReadFile:
15407 * @ctxt: an XML parser context
15408 * @filename: a file or URL
15409 * @encoding: the document encoding, or NULL
15410 * @options: a combination of xmlParserOption
15411 *
15412 * parse an XML file from the filesystem or the network.
15413 * This reuses the existing @ctxt parser context
15414 *
15415 * Returns the resulting document tree
15416 */
15417xmlDocPtr
15418xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15419 const char *encoding, int options)
15420{
15421 xmlParserInputPtr stream;
15422
15423 if (filename == NULL)
15424 return (NULL);
15425 if (ctxt == NULL)
15426 return (NULL);
15427 xmlInitParser();
15428
15429 xmlCtxtReset(ctxt);
15430
15431 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15432 if (stream == NULL) {
15433 return (NULL);
15434 }
15435 inputPush(ctxt, stream);
15436 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15437}
15438
15439/**
15440 * xmlCtxtReadMemory:
15441 * @ctxt: an XML parser context
15442 * @buffer: a pointer to a char array
15443 * @size: the size of the array
15444 * @URL: the base URL to use for the document
15445 * @encoding: the document encoding, or NULL
15446 * @options: a combination of xmlParserOption
15447 *
15448 * parse an XML in-memory document and build a tree.
15449 * This reuses the existing @ctxt parser context
15450 *
15451 * Returns the resulting document tree
15452 */
15453xmlDocPtr
15454xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15455 const char *URL, const char *encoding, int options)
15456{
15457 xmlParserInputBufferPtr input;
15458 xmlParserInputPtr stream;
15459
15460 if (ctxt == NULL)
15461 return (NULL);
15462 if (buffer == NULL)
15463 return (NULL);
15464 xmlInitParser();
15465
15466 xmlCtxtReset(ctxt);
15467
15468 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15469 if (input == NULL) {
15470 return(NULL);
15471 }
15472
15473 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15474 if (stream == NULL) {
15475 xmlFreeParserInputBuffer(input);
15476 return(NULL);
15477 }
15478
15479 inputPush(ctxt, stream);
15480 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15481}
15482
15483/**
15484 * xmlCtxtReadFd:
15485 * @ctxt: an XML parser context
15486 * @fd: an open file descriptor
15487 * @URL: the base URL to use for the document
15488 * @encoding: the document encoding, or NULL
15489 * @options: a combination of xmlParserOption
15490 *
15491 * parse an XML from a file descriptor and build a tree.
15492 * This reuses the existing @ctxt parser context
15493 * NOTE that the file descriptor will not be closed when the
15494 * reader is closed or reset.
15495 *
15496 * Returns the resulting document tree
15497 */
15498xmlDocPtr
15499xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15500 const char *URL, const char *encoding, int options)
15501{
15502 xmlParserInputBufferPtr input;
15503 xmlParserInputPtr stream;
15504
15505 if (fd < 0)
15506 return (NULL);
15507 if (ctxt == NULL)
15508 return (NULL);
15509 xmlInitParser();
15510
15511 xmlCtxtReset(ctxt);
15512
15513
15514 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15515 if (input == NULL)
15516 return (NULL);
15517 input->closecallback = NULL;
15518 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15519 if (stream == NULL) {
15520 xmlFreeParserInputBuffer(input);
15521 return (NULL);
15522 }
15523 inputPush(ctxt, stream);
15524 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15525}
15526
15527/**
15528 * xmlCtxtReadIO:
15529 * @ctxt: an XML parser context
15530 * @ioread: an I/O read function
15531 * @ioclose: an I/O close function
15532 * @ioctx: an I/O handler
15533 * @URL: the base URL to use for the document
15534 * @encoding: the document encoding, or NULL
15535 * @options: a combination of xmlParserOption
15536 *
15537 * parse an XML document from I/O functions and source and build a tree.
15538 * This reuses the existing @ctxt parser context
15539 *
15540 * Returns the resulting document tree
15541 */
15542xmlDocPtr
15543xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15544 xmlInputCloseCallback ioclose, void *ioctx,
15545 const char *URL,
15546 const char *encoding, int options)
15547{
15548 xmlParserInputBufferPtr input;
15549 xmlParserInputPtr stream;
15550
15551 if (ioread == NULL)
15552 return (NULL);
15553 if (ctxt == NULL)
15554 return (NULL);
15555 xmlInitParser();
15556
15557 xmlCtxtReset(ctxt);
15558
15559 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15560 XML_CHAR_ENCODING_NONE);
15561 if (input == NULL) {
15562 if (ioclose != NULL)
15563 ioclose(ioctx);
15564 return (NULL);
15565 }
15566 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15567 if (stream == NULL) {
15568 xmlFreeParserInputBuffer(input);
15569 return (NULL);
15570 }
15571 inputPush(ctxt, stream);
15572 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15573}
15574
15575#define bottom_parser
15576#include "elfgcchack.h"
15577