parserInternals.c source code [ClickHouse/contrib/libxml2/parserInternals.c]

1	/*
2	* parserInternals.c : Internal routines (and obsolete ones) needed for the
3	* XML and HTML parsers.
4	*
5	* See Copyright for the status of this software.
6	*
7	* daniel@veillard.com
8	*/
9
10	#define IN_LIBXML
11	#include "libxml.h"
12
13	#if defined(_WIN32) && !defined (__CYGWIN__)
14	#define XML_DIR_SEP '\\'
15	#else
16	#define XML_DIR_SEP '/'
17	#endif
18
19	#include <string.h>
20	#ifdef HAVE_CTYPE_H
21	#include <ctype.h>
22	#endif
23	#ifdef HAVE_STDLIB_H
24	#include <stdlib.h>
25	#endif
26	#ifdef HAVE_SYS_STAT_H
27	#include <sys/stat.h>
28	#endif
29	#ifdef HAVE_FCNTL_H
30	#include <fcntl.h>
31	#endif
32	#ifdef HAVE_UNISTD_H
33	#include <unistd.h>
34	#endif
35	#ifdef LIBXML_ZLIB_ENABLED
36	#include <zlib.h>
37	#endif
38
39	#include <libxml/xmlmemory.h>
40	#include <libxml/tree.h>
41	#include <libxml/parser.h>
42	#include <libxml/parserInternals.h>
43	#include <libxml/valid.h>
44	#include <libxml/entities.h>
45	#include <libxml/xmlerror.h>
46	#include <libxml/encoding.h>
47	#include <libxml/valid.h>
48	#include <libxml/xmlIO.h>
49	#include <libxml/uri.h>
50	#include <libxml/dict.h>
51	#include <libxml/SAX.h>
52	#ifdef LIBXML_CATALOG_ENABLED
53	#include <libxml/catalog.h>
54	#endif
55	#include <libxml/globals.h>
56	#include <libxml/chvalid.h>
57
58	#define CUR(ctxt) ctxt->input->cur
59	#define END(ctxt) ctxt->input->end
60	#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
61
62	#include "buf.h"
63	#include "enc.h"
64
65	/*
66	* Various global defaults for parsing
67	*/
68
69	/**
70	* xmlCheckVersion:
71	* @version: the include version number
72	*
73	* check the compiled lib version against the include one.
74	* This can warn or immediately kill the application
75	*/
76	void
77	xmlCheckVersion(int version) {
78	int myversion = (int) LIBXML_VERSION;
79
80	xmlInitParser();
81
82	if ((myversion / `10000`) != (version / `10000`)) {
83	xmlGenericError(xmlGenericErrorContext,
84	"Fatal: program compiled against libxml %d using libxml %d\n",
85	(version / `10000`), (myversion / `10000`));
86	fprintf(stderr,
87	"Fatal: program compiled against libxml %d using libxml %d\n",
88	(version / `10000`), (myversion / `10000`));
89	}
90	if ((myversion / `100`) < (version / `100`)) {
91	xmlGenericError(xmlGenericErrorContext,
92	"Warning: program compiled against libxml %d using older %d\n",
93	(version / `100`), (myversion / `100`));
94	}
95	}
96
97
98	/************************************************************************
99	* *
100	* Some factorized error routines *
101	* *
102	************************************************************************/
103
104
105	/**
106	* xmlErrMemory:
107	* @ctxt: an XML parser context
108	* @extra: extra informations
109	*
110	* Handle a redefinition of attribute error
111	*/
112	void
113	xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
114	{
115	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
116	(ctxt->instate == XML_PARSER_EOF))
117	return;
118	if (ctxt != NULL) {
119	ctxt->errNo = XML_ERR_NO_MEMORY;
120	ctxt->instate = XML_PARSER_EOF;
121	ctxt->disableSAX = `1`;
122	}
123	if (extra)
124	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
125	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, `0`, extra,
126	NULL, NULL, `0`, `0`,
127	"Memory allocation failed : %s\n", extra);
128	else
129	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
130	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, `0`, NULL,
131	NULL, NULL, `0`, `0`, "Memory allocation failed\n");
132	}
133
134	/**
135	* __xmlErrEncoding:
136	* @ctxt: an XML parser context
137	* @xmlerr: the error number
138	* @msg: the error message
139	* @str1: an string info
140	* @str2: an string info
141	*
142	* Handle an encoding error
143	*/
144	void
145	__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
146	const char msg, const* xmlChar * str1, const xmlChar * str2)
147	{
148	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
149	(ctxt->instate == XML_PARSER_EOF))
150	return;
151	if (ctxt != NULL)
152	ctxt->errNo = xmlerr;
153	__xmlRaiseError(NULL, NULL, NULL,
154	ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
155	NULL, `0`, (const char ) str1, (const* char *) str2,
156	NULL, `0`, `0`, msg, str1, str2);
157	if (ctxt != NULL) {
158	ctxt->wellFormed = `0`;
159	if (ctxt->recovery == `0`)
160	ctxt->disableSAX = `1`;
161	}
162	}
163
164	/**
165	* xmlErrInternal:
166	* @ctxt: an XML parser context
167	* @msg: the error message
168	* @str: error informations
169	*
170	* Handle an internal error
171	*/
172	static void LIBXML_ATTR_FORMAT(`2`,`0`)
173	xmlErrInternal(xmlParserCtxtPtr ctxt, const char msg, const* xmlChar * str)
174	{
175	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
176	(ctxt->instate == XML_PARSER_EOF))
177	return;
178	if (ctxt != NULL)
179	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
180	__xmlRaiseError(NULL, NULL, NULL,
181	ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
182	XML_ERR_FATAL, NULL, `0`, (const char *) str, NULL, NULL,
183	`0`, `0`, msg, str);
184	if (ctxt != NULL) {
185	ctxt->wellFormed = `0`;
186	if (ctxt->recovery == `0`)
187	ctxt->disableSAX = `1`;
188	}
189	}
190
191	/**
192	* xmlErrEncodingInt:
193	* @ctxt: an XML parser context
194	* @error: the error number
195	* @msg: the error message
196	* @val: an integer value
197	*
198	* n encoding error
199	*/
200	static void LIBXML_ATTR_FORMAT(`3`,`0`)
201	xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
202	const char msg, int* val)
203	{
204	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
205	(ctxt->instate == XML_PARSER_EOF))
206	return;
207	if (ctxt != NULL)
208	ctxt->errNo = error;
209	__xmlRaiseError(NULL, NULL, NULL,
210	ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
211	NULL, `0`, NULL, NULL, NULL, val, `0`, msg, val);
212	if (ctxt != NULL) {
213	ctxt->wellFormed = `0`;
214	if (ctxt->recovery == `0`)
215	ctxt->disableSAX = `1`;
216	}
217	}
218
219	/**
220	* xmlIsLetter:
221	* @c: an unicode character (int)
222	*
223	* Check whether the character is allowed by the production
224	* [84] Letter ::= BaseChar \| Ideographic
225	*
226	* Returns 0 if not, non-zero otherwise
227	*/
228	int
229	xmlIsLetter(int c) {
230	return(IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c));
231	}
232
233	/************************************************************************
234	* *
235	* Input handling functions for progressive parsing *
236	* *
237	************************************************************************/
238
239	/ #define DEBUG_INPUT /
240	/ #define DEBUG_STACK /
241	/ #define DEBUG_PUSH /
242
243
244	/ we need to keep enough input to show errors in context /
245	#define LINE_LEN 80
246
247	#ifdef DEBUG_INPUT
248	#define CHECK_BUFFER(in) check_buffer(in)
249
250	static
251	void check_buffer(xmlParserInputPtr in) {
252	if (in->base != xmlBufContent(in->buf->buffer)) {
253	xmlGenericError(xmlGenericErrorContext,
254	"xmlParserInput: base mismatch problem\n");
255	}
256	if (in->cur < in->base) {
257	xmlGenericError(xmlGenericErrorContext,
258	"xmlParserInput: cur < base problem\n");
259	}
260	if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
261	xmlGenericError(xmlGenericErrorContext,
262	"xmlParserInput: cur > base + use problem\n");
263	}
264	xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
265	(int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
266	xmlBufUse(in->buf->buffer));
267	}
268
269	#else
270	#define CHECK_BUFFER(in)
271	#endif
272
273
274	/**
275	* xmlParserInputRead:
276	* @in: an XML parser input
277	* @len: an indicative size for the lookahead
278	*
279	* This function was internal and is deprecated.
280	*
281	* Returns -1 as this is an error to use it.
282	*/
283	int
284	xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
285	return(-`1`);
286	}
287
288	/**
289	* xmlParserInputGrow:
290	* @in: an XML parser input
291	* @len: an indicative size for the lookahead
292	*
293	* This function increase the input for the parser. It tries to
294	* preserve pointers to the input buffer, and keep already read data
295	*
296	* Returns the amount of char read, or -1 in case of error, 0 indicate the
297	* end of this entity
298	*/
299	int
300	xmlParserInputGrow(xmlParserInputPtr in, int len) {
301	int ret;
302	size_t indx;
303	const xmlChar *content;
304
305	if ((in == NULL) \|\| (len < `0`)) return(-`1`);
306	#ifdef DEBUG_INPUT
307	xmlGenericError(xmlGenericErrorContext, "Grow\n");
308	#endif
309	if (in->buf == NULL) return(-`1`);
310	if (in->base == NULL) return(-`1`);
311	if (in->cur == NULL) return(-`1`);
312	if (in->buf->buffer == NULL) return(-`1`);
313
314	CHECK_BUFFER(in);
315
316	indx = in->cur - in->base;
317	if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
318
319	CHECK_BUFFER(in);
320
321	return(`0`);
322	}
323	if (in->buf->readcallback != NULL) {
324	ret = xmlParserInputBufferGrow(in->buf, len);
325	} else
326	return(`0`);
327
328	/*
329	* NOTE : in->base may be a "dangling" i.e. freed pointer in this
330	* block, but we use it really as an integer to do some
331	* pointer arithmetic. Insure will raise it as a bug but in
332	* that specific case, that's not !
333	*/
334
335	content = xmlBufContent(in->buf->buffer);
336	if (in->base != content) {
337	/*
338	* the buffer has been reallocated
339	*/
340	indx = in->cur - in->base;
341	in->base = content;
342	in->cur = &content[indx];
343	}
344	in->end = xmlBufEnd(in->buf->buffer);
345
346	CHECK_BUFFER(in);
347
348	return(ret);
349	}
350
351	/**
352	* xmlParserInputShrink:
353	* @in: an XML parser input
354	*
355	* This function removes used input for the parser.
356	*/
357	void
358	xmlParserInputShrink(xmlParserInputPtr in) {
359	size_t used;
360	size_t ret;
361	size_t indx;
362	const xmlChar *content;
363
364	#ifdef DEBUG_INPUT
365	xmlGenericError(xmlGenericErrorContext, "Shrink\n");
366	#endif
367	if (in == NULL) return;
368	if (in->buf == NULL) return;
369	if (in->base == NULL) return;
370	if (in->cur == NULL) return;
371	if (in->buf->buffer == NULL) return;
372
373	CHECK_BUFFER(in);
374
375	used = in->cur - xmlBufContent(in->buf->buffer);
376	/*
377	* Do not shrink on large buffers whose only a tiny fraction
378	* was consumed
379	*/
380	if (used > INPUT_CHUNK) {
381	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
382	if (ret > `0`) {
383	in->cur -= ret;
384	in->consumed += ret;
385	}
386	in->end = xmlBufEnd(in->buf->buffer);
387	}
388
389	CHECK_BUFFER(in);
390
391	if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
392	return;
393	}
394	xmlParserInputBufferRead(in->buf, `2` * INPUT_CHUNK);
395	content = xmlBufContent(in->buf->buffer);
396	if (in->base != content) {
397	/*
398	* the buffer has been reallocated
399	*/
400	indx = in->cur - in->base;
401	in->base = content;
402	in->cur = &content[indx];
403	}
404	in->end = xmlBufEnd(in->buf->buffer);
405
406	CHECK_BUFFER(in);
407	}
408
409	/************************************************************************
410	* *
411	* UTF8 character input and related functions *
412	* *
413	************************************************************************/
414
415	/**
416	* xmlNextChar:
417	* @ctxt: the XML parser context
418	*
419	* Skip to the next char input char.
420	*/
421
422	void
423	xmlNextChar(xmlParserCtxtPtr ctxt)
424	{
425	if ((ctxt == NULL) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
426	(ctxt->input == NULL))
427	return;
428
429	if (!(VALID_CTXT(ctxt))) {
430	xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
431	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
432	xmlStopParser(ctxt);
433	return;
434	}
435
436	if ((*ctxt->input->cur == `0`) &&
437	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= `0`)) {
438	return;
439	}
440
441	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
442	const unsigned char *cur;
443	unsigned char c;
444
445	/*
446	* 2.11 End-of-Line Handling
447	* the literal two-character sequence "#xD#xA" or a standalone
448	* literal #xD, an XML processor must pass to the application
449	* the single character #xA.
450	*/
451	if (*(ctxt->input->cur) == `'\n'`) {
452	ctxt->input->line++; ctxt->input->col = `1`;
453	} else
454	ctxt->input->col++;
455
456	/*
457	* We are supposed to handle UTF8, check it's valid
458	* From rfc2044: encoding of the Unicode values on UTF-8:
459	*
460	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
461	* 0000 0000-0000 007F 0xxxxxxx
462	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
463	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
464	*
465	* Check for the 0x110000 limit too
466	*/
467	cur = ctxt->input->cur;
468
469	c = *cur;
470	if (c & `0x80`) {
471	if (c == `0xC0`)
472	goto encoding_error;
473	if (cur[`1`] == `0`) {
474	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
475	cur = ctxt->input->cur;
476	}
477	if ((cur[`1`] & `0xc0`) != `0x80`)
478	goto encoding_error;
479	if ((c & `0xe0`) == `0xe0`) {
480	unsigned int val;
481
482	if (cur[`2`] == `0`) {
483	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
484	cur = ctxt->input->cur;
485	}
486	if ((cur[`2`] & `0xc0`) != `0x80`)
487	goto encoding_error;
488	if ((c & `0xf0`) == `0xf0`) {
489	if (cur[`3`] == `0`) {
490	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
491	cur = ctxt->input->cur;
492	}
493	if (((c & `0xf8`) != `0xf0`) \|\|
494	((cur[`3`] & `0xc0`) != `0x80`))
495	goto encoding_error;
496	/ 4-byte code /
497	ctxt->input->cur += `4`;
498	val = (cur[`0`] & `0x7`) << `18`;
499	val \|= (cur[`1`] & `0x3f`) << `12`;
500	val \|= (cur[`2`] & `0x3f`) << `6`;
501	val \|= cur[`3`] & `0x3f`;
502	} else {
503	/ 3-byte code /
504	ctxt->input->cur += `3`;
505	val = (cur[`0`] & `0xf`) << `12`;
506	val \|= (cur[`1`] & `0x3f`) << `6`;
507	val \|= cur[`2`] & `0x3f`;
508	}
509	if (((val > `0xd7ff`) && (val < `0xe000`)) \|\|
510	((val > `0xfffd`) && (val < `0x10000`)) \|\|
511	(val >= `0x110000`)) {
512	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
513	"Char 0x%X out of allowed range\n",
514	val);
515	}
516	} else
517	/ 2-byte code /
518	ctxt->input->cur += `2`;
519	} else
520	/ 1-byte code /
521	ctxt->input->cur++;
522
523	ctxt->nbChars++;
524	} else {
525	/*
526	* Assume it's a fixed length encoding (1) with
527	* a compatible encoding for the ASCII set, since
528	* XML constructs only use < 128 chars
529	*/
530
531	if (*(ctxt->input->cur) == `'\n'`) {
532	ctxt->input->line++; ctxt->input->col = `1`;
533	} else
534	ctxt->input->col++;
535	ctxt->input->cur++;
536	ctxt->nbChars++;
537	}
538	if (*ctxt->input->cur == `0`)
539	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
540	return;
541	encoding_error:
542	/*
543	* If we detect an UTF8 error that probably mean that the
544	* input encoding didn't get properly advertised in the
545	* declaration header. Report the error and switch the encoding
546	* to ISO-Latin-1 (if you don't like this policy, just declare the
547	* encoding !)
548	*/
549	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
550	(ctxt->input->end - ctxt->input->cur < `4`)) {
551	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
552	"Input is not proper UTF-8, indicate encoding !\n",
553	NULL, NULL);
554	} else {
555	char buffer[`150`];
556
557	snprintf(buffer, `149`, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
558	ctxt->input->cur[`0`], ctxt->input->cur[`1`],
559	ctxt->input->cur[`2`], ctxt->input->cur[`3`]);
560	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
561	"Input is not proper UTF-8, indicate encoding !\n%s",
562	BAD_CAST buffer, NULL);
563	}
564	ctxt->charset = XML_CHAR_ENCODING_8859_1;
565	ctxt->input->cur++;
566	return;
567	}
568
569	/**
570	* xmlCurrentChar:
571	* @ctxt: the XML parser context
572	* @len: pointer to the length of the char read
573	*
574	* The current char value, if using UTF-8 this may actually span multiple
575	* bytes in the input buffer. Implement the end of line normalization:
576	* 2.11 End-of-Line Handling
577	* Wherever an external parsed entity or the literal entity value
578	* of an internal parsed entity contains either the literal two-character
579	* sequence "#xD#xA" or a standalone literal #xD, an XML processor
580	* must pass to the application the single character #xA.
581	* This behavior can conveniently be produced by normalizing all
582	* line breaks to #xA on input, before parsing.)
583	*
584	* Returns the current char value and its length
585	*/
586
587	int
588	xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
589	if ((ctxt == NULL) \|\| (len == NULL) \|\| (ctxt->input == NULL)) return(`0`);
590	if (ctxt->instate == XML_PARSER_EOF)
591	return(`0`);
592
593	if ((ctxt->input->cur >= `0x20`) && (ctxt->input->cur <= `0x7F`)) {
594	*len = `1`;
595	return((int) *ctxt->input->cur);
596	}
597	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
598	/*
599	* We are supposed to handle UTF8, check it's valid
600	* From rfc2044: encoding of the Unicode values on UTF-8:
601	*
602	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
603	* 0000 0000-0000 007F 0xxxxxxx
604	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
605	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
606	*
607	* Check for the 0x110000 limit too
608	*/
609	const unsigned char *cur = ctxt->input->cur;
610	unsigned char c;
611	unsigned int val;
612
613	c = *cur;
614	if (c & `0x80`) {
615	if (((c & `0x40`) == `0`) \|\| (c == `0xC0`))
616	goto encoding_error;
617	if (cur[`1`] == `0`) {
618	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
619	cur = ctxt->input->cur;
620	}
621	if ((cur[`1`] & `0xc0`) != `0x80`)
622	goto encoding_error;
623	if ((c & `0xe0`) == `0xe0`) {
624	if (cur[`2`] == `0`) {
625	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
626	cur = ctxt->input->cur;
627	}
628	if ((cur[`2`] & `0xc0`) != `0x80`)
629	goto encoding_error;
630	if ((c & `0xf0`) == `0xf0`) {
631	if (cur[`3`] == `0`) {
632	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
633	cur = ctxt->input->cur;
634	}
635	if (((c & `0xf8`) != `0xf0`) \|\|
636	((cur[`3`] & `0xc0`) != `0x80`))
637	goto encoding_error;
638	/ 4-byte code /
639	*len = `4`;
640	val = (cur[`0`] & `0x7`) << `18`;
641	val \|= (cur[`1`] & `0x3f`) << `12`;
642	val \|= (cur[`2`] & `0x3f`) << `6`;
643	val \|= cur[`3`] & `0x3f`;
644	if (val < `0x10000`)
645	goto encoding_error;
646	} else {
647	/ 3-byte code /
648	*len = `3`;
649	val = (cur[`0`] & `0xf`) << `12`;
650	val \|= (cur[`1`] & `0x3f`) << `6`;
651	val \|= cur[`2`] & `0x3f`;
652	if (val < `0x800`)
653	goto encoding_error;
654	}
655	} else {
656	/ 2-byte code /
657	*len = `2`;
658	val = (cur[`0`] & `0x1f`) << `6`;
659	val \|= cur[`1`] & `0x3f`;
660	if (val < `0x80`)
661	goto encoding_error;
662	}
663	if (!IS_CHAR(val)) {
664	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
665	"Char 0x%X out of allowed range\n", val);
666	}
667	return(val);
668	} else {
669	/ 1-byte code /
670	*len = `1`;
671	if (*ctxt->input->cur == `0`)
672	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
673	if ((*ctxt->input->cur == `0`) &&
674	(ctxt->input->end > ctxt->input->cur)) {
675	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
676	"Char 0x0 out of allowed range\n", `0`);
677	}
678	if (*ctxt->input->cur == `0xD`) {
679	if (ctxt->input->cur[`1`] == `0xA`) {
680	ctxt->nbChars++;
681	ctxt->input->cur++;
682	}
683	return(`0xA`);
684	}
685	return((int) *ctxt->input->cur);
686	}
687	}
688	/*
689	* Assume it's a fixed length encoding (1) with
690	* a compatible encoding for the ASCII set, since
691	* XML constructs only use < 128 chars
692	*/
693	*len = `1`;
694	if (*ctxt->input->cur == `0xD`) {
695	if (ctxt->input->cur[`1`] == `0xA`) {
696	ctxt->nbChars++;
697	ctxt->input->cur++;
698	}
699	return(`0xA`);
700	}
701	return((int) *ctxt->input->cur);
702	encoding_error:
703	/*
704	* An encoding problem may arise from a truncated input buffer
705	* splitting a character in the middle. In that case do not raise
706	* an error but return 0 to endicate an end of stream problem
707	*/
708	if (ctxt->input->end - ctxt->input->cur < `4`) {
709	*len = `0`;
710	return(`0`);
711	}
712
713	/*
714	* If we detect an UTF8 error that probably mean that the
715	* input encoding didn't get properly advertised in the
716	* declaration header. Report the error and switch the encoding
717	* to ISO-Latin-1 (if you don't like this policy, just declare the
718	* encoding !)
719	*/
720	{
721	char buffer[`150`];
722
723	snprintf(&buffer[`0`], `149`, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
724	ctxt->input->cur[`0`], ctxt->input->cur[`1`],
725	ctxt->input->cur[`2`], ctxt->input->cur[`3`]);
726	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
727	"Input is not proper UTF-8, indicate encoding !\n%s",
728	BAD_CAST buffer, NULL);
729	}
730	ctxt->charset = XML_CHAR_ENCODING_8859_1;
731	*len = `1`;
732	return((int) *ctxt->input->cur);
733	}
734
735	/**
736	* xmlStringCurrentChar:
737	* @ctxt: the XML parser context
738	* @cur: pointer to the beginning of the char
739	* @len: pointer to the length of the char read
740	*
741	* The current char value, if using UTF-8 this may actually span multiple
742	* bytes in the input buffer.
743	*
744	* Returns the current char value and its length
745	*/
746
747	int
748	xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
749	{
750	if ((len == NULL) \|\| (cur == NULL)) return(`0`);
751	if ((ctxt == NULL) \|\| (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
752	/*
753	* We are supposed to handle UTF8, check it's valid
754	* From rfc2044: encoding of the Unicode values on UTF-8:
755	*
756	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
757	* 0000 0000-0000 007F 0xxxxxxx
758	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
759	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
760	*
761	* Check for the 0x110000 limit too
762	*/
763	unsigned char c;
764	unsigned int val;
765
766	c = *cur;
767	if (c & `0x80`) {
768	if ((cur[`1`] & `0xc0`) != `0x80`)
769	goto encoding_error;
770	if ((c & `0xe0`) == `0xe0`) {
771
772	if ((cur[`2`] & `0xc0`) != `0x80`)
773	goto encoding_error;
774	if ((c & `0xf0`) == `0xf0`) {
775	if (((c & `0xf8`) != `0xf0`) \|\| ((cur[`3`] & `0xc0`) != `0x80`))
776	goto encoding_error;
777	/ 4-byte code /
778	*len = `4`;
779	val = (cur[`0`] & `0x7`) << `18`;
780	val \|= (cur[`1`] & `0x3f`) << `12`;
781	val \|= (cur[`2`] & `0x3f`) << `6`;
782	val \|= cur[`3`] & `0x3f`;
783	} else {
784	/ 3-byte code /
785	*len = `3`;
786	val = (cur[`0`] & `0xf`) << `12`;
787	val \|= (cur[`1`] & `0x3f`) << `6`;
788	val \|= cur[`2`] & `0x3f`;
789	}
790	} else {
791	/ 2-byte code /
792	*len = `2`;
793	val = (cur[`0`] & `0x1f`) << `6`;
794	val \|= cur[`1`] & `0x3f`;
795	}
796	if (!IS_CHAR(val)) {
797	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
798	"Char 0x%X out of allowed range\n", val);
799	}
800	return (val);
801	} else {
802	/ 1-byte code /
803	*len = `1`;
804	return ((int) *cur);
805	}
806	}
807	/*
808	* Assume it's a fixed length encoding (1) with
809	* a compatible encoding for the ASCII set, since
810	* XML constructs only use < 128 chars
811	*/
812	*len = `1`;
813	return ((int) *cur);
814	encoding_error:
815
816	/*
817	* An encoding problem may arise from a truncated input buffer
818	* splitting a character in the middle. In that case do not raise
819	* an error but return 0 to endicate an end of stream problem
820	*/
821	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
822	(ctxt->input->end - ctxt->input->cur < `4`)) {
823	*len = `0`;
824	return(`0`);
825	}
826	/*
827	* If we detect an UTF8 error that probably mean that the
828	* input encoding didn't get properly advertised in the
829	* declaration header. Report the error and switch the encoding
830	* to ISO-Latin-1 (if you don't like this policy, just declare the
831	* encoding !)
832	*/
833	{
834	char buffer[`150`];
835
836	snprintf(buffer, `149`, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
837	ctxt->input->cur[`0`], ctxt->input->cur[`1`],
838	ctxt->input->cur[`2`], ctxt->input->cur[`3`]);
839	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
840	"Input is not proper UTF-8, indicate encoding !\n%s",
841	BAD_CAST buffer, NULL);
842	}
843	*len = `1`;
844	return ((int) *cur);
845	}
846
847	/**
848	* xmlCopyCharMultiByte:
849	* @out: pointer to an array of xmlChar
850	* @val: the char value
851	*
852	* append the char value in the array
853	*
854	* Returns the number of xmlChar written
855	*/
856	int
857	xmlCopyCharMultiByte(xmlChar out, int* val) {
858	if (out == NULL) return(`0`);
859	/*
860	* We are supposed to handle UTF8, check it's valid
861	* From rfc2044: encoding of the Unicode values on UTF-8:
862	*
863	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
864	* 0000 0000-0000 007F 0xxxxxxx
865	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
866	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
867	*/
868	if (val >= `0x80`) {
869	xmlChar *savedout = out;
870	int bits;
871	if (val < `0x800`) { *out++= (val >> `6`) \| `0xC0`; bits= `0`; }
872	else if (val < `0x10000`) { *out++= (val >> `12`) \| `0xE0`; bits= `6`;}
873	else if (val < `0x110000`) { *out++= (val >> `18`) \| `0xF0`; bits= `12`; }
874	else {
875	xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
876	"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
877	val);
878	return(`0`);
879	}
880	for ( ; bits >= `0`; bits-= `6`)
881	*out++= ((val >> bits) & `0x3F`) \| `0x80` ;
882	return (out - savedout);
883	}
884	*out = (xmlChar) val;
885	return `1`;
886	}
887
888	/**
889	* xmlCopyChar:
890	* @len: Ignored, compatibility
891	* @out: pointer to an array of xmlChar
892	* @val: the char value
893	*
894	* append the char value in the array
895	*
896	* Returns the number of xmlChar written
897	*/
898
899	int
900	xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar out, int* val) {
901	if (out == NULL) return(`0`);
902	/ the len parameter is ignored /
903	if (val >= `0x80`) {
904	return(xmlCopyCharMultiByte (out, val));
905	}
906	*out = (xmlChar) val;
907	return `1`;
908	}
909
910	/************************************************************************
911	* *
912	* Commodity functions to switch encodings *
913	* *
914	************************************************************************/
915
916	static int
917	xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
918	xmlCharEncodingHandlerPtr handler, int len);
919	static int
920	xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
921	xmlCharEncodingHandlerPtr handler, int len);
922	/**
923	* xmlSwitchEncoding:
924	* @ctxt: the parser context
925	* @enc: the encoding value (number)
926	*
927	* change the input functions when discovering the character encoding
928	* of a given entity.
929	*
930	* Returns 0 in case of success, -1 otherwise
931	*/
932	int
933	xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
934	{
935	xmlCharEncodingHandlerPtr handler;
936	int len = -`1`;
937	int ret;
938
939	if (ctxt == NULL) return(-`1`);
940	switch (enc) {
941	case XML_CHAR_ENCODING_ERROR:
942	__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
943	"encoding unknown\n", NULL, NULL);
944	return(-`1`);
945	case XML_CHAR_ENCODING_NONE:
946	/ let's assume it's UTF-8 without the XML decl /
947	ctxt->charset = XML_CHAR_ENCODING_UTF8;
948	return(`0`);
949	case XML_CHAR_ENCODING_UTF8:
950	/ default encoding, no conversion should be needed /
951	ctxt->charset = XML_CHAR_ENCODING_UTF8;
952
953	/*
954	* Errata on XML-1.0 June 20 2001
955	* Specific handling of the Byte Order Mark for
956	* UTF-8
957	*/
958	if ((ctxt->input != NULL) &&
959	(ctxt->input->cur[`0`] == `0xEF`) &&
960	(ctxt->input->cur[`1`] == `0xBB`) &&
961	(ctxt->input->cur[`2`] == `0xBF`)) {
962	ctxt->input->cur += `3`;
963	}
964	return(`0`);
965	case XML_CHAR_ENCODING_UTF16LE:
966	case XML_CHAR_ENCODING_UTF16BE:
967	/The raw input characters are encoded*
968	*in UTF-16. As we expect this function
969	*to be called after xmlCharEncInFunc, we expect
970	*ctxt->input->cur to contain UTF-8 encoded characters.
971	*So the raw UTF16 Byte Order Mark
972	*has also been converted into
973	*an UTF-8 BOM. Let's skip that BOM.
974	*/
975	if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
976	(ctxt->input->cur[`0`] == `0xEF`) &&
977	(ctxt->input->cur[`1`] == `0xBB`) &&
978	(ctxt->input->cur[`2`] == `0xBF`)) {
979	ctxt->input->cur += `3`;
980	}
981	len = `90`;
982	break;
983	case XML_CHAR_ENCODING_UCS2:
984	len = `90`;
985	break;
986	case XML_CHAR_ENCODING_UCS4BE:
987	case XML_CHAR_ENCODING_UCS4LE:
988	case XML_CHAR_ENCODING_UCS4_2143:
989	case XML_CHAR_ENCODING_UCS4_3412:
990	len = `180`;
991	break;
992	case XML_CHAR_ENCODING_EBCDIC:
993	case XML_CHAR_ENCODING_8859_1:
994	case XML_CHAR_ENCODING_8859_2:
995	case XML_CHAR_ENCODING_8859_3:
996	case XML_CHAR_ENCODING_8859_4:
997	case XML_CHAR_ENCODING_8859_5:
998	case XML_CHAR_ENCODING_8859_6:
999	case XML_CHAR_ENCODING_8859_7:
1000	case XML_CHAR_ENCODING_8859_8:
1001	case XML_CHAR_ENCODING_8859_9:
1002	case XML_CHAR_ENCODING_ASCII:
1003	case XML_CHAR_ENCODING_2022_JP:
1004	case XML_CHAR_ENCODING_SHIFT_JIS:
1005	case XML_CHAR_ENCODING_EUC_JP:
1006	len = `45`;
1007	break;
1008	}
1009	handler = xmlGetCharEncodingHandler(enc);
1010	if (handler == NULL) {
1011	/*
1012	* Default handlers.
1013	*/
1014	switch (enc) {
1015	case XML_CHAR_ENCODING_ASCII:
1016	/ default encoding, no conversion should be needed /
1017	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1018	return(`0`);
1019	case XML_CHAR_ENCODING_UTF16LE:
1020	break;
1021	case XML_CHAR_ENCODING_UTF16BE:
1022	break;
1023	case XML_CHAR_ENCODING_UCS4LE:
1024	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1025	"encoding not supported %s\n",
1026	BAD_CAST "USC4 little endian", NULL);
1027	break;
1028	case XML_CHAR_ENCODING_UCS4BE:
1029	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1030	"encoding not supported %s\n",
1031	BAD_CAST "USC4 big endian", NULL);
1032	break;
1033	case XML_CHAR_ENCODING_EBCDIC:
1034	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1035	"encoding not supported %s\n",
1036	BAD_CAST "EBCDIC", NULL);
1037	break;
1038	case XML_CHAR_ENCODING_UCS4_2143:
1039	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1040	"encoding not supported %s\n",
1041	BAD_CAST "UCS4 2143", NULL);
1042	break;
1043	case XML_CHAR_ENCODING_UCS4_3412:
1044	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1045	"encoding not supported %s\n",
1046	BAD_CAST "UCS4 3412", NULL);
1047	break;
1048	case XML_CHAR_ENCODING_UCS2:
1049	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1050	"encoding not supported %s\n",
1051	BAD_CAST "UCS2", NULL);
1052	break;
1053	case XML_CHAR_ENCODING_8859_1:
1054	case XML_CHAR_ENCODING_8859_2:
1055	case XML_CHAR_ENCODING_8859_3:
1056	case XML_CHAR_ENCODING_8859_4:
1057	case XML_CHAR_ENCODING_8859_5:
1058	case XML_CHAR_ENCODING_8859_6:
1059	case XML_CHAR_ENCODING_8859_7:
1060	case XML_CHAR_ENCODING_8859_8:
1061	case XML_CHAR_ENCODING_8859_9:
1062	/*
1063	* We used to keep the internal content in the
1064	* document encoding however this turns being unmaintainable
1065	* So xmlGetCharEncodingHandler() will return non-null
1066	* values for this now.
1067	*/
1068	if ((ctxt->inputNr == `1`) &&
1069	(ctxt->encoding == NULL) &&
1070	(ctxt->input != NULL) &&
1071	(ctxt->input->encoding != NULL)) {
1072	ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1073	}
1074	ctxt->charset = enc;
1075	return(`0`);
1076	case XML_CHAR_ENCODING_2022_JP:
1077	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1078	"encoding not supported %s\n",
1079	BAD_CAST "ISO-2022-JP", NULL);
1080	break;
1081	case XML_CHAR_ENCODING_SHIFT_JIS:
1082	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1083	"encoding not supported %s\n",
1084	BAD_CAST "Shift_JIS", NULL);
1085	break;
1086	case XML_CHAR_ENCODING_EUC_JP:
1087	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1088	"encoding not supported %s\n",
1089	BAD_CAST "EUC-JP", NULL);
1090	break;
1091	default:
1092	break;
1093	}
1094	}
1095	/*
1096	* TODO: We could recover from errors in external entites if we
1097	* didn't stop the parser. But most callers of this function don't
1098	* check the return value.
1099	*/
1100	if (handler == NULL) {
1101	xmlStopParser(ctxt);
1102	return(-`1`);
1103	}
1104	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1105	ret = xmlSwitchToEncodingInt(ctxt, handler, len);
1106	if ((ret < `0`) \|\| (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1107	/*
1108	* on encoding conversion errors, stop the parser
1109	*/
1110	xmlStopParser(ctxt);
1111	ctxt->errNo = XML_I18N_CONV_FAILED;
1112	}
1113	return(ret);
1114	}
1115
1116	/**
1117	* xmlSwitchInputEncoding:
1118	* @ctxt: the parser context
1119	* @input: the input stream
1120	* @handler: the encoding handler
1121	* @len: the number of bytes to convert for the first line or -1
1122	*
1123	* change the input functions when discovering the character encoding
1124	* of a given entity.
1125	*
1126	* Returns 0 in case of success, -1 otherwise
1127	*/
1128	static int
1129	xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1130	xmlCharEncodingHandlerPtr handler, int len)
1131	{
1132	int nbchars;
1133
1134	if (handler == NULL)
1135	return (-`1`);
1136	if (input == NULL)
1137	return (-`1`);
1138	if (input->buf != NULL) {
1139	if (input->buf->encoder != NULL) {
1140	/*
1141	* Check in case the auto encoding detetection triggered
1142	* in already.
1143	*/
1144	if (input->buf->encoder == handler)
1145	return (`0`);
1146
1147	/*
1148	* "UTF-16" can be used for both LE and BE
1149	if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1150	BAD_CAST "UTF-16", 6)) &&
1151	(!xmlStrncmp(BAD_CAST handler->name,
1152	BAD_CAST "UTF-16", 6))) {
1153	return(0);
1154	}
1155	*/
1156
1157	/*
1158	* Note: this is a bit dangerous, but that's what it
1159	* takes to use nearly compatible signature for different
1160	* encodings.
1161	*/
1162	xmlCharEncCloseFunc(input->buf->encoder);
1163	input->buf->encoder = handler;
1164	return (`0`);
1165	}
1166	input->buf->encoder = handler;
1167
1168	/*
1169	* Is there already some content down the pipe to convert ?
1170	*/
1171	if (xmlBufIsEmpty(input->buf->buffer) == `0`) {
1172	int processed;
1173	unsigned int use;
1174
1175	/*
1176	* Specific handling of the Byte Order Mark for
1177	* UTF-16
1178	*/
1179	if ((handler->name != NULL) &&
1180	(!strcmp(handler->name, "UTF-16LE") \|\|
1181	!strcmp(handler->name, "UTF-16")) &&
1182	(input->cur[`0`] == `0xFF`) && (input->cur[`1`] == `0xFE`)) {
1183	input->cur += `2`;
1184	}
1185	if ((handler->name != NULL) &&
1186	(!strcmp(handler->name, "UTF-16BE")) &&
1187	(input->cur[`0`] == `0xFE`) && (input->cur[`1`] == `0xFF`)) {
1188	input->cur += `2`;
1189	}
1190	/*
1191	* Errata on XML-1.0 June 20 2001
1192	* Specific handling of the Byte Order Mark for
1193	* UTF-8
1194	*/
1195	if ((handler->name != NULL) &&
1196	(!strcmp(handler->name, "UTF-8")) &&
1197	(input->cur[`0`] == `0xEF`) &&
1198	(input->cur[`1`] == `0xBB`) && (input->cur[`2`] == `0xBF`)) {
1199	input->cur += `3`;
1200	}
1201
1202	/*
1203	* Shrink the current input buffer.
1204	* Move it as the raw buffer and create a new input buffer
1205	*/
1206	processed = input->cur - input->base;
1207	xmlBufShrink(input->buf->buffer, processed);
1208	input->buf->raw = input->buf->buffer;
1209	input->buf->buffer = xmlBufCreate();
1210	input->buf->rawconsumed = processed;
1211	use = xmlBufUse(input->buf->raw);
1212
1213	if (ctxt->html) {
1214	/*
1215	* convert as much as possible of the buffer
1216	*/
1217	nbchars = xmlCharEncInput(input->buf, `0`);
1218	} else {
1219	/*
1220	* convert just enough to get
1221	* '<?xml version="1.0" encoding="xxx"?>'
1222	* parsed with the autodetected encoding
1223	* into the parser reading buffer.
1224	*/
1225	nbchars = xmlCharEncFirstLineInput(input->buf, len);
1226	}
1227	xmlBufResetInput(input->buf->buffer, input);
1228	if (nbchars < `0`) {
1229	xmlErrInternal(ctxt,
1230	"switching encoding: encoder error\n",
1231	NULL);
1232	return (-`1`);
1233	}
1234	input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1235	}
1236	return (`0`);
1237	} else if (input->length == `0`) {
1238	/*
1239	* When parsing a static memory array one must know the
1240	* size to be able to convert the buffer.
1241	*/
1242	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1243	return (-`1`);
1244	}
1245	return (`0`);
1246	}
1247
1248	/**
1249	* xmlSwitchInputEncoding:
1250	* @ctxt: the parser context
1251	* @input: the input stream
1252	* @handler: the encoding handler
1253	*
1254	* change the input functions when discovering the character encoding
1255	* of a given entity.
1256	*
1257	* Returns 0 in case of success, -1 otherwise
1258	*/
1259	int
1260	xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1261	xmlCharEncodingHandlerPtr handler) {
1262	return(xmlSwitchInputEncodingInt(ctxt, input, handler, -`1`));
1263	}
1264
1265	/**
1266	* xmlSwitchToEncodingInt:
1267	* @ctxt: the parser context
1268	* @handler: the encoding handler
1269	* @len: the length to convert or -1
1270	*
1271	* change the input functions when discovering the character encoding
1272	* of a given entity, and convert only @len bytes of the output, this
1273	* is needed on auto detect to allows any declared encoding later to
1274	* convert the actual content after the xmlDecl
1275	*
1276	* Returns 0 in case of success, -1 otherwise
1277	*/
1278	static int
1279	xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
1280	xmlCharEncodingHandlerPtr handler, int len) {
1281	int ret = `0`;
1282
1283	if (handler != NULL) {
1284	if (ctxt->input != NULL) {
1285	ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1286	} else {
1287	xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1288	NULL);
1289	return(-`1`);
1290	}
1291	/*
1292	* The parsing is now done in UTF8 natively
1293	*/
1294	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1295	} else
1296	return(-`1`);
1297	return(ret);
1298	}
1299
1300	/**
1301	* xmlSwitchToEncoding:
1302	* @ctxt: the parser context
1303	* @handler: the encoding handler
1304	*
1305	* change the input functions when discovering the character encoding
1306	* of a given entity.
1307	*
1308	* Returns 0 in case of success, -1 otherwise
1309	*/
1310	int
1311	xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1312	{
1313	return (xmlSwitchToEncodingInt(ctxt, handler, -`1`));
1314	}
1315
1316	/************************************************************************
1317	* *
1318	* Commodity functions to handle entities processing *
1319	* *
1320	************************************************************************/
1321
1322	/**
1323	* xmlFreeInputStream:
1324	* @input: an xmlParserInputPtr
1325	*
1326	* Free up an input stream.
1327	*/
1328	void
1329	xmlFreeInputStream(xmlParserInputPtr input) {
1330	if (input == NULL) return;
1331
1332	if (input->filename != NULL) xmlFree((char *) input->filename);
1333	if (input->directory != NULL) xmlFree((char *) input->directory);
1334	if (input->encoding != NULL) xmlFree((char *) input->encoding);
1335	if (input->version != NULL) xmlFree((char *) input->version);
1336	if ((input->free != NULL) && (input->base != NULL))
1337	input->free((xmlChar *) input->base);
1338	if (input->buf != NULL)
1339	xmlFreeParserInputBuffer(input->buf);
1340	xmlFree(input);
1341	}
1342
1343	/**
1344	* xmlNewInputStream:
1345	* @ctxt: an XML parser context
1346	*
1347	* Create a new input stream structure.
1348	*
1349	* Returns the new input stream or NULL
1350	*/
1351	xmlParserInputPtr
1352	xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1353	xmlParserInputPtr input;
1354
1355	input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1356	if (input == NULL) {
1357	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1358	return(NULL);
1359	}
1360	memset(input, `0`, sizeof(xmlParserInput));
1361	input->line = `1`;
1362	input->col = `1`;
1363	input->standalone = -`1`;
1364
1365	/*
1366	* If the context is NULL the id cannot be initialized, but that
1367	* should not happen while parsing which is the situation where
1368	* the id is actually needed.
1369	*/
1370	if (ctxt != NULL)
1371	input->id = ctxt->input_id++;
1372
1373	return(input);
1374	}
1375
1376	/**
1377	* xmlNewIOInputStream:
1378	* @ctxt: an XML parser context
1379	* @input: an I/O Input
1380	* @enc: the charset encoding if known
1381	*
1382	* Create a new input stream structure encapsulating the @input into
1383	* a stream suitable for the parser.
1384	*
1385	* Returns the new input stream or NULL
1386	*/
1387	xmlParserInputPtr
1388	xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1389	xmlCharEncoding enc) {
1390	xmlParserInputPtr inputStream;
1391
1392	if (input == NULL) return(NULL);
1393	if (xmlParserDebugEntities)
1394	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1395	inputStream = xmlNewInputStream(ctxt);
1396	if (inputStream == NULL) {
1397	return(NULL);
1398	}
1399	inputStream->filename = NULL;
1400	inputStream->buf = input;
1401	xmlBufResetInput(inputStream->buf->buffer, inputStream);
1402
1403	if (enc != XML_CHAR_ENCODING_NONE) {
1404	xmlSwitchEncoding(ctxt, enc);
1405	}
1406
1407	return(inputStream);
1408	}
1409
1410	/**
1411	* xmlNewEntityInputStream:
1412	* @ctxt: an XML parser context
1413	* @entity: an Entity pointer
1414	*
1415	* Create a new input stream based on an xmlEntityPtr
1416	*
1417	* Returns the new input stream or NULL
1418	*/
1419	xmlParserInputPtr
1420	xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1421	xmlParserInputPtr input;
1422
1423	if (entity == NULL) {
1424	xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1425	NULL);
1426	return(NULL);
1427	}
1428	if (xmlParserDebugEntities)
1429	xmlGenericError(xmlGenericErrorContext,
1430	"new input from entity: %s\n", entity->name);
1431	if (entity->content == NULL) {
1432	switch (entity->etype) {
1433	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1434	xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1435	entity->name);
1436	break;
1437	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1438	case XML_EXTERNAL_PARAMETER_ENTITY:
1439	return(xmlLoadExternalEntity((char *) entity->URI,
1440	(char *) entity->ExternalID, ctxt));
1441	case XML_INTERNAL_GENERAL_ENTITY:
1442	xmlErrInternal(ctxt,
1443	"Internal entity %s without content !\n",
1444	entity->name);
1445	break;
1446	case XML_INTERNAL_PARAMETER_ENTITY:
1447	xmlErrInternal(ctxt,
1448	"Internal parameter entity %s without content !\n",
1449	entity->name);
1450	break;
1451	case XML_INTERNAL_PREDEFINED_ENTITY:
1452	xmlErrInternal(ctxt,
1453	"Predefined entity %s without content !\n",
1454	entity->name);
1455	break;
1456	}
1457	return(NULL);
1458	}
1459	input = xmlNewInputStream(ctxt);
1460	if (input == NULL) {
1461	return(NULL);
1462	}
1463	if (entity->URI != NULL)
1464	input->filename = (char ) xmlStrdup((xmlChar ) entity->URI);
1465	input->base = entity->content;
1466	if (entity->length == `0`)
1467	entity->length = xmlStrlen(entity->content);
1468	input->cur = entity->content;
1469	input->length = entity->length;
1470	input->end = &entity->content[input->length];
1471	return(input);
1472	}
1473
1474	/**
1475	* xmlNewStringInputStream:
1476	* @ctxt: an XML parser context
1477	* @buffer: an memory buffer
1478	*
1479	* Create a new input stream based on a memory buffer.
1480	* Returns the new input stream
1481	*/
1482	xmlParserInputPtr
1483	xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1484	xmlParserInputPtr input;
1485
1486	if (buffer == NULL) {
1487	xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1488	NULL);
1489	return(NULL);
1490	}
1491	if (xmlParserDebugEntities)
1492	xmlGenericError(xmlGenericErrorContext,
1493	"new fixed input: %.30s\n", buffer);
1494	input = xmlNewInputStream(ctxt);
1495	if (input == NULL) {
1496	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1497	return(NULL);
1498	}
1499	input->base = buffer;
1500	input->cur = buffer;
1501	input->length = xmlStrlen(buffer);
1502	input->end = &buffer[input->length];
1503	return(input);
1504	}
1505
1506	/**
1507	* xmlNewInputFromFile:
1508	* @ctxt: an XML parser context
1509	* @filename: the filename to use as entity
1510	*
1511	* Create a new input stream based on a file or an URL.
1512	*
1513	* Returns the new input stream or NULL in case of error
1514	*/
1515	xmlParserInputPtr
1516	xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1517	xmlParserInputBufferPtr buf;
1518	xmlParserInputPtr inputStream;
1519	char *directory = NULL;
1520	xmlChar *URI = NULL;
1521
1522	if (xmlParserDebugEntities)
1523	xmlGenericError(xmlGenericErrorContext,
1524	"new input from file: %s\n", filename);
1525	if (ctxt == NULL) return(NULL);
1526	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1527	if (buf == NULL) {
1528	if (filename == NULL)
1529	__xmlLoaderErr(ctxt,
1530	"failed to load external entity: NULL filename \n",
1531	NULL);
1532	else
1533	__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1534	(const char *) filename);
1535	return(NULL);
1536	}
1537
1538	inputStream = xmlNewInputStream(ctxt);
1539	if (inputStream == NULL)
1540	return(NULL);
1541
1542	inputStream->buf = buf;
1543	inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1544	if (inputStream == NULL)
1545	return(NULL);
1546
1547	if (inputStream->filename == NULL)
1548	URI = xmlStrdup((xmlChar *) filename);
1549	else
1550	URI = xmlStrdup((xmlChar *) inputStream->filename);
1551	directory = xmlParserGetDirectory((const char *) URI);
1552	if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1553	inputStream->filename = (char ) xmlCanonicPath((const* xmlChar *) URI);
1554	if (URI != NULL) xmlFree((char *) URI);
1555	inputStream->directory = directory;
1556
1557	xmlBufResetInput(inputStream->buf->buffer, inputStream);
1558	if ((ctxt->directory == NULL) && (directory != NULL))
1559	ctxt->directory = (char ) xmlStrdup((const* xmlChar *) directory);
1560	return(inputStream);
1561	}
1562
1563	/************************************************************************
1564	* *
1565	* Commodity functions to handle parser contexts *
1566	* *
1567	************************************************************************/
1568
1569	/**
1570	* xmlInitParserCtxt:
1571	* @ctxt: an XML parser context
1572	*
1573	* Initialize a parser context
1574	*
1575	* Returns 0 in case of success and -1 in case of error
1576	*/
1577
1578	int
1579	xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1580	{
1581	xmlParserInputPtr input;
1582
1583	if(ctxt==NULL) {
1584	xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1585	return(-`1`);
1586	}
1587
1588	xmlDefaultSAXHandlerInit();
1589
1590	if (ctxt->dict == NULL)
1591	ctxt->dict = xmlDictCreate();
1592	if (ctxt->dict == NULL) {
1593	xmlErrMemory(NULL, "cannot initialize parser context\n");
1594	return(-`1`);
1595	}
1596	xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1597
1598	if (ctxt->sax == NULL)
1599	ctxt->sax = (xmlSAXHandler ) xmlMalloc(sizeof*(xmlSAXHandler));
1600	if (ctxt->sax == NULL) {
1601	xmlErrMemory(NULL, "cannot initialize parser context\n");
1602	return(-`1`);
1603	}
1604	else
1605	xmlSAXVersion(ctxt->sax, `2`);
1606
1607	ctxt->maxatts = `0`;
1608	ctxt->atts = NULL;
1609	/ Allocate the Input stack /
1610	if (ctxt->inputTab == NULL) {
1611	ctxt->inputTab = (xmlParserInputPtr *)
1612	xmlMalloc(`5` * sizeof(xmlParserInputPtr));
1613	ctxt->inputMax = `5`;
1614	}
1615	if (ctxt->inputTab == NULL) {
1616	xmlErrMemory(NULL, "cannot initialize parser context\n");
1617	ctxt->inputNr = `0`;
1618	ctxt->inputMax = `0`;
1619	ctxt->input = NULL;
1620	return(-`1`);
1621	}
1622	while ((input = inputPop(ctxt)) != NULL) { / Non consuming /
1623	xmlFreeInputStream(input);
1624	}
1625	ctxt->inputNr = `0`;
1626	ctxt->input = NULL;
1627
1628	ctxt->version = NULL;
1629	ctxt->encoding = NULL;
1630	ctxt->standalone = -`1`;
1631	ctxt->hasExternalSubset = `0`;
1632	ctxt->hasPErefs = `0`;
1633	ctxt->html = `0`;
1634	ctxt->external = `0`;
1635	ctxt->instate = XML_PARSER_START;
1636	ctxt->token = `0`;
1637	ctxt->directory = NULL;
1638
1639	/ Allocate the Node stack /
1640	if (ctxt->nodeTab == NULL) {
1641	ctxt->nodeTab = (xmlNodePtr ) xmlMalloc(`10` sizeof(xmlNodePtr));
1642	ctxt->nodeMax = `10`;
1643	}
1644	if (ctxt->nodeTab == NULL) {
1645	xmlErrMemory(NULL, "cannot initialize parser context\n");
1646	ctxt->nodeNr = `0`;
1647	ctxt->nodeMax = `0`;
1648	ctxt->node = NULL;
1649	ctxt->inputNr = `0`;
1650	ctxt->inputMax = `0`;
1651	ctxt->input = NULL;
1652	return(-`1`);
1653	}
1654	ctxt->nodeNr = `0`;
1655	ctxt->node = NULL;
1656
1657	/ Allocate the Name stack /
1658	if (ctxt->nameTab == NULL) {
1659	ctxt->nameTab = (const xmlChar *) xmlMalloc(`10` sizeof(xmlChar *));
1660	ctxt->nameMax = `10`;
1661	}
1662	if (ctxt->nameTab == NULL) {
1663	xmlErrMemory(NULL, "cannot initialize parser context\n");
1664	ctxt->nodeNr = `0`;
1665	ctxt->nodeMax = `0`;
1666	ctxt->node = NULL;
1667	ctxt->inputNr = `0`;
1668	ctxt->inputMax = `0`;
1669	ctxt->input = NULL;
1670	ctxt->nameNr = `0`;
1671	ctxt->nameMax = `0`;
1672	ctxt->name = NULL;
1673	return(-`1`);
1674	}
1675	ctxt->nameNr = `0`;
1676	ctxt->name = NULL;
1677
1678	/ Allocate the space stack /
1679	if (ctxt->spaceTab == NULL) {
1680	ctxt->spaceTab = (int ) xmlMalloc(`10` sizeof(int));
1681	ctxt->spaceMax = `10`;
1682	}
1683	if (ctxt->spaceTab == NULL) {
1684	xmlErrMemory(NULL, "cannot initialize parser context\n");
1685	ctxt->nodeNr = `0`;
1686	ctxt->nodeMax = `0`;
1687	ctxt->node = NULL;
1688	ctxt->inputNr = `0`;
1689	ctxt->inputMax = `0`;
1690	ctxt->input = NULL;
1691	ctxt->nameNr = `0`;
1692	ctxt->nameMax = `0`;
1693	ctxt->name = NULL;
1694	ctxt->spaceNr = `0`;
1695	ctxt->spaceMax = `0`;
1696	ctxt->space = NULL;
1697	return(-`1`);
1698	}
1699	ctxt->spaceNr = `1`;
1700	ctxt->spaceMax = `10`;
1701	ctxt->spaceTab[`0`] = -`1`;
1702	ctxt->space = &ctxt->spaceTab[`0`];
1703	ctxt->userData = ctxt;
1704	ctxt->myDoc = NULL;
1705	ctxt->wellFormed = `1`;
1706	ctxt->nsWellFormed = `1`;
1707	ctxt->valid = `1`;
1708	ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1709	if (ctxt->loadsubset) {
1710	ctxt->options \|= XML_PARSE_DTDLOAD;
1711	}
1712	ctxt->validate = xmlDoValidityCheckingDefaultValue;
1713	ctxt->pedantic = xmlPedanticParserDefaultValue;
1714	if (ctxt->pedantic) {
1715	ctxt->options \|= XML_PARSE_PEDANTIC;
1716	}
1717	ctxt->linenumbers = xmlLineNumbersDefaultValue;
1718	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1719	if (ctxt->keepBlanks == `0`) {
1720	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1721	ctxt->options \|= XML_PARSE_NOBLANKS;
1722	}
1723
1724	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1725	ctxt->vctxt.userData = ctxt;
1726	ctxt->vctxt.error = xmlParserValidityError;
1727	ctxt->vctxt.warning = xmlParserValidityWarning;
1728	if (ctxt->validate) {
1729	if (xmlGetWarningsDefaultValue == `0`)
1730	ctxt->vctxt.warning = NULL;
1731	else
1732	ctxt->vctxt.warning = xmlParserValidityWarning;
1733	ctxt->vctxt.nodeMax = `0`;
1734	ctxt->options \|= XML_PARSE_DTDVALID;
1735	}
1736	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1737	if (ctxt->replaceEntities) {
1738	ctxt->options \|= XML_PARSE_NOENT;
1739	}
1740	ctxt->record_info = `0`;
1741	ctxt->nbChars = `0`;
1742	ctxt->checkIndex = `0`;
1743	ctxt->inSubset = `0`;
1744	ctxt->errNo = XML_ERR_OK;
1745	ctxt->depth = `0`;
1746	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1747	ctxt->catalogs = NULL;
1748	ctxt->nbentities = `0`;
1749	ctxt->sizeentities = `0`;
1750	ctxt->sizeentcopy = `0`;
1751	ctxt->input_id = `1`;
1752	xmlInitNodeInfoSeq(&ctxt->node_seq);
1753	return(`0`);
1754	}
1755
1756	/**
1757	* xmlFreeParserCtxt:
1758	* @ctxt: an XML parser context
1759	*
1760	* Free all the memory used by a parser context. However the parsed
1761	* document in ctxt->myDoc is not freed.
1762	*/
1763
1764	void
1765	xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1766	{
1767	xmlParserInputPtr input;
1768
1769	if (ctxt == NULL) return;
1770
1771	while ((input = inputPop(ctxt)) != NULL) { / Non consuming /
1772	xmlFreeInputStream(input);
1773	}
1774	if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1775	if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1776	if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1777	if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1778	if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1779	if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1780	if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1781	if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1782	if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1783	#ifdef LIBXML_SAX1_ENABLED
1784	if ((ctxt->sax != NULL) &&
1785	(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1786	#else
1787	if (ctxt->sax != NULL)
1788	#endif /* LIBXML_SAX1_ENABLED */
1789	xmlFree(ctxt->sax);
1790	if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1791	if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1792	if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1793	if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1794	if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1795	if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1796	if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1797	if (ctxt->attsDefault != NULL)
1798	xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
1799	if (ctxt->attsSpecial != NULL)
1800	xmlHashFree(ctxt->attsSpecial, NULL);
1801	if (ctxt->freeElems != NULL) {
1802	xmlNodePtr cur, next;
1803
1804	cur = ctxt->freeElems;
1805	while (cur != NULL) {
1806	next = cur->next;
1807	xmlFree(cur);
1808	cur = next;
1809	}
1810	}
1811	if (ctxt->freeAttrs != NULL) {
1812	xmlAttrPtr cur, next;
1813
1814	cur = ctxt->freeAttrs;
1815	while (cur != NULL) {
1816	next = cur->next;
1817	xmlFree(cur);
1818	cur = next;
1819	}
1820	}
1821	/*
1822	* cleanup the error strings
1823	*/
1824	if (ctxt->lastError.message != NULL)
1825	xmlFree(ctxt->lastError.message);
1826	if (ctxt->lastError.file != NULL)
1827	xmlFree(ctxt->lastError.file);
1828	if (ctxt->lastError.str1 != NULL)
1829	xmlFree(ctxt->lastError.str1);
1830	if (ctxt->lastError.str2 != NULL)
1831	xmlFree(ctxt->lastError.str2);
1832	if (ctxt->lastError.str3 != NULL)
1833	xmlFree(ctxt->lastError.str3);
1834
1835	#ifdef LIBXML_CATALOG_ENABLED
1836	if (ctxt->catalogs != NULL)
1837	xmlCatalogFreeLocal(ctxt->catalogs);
1838	#endif
1839	xmlFree(ctxt);
1840	}
1841
1842	/**
1843	* xmlNewParserCtxt:
1844	*
1845	* Allocate and initialize a new parser context.
1846	*
1847	* Returns the xmlParserCtxtPtr or NULL
1848	*/
1849
1850	xmlParserCtxtPtr
1851	xmlNewParserCtxt(void)
1852	{
1853	xmlParserCtxtPtr ctxt;
1854
1855	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1856	if (ctxt == NULL) {
1857	xmlErrMemory(NULL, "cannot allocate parser context\n");
1858	return(NULL);
1859	}
1860	memset(ctxt, `0`, sizeof(xmlParserCtxt));
1861	if (xmlInitParserCtxt(ctxt) < `0`) {
1862	xmlFreeParserCtxt(ctxt);
1863	return(NULL);
1864	}
1865	return(ctxt);
1866	}
1867
1868	/************************************************************************
1869	* *
1870	* Handling of node informations *
1871	* *
1872	************************************************************************/
1873
1874	/**
1875	* xmlClearParserCtxt:
1876	* @ctxt: an XML parser context
1877	*
1878	* Clear (release owned resources) and reinitialize a parser context
1879	*/
1880
1881	void
1882	xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1883	{
1884	if (ctxt==NULL)
1885	return;
1886	xmlClearNodeInfoSeq(&ctxt->node_seq);
1887	xmlCtxtReset(ctxt);
1888	}
1889
1890
1891	/**
1892	* xmlParserFindNodeInfo:
1893	* @ctx: an XML parser context
1894	* @node: an XML node within the tree
1895	*
1896	* Find the parser node info struct for a given node
1897	*
1898	* Returns an xmlParserNodeInfo block pointer or NULL
1899	*/
1900	const xmlParserNodeInfo *
1901	xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1902	{
1903	unsigned long pos;
1904
1905	if ((ctx == NULL) \|\| (node == NULL))
1906	return (NULL);
1907	/ Find position where node should be at /
1908	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1909	if (pos < ctx->node_seq.length
1910	&& ctx->node_seq.buffer[pos].node == node)
1911	return &ctx->node_seq.buffer[pos];
1912	else
1913	return NULL;
1914	}
1915
1916
1917	/**
1918	* xmlInitNodeInfoSeq:
1919	* @seq: a node info sequence pointer
1920	*
1921	* -- Initialize (set to initial state) node info sequence
1922	*/
1923	void
1924	xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1925	{
1926	if (seq == NULL)
1927	return;
1928	seq->length = `0`;
1929	seq->maximum = `0`;
1930	seq->buffer = NULL;
1931	}
1932
1933	/**
1934	* xmlClearNodeInfoSeq:
1935	* @seq: a node info sequence pointer
1936	*
1937	* -- Clear (release memory and reinitialize) node
1938	* info sequence
1939	*/
1940	void
1941	xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1942	{
1943	if (seq == NULL)
1944	return;
1945	if (seq->buffer != NULL)
1946	xmlFree(seq->buffer);
1947	xmlInitNodeInfoSeq(seq);
1948	}
1949
1950	/**
1951	* xmlParserFindNodeInfoIndex:
1952	* @seq: a node info sequence pointer
1953	* @node: an XML node pointer
1954	*
1955	*
1956	* xmlParserFindNodeInfoIndex : Find the index that the info record for
1957	* the given node is or should be at in a sorted sequence
1958	*
1959	* Returns a long indicating the position of the record
1960	*/
1961	unsigned long
1962	xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1963	const xmlNodePtr node)
1964	{
1965	unsigned long upper, lower, middle;
1966	int found = `0`;
1967
1968	if ((seq == NULL) \|\| (node == NULL))
1969	return ((unsigned long) -`1`);
1970
1971	/ Do a binary search for the key /
1972	lower = `1`;
1973	upper = seq->length;
1974	middle = `0`;
1975	while (lower <= upper && !found) {
1976	middle = lower + (upper - lower) / `2`;
1977	if (node == seq->buffer[middle - `1`].node)
1978	found = `1`;
1979	else if (node < seq->buffer[middle - `1`].node)
1980	upper = middle - `1`;
1981	else
1982	lower = middle + `1`;
1983	}
1984
1985	/ Return position /
1986	if (middle == `0` \|\| seq->buffer[middle - `1`].node < node)
1987	return middle;
1988	else
1989	return middle - `1`;
1990	}
1991
1992
1993	/**
1994	* xmlParserAddNodeInfo:
1995	* @ctxt: an XML parser context
1996	* @info: a node info sequence pointer
1997	*
1998	* Insert node info record into the sorted sequence
1999	*/
2000	void
2001	xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2002	const xmlParserNodeInfoPtr info)
2003	{
2004	unsigned long pos;
2005
2006	if ((ctxt == NULL) \|\| (info == NULL)) return;
2007
2008	/ Find pos and check to see if node is already in the sequence /
2009	pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2010	info->node);
2011
2012	if ((pos < ctxt->node_seq.length) &&
2013	(ctxt->node_seq.buffer != NULL) &&
2014	(ctxt->node_seq.buffer[pos].node == info->node)) {
2015	ctxt->node_seq.buffer[pos] = *info;
2016	}
2017
2018	/ Otherwise, we need to add new node to buffer /
2019	else {
2020	if ((ctxt->node_seq.length + `1` > ctxt->node_seq.maximum) \|\|
2021	(ctxt->node_seq.buffer == NULL)) {
2022	xmlParserNodeInfo *tmp_buffer;
2023	unsigned int byte_size;
2024
2025	if (ctxt->node_seq.maximum == `0`)
2026	ctxt->node_seq.maximum = `2`;
2027	byte_size = (sizeof(ctxt->node_seq.buffer)
2028	(`2` * ctxt->node_seq.maximum));
2029
2030	if (ctxt->node_seq.buffer == NULL)
2031	tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2032	else
2033	tmp_buffer =
2034	(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2035	byte_size);
2036
2037	if (tmp_buffer == NULL) {
2038	xmlErrMemory(ctxt, "failed to allocate buffer\n");
2039	return;
2040	}
2041	ctxt->node_seq.buffer = tmp_buffer;
2042	ctxt->node_seq.maximum *= `2`;
2043	}
2044
2045	/ If position is not at end, move elements out of the way /
2046	if (pos != ctxt->node_seq.length) {
2047	unsigned long i;
2048
2049	for (i = ctxt->node_seq.length; i > pos; i--)
2050	ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - `1`];
2051	}
2052
2053	/ Copy element and increase length /
2054	ctxt->node_seq.buffer[pos] = *info;
2055	ctxt->node_seq.length++;
2056	}
2057	}
2058
2059	/************************************************************************
2060	* *
2061	* Defaults settings *
2062	* *
2063	************************************************************************/
2064	/**
2065	* xmlPedanticParserDefault:
2066	* @val: int 0 or 1
2067	*
2068	* Set and return the previous value for enabling pedantic warnings.
2069	*
2070	* Returns the last value for 0 for no substitution, 1 for substitution.
2071	*/
2072
2073	int
2074	xmlPedanticParserDefault(int val) {
2075	int old = xmlPedanticParserDefaultValue;
2076
2077	xmlPedanticParserDefaultValue = val;
2078	return(old);
2079	}
2080
2081	/**
2082	* xmlLineNumbersDefault:
2083	* @val: int 0 or 1
2084	*
2085	* Set and return the previous value for enabling line numbers in elements
2086	* contents. This may break on old application and is turned off by default.
2087	*
2088	* Returns the last value for 0 for no substitution, 1 for substitution.
2089	*/
2090
2091	int
2092	xmlLineNumbersDefault(int val) {
2093	int old = xmlLineNumbersDefaultValue;
2094
2095	xmlLineNumbersDefaultValue = val;
2096	return(old);
2097	}
2098
2099	/**
2100	* xmlSubstituteEntitiesDefault:
2101	* @val: int 0 or 1
2102	*
2103	* Set and return the previous value for default entity support.
2104	* Initially the parser always keep entity references instead of substituting
2105	* entity values in the output. This function has to be used to change the
2106	* default parser behavior
2107	* SAX::substituteEntities() has to be used for changing that on a file by
2108	* file basis.
2109	*
2110	* Returns the last value for 0 for no substitution, 1 for substitution.
2111	*/
2112
2113	int
2114	xmlSubstituteEntitiesDefault(int val) {
2115	int old = xmlSubstituteEntitiesDefaultValue;
2116
2117	xmlSubstituteEntitiesDefaultValue = val;
2118	return(old);
2119	}
2120
2121	/**
2122	* xmlKeepBlanksDefault:
2123	* @val: int 0 or 1
2124	*
2125	* Set and return the previous value for default blanks text nodes support.
2126	* The 1.x version of the parser used an heuristic to try to detect
2127	* ignorable white spaces. As a result the SAX callback was generating
2128	* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2129	* using the DOM output text nodes containing those blanks were not generated.
2130	* The 2.x and later version will switch to the XML standard way and
2131	* ignorableWhitespace() are only generated when running the parser in
2132	* validating mode and when the current element doesn't allow CDATA or
2133	* mixed content.
2134	* This function is provided as a way to force the standard behavior
2135	* on 1.X libs and to switch back to the old mode for compatibility when
2136	* running 1.X client code on 2.X . Upgrade of 1.X code should be done
2137	* by using xmlIsBlankNode() commodity function to detect the "empty"
2138	* nodes generated.
2139	* This value also affect autogeneration of indentation when saving code
2140	* if blanks sections are kept, indentation is not generated.
2141	*
2142	* Returns the last value for 0 for no substitution, 1 for substitution.
2143	*/
2144
2145	int
2146	xmlKeepBlanksDefault(int val) {
2147	int old = xmlKeepBlanksDefaultValue;
2148
2149	xmlKeepBlanksDefaultValue = val;
2150	if (!val) xmlIndentTreeOutput = `1`;
2151	return(old);
2152	}
2153
2154	#define bottom_parserInternals
2155	#include "elfgcchack.h"
2156

Browse the source code of ClickHouse/contrib/libxml2/parserInternals.c