HTMLparser.c source code [ClickHouse/contrib/libxml2/HTMLparser.c]

1	/*
2	* HTMLparser.c : an HTML 4.0 non-verifying parser
3	*
4	* See Copyright for the status of this software.
5	*
6	* daniel@veillard.com
7	*/
8
9	#define IN_LIBXML
10	#include "libxml.h"
11	#ifdef LIBXML_HTML_ENABLED
12
13	#include <string.h>
14	#ifdef HAVE_CTYPE_H
15	#include <ctype.h>
16	#endif
17	#ifdef HAVE_STDLIB_H
18	#include <stdlib.h>
19	#endif
20	#ifdef HAVE_SYS_STAT_H
21	#include <sys/stat.h>
22	#endif
23	#ifdef HAVE_FCNTL_H
24	#include <fcntl.h>
25	#endif
26	#ifdef HAVE_UNISTD_H
27	#include <unistd.h>
28	#endif
29	#ifdef LIBXML_ZLIB_ENABLED
30	#include <zlib.h>
31	#endif
32
33	#include <libxml/xmlmemory.h>
34	#include <libxml/tree.h>
35	#include <libxml/parser.h>
36	#include <libxml/parserInternals.h>
37	#include <libxml/xmlerror.h>
38	#include <libxml/HTMLparser.h>
39	#include <libxml/HTMLtree.h>
40	#include <libxml/entities.h>
41	#include <libxml/encoding.h>
42	#include <libxml/valid.h>
43	#include <libxml/xmlIO.h>
44	#include <libxml/globals.h>
45	#include <libxml/uri.h>
46
47	#include "buf.h"
48	#include "enc.h"
49
50	#define HTML_MAX_NAMELEN 1000
51	#define HTML_PARSER_BIG_BUFFER_SIZE 1000
52	#define HTML_PARSER_BUFFER_SIZE 100
53
54	/ #define DEBUG /
55	/ #define DEBUG_PUSH /
56
57	static int htmlOmittedDefaultValue = `1`;
58
59	xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
60	xmlChar end, xmlChar end2, xmlChar end3);
61	static void htmlParseComment(htmlParserCtxtPtr ctxt);
62
63	/************************************************************************
64	* *
65	* Some factorized error routines *
66	* *
67	************************************************************************/
68
69	/**
70	* htmlErrMemory:
71	* @ctxt: an HTML parser context
72	* @extra: extra informations
73	*
74	* Handle a redefinition of attribute error
75	*/
76	static void
77	htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
78	{
79	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
80	(ctxt->instate == XML_PARSER_EOF))
81	return;
82	if (ctxt != NULL) {
83	ctxt->errNo = XML_ERR_NO_MEMORY;
84	ctxt->instate = XML_PARSER_EOF;
85	ctxt->disableSAX = `1`;
86	}
87	if (extra)
88	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
89	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, `0`, extra,
90	NULL, NULL, `0`, `0`,
91	"Memory allocation failed : %s\n", extra);
92	else
93	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
94	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, `0`, NULL,
95	NULL, NULL, `0`, `0`, "Memory allocation failed\n");
96	}
97
98	/**
99	* htmlParseErr:
100	* @ctxt: an HTML parser context
101	* @error: the error number
102	* @msg: the error message
103	* @str1: string infor
104	* @str2: string infor
105	*
106	* Handle a fatal parser error, i.e. violating Well-Formedness constraints
107	*/
108	static void LIBXML_ATTR_FORMAT(`3`,`0`)
109	htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
110	const char msg, const* xmlChar str1, const* xmlChar *str2)
111	{
112	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
113	(ctxt->instate == XML_PARSER_EOF))
114	return;
115	if (ctxt != NULL)
116	ctxt->errNo = error;
117	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
118	XML_ERR_ERROR, NULL, `0`,
119	(const char ) str1, (const* char *) str2,
120	NULL, `0`, `0`,
121	msg, str1, str2);
122	if (ctxt != NULL)
123	ctxt->wellFormed = `0`;
124	}
125
126	/**
127	* htmlParseErrInt:
128	* @ctxt: an HTML parser context
129	* @error: the error number
130	* @msg: the error message
131	* @val: integer info
132	*
133	* Handle a fatal parser error, i.e. violating Well-Formedness constraints
134	*/
135	static void LIBXML_ATTR_FORMAT(`3`,`0`)
136	htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
137	const char msg, int* val)
138	{
139	if ((ctxt != NULL) && (ctxt->disableSAX != `0`) &&
140	(ctxt->instate == XML_PARSER_EOF))
141	return;
142	if (ctxt != NULL)
143	ctxt->errNo = error;
144	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
145	XML_ERR_ERROR, NULL, `0`, NULL, NULL,
146	NULL, val, `0`, msg, val);
147	if (ctxt != NULL)
148	ctxt->wellFormed = `0`;
149	}
150
151	/************************************************************************
152	* *
153	* Parser stacks related functions and macros *
154	* *
155	************************************************************************/
156
157	/**
158	* htmlnamePush:
159	* @ctxt: an HTML parser context
160	* @value: the element name
161	*
162	* Pushes a new element name on top of the name stack
163	*
164	* Returns 0 in case of error, the index in the stack otherwise
165	*/
166	static int
167	htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
168	{
169	if ((ctxt->html < `3`) && (xmlStrEqual(value, BAD_CAST "head")))
170	ctxt->html = `3`;
171	if ((ctxt->html < `10`) && (xmlStrEqual(value, BAD_CAST "body")))
172	ctxt->html = `10`;
173	if (ctxt->nameNr >= ctxt->nameMax) {
174	ctxt->nameMax *= `2`;
175	ctxt->nameTab = (const xmlChar * *)
176	xmlRealloc((xmlChar * *)ctxt->nameTab,
177	ctxt->nameMax *
178	sizeof(ctxt->nameTab[`0`]));
179	if (ctxt->nameTab == NULL) {
180	htmlErrMemory(ctxt, NULL);
181	return (`0`);
182	}
183	}
184	ctxt->nameTab[ctxt->nameNr] = value;
185	ctxt->name = value;
186	return (ctxt->nameNr++);
187	}
188	/**
189	* htmlnamePop:
190	* @ctxt: an HTML parser context
191	*
192	* Pops the top element name from the name stack
193	*
194	* Returns the name just removed
195	*/
196	static const xmlChar *
197	htmlnamePop(htmlParserCtxtPtr ctxt)
198	{
199	const xmlChar *ret;
200
201	if (ctxt->nameNr <= `0`)
202	return (NULL);
203	ctxt->nameNr--;
204	if (ctxt->nameNr < `0`)
205	return (NULL);
206	if (ctxt->nameNr > `0`)
207	ctxt->name = ctxt->nameTab[ctxt->nameNr - `1`];
208	else
209	ctxt->name = NULL;
210	ret = ctxt->nameTab[ctxt->nameNr];
211	ctxt->nameTab[ctxt->nameNr] = NULL;
212	return (ret);
213	}
214
215	/**
216	* htmlNodeInfoPush:
217	* @ctxt: an HTML parser context
218	* @value: the node info
219	*
220	* Pushes a new element name on top of the node info stack
221	*
222	* Returns 0 in case of error, the index in the stack otherwise
223	*/
224	static int
225	htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
226	{
227	if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
228	if (ctxt->nodeInfoMax == `0`)
229	ctxt->nodeInfoMax = `5`;
230	ctxt->nodeInfoMax *= `2`;
231	ctxt->nodeInfoTab = (htmlParserNodeInfo *)
232	xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
233	ctxt->nodeInfoMax *
234	sizeof(ctxt->nodeInfoTab[`0`]));
235	if (ctxt->nodeInfoTab == NULL) {
236	htmlErrMemory(ctxt, NULL);
237	return (`0`);
238	}
239	}
240	ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
241	ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
242	return (ctxt->nodeInfoNr++);
243	}
244
245	/**
246	* htmlNodeInfoPop:
247	* @ctxt: an HTML parser context
248	*
249	* Pops the top element name from the node info stack
250	*
251	* Returns 0 in case of error, the pointer to NodeInfo otherwise
252	*/
253	static htmlParserNodeInfo *
254	htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
255	{
256	if (ctxt->nodeInfoNr <= `0`)
257	return (NULL);
258	ctxt->nodeInfoNr--;
259	if (ctxt->nodeInfoNr < `0`)
260	return (NULL);
261	if (ctxt->nodeInfoNr > `0`)
262	ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - `1`];
263	else
264	ctxt->nodeInfo = NULL;
265	return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
266	}
267
268	/*
269	* Macros for accessing the content. Those should be used only by the parser,
270	* and not exported.
271	*
272	* Dirty macros, i.e. one need to make assumption on the context to use them
273	*
274	* CUR_PTR return the current pointer to the xmlChar to be parsed.
275	* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
276	* in ISO-Latin or UTF-8, and the current 16 bit value if compiled
277	* in UNICODE mode. This should be used internally by the parser
278	* only to compare to ASCII values otherwise it would break when
279	* running with UTF-8 encoding.
280	* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
281	* to compare on ASCII based substring.
282	* UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
283	* it should be used only to compare on ASCII based substring.
284	* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
285	* strings without newlines within the parser.
286	*
287	* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
288	*
289	* CURRENT Returns the current char value, with the full decoding of
290	* UTF-8 if we are using this mode. It returns an int.
291	* NEXT Skip to the next character, this does the proper decoding
292	* in UTF-8 mode. It also pop-up unfinished entities on the fly.
293	* NEXTL(l) Skip the current unicode character of l xmlChars long.
294	* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
295	*/
296
297	#define UPPER (toupper(*ctxt->input->cur))
298
299	#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
300
301	#define NXT(val) ctxt->input->cur[(val)]
302
303	#define UPP(val) (toupper(ctxt->input->cur[(val)]))
304
305	#define CUR_PTR ctxt->input->cur
306	#define BASE_PTR ctxt->input->base
307
308	#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
309	(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
310	xmlParserInputShrink(ctxt->input)
311
312	#define GROW if ((ctxt->progressive == 0) && \
313	(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
314	xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
315
316	#define CURRENT ((int) (*ctxt->input->cur))
317
318	#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
319
320	/ Inported from XML /
321
322	/ #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /*
323	#define CUR ((int) (*ctxt->input->cur))
324	#define NEXT xmlNextChar(ctxt)
325
326	#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
327
328
329	#define NEXTL(l) do { \
330	if (*(ctxt->input->cur) == '\n') { \
331	ctxt->input->line++; ctxt->input->col = 1; \
332	} else ctxt->input->col++; \
333	ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
334	} while (0)
335
336	/************
337	\
338	if (ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \*
339	if (ctxt->input->cur == '&') xmlParserHandleReference(ctxt);*
340	************/
341
342	#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
343	#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
344
345	#define COPY_BUF(l,b,i,v) \
346	if (l == 1) b[i++] = (xmlChar) v; \
347	else i += xmlCopyChar(l,&b[i],v)
348
349	/**
350	* htmlFindEncoding:
351	* @the HTML parser context
352	*
353	* Ty to find and encoding in the current data available in the input
354	* buffer this is needed to try to switch to the proper encoding when
355	* one face a character error.
356	* That's an heuristic, since it's operating outside of parsing it could
357	* try to use a meta which had been commented out, that's the reason it
358	* should only be used in case of error, not as a default.
359	*
360	* Returns an encoding string or NULL if not found, the string need to
361	* be freed
362	*/
363	static xmlChar *
364	htmlFindEncoding(xmlParserCtxtPtr ctxt) {
365	const xmlChar start, cur, *end;
366
367	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
368	(ctxt->input->encoding != NULL) \|\| (ctxt->input->buf == NULL) \|\|
369	(ctxt->input->buf->encoder != NULL))
370	return(NULL);
371	if ((ctxt->input->cur == NULL) \|\| (ctxt->input->end == NULL))
372	return(NULL);
373
374	start = ctxt->input->cur;
375	end = ctxt->input->end;
376	/ we also expect the input buffer to be zero terminated /
377	if (*end != `0`)
378	return(NULL);
379
380	cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV");
381	if (cur == NULL)
382	return(NULL);
383	cur = xmlStrcasestr(cur, BAD_CAST "CONTENT");
384	if (cur == NULL)
385	return(NULL);
386	cur = xmlStrcasestr(cur, BAD_CAST "CHARSET=");
387	if (cur == NULL)
388	return(NULL);
389	cur += `8`;
390	start = cur;
391	while (((cur >= `'A'`) && (cur <= `'Z'`)) \|\|
392	((cur >= `'a'`) && (cur <= `'z'`)) \|\|
393	((cur >= `'0'`) && (cur <= `'9'`)) \|\|
394	(cur == `'-'`) \|\| (cur == `'_'`) \|\| (cur == `':'`) \|\| (cur == `'/'`))
395	cur++;
396	if (cur == start)
397	return(NULL);
398	return(xmlStrndup(start, cur - start));
399	}
400
401	/**
402	* htmlCurrentChar:
403	* @ctxt: the HTML parser context
404	* @len: pointer to the length of the char read
405	*
406	* The current char value, if using UTF-8 this may actually span multiple
407	* bytes in the input buffer. Implement the end of line normalization:
408	* 2.11 End-of-Line Handling
409	* If the encoding is unspecified, in the case we find an ISO-Latin-1
410	* char, then the encoding converter is plugged in automatically.
411	*
412	* Returns the current char value and its length
413	*/
414
415	static int
416	htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
417	if (ctxt->instate == XML_PARSER_EOF)
418	return(`0`);
419
420	if (ctxt->token != `0`) {
421	*len = `0`;
422	return(ctxt->token);
423	}
424	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
425	/*
426	* We are supposed to handle UTF8, check it's valid
427	* From rfc2044: encoding of the Unicode values on UTF-8:
428	*
429	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
430	* 0000 0000-0000 007F 0xxxxxxx
431	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
432	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
433	*
434	* Check for the 0x110000 limit too
435	*/
436	const unsigned char *cur = ctxt->input->cur;
437	unsigned char c;
438	unsigned int val;
439
440	c = *cur;
441	if (c & `0x80`) {
442	if (cur[`1`] == `0`) {
443	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
444	cur = ctxt->input->cur;
445	}
446	if ((cur[`1`] & `0xc0`) != `0x80`)
447	goto encoding_error;
448	if ((c & `0xe0`) == `0xe0`) {
449
450	if (cur[`2`] == `0`) {
451	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
452	cur = ctxt->input->cur;
453	}
454	if ((cur[`2`] & `0xc0`) != `0x80`)
455	goto encoding_error;
456	if ((c & `0xf0`) == `0xf0`) {
457	if (cur[`3`] == `0`) {
458	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
459	cur = ctxt->input->cur;
460	}
461	if (((c & `0xf8`) != `0xf0`) \|\|
462	((cur[`3`] & `0xc0`) != `0x80`))
463	goto encoding_error;
464	/ 4-byte code /
465	*len = `4`;
466	val = (cur[`0`] & `0x7`) << `18`;
467	val \|= (cur[`1`] & `0x3f`) << `12`;
468	val \|= (cur[`2`] & `0x3f`) << `6`;
469	val \|= cur[`3`] & `0x3f`;
470	} else {
471	/ 3-byte code /
472	*len = `3`;
473	val = (cur[`0`] & `0xf`) << `12`;
474	val \|= (cur[`1`] & `0x3f`) << `6`;
475	val \|= cur[`2`] & `0x3f`;
476	}
477	} else {
478	/ 2-byte code /
479	*len = `2`;
480	val = (cur[`0`] & `0x1f`) << `6`;
481	val \|= cur[`1`] & `0x3f`;
482	}
483	if (!IS_CHAR(val)) {
484	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
485	"Char 0x%X out of allowed range\n", val);
486	}
487	return(val);
488	} else {
489	if ((*ctxt->input->cur == `0`) &&
490	(ctxt->input->cur < ctxt->input->end)) {
491	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
492	"Char 0x%X out of allowed range\n", `0`);
493	*len = `1`;
494	return(`' '`);
495	}
496	/ 1-byte code /
497	*len = `1`;
498	return((int) *ctxt->input->cur);
499	}
500	}
501	/*
502	* Assume it's a fixed length encoding (1) with
503	* a compatible encoding for the ASCII set, since
504	* XML constructs only use < 128 chars
505	*/
506	*len = `1`;
507	if ((int) *ctxt->input->cur < `0x80`)
508	return((int) *ctxt->input->cur);
509
510	/*
511	* Humm this is bad, do an automatic flow conversion
512	*/
513	{
514	xmlChar * guess;
515	xmlCharEncodingHandlerPtr handler;
516
517	guess = htmlFindEncoding(ctxt);
518	if (guess == NULL) {
519	xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
520	} else {
521	if (ctxt->input->encoding != NULL)
522	xmlFree((xmlChar *) ctxt->input->encoding);
523	ctxt->input->encoding = guess;
524	handler = xmlFindCharEncodingHandler((const char *) guess);
525	if (handler != NULL) {
526	xmlSwitchToEncoding(ctxt, handler);
527	} else {
528	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
529	"Unsupported encoding %s", guess, NULL);
530	}
531	}
532	ctxt->charset = XML_CHAR_ENCODING_UTF8;
533	}
534
535	return(xmlCurrentChar(ctxt, len));
536
537	encoding_error:
538	/*
539	* If we detect an UTF8 error that probably mean that the
540	* input encoding didn't get properly advertized in the
541	* declaration header. Report the error and switch the encoding
542	* to ISO-Latin-1 (if you don't like this policy, just declare the
543	* encoding !)
544	*/
545	{
546	char buffer[`150`];
547
548	if (ctxt->input->end - ctxt->input->cur >= `4`) {
549	snprintf(buffer, `149`, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
550	ctxt->input->cur[`0`], ctxt->input->cur[`1`],
551	ctxt->input->cur[`2`], ctxt->input->cur[`3`]);
552	} else {
553	snprintf(buffer, `149`, "Bytes: 0x%02X\n", ctxt->input->cur[`0`]);
554	}
555	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
556	"Input is not proper UTF-8, indicate encoding !\n",
557	BAD_CAST buffer, NULL);
558	}
559
560	ctxt->charset = XML_CHAR_ENCODING_8859_1;
561	*len = `1`;
562	return((int) *ctxt->input->cur);
563	}
564
565	/**
566	* htmlSkipBlankChars:
567	* @ctxt: the HTML parser context
568	*
569	* skip all blanks character found at that point in the input streams.
570	*
571	* Returns the number of space chars skipped
572	*/
573
574	static int
575	htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
576	int res = `0`;
577
578	while (IS_BLANK_CH(*(ctxt->input->cur))) {
579	if ((*ctxt->input->cur == `0`) &&
580	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= `0`)) {
581	xmlPopInput(ctxt);
582	} else {
583	if (*(ctxt->input->cur) == `'\n'`) {
584	ctxt->input->line++; ctxt->input->col = `1`;
585	} else ctxt->input->col++;
586	ctxt->input->cur++;
587	ctxt->nbChars++;
588	if (*ctxt->input->cur == `0`)
589	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
590	}
591	res++;
592	}
593	return(res);
594	}
595
596
597
598	/************************************************************************
599	* *
600	* The list of HTML elements and their properties *
601	* *
602	************************************************************************/
603
604	/*
605	* Start Tag: 1 means the start tag can be ommited
606	* End Tag: 1 means the end tag can be ommited
607	* 2 means it's forbidden (empty elements)
608	* 3 means the tag is stylistic and should be closed easily
609	* Depr: this element is deprecated
610	* DTD: 1 means that this element is valid only in the Loose DTD
611	* 2 means that this element is valid only in the Frameset DTD
612	*
613	* Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
614	, subElements , impliedsubelt , Attributes, userdata
615	*/
616
617	/ Definitions and a couple of vars for HTML Elements /
618
619	#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
620	#define NB_FONTSTYLE 8
621	#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
622	#define NB_PHRASE 10
623	#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
624	#define NB_SPECIAL 16
625	#define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL
626	#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
627	#define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
628	#define NB_BLOCK NB_HEADING + NB_LIST + 14
629	#define FORMCTRL "input", "select", "textarea", "label", "button"
630	#define NB_FORMCTRL 5
631	#define PCDATA
632	#define NB_PCDATA 0
633	#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
634	#define NB_HEADING 6
635	#define LIST "ul", "ol", "dir", "menu"
636	#define NB_LIST 4
637	#define MODIFIER
638	#define NB_MODIFIER 0
639	#define FLOW BLOCK,INLINE
640	#define NB_FLOW NB_BLOCK + NB_INLINE
641	#define EMPTY NULL
642
643
644	static const char* const html_flow[] = { FLOW, NULL } ;
645	static const char* const html_inline[] = { INLINE, NULL } ;
646
647	/ placeholders: elts with content but no subelements /
648	static const char* const html_pcdata[] = { NULL } ;
649	#define html_cdata html_pcdata
650
651
652	/ ... and for HTML Attributes /
653
654	#define COREATTRS "id", "class", "style", "title"
655	#define NB_COREATTRS 4
656	#define I18N "lang", "dir"
657	#define NB_I18N 2
658	#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
659	#define NB_EVENTS 9
660	#define ATTRS COREATTRS,I18N,EVENTS
661	#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
662	#define CELLHALIGN "align", "char", "charoff"
663	#define NB_CELLHALIGN 3
664	#define CELLVALIGN "valign"
665	#define NB_CELLVALIGN 1
666
667	static const char* const html_attrs[] = { ATTRS, NULL } ;
668	static const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;
669	static const char* const core_attrs[] = { COREATTRS, NULL } ;
670	static const char* const i18n_attrs[] = { I18N, NULL } ;
671
672
673	/ Other declarations that should go inline ... /
674	static const char* const a_attrs[] = { ATTRS, "charset", "type", "name",
675	"href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
676	"tabindex", "onfocus", "onblur", NULL } ;
677	static const char* const target_attr[] = { "target", NULL } ;
678	static const char* const rows_cols_attr[] = { "rows", "cols", NULL } ;
679	static const char* const alt_attr[] = { "alt", NULL } ;
680	static const char* const src_alt_attrs[] = { "src", "alt", NULL } ;
681	static const char* const href_attrs[] = { "href", NULL } ;
682	static const char* const clear_attrs[] = { "clear", NULL } ;
683	static const char* const inline_p[] = { INLINE, "p", NULL } ;
684
685	static const char* const flow_param[] = { FLOW, "param", NULL } ;
686	static const char* const applet_attrs[] = { COREATTRS , "codebase",
687	"archive", "alt", "name", "height", "width", "align",
688	"hspace", "vspace", NULL } ;
689	static const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
690	"tabindex", "accesskey", "onfocus", "onblur", NULL } ;
691	static const char* const basefont_attrs[] =
692	{ "id", "size", "color", "face", NULL } ;
693	static const char* const quote_attrs[] = { ATTRS, "cite", NULL } ;
694	static const char* const body_contents[] = { FLOW, "ins", "del", NULL } ;
695	static const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;
696	static const char* const body_depr[] = { "background", "bgcolor", "text",
697	"link", "vlink", "alink", NULL } ;
698	static const char* const button_attrs[] = { ATTRS, "name", "value", "type",
699	"disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
700
701
702	static const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;
703	static const char* const col_elt[] = { "col", NULL } ;
704	static const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;
705	static const char* const compact_attrs[] = { ATTRS, "compact", NULL } ;
706	static const char* const dl_contents[] = { "dt", "dd", NULL } ;
707	static const char* const compact_attr[] = { "compact", NULL } ;
708	static const char* const label_attr[] = { "label", NULL } ;
709	static const char* const fieldset_contents[] = { FLOW, "legend" } ;
710	static const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;
711	static const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;
712	static const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;
713	static const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;
714	static const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;
715	static const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;
716	static const char* const head_attrs[] = { I18N, "profile", NULL } ;
717	static const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;
718	static const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ;
719	static const char* const version_attr[] = { "version", NULL } ;
720	static const char* const html_content[] = { "head", "body", "frameset", NULL } ;
721	static const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;
722	static const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;
723	static const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ;
724	static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;
725	static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
726	static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
727	static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
728	static const char* const align_attr[] = { "align", NULL } ;
729	static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
730	static const char* const map_contents[] = { BLOCK, "area", NULL } ;
731	static const char* const name_attr[] = { "name", NULL } ;
732	static const char* const action_attr[] = { "action", NULL } ;
733	static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
734	static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;
735	static const char* const content_attr[] = { "content", NULL } ;
736	static const char* const type_attr[] = { "type", NULL } ;
737	static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
738	static const char* const object_contents[] = { FLOW, "param", NULL } ;
739	static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;
740	static const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;
741	static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
742	static const char* const option_elt[] = { "option", NULL } ;
743	static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
744	static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;
745	static const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;
746	static const char* const width_attr[] = { "width", NULL } ;
747	static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;
748	static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;
749	static const char* const language_attr[] = { "language", NULL } ;
750	static const char* const select_content[] = { "optgroup", "option", NULL } ;
751	static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;
752	static const char* const style_attrs[] = { I18N, "media", "title", NULL } ;
753	static const char* const table_attrs[] = { ATTRS, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;
754	static const char* const table_depr[] = { "align", "bgcolor", NULL } ;
755	static const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;
756	static const char* const tr_elt[] = { "tr", NULL } ;
757	static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;
758	static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;
759	static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;
760	static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;
761	static const char* const tr_contents[] = { "th", "td", NULL } ;
762	static const char* const bgcolor_attr[] = { "bgcolor", NULL } ;
763	static const char* const li_elt[] = { "li", NULL } ;
764	static const char* const ul_depr[] = { "type", "compact", NULL} ;
765	static const char* const dir_attr[] = { "dir", NULL} ;
766
767	#define DECL (const char**)
768
769	static const htmlElemDesc
770	html40ElementTable[] = {
771	{ "a", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "anchor ",
772	DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL
773	},
774	{ "abbr", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "abbreviated form",
775	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
776	},
777	{ "acronym", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "",
778	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
779	},
780	{ "address", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "information on author ",
781	DECL inline_p , NULL , DECL html_attrs, NULL, NULL
782	},
783	{ "applet", `0`, `0`, `0`, `0`, `1`, `1`, `2`, "java applet ",
784	DECL flow_param , NULL , NULL , DECL applet_attrs, NULL
785	},
786	{ "area", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "client-side image map area ",
787	EMPTY , NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
788	},
789	{ "b", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "bold text style",
790	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
791	},
792	{ "base", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "document base uri ",
793	EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs
794	},
795	{ "basefont", `0`, `2`, `2`, `1`, `1`, `1`, `1`, "base font size " ,
796	EMPTY , NULL , NULL, DECL basefont_attrs, NULL
797	},
798	{ "bdo", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "i18n bidi over-ride ",
799	DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr
800	},
801	{ "big", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "large text style",
802	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
803	},
804	{ "blockquote", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "long quotation ",
805	DECL html_flow , NULL , DECL quote_attrs , NULL, NULL
806	},
807	{ "body", `1`, `1`, `0`, `0`, `0`, `0`, `0`, "document body ",
808	DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL
809	},
810	{ "br", `0`, `2`, `2`, `1`, `0`, `0`, `1`, "forced line break ",
811	EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL
812	},
813	{ "button", `0`, `0`, `0`, `0`, `0`, `0`, `2`, "push button ",
814	DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL
815	},
816	{ "caption", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "table caption ",
817	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
818	},
819	{ "center", `0`, `3`, `0`, `0`, `1`, `1`, `0`, "shorthand for div align=center ",
820	DECL html_flow , NULL , NULL, DECL html_attrs, NULL
821	},
822	{ "cite", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "citation",
823	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
824	},
825	{ "code", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "computer code fragment",
826	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
827	},
828	{ "col", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "table column ",
829	EMPTY , NULL , DECL col_attrs , NULL, NULL
830	},
831	{ "colgroup", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "table column group ",
832	DECL col_elt , "col" , DECL col_attrs , NULL, NULL
833	},
834	{ "dd", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "definition description ",
835	DECL html_flow , NULL , DECL html_attrs, NULL, NULL
836	},
837	{ "del", `0`, `0`, `0`, `0`, `0`, `0`, `2`, "deleted text ",
838	DECL html_flow , NULL , DECL edit_attrs , NULL, NULL
839	},
840	{ "dfn", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "instance definition",
841	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
842	},
843	{ "dir", `0`, `0`, `0`, `0`, `1`, `1`, `0`, "directory list",
844	DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL
845	},
846	{ "div", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "generic language/style container",
847	DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL
848	},
849	{ "dl", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "definition list ",
850	DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL
851	},
852	{ "dt", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "definition term ",
853	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
854	},
855	{ "em", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "emphasis",
856	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
857	},
858	{ "embed", `0`, `1`, `0`, `0`, `1`, `1`, `1`, "generic embedded object ",
859	EMPTY, NULL, DECL embed_attrs, NULL, NULL
860	},
861	{ "fieldset", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "form control group ",
862	DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL
863	},
864	{ "font", `0`, `3`, `0`, `0`, `1`, `1`, `1`, "local change to font ",
865	DECL html_inline, NULL, NULL, DECL font_attrs, NULL
866	},
867	{ "form", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "interactive form ",
868	DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
869	},
870	{ "frame", `0`, `2`, `2`, `1`, `0`, `2`, `0`, "subwindow " ,
871	EMPTY, NULL, NULL, DECL frame_attrs, NULL
872	},
873	{ "frameset", `0`, `0`, `0`, `0`, `0`, `2`, `0`, "window subdivision" ,
874	DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL
875	},
876	{ "h1", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "heading ",
877	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
878	},
879	{ "h2", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "heading ",
880	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
881	},
882	{ "h3", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "heading ",
883	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
884	},
885	{ "h4", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "heading ",
886	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
887	},
888	{ "h5", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "heading ",
889	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
890	},
891	{ "h6", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "heading ",
892	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
893	},
894	{ "head", `1`, `1`, `0`, `0`, `0`, `0`, `0`, "document head ",
895	DECL head_contents, NULL, DECL head_attrs, NULL, NULL
896	},
897	{ "hr", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "horizontal rule " ,
898	EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL
899	},
900	{ "html", `1`, `1`, `0`, `0`, `0`, `0`, `0`, "document root element ",
901	DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL
902	},
903	{ "i", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "italic text style",
904	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
905	},
906	{ "iframe", `0`, `0`, `0`, `0`, `0`, `1`, `2`, "inline subwindow ",
907	DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL
908	},
909	{ "img", `0`, `2`, `2`, `1`, `0`, `0`, `1`, "embedded image ",
910	EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
911	},
912	{ "input", `0`, `2`, `2`, `1`, `0`, `0`, `1`, "form control ",
913	EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL
914	},
915	{ "ins", `0`, `0`, `0`, `0`, `0`, `0`, `2`, "inserted text",
916	DECL html_flow, NULL, DECL edit_attrs, NULL, NULL
917	},
918	{ "isindex", `0`, `2`, `2`, `1`, `1`, `1`, `0`, "single line prompt ",
919	EMPTY, NULL, NULL, DECL prompt_attrs, NULL
920	},
921	{ "kbd", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "text to be entered by the user",
922	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
923	},
924	{ "label", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "form field label text ",
925	DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL
926	},
927	{ "legend", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "fieldset legend ",
928	DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL
929	},
930	{ "li", `0`, `1`, `1`, `0`, `0`, `0`, `0`, "list item ",
931	DECL html_flow, NULL, DECL html_attrs, NULL, NULL
932	},
933	{ "link", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "a media-independent link ",
934	EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL
935	},
936	{ "map", `0`, `0`, `0`, `0`, `0`, `0`, `2`, "client-side image map ",
937	DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr
938	},
939	{ "menu", `0`, `0`, `0`, `0`, `1`, `1`, `0`, "menu list ",
940	DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL
941	},
942	{ "meta", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "generic metainformation ",
943	EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr
944	},
945	{ "noframes", `0`, `0`, `0`, `0`, `0`, `2`, `0`, "alternate content container for non frame-based rendering ",
946	DECL noframes_content, "body" , DECL html_attrs, NULL, NULL
947	},
948	{ "noscript", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "alternate content container for non script-based rendering ",
949	DECL html_flow, "div", DECL html_attrs, NULL, NULL
950	},
951	{ "object", `0`, `0`, `0`, `0`, `0`, `0`, `2`, "generic embedded object ",
952	DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL
953	},
954	{ "ol", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "ordered list ",
955	DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL
956	},
957	{ "optgroup", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "option group ",
958	DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr
959	},
960	{ "option", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "selectable choice " ,
961	DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL
962	},
963	{ "p", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "paragraph ",
964	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
965	},
966	{ "param", `0`, `2`, `2`, `1`, `0`, `0`, `0`, "named property value ",
967	EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr
968	},
969	{ "pre", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "preformatted text ",
970	DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL
971	},
972	{ "q", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "short inline quotation ",
973	DECL html_inline, NULL, DECL quote_attrs, NULL, NULL
974	},
975	{ "s", `0`, `3`, `0`, `0`, `1`, `1`, `1`, "strike-through text style",
976	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
977	},
978	{ "samp", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "sample program output, scripts, etc.",
979	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
980	},
981	{ "script", `0`, `0`, `0`, `0`, `0`, `0`, `2`, "script statements ",
982	DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr
983	},
984	{ "select", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "option selector ",
985	DECL select_content, NULL, DECL select_attrs, NULL, NULL
986	},
987	{ "small", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "small text style",
988	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
989	},
990	{ "span", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "generic language/style container ",
991	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
992	},
993	{ "strike", `0`, `3`, `0`, `0`, `1`, `1`, `1`, "strike-through text",
994	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
995	},
996	{ "strong", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "strong emphasis",
997	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
998	},
999	{ "style", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "style info ",
1000	DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr
1001	},
1002	{ "sub", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "subscript",
1003	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1004	},
1005	{ "sup", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "superscript ",
1006	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1007	},
1008	{ "table", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "",
1009	DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL
1010	},
1011	{ "tbody", `1`, `0`, `0`, `0`, `0`, `0`, `0`, "table body ",
1012	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
1013	},
1014	{ "td", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "table data cell",
1015	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
1016	},
1017	{ "textarea", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "multi-line text field ",
1018	DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr
1019	},
1020	{ "tfoot", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "table footer ",
1021	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
1022	},
1023	{ "th", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "table header cell",
1024	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
1025	},
1026	{ "thead", `0`, `1`, `0`, `0`, `0`, `0`, `0`, "table header ",
1027	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
1028	},
1029	{ "title", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "document title ",
1030	DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL
1031	},
1032	{ "tr", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "table row ",
1033	DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL
1034	},
1035	{ "tt", `0`, `3`, `0`, `0`, `0`, `0`, `1`, "teletype or monospaced text style",
1036	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1037	},
1038	{ "u", `0`, `3`, `0`, `0`, `1`, `1`, `1`, "underlined text style",
1039	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
1040	},
1041	{ "ul", `0`, `0`, `0`, `0`, `0`, `0`, `0`, "unordered list ",
1042	DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL
1043	},
1044	{ "var", `0`, `0`, `0`, `0`, `0`, `0`, `1`, "instance of a variable or program argument",
1045	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1046	}
1047	};
1048
1049	/*
1050	* start tags that imply the end of current element
1051	*/
1052	static const char * const htmlStartClose[] = {
1053	"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
1054	"dl", "ul", "ol", "menu", "dir", "address", "pre",
1055	"listing", "xmp", "head", NULL,
1056	"head", "p", NULL,
1057	"title", "p", NULL,
1058	"body", "head", "style", "link", "title", "p", NULL,
1059	"frameset", "head", "style", "link", "title", "p", NULL,
1060	"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
1061	"pre", "listing", "xmp", "head", "li", NULL,
1062	"hr", "p", "head", NULL,
1063	"h1", "p", "head", NULL,
1064	"h2", "p", "head", NULL,
1065	"h3", "p", "head", NULL,
1066	"h4", "p", "head", NULL,
1067	"h5", "p", "head", NULL,
1068	"h6", "p", "head", NULL,
1069	"dir", "p", "head", NULL,
1070	"address", "p", "head", "ul", NULL,
1071	"pre", "p", "head", "ul", NULL,
1072	"listing", "p", "head", NULL,
1073	"xmp", "p", "head", NULL,
1074	"blockquote", "p", "head", NULL,
1075	"dl", "p", "dt", "menu", "dir", "address", "pre", "listing",
1076	"xmp", "head", NULL,
1077	"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",
1078	"head", "dd", NULL,
1079	"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",
1080	"head", "dt", NULL,
1081	"ul", "p", "head", "ol", "menu", "dir", "address", "pre",
1082	"listing", "xmp", NULL,
1083	"ol", "p", "head", "ul", NULL,
1084	"menu", "p", "head", "ul", NULL,
1085	"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
1086	"div", "p", "head", NULL,
1087	"noscript", "p", NULL,
1088	"center", "font", "b", "i", "p", "head", NULL,
1089	"a", "a", "head", NULL,
1090	"caption", "p", NULL,
1091	"colgroup", "caption", "colgroup", "col", "p", NULL,
1092	"col", "caption", "col", "p", NULL,
1093	"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
1094	"listing", "xmp", "a", NULL,
1095	"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
1096	"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
1097	"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
1098	"thead", "caption", "col", "colgroup", NULL,
1099	"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",
1100	"tbody", "p", NULL,
1101	"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",
1102	"tfoot", "tbody", "p", NULL,
1103	"optgroup", "option", NULL,
1104	"option", "option", NULL,
1105	"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
1106	"pre", "listing", "xmp", "a", NULL,
1107	/ most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> /
1108	"tt", "head", NULL,
1109	"i", "head", NULL,
1110	"b", "head", NULL,
1111	"u", "head", NULL,
1112	"s", "head", NULL,
1113	"strike", "head", NULL,
1114	"big", "head", NULL,
1115	"small", "head", NULL,
1116
1117	"em", "head", NULL,
1118	"strong", "head", NULL,
1119	"dfn", "head", NULL,
1120	"code", "head", NULL,
1121	"samp", "head", NULL,
1122	"kbd", "head", NULL,
1123	"var", "head", NULL,
1124	"cite", "head", NULL,
1125	"abbr", "head", NULL,
1126	"acronym", "head", NULL,
1127
1128	/ "a" /
1129	"img", "head", NULL,
1130	/ "applet" /
1131	/ "embed" /
1132	/ "object" /
1133	"font", "head", NULL,
1134	/ "basefont" /
1135	"br", "head", NULL,
1136	/ "script" /
1137	"map", "head", NULL,
1138	"q", "head", NULL,
1139	"sub", "head", NULL,
1140	"sup", "head", NULL,
1141	"span", "head", NULL,
1142	"bdo", "head", NULL,
1143	"iframe", "head", NULL,
1144	NULL
1145	};
1146
1147	/*
1148	* The list of HTML elements which are supposed not to have
1149	* CDATA content and where a p element will be implied
1150	*
1151	* TODO: extend that list by reading the HTML SGML DTD on
1152	* implied paragraph
1153	*/
1154	static const char *const htmlNoContentElements[] = {
1155	"html",
1156	"head",
1157	NULL
1158	};
1159
1160	/*
1161	* The list of HTML attributes which are of content %Script;
1162	* NOTE: when adding ones, check htmlIsScriptAttribute() since
1163	* it assumes the name starts with 'on'
1164	*/
1165	static const char *const htmlScriptAttributes[] = {
1166	"onclick",
1167	"ondblclick",
1168	"onmousedown",
1169	"onmouseup",
1170	"onmouseover",
1171	"onmousemove",
1172	"onmouseout",
1173	"onkeypress",
1174	"onkeydown",
1175	"onkeyup",
1176	"onload",
1177	"onunload",
1178	"onfocus",
1179	"onblur",
1180	"onsubmit",
1181	"onreset",
1182	"onchange",
1183	"onselect"
1184	};
1185
1186	/*
1187	* This table is used by the htmlparser to know what to do with
1188	* broken html pages. By assigning different priorities to different
1189	* elements the parser can decide how to handle extra endtags.
1190	* Endtags are only allowed to close elements with lower or equal
1191	* priority.
1192	*/
1193
1194	typedef struct {
1195	const char *name;
1196	int priority;
1197	} elementPriority;
1198
1199	static const elementPriority htmlEndPriority[] = {
1200	{"div", `150`},
1201	{"td", `160`},
1202	{"th", `160`},
1203	{"tr", `170`},
1204	{"thead", `180`},
1205	{"tbody", `180`},
1206	{"tfoot", `180`},
1207	{"table", `190`},
1208	{"head", `200`},
1209	{"body", `200`},
1210	{"html", `220`},
1211	{NULL, `100`} / Default priority /
1212	};
1213
1214	static const char** htmlStartCloseIndex[`100`];
1215	static int htmlStartCloseIndexinitialized = `0`;
1216
1217	/************************************************************************
1218	* *
1219	* functions to handle HTML specific data *
1220	* *
1221	************************************************************************/
1222
1223	/**
1224	* htmlInitAutoClose:
1225	*
1226	* Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
1227	* This is not reentrant. Call xmlInitParser() once before processing in
1228	* case of use in multithreaded programs.
1229	*/
1230	void
1231	htmlInitAutoClose(void) {
1232	int indx, i = `0`;
1233
1234	if (htmlStartCloseIndexinitialized) return;
1235
1236	for (indx = `0`;indx < `100`;indx ++) htmlStartCloseIndex[indx] = NULL;
1237	indx = `0`;
1238	while ((htmlStartClose[i] != NULL) && (indx < `100` - `1`)) {
1239	htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
1240	while (htmlStartClose[i] != NULL) i++;
1241	i++;
1242	}
1243	htmlStartCloseIndexinitialized = `1`;
1244	}
1245
1246	/**
1247	* htmlTagLookup:
1248	* @tag: The tag name in lowercase
1249	*
1250	* Lookup the HTML tag in the ElementTable
1251	*
1252	* Returns the related htmlElemDescPtr or NULL if not found.
1253	*/
1254	const htmlElemDesc *
1255	htmlTagLookup(const xmlChar *tag) {
1256	unsigned int i;
1257
1258	for (i = `0`; i < (sizeof(html40ElementTable) /
1259	sizeof(html40ElementTable[`0`]));i++) {
1260	if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))
1261	return((htmlElemDescPtr) &html40ElementTable[i]);
1262	}
1263	return(NULL);
1264	}
1265
1266	/**
1267	* htmlGetEndPriority:
1268	* @name: The name of the element to look up the priority for.
1269	*
1270	* Return value: The "endtag" priority.
1271	**/
1272	static int
1273	htmlGetEndPriority (const xmlChar *name) {
1274	int i = `0`;
1275
1276	while ((htmlEndPriority[i].name != NULL) &&
1277	(!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
1278	i++;
1279
1280	return(htmlEndPriority[i].priority);
1281	}
1282
1283
1284	/**
1285	* htmlCheckAutoClose:
1286	* @newtag: The new tag name
1287	* @oldtag: The old tag name
1288	*
1289	* Checks whether the new tag is one of the registered valid tags for
1290	* closing old.
1291	* Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
1292	*
1293	* Returns 0 if no, 1 if yes.
1294	*/
1295	static int
1296	htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
1297	{
1298	int i, indx;
1299	const char **closed = NULL;
1300
1301	if (htmlStartCloseIndexinitialized == `0`)
1302	htmlInitAutoClose();
1303
1304	/ inefficient, but not a big deal /
1305	for (indx = `0`; indx < `100`; indx++) {
1306	closed = htmlStartCloseIndex[indx];
1307	if (closed == NULL)
1308	return (`0`);
1309	if (xmlStrEqual(BAD_CAST * closed, newtag))
1310	break;
1311	}
1312
1313	i = closed - htmlStartClose;
1314	i++;
1315	while (htmlStartClose[i] != NULL) {
1316	if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) {
1317	return (`1`);
1318	}
1319	i++;
1320	}
1321	return (`0`);
1322	}
1323
1324	/**
1325	* htmlAutoCloseOnClose:
1326	* @ctxt: an HTML parser context
1327	* @newtag: The new tag name
1328	* @force: force the tag closure
1329	*
1330	* The HTML DTD allows an ending tag to implicitly close other tags.
1331	*/
1332	static void
1333	htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1334	{
1335	const htmlElemDesc *info;
1336	int i, priority;
1337
1338	priority = htmlGetEndPriority(newtag);
1339
1340	for (i = (ctxt->nameNr - `1`); i >= `0`; i--) {
1341
1342	if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1343	break;
1344	/*
1345	* A missplaced endtag can only close elements with lower
1346	* or equal priority, so if we find an element with higher
1347	* priority before we find an element with
1348	* matching name, we just ignore this endtag
1349	*/
1350	if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1351	return;
1352	}
1353	if (i < `0`)
1354	return;
1355
1356	while (!xmlStrEqual(newtag, ctxt->name)) {
1357	info = htmlTagLookup(ctxt->name);
1358	if ((info != NULL) && (info->endTag == `3`)) {
1359	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1360	"Opening and ending tag mismatch: %s and %s\n",
1361	newtag, ctxt->name);
1362	}
1363	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1364	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1365	htmlnamePop(ctxt);
1366	}
1367	}
1368
1369	/**
1370	* htmlAutoCloseOnEnd:
1371	* @ctxt: an HTML parser context
1372	*
1373	* Close all remaining tags at the end of the stream
1374	*/
1375	static void
1376	htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1377	{
1378	int i;
1379
1380	if (ctxt->nameNr == `0`)
1381	return;
1382	for (i = (ctxt->nameNr - `1`); i >= `0`; i--) {
1383	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1384	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1385	htmlnamePop(ctxt);
1386	}
1387	}
1388
1389	/**
1390	* htmlAutoClose:
1391	* @ctxt: an HTML parser context
1392	* @newtag: The new tag name or NULL
1393	*
1394	* The HTML DTD allows a tag to implicitly close other tags.
1395	* The list is kept in htmlStartClose array. This function is
1396	* called when a new tag has been detected and generates the
1397	* appropriates closes if possible/needed.
1398	* If newtag is NULL this mean we are at the end of the resource
1399	* and we should check
1400	*/
1401	static void
1402	htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1403	{
1404	while ((newtag != NULL) && (ctxt->name != NULL) &&
1405	(htmlCheckAutoClose(newtag, ctxt->name))) {
1406	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1407	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1408	htmlnamePop(ctxt);
1409	}
1410	if (newtag == NULL) {
1411	htmlAutoCloseOnEnd(ctxt);
1412	return;
1413	}
1414	while ((newtag == NULL) && (ctxt->name != NULL) &&
1415	((xmlStrEqual(ctxt->name, BAD_CAST "head")) \|\|
1416	(xmlStrEqual(ctxt->name, BAD_CAST "body")) \|\|
1417	(xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1418	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1419	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1420	htmlnamePop(ctxt);
1421	}
1422	}
1423
1424	/**
1425	* htmlAutoCloseTag:
1426	* @doc: the HTML document
1427	* @name: The tag name
1428	* @elem: the HTML element
1429	*
1430	* The HTML DTD allows a tag to implicitly close other tags.
1431	* The list is kept in htmlStartClose array. This function checks
1432	* if the element or one of it's children would autoclose the
1433	* given tag.
1434	*
1435	* Returns 1 if autoclose, 0 otherwise
1436	*/
1437	int
1438	htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
1439	htmlNodePtr child;
1440
1441	if (elem == NULL) return(`1`);
1442	if (xmlStrEqual(name, elem->name)) return(`0`);
1443	if (htmlCheckAutoClose(elem->name, name)) return(`1`);
1444	child = elem->children;
1445	while (child != NULL) {
1446	if (htmlAutoCloseTag(doc, name, child)) return(`1`);
1447	child = child->next;
1448	}
1449	return(`0`);
1450	}
1451
1452	/**
1453	* htmlIsAutoClosed:
1454	* @doc: the HTML document
1455	* @elem: the HTML element
1456	*
1457	* The HTML DTD allows a tag to implicitly close other tags.
1458	* The list is kept in htmlStartClose array. This function checks
1459	* if a tag is autoclosed by one of it's child
1460	*
1461	* Returns 1 if autoclosed, 0 otherwise
1462	*/
1463	int
1464	htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
1465	htmlNodePtr child;
1466
1467	if (elem == NULL) return(`1`);
1468	child = elem->children;
1469	while (child != NULL) {
1470	if (htmlAutoCloseTag(doc, elem->name, child)) return(`1`);
1471	child = child->next;
1472	}
1473	return(`0`);
1474	}
1475
1476	/**
1477	* htmlCheckImplied:
1478	* @ctxt: an HTML parser context
1479	* @newtag: The new tag name
1480	*
1481	* The HTML DTD allows a tag to exists only implicitly
1482	* called when a new tag has been detected and generates the
1483	* appropriates implicit tags if missing
1484	*/
1485	static void
1486	htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1487	int i;
1488
1489	if (ctxt->options & HTML_PARSE_NOIMPLIED)
1490	return;
1491	if (!htmlOmittedDefaultValue)
1492	return;
1493	if (xmlStrEqual(newtag, BAD_CAST"html"))
1494	return;
1495	if (ctxt->nameNr <= `0`) {
1496	htmlnamePush(ctxt, BAD_CAST"html");
1497	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1498	ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1499	}
1500	if ((xmlStrEqual(newtag, BAD_CAST"body")) \|\| (xmlStrEqual(newtag, BAD_CAST"head")))
1501	return;
1502	if ((ctxt->nameNr <= `1`) &&
1503	((xmlStrEqual(newtag, BAD_CAST"script")) \|\|
1504	(xmlStrEqual(newtag, BAD_CAST"style")) \|\|
1505	(xmlStrEqual(newtag, BAD_CAST"meta")) \|\|
1506	(xmlStrEqual(newtag, BAD_CAST"link")) \|\|
1507	(xmlStrEqual(newtag, BAD_CAST"title")) \|\|
1508	(xmlStrEqual(newtag, BAD_CAST"base")))) {
1509	if (ctxt->html >= `3`) {
1510	/ we already saw or generated an <head> before /
1511	return;
1512	}
1513	/*
1514	* dropped OBJECT ... i you put it first BODY will be
1515	* assumed !
1516	*/
1517	htmlnamePush(ctxt, BAD_CAST"head");
1518	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1519	ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1520	} else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
1521	(!xmlStrEqual(newtag, BAD_CAST"frame")) &&
1522	(!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
1523	if (ctxt->html >= `10`) {
1524	/ we already saw or generated a <body> before /
1525	return;
1526	}
1527	for (i = `0`;i < ctxt->nameNr;i++) {
1528	if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1529	return;
1530	}
1531	if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1532	return;
1533	}
1534	}
1535
1536	htmlnamePush(ctxt, BAD_CAST"body");
1537	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1538	ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1539	}
1540	}
1541
1542	/**
1543	* htmlCheckParagraph
1544	* @ctxt: an HTML parser context
1545	*
1546	* Check whether a p element need to be implied before inserting
1547	* characters in the current element.
1548	*
1549	* Returns 1 if a paragraph has been inserted, 0 if not and -1
1550	* in case of error.
1551	*/
1552
1553	static int
1554	htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1555	const xmlChar *tag;
1556	int i;
1557
1558	if (ctxt == NULL)
1559	return(-`1`);
1560	tag = ctxt->name;
1561	if (tag == NULL) {
1562	htmlAutoClose(ctxt, BAD_CAST"p");
1563	htmlCheckImplied(ctxt, BAD_CAST"p");
1564	htmlnamePush(ctxt, BAD_CAST"p");
1565	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1566	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1567	return(`1`);
1568	}
1569	if (!htmlOmittedDefaultValue)
1570	return(`0`);
1571	for (i = `0`; htmlNoContentElements[i] != NULL; i++) {
1572	if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
1573	htmlAutoClose(ctxt, BAD_CAST"p");
1574	htmlCheckImplied(ctxt, BAD_CAST"p");
1575	htmlnamePush(ctxt, BAD_CAST"p");
1576	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1577	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1578	return(`1`);
1579	}
1580	}
1581	return(`0`);
1582	}
1583
1584	/**
1585	* htmlIsScriptAttribute:
1586	* @name: an attribute name
1587	*
1588	* Check if an attribute is of content type Script
1589	*
1590	* Returns 1 is the attribute is a script 0 otherwise
1591	*/
1592	int
1593	htmlIsScriptAttribute(const xmlChar *name) {
1594	unsigned int i;
1595
1596	if (name == NULL)
1597	return(`0`);
1598	/*
1599	* all script attributes start with 'on'
1600	*/
1601	if ((name[`0`] != `'o'`) \|\| (name[`1`] != `'n'`))
1602	return(`0`);
1603	for (i = `0`;
1604	i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[`0`]);
1605	i++) {
1606	if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
1607	return(`1`);
1608	}
1609	return(`0`);
1610	}
1611
1612	/************************************************************************
1613	* *
1614	* The list of HTML predefined entities *
1615	* *
1616	************************************************************************/
1617
1618
1619	static const htmlEntityDesc html40EntitiesTable[] = {
1620	/*
1621	* the 4 absolute ones, plus apostrophe.
1622	*/
1623	{ `34`, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
1624	{ `38`, "amp", "ampersand, U+0026 ISOnum" },
1625	{ `39`, "apos", "single quote" },
1626	{ `60`, "lt", "less-than sign, U+003C ISOnum" },
1627	{ `62`, "gt", "greater-than sign, U+003E ISOnum" },
1628
1629	/*
1630	* A bunch still in the 128-255 range
1631	* Replacing them depend really on the charset used.
1632	*/
1633	{ `160`, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
1634	{ `161`, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
1635	{ `162`, "cent", "cent sign, U+00A2 ISOnum" },
1636	{ `163`, "pound","pound sign, U+00A3 ISOnum" },
1637	{ `164`, "curren","currency sign, U+00A4 ISOnum" },
1638	{ `165`, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
1639	{ `166`, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
1640	{ `167`, "sect", "section sign, U+00A7 ISOnum" },
1641	{ `168`, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
1642	{ `169`, "copy", "copyright sign, U+00A9 ISOnum" },
1643	{ `170`, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
1644	{ `171`, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
1645	{ `172`, "not", "not sign, U+00AC ISOnum" },
1646	{ `173`, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
1647	{ `174`, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
1648	{ `175`, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
1649	{ `176`, "deg", "degree sign, U+00B0 ISOnum" },
1650	{ `177`, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
1651	{ `178`, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
1652	{ `179`, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
1653	{ `180`, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
1654	{ `181`, "micro","micro sign, U+00B5 ISOnum" },
1655	{ `182`, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
1656	{ `183`, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
1657	{ `184`, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
1658	{ `185`, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
1659	{ `186`, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
1660	{ `187`, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
1661	{ `188`, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
1662	{ `189`, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
1663	{ `190`, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
1664	{ `191`, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
1665	{ `192`, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
1666	{ `193`, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
1667	{ `194`, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
1668	{ `195`, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
1669	{ `196`, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
1670	{ `197`, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
1671	{ `198`, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
1672	{ `199`, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
1673	{ `200`, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
1674	{ `201`, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
1675	{ `202`, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
1676	{ `203`, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
1677	{ `204`, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
1678	{ `205`, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
1679	{ `206`, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
1680	{ `207`, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
1681	{ `208`, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
1682	{ `209`, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
1683	{ `210`, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
1684	{ `211`, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
1685	{ `212`, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
1686	{ `213`, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
1687	{ `214`, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
1688	{ `215`, "times","multiplication sign, U+00D7 ISOnum" },
1689	{ `216`, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
1690	{ `217`, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
1691	{ `218`, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
1692	{ `219`, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
1693	{ `220`, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
1694	{ `221`, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
1695	{ `222`, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
1696	{ `223`, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
1697	{ `224`, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
1698	{ `225`, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
1699	{ `226`, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
1700	{ `227`, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
1701	{ `228`, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
1702	{ `229`, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
1703	{ `230`, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
1704	{ `231`, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
1705	{ `232`, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
1706	{ `233`, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
1707	{ `234`, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
1708	{ `235`, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
1709	{ `236`, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
1710	{ `237`, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
1711	{ `238`, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
1712	{ `239`, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
1713	{ `240`, "eth", "latin small letter eth, U+00F0 ISOlat1" },
1714	{ `241`, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
1715	{ `242`, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
1716	{ `243`, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
1717	{ `244`, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
1718	{ `245`, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
1719	{ `246`, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
1720	{ `247`, "divide","division sign, U+00F7 ISOnum" },
1721	{ `248`, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
1722	{ `249`, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
1723	{ `250`, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
1724	{ `251`, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
1725	{ `252`, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
1726	{ `253`, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
1727	{ `254`, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
1728	{ `255`, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
1729
1730	{ `338`, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
1731	{ `339`, "oelig","latin small ligature oe, U+0153 ISOlat2" },
1732	{ `352`, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
1733	{ `353`, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
1734	{ `376`, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
1735
1736	/*
1737	* Anything below should really be kept as entities references
1738	*/
1739	{ `402`, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
1740
1741	{ `710`, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
1742	{ `732`, "tilde","small tilde, U+02DC ISOdia" },
1743
1744	{ `913`, "Alpha","greek capital letter alpha, U+0391" },
1745	{ `914`, "Beta", "greek capital letter beta, U+0392" },
1746	{ `915`, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
1747	{ `916`, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
1748	{ `917`, "Epsilon","greek capital letter epsilon, U+0395" },
1749	{ `918`, "Zeta", "greek capital letter zeta, U+0396" },
1750	{ `919`, "Eta", "greek capital letter eta, U+0397" },
1751	{ `920`, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
1752	{ `921`, "Iota", "greek capital letter iota, U+0399" },
1753	{ `922`, "Kappa","greek capital letter kappa, U+039A" },
1754	{ `923`, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
1755	{ `924`, "Mu", "greek capital letter mu, U+039C" },
1756	{ `925`, "Nu", "greek capital letter nu, U+039D" },
1757	{ `926`, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
1758	{ `927`, "Omicron","greek capital letter omicron, U+039F" },
1759	{ `928`, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
1760	{ `929`, "Rho", "greek capital letter rho, U+03A1" },
1761	{ `931`, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
1762	{ `932`, "Tau", "greek capital letter tau, U+03A4" },
1763	{ `933`, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
1764	{ `934`, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
1765	{ `935`, "Chi", "greek capital letter chi, U+03A7" },
1766	{ `936`, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
1767	{ `937`, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
1768
1769	{ `945`, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
1770	{ `946`, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
1771	{ `947`, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
1772	{ `948`, "delta","greek small letter delta, U+03B4 ISOgrk3" },
1773	{ `949`, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
1774	{ `950`, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
1775	{ `951`, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
1776	{ `952`, "theta","greek small letter theta, U+03B8 ISOgrk3" },
1777	{ `953`, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
1778	{ `954`, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
1779	{ `955`, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
1780	{ `956`, "mu", "greek small letter mu, U+03BC ISOgrk3" },
1781	{ `957`, "nu", "greek small letter nu, U+03BD ISOgrk3" },
1782	{ `958`, "xi", "greek small letter xi, U+03BE ISOgrk3" },
1783	{ `959`, "omicron","greek small letter omicron, U+03BF NEW" },
1784	{ `960`, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
1785	{ `961`, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
1786	{ `962`, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
1787	{ `963`, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
1788	{ `964`, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
1789	{ `965`, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
1790	{ `966`, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
1791	{ `967`, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
1792	{ `968`, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
1793	{ `969`, "omega","greek small letter omega, U+03C9 ISOgrk3" },
1794	{ `977`, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
1795	{ `978`, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
1796	{ `982`, "piv", "greek pi symbol, U+03D6 ISOgrk3" },
1797
1798	{ `8194`, "ensp", "en space, U+2002 ISOpub" },
1799	{ `8195`, "emsp", "em space, U+2003 ISOpub" },
1800	{ `8201`, "thinsp","thin space, U+2009 ISOpub" },
1801	{ `8204`, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
1802	{ `8205`, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
1803	{ `8206`, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
1804	{ `8207`, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
1805	{ `8211`, "ndash","en dash, U+2013 ISOpub" },
1806	{ `8212`, "mdash","em dash, U+2014 ISOpub" },
1807	{ `8216`, "lsquo","left single quotation mark, U+2018 ISOnum" },
1808	{ `8217`, "rsquo","right single quotation mark, U+2019 ISOnum" },
1809	{ `8218`, "sbquo","single low-9 quotation mark, U+201A NEW" },
1810	{ `8220`, "ldquo","left double quotation mark, U+201C ISOnum" },
1811	{ `8221`, "rdquo","right double quotation mark, U+201D ISOnum" },
1812	{ `8222`, "bdquo","double low-9 quotation mark, U+201E NEW" },
1813	{ `8224`, "dagger","dagger, U+2020 ISOpub" },
1814	{ `8225`, "Dagger","double dagger, U+2021 ISOpub" },
1815
1816	{ `8226`, "bull", "bullet = black small circle, U+2022 ISOpub" },
1817	{ `8230`, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
1818
1819	{ `8240`, "permil","per mille sign, U+2030 ISOtech" },
1820
1821	{ `8242`, "prime","prime = minutes = feet, U+2032 ISOtech" },
1822	{ `8243`, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
1823
1824	{ `8249`, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
1825	{ `8250`, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
1826
1827	{ `8254`, "oline","overline = spacing overscore, U+203E NEW" },
1828	{ `8260`, "frasl","fraction slash, U+2044 NEW" },
1829
1830	{ `8364`, "euro", "euro sign, U+20AC NEW" },
1831
1832	{ `8465`, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
1833	{ `8472`, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
1834	{ `8476`, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
1835	{ `8482`, "trade","trade mark sign, U+2122 ISOnum" },
1836	{ `8501`, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
1837	{ `8592`, "larr", "leftwards arrow, U+2190 ISOnum" },
1838	{ `8593`, "uarr", "upwards arrow, U+2191 ISOnum" },
1839	{ `8594`, "rarr", "rightwards arrow, U+2192 ISOnum" },
1840	{ `8595`, "darr", "downwards arrow, U+2193 ISOnum" },
1841	{ `8596`, "harr", "left right arrow, U+2194 ISOamsa" },
1842	{ `8629`, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
1843	{ `8656`, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
1844	{ `8657`, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
1845	{ `8658`, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
1846	{ `8659`, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
1847	{ `8660`, "hArr", "left right double arrow, U+21D4 ISOamsa" },
1848
1849	{ `8704`, "forall","for all, U+2200 ISOtech" },
1850	{ `8706`, "part", "partial differential, U+2202 ISOtech" },
1851	{ `8707`, "exist","there exists, U+2203 ISOtech" },
1852	{ `8709`, "empty","empty set = null set = diameter, U+2205 ISOamso" },
1853	{ `8711`, "nabla","nabla = backward difference, U+2207 ISOtech" },
1854	{ `8712`, "isin", "element of, U+2208 ISOtech" },
1855	{ `8713`, "notin","not an element of, U+2209 ISOtech" },
1856	{ `8715`, "ni", "contains as member, U+220B ISOtech" },
1857	{ `8719`, "prod", "n-ary product = product sign, U+220F ISOamsb" },
1858	{ `8721`, "sum", "n-ary summation, U+2211 ISOamsb" },
1859	{ `8722`, "minus","minus sign, U+2212 ISOtech" },
1860	{ `8727`, "lowast","asterisk operator, U+2217 ISOtech" },
1861	{ `8730`, "radic","square root = radical sign, U+221A ISOtech" },
1862	{ `8733`, "prop", "proportional to, U+221D ISOtech" },
1863	{ `8734`, "infin","infinity, U+221E ISOtech" },
1864	{ `8736`, "ang", "angle, U+2220 ISOamso" },
1865	{ `8743`, "and", "logical and = wedge, U+2227 ISOtech" },
1866	{ `8744`, "or", "logical or = vee, U+2228 ISOtech" },
1867	{ `8745`, "cap", "intersection = cap, U+2229 ISOtech" },
1868	{ `8746`, "cup", "union = cup, U+222A ISOtech" },
1869	{ `8747`, "int", "integral, U+222B ISOtech" },
1870	{ `8756`, "there4","therefore, U+2234 ISOtech" },
1871	{ `8764`, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
1872	{ `8773`, "cong", "approximately equal to, U+2245 ISOtech" },
1873	{ `8776`, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
1874	{ `8800`, "ne", "not equal to, U+2260 ISOtech" },
1875	{ `8801`, "equiv","identical to, U+2261 ISOtech" },
1876	{ `8804`, "le", "less-than or equal to, U+2264 ISOtech" },
1877	{ `8805`, "ge", "greater-than or equal to, U+2265 ISOtech" },
1878	{ `8834`, "sub", "subset of, U+2282 ISOtech" },
1879	{ `8835`, "sup", "superset of, U+2283 ISOtech" },
1880	{ `8836`, "nsub", "not a subset of, U+2284 ISOamsn" },
1881	{ `8838`, "sube", "subset of or equal to, U+2286 ISOtech" },
1882	{ `8839`, "supe", "superset of or equal to, U+2287 ISOtech" },
1883	{ `8853`, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
1884	{ `8855`, "otimes","circled times = vector product, U+2297 ISOamsb" },
1885	{ `8869`, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
1886	{ `8901`, "sdot", "dot operator, U+22C5 ISOamsb" },
1887	{ `8968`, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
1888	{ `8969`, "rceil","right ceiling, U+2309 ISOamsc" },
1889	{ `8970`, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
1890	{ `8971`, "rfloor","right floor, U+230B ISOamsc" },
1891	{ `9001`, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
1892	{ `9002`, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
1893	{ `9674`, "loz", "lozenge, U+25CA ISOpub" },
1894
1895	{ `9824`, "spades","black spade suit, U+2660 ISOpub" },
1896	{ `9827`, "clubs","black club suit = shamrock, U+2663 ISOpub" },
1897	{ `9829`, "hearts","black heart suit = valentine, U+2665 ISOpub" },
1898	{ `9830`, "diams","black diamond suit, U+2666 ISOpub" },
1899
1900	};
1901
1902	/************************************************************************
1903	* *
1904	* Commodity functions to handle entities *
1905	* *
1906	************************************************************************/
1907
1908	/*
1909	* Macro used to grow the current buffer.
1910	*/
1911	#define growBuffer(buffer) { \
1912	xmlChar *tmp; \
1913	buffer##_size *= 2; \
1914	tmp = (xmlChar ) xmlRealloc(buffer, buffer##_size sizeof(xmlChar)); \
1915	if (tmp == NULL) { \
1916	htmlErrMemory(ctxt, "growing buffer\n"); \
1917	xmlFree(buffer); \
1918	return(NULL); \
1919	} \
1920	buffer = tmp; \
1921	}
1922
1923	/**
1924	* htmlEntityLookup:
1925	* @name: the entity name
1926	*
1927	* Lookup the given entity in EntitiesTable
1928	*
1929	* TODO: the linear scan is really ugly, an hash table is really needed.
1930	*
1931	* Returns the associated htmlEntityDescPtr if found, NULL otherwise.
1932	*/
1933	const htmlEntityDesc *
1934	htmlEntityLookup(const xmlChar *name) {
1935	unsigned int i;
1936
1937	for (i = `0`;i < (sizeof(html40EntitiesTable)/
1938	sizeof(html40EntitiesTable[`0`]));i++) {
1939	if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
1940	return((htmlEntityDescPtr) &html40EntitiesTable[i]);
1941	}
1942	}
1943	return(NULL);
1944	}
1945
1946	/**
1947	* htmlEntityValueLookup:
1948	* @value: the entity's unicode value
1949	*
1950	* Lookup the given entity in EntitiesTable
1951	*
1952	* TODO: the linear scan is really ugly, an hash table is really needed.
1953	*
1954	* Returns the associated htmlEntityDescPtr if found, NULL otherwise.
1955	*/
1956	const htmlEntityDesc *
1957	htmlEntityValueLookup(unsigned int value) {
1958	unsigned int i;
1959
1960	for (i = `0`;i < (sizeof(html40EntitiesTable)/
1961	sizeof(html40EntitiesTable[`0`]));i++) {
1962	if (html40EntitiesTable[i].value >= value) {
1963	if (html40EntitiesTable[i].value > value)
1964	break;
1965	return((htmlEntityDescPtr) &html40EntitiesTable[i]);
1966	}
1967	}
1968	return(NULL);
1969	}
1970
1971	/**
1972	* UTF8ToHtml:
1973	* @out: a pointer to an array of bytes to store the result
1974	* @outlen: the length of @out
1975	* @in: a pointer to an array of UTF-8 chars
1976	* @inlen: the length of @in
1977	*
1978	* Take a block of UTF-8 chars in and try to convert it to an ASCII
1979	* plus HTML entities block of chars out.
1980	*
1981	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
1982	* The value of @inlen after return is the number of octets consumed
1983	* as the return value is positive, else unpredictable.
1984	* The value of @outlen after return is the number of octets consumed.
1985	*/
1986	int
1987	UTF8ToHtml(unsigned char* out, int *outlen,
1988	const unsigned char* in, int *inlen) {
1989	const unsigned char* processed = in;
1990	const unsigned char* outend;
1991	const unsigned char* outstart = out;
1992	const unsigned char* instart = in;
1993	const unsigned char* inend;
1994	unsigned int c, d;
1995	int trailing;
1996
1997	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL)) return(-`1`);
1998	if (in == NULL) {
1999	/*
2000	* initialization nothing to do
2001	*/
2002	*outlen = `0`;
2003	*inlen = `0`;
2004	return(`0`);
2005	}
2006	inend = in + (*inlen);
2007	outend = out + (*outlen);
2008	while (in < inend) {
2009	d = *in++;
2010	if (d < `0x80`) { c= d; trailing= `0`; }
2011	else if (d < `0xC0`) {
2012	/ trailing byte in leading position /
2013	*outlen = out - outstart;
2014	*inlen = processed - instart;
2015	return(-`2`);
2016	} else if (d < `0xE0`) { c= d & `0x1F`; trailing= `1`; }
2017	else if (d < `0xF0`) { c= d & `0x0F`; trailing= `2`; }
2018	else if (d < `0xF8`) { c= d & `0x07`; trailing= `3`; }
2019	else {
2020	/ no chance for this in Ascii /
2021	*outlen = out - outstart;
2022	*inlen = processed - instart;
2023	return(-`2`);
2024	}
2025
2026	if (inend - in < trailing) {
2027	break;
2028	}
2029
2030	for ( ; trailing; trailing--) {
2031	if ((in >= inend) \|\| (((d= *in++) & `0xC0`) != `0x80`))
2032	break;
2033	c <<= `6`;
2034	c \|= d & `0x3F`;
2035	}
2036
2037	/ assertion: c is a single UTF-4 value /
2038	if (c < `0x80`) {
2039	if (out + `1` >= outend)
2040	break;
2041	*out++ = c;
2042	} else {
2043	int len;
2044	const htmlEntityDesc * ent;
2045	const char *cp;
2046	char nbuf[`16`];
2047
2048	/*
2049	* Try to lookup a predefined HTML entity for it
2050	*/
2051
2052	ent = htmlEntityValueLookup(c);
2053	if (ent == NULL) {
2054	snprintf(nbuf, sizeof(nbuf), "#%u", c);
2055	cp = nbuf;
2056	}
2057	else
2058	cp = ent->name;
2059	len = strlen(cp);
2060	if (out + `2` + len >= outend)
2061	break;
2062	*out++ = `'&'`;
2063	memcpy(out, cp, len);
2064	out += len;
2065	*out++ = `';'`;
2066	}
2067	processed = in;
2068	}
2069	*outlen = out - outstart;
2070	*inlen = processed - instart;
2071	return(`0`);
2072	}
2073
2074	/**
2075	* htmlEncodeEntities:
2076	* @out: a pointer to an array of bytes to store the result
2077	* @outlen: the length of @out
2078	* @in: a pointer to an array of UTF-8 chars
2079	* @inlen: the length of @in
2080	* @quoteChar: the quote character to escape (' or ") or zero.
2081	*
2082	* Take a block of UTF-8 chars in and try to convert it to an ASCII
2083	* plus HTML entities block of chars out.
2084	*
2085	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2086	* The value of @inlen after return is the number of octets consumed
2087	* as the return value is positive, else unpredictable.
2088	* The value of @outlen after return is the number of octets consumed.
2089	*/
2090	int
2091	htmlEncodeEntities(unsigned char* out, int *outlen,
2092	const unsigned char* in, int inlen, int* quoteChar) {
2093	const unsigned char* processed = in;
2094	const unsigned char* outend;
2095	const unsigned char* outstart = out;
2096	const unsigned char* instart = in;
2097	const unsigned char* inend;
2098	unsigned int c, d;
2099	int trailing;
2100
2101	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL) \|\| (in == NULL))
2102	return(-`1`);
2103	outend = out + (*outlen);
2104	inend = in + (*inlen);
2105	while (in < inend) {
2106	d = *in++;
2107	if (d < `0x80`) { c= d; trailing= `0`; }
2108	else if (d < `0xC0`) {
2109	/ trailing byte in leading position /
2110	*outlen = out - outstart;
2111	*inlen = processed - instart;
2112	return(-`2`);
2113	} else if (d < `0xE0`) { c= d & `0x1F`; trailing= `1`; }
2114	else if (d < `0xF0`) { c= d & `0x0F`; trailing= `2`; }
2115	else if (d < `0xF8`) { c= d & `0x07`; trailing= `3`; }
2116	else {
2117	/ no chance for this in Ascii /
2118	*outlen = out - outstart;
2119	*inlen = processed - instart;
2120	return(-`2`);
2121	}
2122
2123	if (inend - in < trailing)
2124	break;
2125
2126	while (trailing--) {
2127	if (((d= *in++) & `0xC0`) != `0x80`) {
2128	*outlen = out - outstart;
2129	*inlen = processed - instart;
2130	return(-`2`);
2131	}
2132	c <<= `6`;
2133	c \|= d & `0x3F`;
2134	}
2135
2136	/ assertion: c is a single UTF-4 value /
2137	if ((c < `0x80`) && (c != (unsigned int) quoteChar) &&
2138	(c != `'&'`) && (c != `'<'`) && (c != `'>'`)) {
2139	if (out >= outend)
2140	break;
2141	*out++ = c;
2142	} else {
2143	const htmlEntityDesc * ent;
2144	const char *cp;
2145	char nbuf[`16`];
2146	int len;
2147
2148	/*
2149	* Try to lookup a predefined HTML entity for it
2150	*/
2151	ent = htmlEntityValueLookup(c);
2152	if (ent == NULL) {
2153	snprintf(nbuf, sizeof(nbuf), "#%u", c);
2154	cp = nbuf;
2155	}
2156	else
2157	cp = ent->name;
2158	len = strlen(cp);
2159	if (out + `2` + len > outend)
2160	break;
2161	*out++ = `'&'`;
2162	memcpy(out, cp, len);
2163	out += len;
2164	*out++ = `';'`;
2165	}
2166	processed = in;
2167	}
2168	*outlen = out - outstart;
2169	*inlen = processed - instart;
2170	return(`0`);
2171	}
2172
2173	/************************************************************************
2174	* *
2175	* Commodity functions to handle streams *
2176	* *
2177	************************************************************************/
2178
2179	/**
2180	* htmlNewInputStream:
2181	* @ctxt: an HTML parser context
2182	*
2183	* Create a new input stream structure
2184	* Returns the new input stream or NULL
2185	*/
2186	static htmlParserInputPtr
2187	htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2188	htmlParserInputPtr input;
2189
2190	input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
2191	if (input == NULL) {
2192	htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2193	return(NULL);
2194	}
2195	memset(input, `0`, sizeof(htmlParserInput));
2196	input->filename = NULL;
2197	input->directory = NULL;
2198	input->base = NULL;
2199	input->cur = NULL;
2200	input->buf = NULL;
2201	input->line = `1`;
2202	input->col = `1`;
2203	input->buf = NULL;
2204	input->free = NULL;
2205	input->version = NULL;
2206	input->consumed = `0`;
2207	input->length = `0`;
2208	return(input);
2209	}
2210
2211
2212	/************************************************************************
2213	* *
2214	* Commodity functions, cleanup needed ? *
2215	* *
2216	************************************************************************/
2217	/*
2218	* all tags allowing pc data from the html 4.01 loose dtd
2219	* NOTE: it might be more apropriate to integrate this information
2220	* into the html40ElementTable array but I don't want to risk any
2221	* binary incomptibility
2222	*/
2223	static const char *allowPCData[] = {
2224	"a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
2225	"blockquote", "body", "button", "caption", "center", "cite", "code",
2226	"dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
2227	"h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
2228	"li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
2229	"small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
2230	};
2231
2232	/**
2233	* areBlanks:
2234	* @ctxt: an HTML parser context
2235	* @str: a xmlChar *
2236	* @len: the size of @str
2237	*
2238	* Is this a sequence of blank chars that one can ignore ?
2239	*
2240	* Returns 1 if ignorable 0 otherwise.
2241	*/
2242
2243	static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar str, int* len) {
2244	unsigned int i;
2245	int j;
2246	xmlNodePtr lastChild;
2247	xmlDtdPtr dtd;
2248
2249	for (j = `0`;j < len;j++)
2250	if (!(IS_BLANK_CH(str[j]))) return(`0`);
2251
2252	if (CUR == `0`) return(`1`);
2253	if (CUR != `'<'`) return(`0`);
2254	if (ctxt->name == NULL)
2255	return(`1`);
2256	if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2257	return(`1`);
2258	if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2259	return(`1`);
2260
2261	/ Only strip CDATA children of the body tag for strict HTML DTDs /
2262	if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2263	dtd = xmlGetIntSubset(ctxt->myDoc);
2264	if (dtd != NULL && dtd->ExternalID != NULL) {
2265	if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") \|\|
2266	!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
2267	return(`1`);
2268	}
2269	}
2270
2271	if (ctxt->node == NULL) return(`0`);
2272	lastChild = xmlGetLastChild(ctxt->node);
2273	while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
2274	lastChild = lastChild->prev;
2275	if (lastChild == NULL) {
2276	if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2277	(ctxt->node->content != NULL)) return(`0`);
2278	/ keep ws in constructs like ...<b> </b>...*
2279	for all tags "b" allowing PCDATA /*
2280	for ( i = `0`; i < sizeof(allowPCData)/sizeof(allowPCData[`0`]); i++ ) {
2281	if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2282	return(`0`);
2283	}
2284	}
2285	} else if (xmlNodeIsText(lastChild)) {
2286	return(`0`);
2287	} else {
2288	/ keep ws in constructs like <p><b>xy</b> <i>z</i><p>*
2289	for all tags "p" allowing PCDATA /*
2290	for ( i = `0`; i < sizeof(allowPCData)/sizeof(allowPCData[`0`]); i++ ) {
2291	if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
2292	return(`0`);
2293	}
2294	}
2295	}
2296	return(`1`);
2297	}
2298
2299	/**
2300	* htmlNewDocNoDtD:
2301	* @URI: URI for the dtd, or NULL
2302	* @ExternalID: the external ID of the DTD, or NULL
2303	*
2304	* Creates a new HTML document without a DTD node if @URI and @ExternalID
2305	* are NULL
2306	*
2307	* Returns a new document, do not initialize the DTD if not provided
2308	*/
2309	htmlDocPtr
2310	htmlNewDocNoDtD(const xmlChar URI, const* xmlChar *ExternalID) {
2311	xmlDocPtr cur;
2312
2313	/*
2314	* Allocate a new document and fill the fields.
2315	*/
2316	cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
2317	if (cur == NULL) {
2318	htmlErrMemory(NULL, "HTML document creation failed\n");
2319	return(NULL);
2320	}
2321	memset(cur, `0`, sizeof(xmlDoc));
2322
2323	cur->type = XML_HTML_DOCUMENT_NODE;
2324	cur->version = NULL;
2325	cur->intSubset = NULL;
2326	cur->doc = cur;
2327	cur->name = NULL;
2328	cur->children = NULL;
2329	cur->extSubset = NULL;
2330	cur->oldNs = NULL;
2331	cur->encoding = NULL;
2332	cur->standalone = `1`;
2333	cur->compression = `0`;
2334	cur->ids = NULL;
2335	cur->refs = NULL;
2336	cur->_private = NULL;
2337	cur->charset = XML_CHAR_ENCODING_UTF8;
2338	cur->properties = XML_DOC_HTML \| XML_DOC_USERBUILT;
2339	if ((ExternalID != NULL) \|\|
2340	(URI != NULL))
2341	xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
2342	return(cur);
2343	}
2344
2345	/**
2346	* htmlNewDoc:
2347	* @URI: URI for the dtd, or NULL
2348	* @ExternalID: the external ID of the DTD, or NULL
2349	*
2350	* Creates a new HTML document
2351	*
2352	* Returns a new document
2353	*/
2354	htmlDocPtr
2355	htmlNewDoc(const xmlChar URI, const* xmlChar *ExternalID) {
2356	if ((URI == NULL) && (ExternalID == NULL))
2357	return(htmlNewDocNoDtD(
2358	BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
2359	BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
2360
2361	return(htmlNewDocNoDtD(URI, ExternalID));
2362	}
2363
2364
2365	/************************************************************************
2366	* *
2367	* The parser itself *
2368	* Relates to http://www.w3.org/TR/html40 *
2369	* *
2370	************************************************************************/
2371
2372	/************************************************************************
2373	* *
2374	* The parser itself *
2375	* *
2376	************************************************************************/
2377
2378	static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2379
2380	/**
2381	* htmlParseHTMLName:
2382	* @ctxt: an HTML parser context
2383	*
2384	* parse an HTML tag or attribute name, note that we convert it to lowercase
2385	* since HTML names are not case-sensitive.
2386	*
2387	* Returns the Tag Name parsed or NULL
2388	*/
2389
2390	static const xmlChar *
2391	htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2392	int i = `0`;
2393	xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2394
2395	if (!IS_ASCII_LETTER(CUR) && (CUR != `'_'`) &&
2396	(CUR != `':'`) && (CUR != `'.'`)) return(NULL);
2397
2398	while ((i < HTML_PARSER_BUFFER_SIZE) &&
2399	((IS_ASCII_LETTER(CUR)) \|\| (IS_ASCII_DIGIT(CUR)) \|\|
2400	(CUR == `':'`) \|\| (CUR == `'-'`) \|\| (CUR == `'_'`) \|\|
2401	(CUR == `'.'`))) {
2402	if ((CUR >= `'A'`) && (CUR <= `'Z'`)) loc[i] = CUR + `0x20`;
2403	else loc[i] = CUR;
2404	i++;
2405
2406	NEXT;
2407	}
2408
2409	return(xmlDictLookup(ctxt->dict, loc, i));
2410	}
2411
2412
2413	/**
2414	* htmlParseHTMLName_nonInvasive:
2415	* @ctxt: an HTML parser context
2416	*
2417	* parse an HTML tag or attribute name, note that we convert it to lowercase
2418	* since HTML names are not case-sensitive, this doesn't consume the data
2419	* from the stream, it's a look-ahead
2420	*
2421	* Returns the Tag Name parsed or NULL
2422	*/
2423
2424	static const xmlChar *
2425	htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2426	int i = `0`;
2427	xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2428
2429	if (!IS_ASCII_LETTER(NXT(`1`)) && (NXT(`1`) != `'_'`) &&
2430	(NXT(`1`) != `':'`)) return(NULL);
2431
2432	while ((i < HTML_PARSER_BUFFER_SIZE) &&
2433	((IS_ASCII_LETTER(NXT(`1`+i))) \|\| (IS_ASCII_DIGIT(NXT(`1`+i))) \|\|
2434	(NXT(`1`+i) == `':'`) \|\| (NXT(`1`+i) == `'-'`) \|\| (NXT(`1`+i) == `'_'`))) {
2435	if ((NXT(`1`+i) >= `'A'`) && (NXT(`1`+i) <= `'Z'`)) loc[i] = NXT(`1`+i) + `0x20`;
2436	else loc[i] = NXT(`1`+i);
2437	i++;
2438	}
2439
2440	return(xmlDictLookup(ctxt->dict, loc, i));
2441	}
2442
2443
2444	/**
2445	* htmlParseName:
2446	* @ctxt: an HTML parser context
2447	*
2448	* parse an HTML name, this routine is case sensitive.
2449	*
2450	* Returns the Name parsed or NULL
2451	*/
2452
2453	static const xmlChar *
2454	htmlParseName(htmlParserCtxtPtr ctxt) {
2455	const xmlChar *in;
2456	const xmlChar *ret;
2457	int count = `0`;
2458
2459	GROW;
2460
2461	/*
2462	* Accelerator for simple ASCII names
2463	*/
2464	in = ctxt->input->cur;
2465	if (((in >= `0x61`) && (in <= `0x7A`)) \|\|
2466	((in >= `0x41`) && (in <= `0x5A`)) \|\|
2467	(in == `'_'`) \|\| (in == `':'`)) {
2468	in++;
2469	while (((in >= `0x61`) && (in <= `0x7A`)) \|\|
2470	((in >= `0x41`) && (in <= `0x5A`)) \|\|
2471	((in >= `0x30`) && (in <= `0x39`)) \|\|
2472	(in == `'_'`) \|\| (in == `'-'`) \|\|
2473	(in == `':'`) \|\| (in == `'.'`))
2474	in++;
2475
2476	if (in == ctxt->input->end)
2477	return(NULL);
2478
2479	if ((in > `0`) && (in < `0x80`)) {
2480	count = in - ctxt->input->cur;
2481	ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2482	ctxt->input->cur = in;
2483	ctxt->nbChars += count;
2484	ctxt->input->col += count;
2485	return(ret);
2486	}
2487	}
2488	return(htmlParseNameComplex(ctxt));
2489	}
2490
2491	static const xmlChar *
2492	htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2493	int len = `0`, l;
2494	int c;
2495	int count = `0`;
2496	const xmlChar *base = ctxt->input->base;
2497
2498	/*
2499	* Handler for more complex cases
2500	*/
2501	GROW;
2502	c = CUR_CHAR(l);
2503	if ((c == `' '`) \|\| (c == `'>'`) \|\| (c == `'/'`) \|\| / accelerators /
2504	(!IS_LETTER(c) && (c != `'_'`) &&
2505	(c != `':'`))) {
2506	return(NULL);
2507	}
2508
2509	while ((c != `' '`) && (c != `'>'`) && (c != `'/'`) && / test bigname.xml /
2510	((IS_LETTER(c)) \|\| (IS_DIGIT(c)) \|\|
2511	(c == `'.'`) \|\| (c == `'-'`) \|\|
2512	(c == `'_'`) \|\| (c == `':'`) \|\|
2513	(IS_COMBINING(c)) \|\|
2514	(IS_EXTENDER(c)))) {
2515	if (count++ > `100`) {
2516	count = `0`;
2517	GROW;
2518	}
2519	len += l;
2520	NEXTL(l);
2521	c = CUR_CHAR(l);
2522	if (ctxt->input->base != base) {
2523	/*
2524	* We changed encoding from an unknown encoding
2525	* Input buffer changed location, so we better start again
2526	*/
2527	return(htmlParseNameComplex(ctxt));
2528	}
2529	}
2530
2531	if (ctxt->input->cur - ctxt->input->base < len) {
2532	/ Sanity check /
2533	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
2534	"unexpected change of input buffer", NULL, NULL);
2535	return (NULL);
2536	}
2537
2538	return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2539	}
2540
2541
2542	/**
2543	* htmlParseHTMLAttribute:
2544	* @ctxt: an HTML parser context
2545	* @stop: a char stop value
2546	*
2547	* parse an HTML attribute value till the stop (quote), if
2548	* stop is 0 then it stops at the first space
2549	*
2550	* Returns the attribute parsed or NULL
2551	*/
2552
2553	static xmlChar *
2554	htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2555	xmlChar *buffer = NULL;
2556	int buffer_size = `0`;
2557	xmlChar *out = NULL;
2558	const xmlChar *name = NULL;
2559	const xmlChar *cur = NULL;
2560	const htmlEntityDesc * ent;
2561
2562	/*
2563	* allocate a translation buffer.
2564	*/
2565	buffer_size = HTML_PARSER_BUFFER_SIZE;
2566	buffer = (xmlChar ) xmlMallocAtomic(buffer_size sizeof(xmlChar));
2567	if (buffer == NULL) {
2568	htmlErrMemory(ctxt, "buffer allocation failed\n");
2569	return(NULL);
2570	}
2571	out = buffer;
2572
2573	/*
2574	* Ok loop until we reach one of the ending chars
2575	*/
2576	while ((CUR != `0`) && (CUR != stop)) {
2577	if ((stop == `0`) && (CUR == `'>'`)) break;
2578	if ((stop == `0`) && (IS_BLANK_CH(CUR))) break;
2579	if (CUR == `'&'`) {
2580	if (NXT(`1`) == `'#'`) {
2581	unsigned int c;
2582	int bits;
2583
2584	c = htmlParseCharRef(ctxt);
2585	if (c < `0x80`)
2586	{ *out++ = c; bits= -`6`; }
2587	else if (c < `0x800`)
2588	{ *out++ =((c >> `6`) & `0x1F`) \| `0xC0`; bits= `0`; }
2589	else if (c < `0x10000`)
2590	{ *out++ =((c >> `12`) & `0x0F`) \| `0xE0`; bits= `6`; }
2591	else
2592	{ *out++ =((c >> `18`) & `0x07`) \| `0xF0`; bits= `12`; }
2593
2594	for ( ; bits >= `0`; bits-= `6`) {
2595	*out++ = ((c >> bits) & `0x3F`) \| `0x80`;
2596	}
2597
2598	if (out - buffer > buffer_size - `100`) {
2599	int indx = out - buffer;
2600
2601	growBuffer(buffer);
2602	out = &buffer[indx];
2603	}
2604	} else {
2605	ent = htmlParseEntityRef(ctxt, &name);
2606	if (name == NULL) {
2607	*out++ = `'&'`;
2608	if (out - buffer > buffer_size - `100`) {
2609	int indx = out - buffer;
2610
2611	growBuffer(buffer);
2612	out = &buffer[indx];
2613	}
2614	} else if (ent == NULL) {
2615	*out++ = `'&'`;
2616	cur = name;
2617	while (*cur != `0`) {
2618	if (out - buffer > buffer_size - `100`) {
2619	int indx = out - buffer;
2620
2621	growBuffer(buffer);
2622	out = &buffer[indx];
2623	}
2624	out++ = cur++;
2625	}
2626	} else {
2627	unsigned int c;
2628	int bits;
2629
2630	if (out - buffer > buffer_size - `100`) {
2631	int indx = out - buffer;
2632
2633	growBuffer(buffer);
2634	out = &buffer[indx];
2635	}
2636	c = ent->value;
2637	if (c < `0x80`)
2638	{ *out++ = c; bits= -`6`; }
2639	else if (c < `0x800`)
2640	{ *out++ =((c >> `6`) & `0x1F`) \| `0xC0`; bits= `0`; }
2641	else if (c < `0x10000`)
2642	{ *out++ =((c >> `12`) & `0x0F`) \| `0xE0`; bits= `6`; }
2643	else
2644	{ *out++ =((c >> `18`) & `0x07`) \| `0xF0`; bits= `12`; }
2645
2646	for ( ; bits >= `0`; bits-= `6`) {
2647	*out++ = ((c >> bits) & `0x3F`) \| `0x80`;
2648	}
2649	}
2650	}
2651	} else {
2652	unsigned int c;
2653	int bits, l;
2654
2655	if (out - buffer > buffer_size - `100`) {
2656	int indx = out - buffer;
2657
2658	growBuffer(buffer);
2659	out = &buffer[indx];
2660	}
2661	c = CUR_CHAR(l);
2662	if (c < `0x80`)
2663	{ *out++ = c; bits= -`6`; }
2664	else if (c < `0x800`)
2665	{ *out++ =((c >> `6`) & `0x1F`) \| `0xC0`; bits= `0`; }
2666	else if (c < `0x10000`)
2667	{ *out++ =((c >> `12`) & `0x0F`) \| `0xE0`; bits= `6`; }
2668	else
2669	{ *out++ =((c >> `18`) & `0x07`) \| `0xF0`; bits= `12`; }
2670
2671	for ( ; bits >= `0`; bits-= `6`) {
2672	*out++ = ((c >> bits) & `0x3F`) \| `0x80`;
2673	}
2674	NEXT;
2675	}
2676	}
2677	*out = `0`;
2678	return(buffer);
2679	}
2680
2681	/**
2682	* htmlParseEntityRef:
2683	* @ctxt: an HTML parser context
2684	* @str: location to store the entity name
2685	*
2686	* parse an HTML ENTITY references
2687	*
2688	* [68] EntityRef ::= '&' Name ';'
2689	*
2690	* Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
2691	* if non-NULL *str will have to be freed by the caller.
2692	*/
2693	const htmlEntityDesc *
2694	htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2695	const xmlChar *name;
2696	const htmlEntityDesc * ent = NULL;
2697
2698	if (str != NULL) *str = NULL;
2699	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) return(NULL);
2700
2701	if (CUR == `'&'`) {
2702	NEXT;
2703	name = htmlParseName(ctxt);
2704	if (name == NULL) {
2705	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2706	"htmlParseEntityRef: no name\n", NULL, NULL);
2707	} else {
2708	GROW;
2709	if (CUR == `';'`) {
2710	if (str != NULL)
2711	*str = name;
2712
2713	/*
2714	* Lookup the entity in the table.
2715	*/
2716	ent = htmlEntityLookup(name);
2717	if (ent != NULL) / OK that's ugly !!! /
2718	NEXT;
2719	} else {
2720	htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2721	"htmlParseEntityRef: expecting ';'\n",
2722	NULL, NULL);
2723	if (str != NULL)
2724	*str = name;
2725	}
2726	}
2727	}
2728	return(ent);
2729	}
2730
2731	/**
2732	* htmlParseAttValue:
2733	* @ctxt: an HTML parser context
2734	*
2735	* parse a value for an attribute
2736	* Note: the parser won't do substitution of entities here, this
2737	* will be handled later in xmlStringGetNodeList, unless it was
2738	* asked for ctxt->replaceEntities != 0
2739	*
2740	* Returns the AttValue parsed or NULL.
2741	*/
2742
2743	static xmlChar *
2744	htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2745	xmlChar *ret = NULL;
2746
2747	if (CUR == `'"'`) {
2748	NEXT;
2749	ret = htmlParseHTMLAttribute(ctxt, `'"'`);
2750	if (CUR != `'"'`) {
2751	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2752	"AttValue: \" expected\n", NULL, NULL);
2753	} else
2754	NEXT;
2755	} else if (CUR == `'\''`) {
2756	NEXT;
2757	ret = htmlParseHTMLAttribute(ctxt, `'\''`);
2758	if (CUR != `'\''`) {
2759	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2760	"AttValue: ' expected\n", NULL, NULL);
2761	} else
2762	NEXT;
2763	} else {
2764	/*
2765	* That's an HTMLism, the attribute value may not be quoted
2766	*/
2767	ret = htmlParseHTMLAttribute(ctxt, `0`);
2768	if (ret == NULL) {
2769	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2770	"AttValue: no value found\n", NULL, NULL);
2771	}
2772	}
2773	return(ret);
2774	}
2775
2776	/**
2777	* htmlParseSystemLiteral:
2778	* @ctxt: an HTML parser context
2779	*
2780	* parse an HTML Literal
2781	*
2782	* [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
2783	*
2784	* Returns the SystemLiteral parsed or NULL
2785	*/
2786
2787	static xmlChar *
2788	htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2789	size_t len = `0`, startPosition = `0`;
2790	xmlChar *ret = NULL;
2791
2792	if (CUR == `'"'`) {
2793	NEXT;
2794
2795	if (CUR_PTR < BASE_PTR)
2796	return(ret);
2797	startPosition = CUR_PTR - BASE_PTR;
2798
2799	while ((IS_CHAR_CH(CUR)) && (CUR != `'"'`)) {
2800	NEXT;
2801	len++;
2802	}
2803	if (!IS_CHAR_CH(CUR)) {
2804	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2805	"Unfinished SystemLiteral\n", NULL, NULL);
2806	} else {
2807	ret = xmlStrndup((BASE_PTR+startPosition), len);
2808	NEXT;
2809	}
2810	} else if (CUR == `'\''`) {
2811	NEXT;
2812
2813	if (CUR_PTR < BASE_PTR)
2814	return(ret);
2815	startPosition = CUR_PTR - BASE_PTR;
2816
2817	while ((IS_CHAR_CH(CUR)) && (CUR != `'\''`)) {
2818	NEXT;
2819	len++;
2820	}
2821	if (!IS_CHAR_CH(CUR)) {
2822	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2823	"Unfinished SystemLiteral\n", NULL, NULL);
2824	} else {
2825	ret = xmlStrndup((BASE_PTR+startPosition), len);
2826	NEXT;
2827	}
2828	} else {
2829	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2830	" or ' expected\n", NULL, NULL);
2831	}
2832
2833	return(ret);
2834	}
2835
2836	/**
2837	* htmlParsePubidLiteral:
2838	* @ctxt: an HTML parser context
2839	*
2840	* parse an HTML public literal
2841	*
2842	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
2843	*
2844	* Returns the PubidLiteral parsed or NULL.
2845	*/
2846
2847	static xmlChar *
2848	htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2849	size_t len = `0`, startPosition = `0`;
2850	xmlChar *ret = NULL;
2851	/*
2852	* Name ::= (Letter \| '_') (NameChar)*
2853	*/
2854	if (CUR == `'"'`) {
2855	NEXT;
2856
2857	if (CUR_PTR < BASE_PTR)
2858	return(ret);
2859	startPosition = CUR_PTR - BASE_PTR;
2860
2861	while (IS_PUBIDCHAR_CH(CUR)) {
2862	len++;
2863	NEXT;
2864	}
2865
2866	if (CUR != `'"'`) {
2867	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2868	"Unfinished PubidLiteral\n", NULL, NULL);
2869	} else {
2870	ret = xmlStrndup((BASE_PTR + startPosition), len);
2871	NEXT;
2872	}
2873	} else if (CUR == `'\''`) {
2874	NEXT;
2875
2876	if (CUR_PTR < BASE_PTR)
2877	return(ret);
2878	startPosition = CUR_PTR - BASE_PTR;
2879
2880	while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != `'\''`)){
2881	len++;
2882	NEXT;
2883	}
2884
2885	if (CUR != `'\''`) {
2886	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2887	"Unfinished PubidLiteral\n", NULL, NULL);
2888	} else {
2889	ret = xmlStrndup((BASE_PTR + startPosition), len);
2890	NEXT;
2891	}
2892	} else {
2893	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2894	"PubidLiteral \" or ' expected\n", NULL, NULL);
2895	}
2896
2897	return(ret);
2898	}
2899
2900	/**
2901	* htmlParseScript:
2902	* @ctxt: an HTML parser context
2903	*
2904	* parse the content of an HTML SCRIPT or STYLE element
2905	* http://www.w3.org/TR/html4/sgml/dtd.html#Script
2906	* http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
2907	* http://www.w3.org/TR/html4/types.html#type-script
2908	* http://www.w3.org/TR/html4/types.html#h-6.15
2909	* http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
2910	*
2911	* Script data ( %Script; in the DTD) can be the content of the SCRIPT
2912	* element and the value of intrinsic event attributes. User agents must
2913	* not evaluate script data as HTML markup but instead must pass it on as
2914	* data to a script engine.
2915	* NOTES:
2916	* - The content is passed like CDATA
2917	* - the attributes for style and scripting "onXXX" are also described
2918	* as CDATA but SGML allows entities references in attributes so their
2919	* processing is identical as other attributes
2920	*/
2921	static void
2922	htmlParseScript(htmlParserCtxtPtr ctxt) {
2923	xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + `5`];
2924	int nbchar = `0`;
2925	int cur,l;
2926
2927	SHRINK;
2928	cur = CUR_CHAR(l);
2929	while (IS_CHAR_CH(cur)) {
2930	if ((cur == `'<'`) && (NXT(`1`) == `'/'`)) {
2931	/*
2932	* One should break here, the specification is clear:
2933	* Authors should therefore escape "</" within the content.
2934	* Escape mechanisms are specific to each scripting or
2935	* style sheet language.
2936	*
2937	* In recovery mode, only break if end tag match the
2938	* current tag, effectively ignoring all tags inside the
2939	* script/style block and treating the entire block as
2940	* CDATA.
2941	*/
2942	if (ctxt->recovery) {
2943	if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+`2`,
2944	xmlStrlen(ctxt->name)) == `0`)
2945	{
2946	break; / while /
2947	} else {
2948	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2949	"Element %s embeds close tag\n",
2950	ctxt->name, NULL);
2951	}
2952	} else {
2953	if (((NXT(`2`) >= `'A'`) && (NXT(`2`) <= `'Z'`)) \|\|
2954	((NXT(`2`) >= `'a'`) && (NXT(`2`) <= `'z'`)))
2955	{
2956	break; / while /
2957	}
2958	}
2959	}
2960	COPY_BUF(l,buf,nbchar,cur);
2961	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
2962	if (ctxt->sax->cdataBlock!= NULL) {
2963	/*
2964	* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
2965	*/
2966	ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2967	} else if (ctxt->sax->characters != NULL) {
2968	ctxt->sax->characters(ctxt->userData, buf, nbchar);
2969	}
2970	nbchar = `0`;
2971	}
2972	GROW;
2973	NEXTL(l);
2974	cur = CUR_CHAR(l);
2975	}
2976
2977	if ((!(IS_CHAR_CH(cur))) && (!((cur == `0`) && (ctxt->progressive)))) {
2978	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2979	"Invalid char in CDATA 0x%X\n", cur);
2980	if (ctxt->input->cur < ctxt->input->end) {
2981	NEXT;
2982	}
2983	}
2984
2985	if ((nbchar != `0`) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2986	if (ctxt->sax->cdataBlock!= NULL) {
2987	/*
2988	* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
2989	*/
2990	ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2991	} else if (ctxt->sax->characters != NULL) {
2992	ctxt->sax->characters(ctxt->userData, buf, nbchar);
2993	}
2994	}
2995	}
2996
2997
2998	/**
2999	* htmlParseCharDataInternal:
3000	* @ctxt: an HTML parser context
3001	* @readahead: optional read ahead character in ascii range
3002	*
3003	* parse a CharData section.
3004	* if we are within a CDATA section ']]>' marks an end of section.
3005	*
3006	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3007	*/
3008
3009	static void
3010	htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
3011	xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + `6`];
3012	int nbchar = `0`;
3013	int cur, l;
3014	int chunk = `0`;
3015
3016	if (readahead)
3017	buf[nbchar++] = readahead;
3018
3019	SHRINK;
3020	cur = CUR_CHAR(l);
3021	while (((cur != `'<'`) \|\| (ctxt->token == `'<'`)) &&
3022	((cur != `'&'`) \|\| (ctxt->token == `'&'`)) &&
3023	(cur != `0`)) {
3024	if (!(IS_CHAR(cur))) {
3025	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3026	"Invalid char in CDATA 0x%X\n", cur);
3027	} else {
3028	COPY_BUF(l,buf,nbchar,cur);
3029	}
3030	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3031	/*
3032	* Ok the segment is to be consumed as chars.
3033	*/
3034	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3035	if (areBlanks(ctxt, buf, nbchar)) {
3036	if (ctxt->keepBlanks) {
3037	if (ctxt->sax->characters != NULL)
3038	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3039	} else {
3040	if (ctxt->sax->ignorableWhitespace != NULL)
3041	ctxt->sax->ignorableWhitespace(ctxt->userData,
3042	buf, nbchar);
3043	}
3044	} else {
3045	htmlCheckParagraph(ctxt);
3046	if (ctxt->sax->characters != NULL)
3047	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3048	}
3049	}
3050	nbchar = `0`;
3051	}
3052	NEXTL(l);
3053	chunk++;
3054	if (chunk > HTML_PARSER_BUFFER_SIZE) {
3055	chunk = `0`;
3056	SHRINK;
3057	GROW;
3058	}
3059	cur = CUR_CHAR(l);
3060	if (cur == `0`) {
3061	SHRINK;
3062	GROW;
3063	cur = CUR_CHAR(l);
3064	}
3065	}
3066	if (nbchar != `0`) {
3067	buf[nbchar] = `0`;
3068
3069	/*
3070	* Ok the segment is to be consumed as chars.
3071	*/
3072	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3073	if (areBlanks(ctxt, buf, nbchar)) {
3074	if (ctxt->keepBlanks) {
3075	if (ctxt->sax->characters != NULL)
3076	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3077	} else {
3078	if (ctxt->sax->ignorableWhitespace != NULL)
3079	ctxt->sax->ignorableWhitespace(ctxt->userData,
3080	buf, nbchar);
3081	}
3082	} else {
3083	htmlCheckParagraph(ctxt);
3084	if (ctxt->sax->characters != NULL)
3085	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3086	}
3087	}
3088	} else {
3089	/*
3090	* Loop detection
3091	*/
3092	if (cur == `0`)
3093	ctxt->instate = XML_PARSER_EOF;
3094	}
3095	}
3096
3097	/**
3098	* htmlParseCharData:
3099	* @ctxt: an HTML parser context
3100	*
3101	* parse a CharData section.
3102	* if we are within a CDATA section ']]>' marks an end of section.
3103	*
3104	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3105	*/
3106
3107	static void
3108	htmlParseCharData(htmlParserCtxtPtr ctxt) {
3109	htmlParseCharDataInternal(ctxt, `0`);
3110	}
3111
3112	/**
3113	* htmlParseExternalID:
3114	* @ctxt: an HTML parser context
3115	* @publicID: a xmlChar** receiving PubidLiteral
3116	*
3117	* Parse an External ID or a Public ID
3118	*
3119	* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3120	* \| 'PUBLIC' S PubidLiteral S SystemLiteral
3121	*
3122	* [83] PublicID ::= 'PUBLIC' S PubidLiteral
3123	*
3124	* Returns the function returns SystemLiteral and in the second
3125	* case publicID receives PubidLiteral, is strict is off
3126	* it is possible to return NULL and have publicID set.
3127	*/
3128
3129	static xmlChar *
3130	htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
3131	xmlChar *URI = NULL;
3132
3133	if ((UPPER == `'S'`) && (UPP(`1`) == `'Y'`) &&
3134	(UPP(`2`) == `'S'`) && (UPP(`3`) == `'T'`) &&
3135	(UPP(`4`) == `'E'`) && (UPP(`5`) == `'M'`)) {
3136	SKIP(`6`);
3137	if (!IS_BLANK_CH(CUR)) {
3138	htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3139	"Space required after 'SYSTEM'\n", NULL, NULL);
3140	}
3141	SKIP_BLANKS;
3142	URI = htmlParseSystemLiteral(ctxt);
3143	if (URI == NULL) {
3144	htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
3145	"htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL);
3146	}
3147	} else if ((UPPER == `'P'`) && (UPP(`1`) == `'U'`) &&
3148	(UPP(`2`) == `'B'`) && (UPP(`3`) == `'L'`) &&
3149	(UPP(`4`) == `'I'`) && (UPP(`5`) == `'C'`)) {
3150	SKIP(`6`);
3151	if (!IS_BLANK_CH(CUR)) {
3152	htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3153	"Space required after 'PUBLIC'\n", NULL, NULL);
3154	}
3155	SKIP_BLANKS;
3156	*publicID = htmlParsePubidLiteral(ctxt);
3157	if (*publicID == NULL) {
3158	htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
3159	"htmlParseExternalID: PUBLIC, no Public Identifier\n",
3160	NULL, NULL);
3161	}
3162	SKIP_BLANKS;
3163	if ((CUR == `'"'`) \|\| (CUR == `'\''`)) {
3164	URI = htmlParseSystemLiteral(ctxt);
3165	}
3166	}
3167	return(URI);
3168	}
3169
3170	/**
3171	* xmlParsePI:
3172	* @ctxt: an XML parser context
3173	*
3174	* parse an XML Processing Instruction.
3175	*
3176	* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3177	*/
3178	static void
3179	htmlParsePI(htmlParserCtxtPtr ctxt) {
3180	xmlChar *buf = NULL;
3181	int len = `0`;
3182	int size = HTML_PARSER_BUFFER_SIZE;
3183	int cur, l;
3184	const xmlChar *target;
3185	xmlParserInputState state;
3186	int count = `0`;
3187
3188	if ((RAW == `'<'`) && (NXT(`1`) == `'?'`)) {
3189	state = ctxt->instate;
3190	ctxt->instate = XML_PARSER_PI;
3191	/*
3192	* this is a Processing Instruction.
3193	*/
3194	SKIP(`2`);
3195	SHRINK;
3196
3197	/*
3198	* Parse the target name and check for special support like
3199	* namespace.
3200	*/
3201	target = htmlParseName(ctxt);
3202	if (target != NULL) {
3203	if (RAW == `'>'`) {
3204	SKIP(`1`);
3205
3206	/*
3207	* SAX: PI detected.
3208	*/
3209	if ((ctxt->sax) && (!ctxt->disableSAX) &&
3210	(ctxt->sax->processingInstruction != NULL))
3211	ctxt->sax->processingInstruction(ctxt->userData,
3212	target, NULL);
3213	ctxt->instate = state;
3214	return;
3215	}
3216	buf = (xmlChar ) xmlMallocAtomic(size sizeof(xmlChar));
3217	if (buf == NULL) {
3218	htmlErrMemory(ctxt, NULL);
3219	ctxt->instate = state;
3220	return;
3221	}
3222	cur = CUR;
3223	if (!IS_BLANK(cur)) {
3224	htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3225	"ParsePI: PI %s space expected\n", target, NULL);
3226	}
3227	SKIP_BLANKS;
3228	cur = CUR_CHAR(l);
3229	while (IS_CHAR(cur) && (cur != `'>'`)) {
3230	if (len + `5` >= size) {
3231	xmlChar *tmp;
3232
3233	size *= `2`;
3234	tmp = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
3235	if (tmp == NULL) {
3236	htmlErrMemory(ctxt, NULL);
3237	xmlFree(buf);
3238	ctxt->instate = state;
3239	return;
3240	}
3241	buf = tmp;
3242	}
3243	count++;
3244	if (count > `50`) {
3245	GROW;
3246	count = `0`;
3247	}
3248	COPY_BUF(l,buf,len,cur);
3249	NEXTL(l);
3250	cur = CUR_CHAR(l);
3251	if (cur == `0`) {
3252	SHRINK;
3253	GROW;
3254	cur = CUR_CHAR(l);
3255	}
3256	}
3257	buf[len] = `0`;
3258	if (cur != `'>'`) {
3259	htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
3260	"ParsePI: PI %s never end ...\n", target, NULL);
3261	} else {
3262	SKIP(`1`);
3263
3264	/*
3265	* SAX: PI detected.
3266	*/
3267	if ((ctxt->sax) && (!ctxt->disableSAX) &&
3268	(ctxt->sax->processingInstruction != NULL))
3269	ctxt->sax->processingInstruction(ctxt->userData,
3270	target, buf);
3271	}
3272	xmlFree(buf);
3273	} else {
3274	htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
3275	"PI is not started correctly", NULL, NULL);
3276	}
3277	ctxt->instate = state;
3278	}
3279	}
3280
3281	/**
3282	* htmlParseComment:
3283	* @ctxt: an HTML parser context
3284	*
3285	* Parse an XML (SGML) comment <!-- .... -->
3286	*
3287	* [15] Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
3288	*/
3289	static void
3290	htmlParseComment(htmlParserCtxtPtr ctxt) {
3291	xmlChar *buf = NULL;
3292	int len;
3293	int size = HTML_PARSER_BUFFER_SIZE;
3294	int q, ql;
3295	int r, rl;
3296	int cur, l;
3297	xmlParserInputState state;
3298
3299	/*
3300	* Check that there is a comment right here.
3301	*/
3302	if ((RAW != `'<'`) \|\| (NXT(`1`) != `'!'`) \|\|
3303	(NXT(`2`) != `'-'`) \|\| (NXT(`3`) != `'-'`)) return;
3304
3305	state = ctxt->instate;
3306	ctxt->instate = XML_PARSER_COMMENT;
3307	SHRINK;
3308	SKIP(`4`);
3309	buf = (xmlChar ) xmlMallocAtomic(size sizeof(xmlChar));
3310	if (buf == NULL) {
3311	htmlErrMemory(ctxt, "buffer allocation failed\n");
3312	ctxt->instate = state;
3313	return;
3314	}
3315	len = `0`;
3316	buf[len] = `0`;
3317	q = CUR_CHAR(ql);
3318	if (!IS_CHAR(q))
3319	goto unfinished;
3320	NEXTL(ql);
3321	r = CUR_CHAR(rl);
3322	if (!IS_CHAR(r))
3323	goto unfinished;
3324	NEXTL(rl);
3325	cur = CUR_CHAR(l);
3326	while (IS_CHAR(cur) &&
3327	((cur != `'>'`) \|\|
3328	(r != `'-'`) \|\| (q != `'-'`))) {
3329	if (len + `5` >= size) {
3330	xmlChar *tmp;
3331
3332	size *= `2`;
3333	tmp = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
3334	if (tmp == NULL) {
3335	xmlFree(buf);
3336	htmlErrMemory(ctxt, "growing buffer failed\n");
3337	ctxt->instate = state;
3338	return;
3339	}
3340	buf = tmp;
3341	}
3342	COPY_BUF(ql,buf,len,q);
3343	q = r;
3344	ql = rl;
3345	r = cur;
3346	rl = l;
3347	NEXTL(l);
3348	cur = CUR_CHAR(l);
3349	if (cur == `0`) {
3350	SHRINK;
3351	GROW;
3352	cur = CUR_CHAR(l);
3353	}
3354	}
3355	buf[len] = `0`;
3356	if (IS_CHAR(cur)) {
3357	NEXT;
3358	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3359	(!ctxt->disableSAX))
3360	ctxt->sax->comment(ctxt->userData, buf);
3361	xmlFree(buf);
3362	ctxt->instate = state;
3363	return;
3364	}
3365
3366	unfinished:
3367	htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3368	"Comment not terminated \n<!--%.50s\n", buf, NULL);
3369	xmlFree(buf);
3370	}
3371
3372	/**
3373	* htmlParseCharRef:
3374	* @ctxt: an HTML parser context
3375	*
3376	* parse Reference declarations
3377	*
3378	* [66] CharRef ::= '&#' [0-9]+ ';' \|
3379	* '&#x' [0-9a-fA-F]+ ';'
3380	*
3381	* Returns the value parsed (as an int)
3382	*/
3383	int
3384	htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3385	int val = `0`;
3386
3387	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
3388	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3389	"htmlParseCharRef: context error\n",
3390	NULL, NULL);
3391	return(`0`);
3392	}
3393	if ((CUR == `'&'`) && (NXT(`1`) == `'#'`) &&
3394	((NXT(`2`) == `'x'`) \|\| NXT(`2`) == `'X'`)) {
3395	SKIP(`3`);
3396	while (CUR != `';'`) {
3397	if ((CUR >= `'0'`) && (CUR <= `'9'`))
3398	val = val * `16` + (CUR - `'0'`);
3399	else if ((CUR >= `'a'`) && (CUR <= `'f'`))
3400	val = val * `16` + (CUR - `'a'`) + `10`;
3401	else if ((CUR >= `'A'`) && (CUR <= `'F'`))
3402	val = val * `16` + (CUR - `'A'`) + `10`;
3403	else {
3404	htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3405	"htmlParseCharRef: missing semicolon\n",
3406	NULL, NULL);
3407	break;
3408	}
3409	NEXT;
3410	}
3411	if (CUR == `';'`)
3412	NEXT;
3413	} else if ((CUR == `'&'`) && (NXT(`1`) == `'#'`)) {
3414	SKIP(`2`);
3415	while (CUR != `';'`) {
3416	if ((CUR >= `'0'`) && (CUR <= `'9'`))
3417	val = val * `10` + (CUR - `'0'`);
3418	else {
3419	htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3420	"htmlParseCharRef: missing semicolon\n",
3421	NULL, NULL);
3422	break;
3423	}
3424	NEXT;
3425	}
3426	if (CUR == `';'`)
3427	NEXT;
3428	} else {
3429	htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3430	"htmlParseCharRef: invalid value\n", NULL, NULL);
3431	}
3432	/*
3433	* Check the value IS_CHAR ...
3434	*/
3435	if (IS_CHAR(val)) {
3436	return(val);
3437	} else {
3438	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3439	"htmlParseCharRef: invalid xmlChar value %d\n",
3440	val);
3441	}
3442	return(`0`);
3443	}
3444
3445
3446	/**
3447	* htmlParseDocTypeDecl:
3448	* @ctxt: an HTML parser context
3449	*
3450	* parse a DOCTYPE declaration
3451	*
3452	* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3453	* ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
3454	*/
3455
3456	static void
3457	htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3458	const xmlChar *name;
3459	xmlChar *ExternalID = NULL;
3460	xmlChar *URI = NULL;
3461
3462	/*
3463	* We know that '<!DOCTYPE' has been detected.
3464	*/
3465	SKIP(`9`);
3466
3467	SKIP_BLANKS;
3468
3469	/*
3470	* Parse the DOCTYPE name.
3471	*/
3472	name = htmlParseName(ctxt);
3473	if (name == NULL) {
3474	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3475	"htmlParseDocTypeDecl : no DOCTYPE name !\n",
3476	NULL, NULL);
3477	}
3478	/*
3479	* Check that upper(name) == "HTML" !!!!!!!!!!!!!
3480	*/
3481
3482	SKIP_BLANKS;
3483
3484	/*
3485	* Check for SystemID and ExternalID
3486	*/
3487	URI = htmlParseExternalID(ctxt, &ExternalID);
3488	SKIP_BLANKS;
3489
3490	/*
3491	* We should be at the end of the DOCTYPE declaration.
3492	*/
3493	if (CUR != `'>'`) {
3494	htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3495	"DOCTYPE improperly terminated\n", NULL, NULL);
3496	/ We shouldn't try to resynchronize ... /
3497	}
3498	NEXT;
3499
3500	/*
3501	* Create or update the document accordingly to the DOCTYPE
3502	*/
3503	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3504	(!ctxt->disableSAX))
3505	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3506
3507	/*
3508	* Cleanup, since we don't use all those identifiers
3509	*/
3510	if (URI != NULL) xmlFree(URI);
3511	if (ExternalID != NULL) xmlFree(ExternalID);
3512	}
3513
3514	/**
3515	* htmlParseAttribute:
3516	* @ctxt: an HTML parser context
3517	* @value: a xmlChar ** used to store the value of the attribute
3518	*
3519	* parse an attribute
3520	*
3521	* [41] Attribute ::= Name Eq AttValue
3522	*
3523	* [25] Eq ::= S? '=' S?
3524	*
3525	* With namespace:
3526	*
3527	* [NS 11] Attribute ::= QName Eq AttValue
3528	*
3529	* Also the case QName == xmlns:??? is handled independently as a namespace
3530	* definition.
3531	*
3532	* Returns the attribute name, and the value in *value.
3533	*/
3534
3535	static const xmlChar *
3536	htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3537	const xmlChar *name;
3538	xmlChar *val = NULL;
3539
3540	*value = NULL;
3541	name = htmlParseHTMLName(ctxt);
3542	if (name == NULL) {
3543	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3544	"error parsing attribute name\n", NULL, NULL);
3545	return(NULL);
3546	}
3547
3548	/*
3549	* read the value
3550	*/
3551	SKIP_BLANKS;
3552	if (CUR == `'='`) {
3553	NEXT;
3554	SKIP_BLANKS;
3555	val = htmlParseAttValue(ctxt);
3556	}
3557
3558	*value = val;
3559	return(name);
3560	}
3561
3562	/**
3563	* htmlCheckEncodingDirect:
3564	* @ctxt: an HTML parser context
3565	* @attvalue: the attribute value
3566	*
3567	* Checks an attribute value to detect
3568	* the encoding
3569	* If a new encoding is detected the parser is switched to decode
3570	* it and pass UTF8
3571	*/
3572	static void
3573	htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
3574
3575	if ((ctxt == NULL) \|\| (encoding == NULL) \|\|
3576	(ctxt->options & HTML_PARSE_IGNORE_ENC))
3577	return;
3578
3579	/ do not change encoding /
3580	if (ctxt->input->encoding != NULL)
3581	return;
3582
3583	if (encoding != NULL) {
3584	xmlCharEncoding enc;
3585	xmlCharEncodingHandlerPtr handler;
3586
3587	while ((encoding == `' '`) \|\| (encoding == `'\t'`)) encoding++;
3588
3589	if (ctxt->input->encoding != NULL)
3590	xmlFree((xmlChar *) ctxt->input->encoding);
3591	ctxt->input->encoding = xmlStrdup(encoding);
3592
3593	enc = xmlParseCharEncoding((const char *) encoding);
3594	/*
3595	* registered set of known encodings
3596	*/
3597	if (enc != XML_CHAR_ENCODING_ERROR) {
3598	if (((enc == XML_CHAR_ENCODING_UTF16LE) \|\|
3599	(enc == XML_CHAR_ENCODING_UTF16BE) \|\|
3600	(enc == XML_CHAR_ENCODING_UCS4LE) \|\|
3601	(enc == XML_CHAR_ENCODING_UCS4BE)) &&
3602	(ctxt->input->buf != NULL) &&
3603	(ctxt->input->buf->encoder == NULL)) {
3604	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3605	"htmlCheckEncoding: wrong encoding meta\n",
3606	NULL, NULL);
3607	} else {
3608	xmlSwitchEncoding(ctxt, enc);
3609	}
3610	ctxt->charset = XML_CHAR_ENCODING_UTF8;
3611	} else {
3612	/*
3613	* fallback for unknown encodings
3614	*/
3615	handler = xmlFindCharEncodingHandler((const char *) encoding);
3616	if (handler != NULL) {
3617	xmlSwitchToEncoding(ctxt, handler);
3618	ctxt->charset = XML_CHAR_ENCODING_UTF8;
3619	} else {
3620	htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
3621	"htmlCheckEncoding: unknown encoding %s\n",
3622	encoding, NULL);
3623	}
3624	}
3625
3626	if ((ctxt->input->buf != NULL) &&
3627	(ctxt->input->buf->encoder != NULL) &&
3628	(ctxt->input->buf->raw != NULL) &&
3629	(ctxt->input->buf->buffer != NULL)) {
3630	int nbchars;
3631	int processed;
3632
3633	/*
3634	* convert as much as possible to the parser reading buffer.
3635	*/
3636	processed = ctxt->input->cur - ctxt->input->base;
3637	xmlBufShrink(ctxt->input->buf->buffer, processed);
3638	nbchars = xmlCharEncInput(ctxt->input->buf, `0`);
3639	if (nbchars < `0`) {
3640	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3641	"htmlCheckEncoding: encoder error\n",
3642	NULL, NULL);
3643	}
3644	xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
3645	}
3646	}
3647	}
3648
3649	/**
3650	* htmlCheckEncoding:
3651	* @ctxt: an HTML parser context
3652	* @attvalue: the attribute value
3653	*
3654	* Checks an http-equiv attribute from a Meta tag to detect
3655	* the encoding
3656	* If a new encoding is detected the parser is switched to decode
3657	* it and pass UTF8
3658	*/
3659	static void
3660	htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3661	const xmlChar *encoding;
3662
3663	if (!attvalue)
3664	return;
3665
3666	encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
3667	if (encoding != NULL) {
3668	encoding += `7`;
3669	}
3670	/*
3671	* skip blank
3672	*/
3673	if (encoding && IS_BLANK_CH(*encoding))
3674	encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
3675	if (encoding && *encoding == `'='`) {
3676	encoding ++;
3677	htmlCheckEncodingDirect(ctxt, encoding);
3678	}
3679	}
3680
3681	/**
3682	* htmlCheckMeta:
3683	* @ctxt: an HTML parser context
3684	* @atts: the attributes values
3685	*
3686	* Checks an attributes from a Meta tag
3687	*/
3688	static void
3689	htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3690	int i;
3691	const xmlChar att, value;
3692	int http = `0`;
3693	const xmlChar *content = NULL;
3694
3695	if ((ctxt == NULL) \|\| (atts == NULL))
3696	return;
3697
3698	i = `0`;
3699	att = atts[i++];
3700	while (att != NULL) {
3701	value = atts[i++];
3702	if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
3703	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
3704	http = `1`;
3705	else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))
3706	htmlCheckEncodingDirect(ctxt, value);
3707	else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
3708	content = value;
3709	att = atts[i++];
3710	}
3711	if ((http) && (content != NULL))
3712	htmlCheckEncoding(ctxt, content);
3713
3714	}
3715
3716	/**
3717	* htmlParseStartTag:
3718	* @ctxt: an HTML parser context
3719	*
3720	* parse a start of tag either for rule element or
3721	* EmptyElement. In both case we don't parse the tag closing chars.
3722	*
3723	* [40] STag ::= '<' Name (S Attribute)* S? '>'
3724	*
3725	* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3726	*
3727	* With namespace:
3728	*
3729	* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3730	*
3731	* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
3732	*
3733	* Returns 0 in case of success, -1 in case of error and 1 if discarded
3734	*/
3735
3736	static int
3737	htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3738	const xmlChar *name;
3739	const xmlChar *attname;
3740	xmlChar *attvalue;
3741	const xmlChar **atts;
3742	int nbatts = `0`;
3743	int maxatts;
3744	int meta = `0`;
3745	int i;
3746	int discardtag = `0`;
3747
3748	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
3749	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3750	"htmlParseStartTag: context error\n", NULL, NULL);
3751	return -`1`;
3752	}
3753	if (ctxt->instate == XML_PARSER_EOF)
3754	return(-`1`);
3755	if (CUR != `'<'`) return -`1`;
3756	NEXT;
3757
3758	atts = ctxt->atts;
3759	maxatts = ctxt->maxatts;
3760
3761	GROW;
3762	name = htmlParseHTMLName(ctxt);
3763	if (name == NULL) {
3764	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3765	"htmlParseStartTag: invalid element name\n",
3766	NULL, NULL);
3767	/ if recover preserve text on classic misconstructs /
3768	if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) \|\| (CUR == `'<'`) \|\|
3769	(CUR == `'='`) \|\| (CUR == `'>'`) \|\| (((CUR >= `'0'`) && (CUR <= `'9'`))))) {
3770	htmlParseCharDataInternal(ctxt, `'<'`);
3771	return(-`1`);
3772	}
3773
3774
3775	/ Dump the bogus tag like browsers do /
3776	while ((IS_CHAR_CH(CUR)) && (CUR != `'>'`) &&
3777	(ctxt->instate != XML_PARSER_EOF))
3778	NEXT;
3779	return -`1`;
3780	}
3781	if (xmlStrEqual(name, BAD_CAST"meta"))
3782	meta = `1`;
3783
3784	/*
3785	* Check for auto-closure of HTML elements.
3786	*/
3787	htmlAutoClose(ctxt, name);
3788
3789	/*
3790	* Check for implied HTML elements.
3791	*/
3792	htmlCheckImplied(ctxt, name);
3793
3794	/*
3795	* Avoid html at any level > 0, head at any level != 1
3796	* or any attempt to recurse body
3797	*/
3798	if ((ctxt->nameNr > `0`) && (xmlStrEqual(name, BAD_CAST"html"))) {
3799	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3800	"htmlParseStartTag: misplaced <html> tag\n",
3801	name, NULL);
3802	discardtag = `1`;
3803	ctxt->depth++;
3804	}
3805	if ((ctxt->nameNr != `1`) &&
3806	(xmlStrEqual(name, BAD_CAST"head"))) {
3807	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3808	"htmlParseStartTag: misplaced <head> tag\n",
3809	name, NULL);
3810	discardtag = `1`;
3811	ctxt->depth++;
3812	}
3813	if (xmlStrEqual(name, BAD_CAST"body")) {
3814	int indx;
3815	for (indx = `0`;indx < ctxt->nameNr;indx++) {
3816	if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
3817	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3818	"htmlParseStartTag: misplaced <body> tag\n",
3819	name, NULL);
3820	discardtag = `1`;
3821	ctxt->depth++;
3822	}
3823	}
3824	}
3825
3826	/*
3827	* Now parse the attributes, it ends up with the ending
3828	*
3829	* (S Attribute)* S?
3830	*/
3831	SKIP_BLANKS;
3832	while ((IS_CHAR_CH(CUR)) &&
3833	(CUR != `'>'`) &&
3834	((CUR != `'/'`) \|\| (NXT(`1`) != `'>'`))) {
3835	long cons = ctxt->nbChars;
3836
3837	GROW;
3838	attname = htmlParseAttribute(ctxt, &attvalue);
3839	if (attname != NULL) {
3840
3841	/*
3842	* Well formedness requires at most one declaration of an attribute
3843	*/
3844	for (i = `0`; i < nbatts;i += `2`) {
3845	if (xmlStrEqual(atts[i], attname)) {
3846	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
3847	"Attribute %s redefined\n", attname, NULL);
3848	if (attvalue != NULL)
3849	xmlFree(attvalue);
3850	goto failed;
3851	}
3852	}
3853
3854	/*
3855	* Add the pair to atts
3856	*/
3857	if (atts == NULL) {
3858	maxatts = `22`; / allow for 10 attrs by default /
3859	atts = (const xmlChar **)
3860	xmlMalloc(maxatts * sizeof(xmlChar *));
3861	if (atts == NULL) {
3862	htmlErrMemory(ctxt, NULL);
3863	if (attvalue != NULL)
3864	xmlFree(attvalue);
3865	goto failed;
3866	}
3867	ctxt->atts = atts;
3868	ctxt->maxatts = maxatts;
3869	} else if (nbatts + `4` > maxatts) {
3870	const xmlChar **n;
3871
3872	maxatts *= `2`;
3873	n = (const xmlChar *) xmlRealloc((void* *) atts,
3874	maxatts * sizeof(const xmlChar *));
3875	if (n == NULL) {
3876	htmlErrMemory(ctxt, NULL);
3877	if (attvalue != NULL)
3878	xmlFree(attvalue);
3879	goto failed;
3880	}
3881	atts = n;
3882	ctxt->atts = atts;
3883	ctxt->maxatts = maxatts;
3884	}
3885	atts[nbatts++] = attname;
3886	atts[nbatts++] = attvalue;
3887	atts[nbatts] = NULL;
3888	atts[nbatts + `1`] = NULL;
3889	}
3890	else {
3891	if (attvalue != NULL)
3892	xmlFree(attvalue);
3893	/ Dump the bogus attribute string up to the next blank or*
3894	* the end of the tag. */
3895	while ((IS_CHAR_CH(CUR)) &&
3896	!(IS_BLANK_CH(CUR)) && (CUR != `'>'`) &&
3897	((CUR != `'/'`) \|\| (NXT(`1`) != `'>'`)))
3898	NEXT;
3899	}
3900
3901	failed:
3902	SKIP_BLANKS;
3903	if (cons == ctxt->nbChars) {
3904	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3905	"htmlParseStartTag: problem parsing attributes\n",
3906	NULL, NULL);
3907	break;
3908	}
3909	}
3910
3911	/*
3912	* Handle specific association to the META tag
3913	*/
3914	if (meta && (nbatts != `0`))
3915	htmlCheckMeta(ctxt, atts);
3916
3917	/*
3918	* SAX: Start of Element !
3919	*/
3920	if (!discardtag) {
3921	htmlnamePush(ctxt, name);
3922	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
3923	if (nbatts != `0`)
3924	ctxt->sax->startElement(ctxt->userData, name, atts);
3925	else
3926	ctxt->sax->startElement(ctxt->userData, name, NULL);
3927	}
3928	}
3929
3930	if (atts != NULL) {
3931	for (i = `1`;i < nbatts;i += `2`) {
3932	if (atts[i] != NULL)
3933	xmlFree((xmlChar *) atts[i]);
3934	}
3935	}
3936
3937	return(discardtag);
3938	}
3939
3940	/**
3941	* htmlParseEndTag:
3942	* @ctxt: an HTML parser context
3943	*
3944	* parse an end of tag
3945	*
3946	* [42] ETag ::= '</' Name S? '>'
3947	*
3948	* With namespace
3949	*
3950	* [NS 9] ETag ::= '</' QName S? '>'
3951	*
3952	* Returns 1 if the current level should be closed.
3953	*/
3954
3955	static int
3956	htmlParseEndTag(htmlParserCtxtPtr ctxt)
3957	{
3958	const xmlChar *name;
3959	const xmlChar *oldname;
3960	int i, ret;
3961
3962	if ((CUR != `'<'`) \|\| (NXT(`1`) != `'/'`)) {
3963	htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
3964	"htmlParseEndTag: '</' not found\n", NULL, NULL);
3965	return (`0`);
3966	}
3967	SKIP(`2`);
3968
3969	name = htmlParseHTMLName(ctxt);
3970	if (name == NULL)
3971	return (`0`);
3972	/*
3973	* We should definitely be at the ending "S? '>'" part
3974	*/
3975	SKIP_BLANKS;
3976	if ((!IS_CHAR_CH(CUR)) \|\| (CUR != `'>'`)) {
3977	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
3978	"End tag : expected '>'\n", NULL, NULL);
3979	if (ctxt->recovery) {
3980	/*
3981	* We're not at the ending > !!
3982	* Error, unless in recover mode where we search forwards
3983	* until we find a >
3984	*/
3985	while (CUR != `'\0'` && CUR != `'>'`) NEXT;
3986	NEXT;
3987	}
3988	} else
3989	NEXT;
3990
3991	/*
3992	* if we ignored misplaced tags in htmlParseStartTag don't pop them
3993	* out now.
3994	*/
3995	if ((ctxt->depth > `0`) &&
3996	(xmlStrEqual(name, BAD_CAST "html") \|\|
3997	xmlStrEqual(name, BAD_CAST "body") \|\|
3998	xmlStrEqual(name, BAD_CAST "head"))) {
3999	ctxt->depth--;
4000	return (`0`);
4001	}
4002
4003	/*
4004	* If the name read is not one of the element in the parsing stack
4005	* then return, it's just an error.
4006	*/
4007	for (i = (ctxt->nameNr - `1`); i >= `0`; i--) {
4008	if (xmlStrEqual(name, ctxt->nameTab[i]))
4009	break;
4010	}
4011	if (i < `0`) {
4012	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
4013	"Unexpected end tag : %s\n", name, NULL);
4014	return (`0`);
4015	}
4016
4017
4018	/*
4019	* Check for auto-closure of HTML elements.
4020	*/
4021
4022	htmlAutoCloseOnClose(ctxt, name);
4023
4024	/*
4025	* Well formedness constraints, opening and closing must match.
4026	* With the exception that the autoclose may have popped stuff out
4027	* of the stack.
4028	*/
4029	if (!xmlStrEqual(name, ctxt->name)) {
4030	if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
4031	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
4032	"Opening and ending tag mismatch: %s and %s\n",
4033	name, ctxt->name);
4034	}
4035	}
4036
4037	/*
4038	* SAX: End of Tag
4039	*/
4040	oldname = ctxt->name;
4041	if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
4042	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4043	ctxt->sax->endElement(ctxt->userData, name);
4044	htmlNodeInfoPop(ctxt);
4045	htmlnamePop(ctxt);
4046	ret = `1`;
4047	} else {
4048	ret = `0`;
4049	}
4050
4051	return (ret);
4052	}
4053
4054
4055	/**
4056	* htmlParseReference:
4057	* @ctxt: an HTML parser context
4058	*
4059	* parse and handle entity references in content,
4060	* this will end-up in a call to character() since this is either a
4061	* CharRef, or a predefined entity.
4062	*/
4063	static void
4064	htmlParseReference(htmlParserCtxtPtr ctxt) {
4065	const htmlEntityDesc * ent;
4066	xmlChar out[`6`];
4067	const xmlChar *name;
4068	if (CUR != `'&'`) return;
4069
4070	if (NXT(`1`) == `'#'`) {
4071	unsigned int c;
4072	int bits, i = `0`;
4073
4074	c = htmlParseCharRef(ctxt);
4075	if (c == `0`)
4076	return;
4077
4078	if (c < `0x80`) { out[i++]= c; bits= -`6`; }
4079	else if (c < `0x800`) { out[i++]=((c >> `6`) & `0x1F`) \| `0xC0`; bits= `0`; }
4080	else if (c < `0x10000`) { out[i++]=((c >> `12`) & `0x0F`) \| `0xE0`; bits= `6`; }
4081	else { out[i++]=((c >> `18`) & `0x07`) \| `0xF0`; bits= `12`; }
4082
4083	for ( ; bits >= `0`; bits-= `6`) {
4084	out[i++]= ((c >> bits) & `0x3F`) \| `0x80`;
4085	}
4086	out[i] = `0`;
4087
4088	htmlCheckParagraph(ctxt);
4089	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4090	ctxt->sax->characters(ctxt->userData, out, i);
4091	} else {
4092	ent = htmlParseEntityRef(ctxt, &name);
4093	if (name == NULL) {
4094	htmlCheckParagraph(ctxt);
4095	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4096	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", `1`);
4097	return;
4098	}
4099	if ((ent == NULL) \|\| !(ent->value > `0`)) {
4100	htmlCheckParagraph(ctxt);
4101	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
4102	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", `1`);
4103	ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
4104	/ ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); /
4105	}
4106	} else {
4107	unsigned int c;
4108	int bits, i = `0`;
4109
4110	c = ent->value;
4111	if (c < `0x80`)
4112	{ out[i++]= c; bits= -`6`; }
4113	else if (c < `0x800`)
4114	{ out[i++]=((c >> `6`) & `0x1F`) \| `0xC0`; bits= `0`; }
4115	else if (c < `0x10000`)
4116	{ out[i++]=((c >> `12`) & `0x0F`) \| `0xE0`; bits= `6`; }
4117	else
4118	{ out[i++]=((c >> `18`) & `0x07`) \| `0xF0`; bits= `12`; }
4119
4120	for ( ; bits >= `0`; bits-= `6`) {
4121	out[i++]= ((c >> bits) & `0x3F`) \| `0x80`;
4122	}
4123	out[i] = `0`;
4124
4125	htmlCheckParagraph(ctxt);
4126	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4127	ctxt->sax->characters(ctxt->userData, out, i);
4128	}
4129	}
4130	}
4131
4132	/**
4133	* htmlParseContent:
4134	* @ctxt: an HTML parser context
4135	*
4136	* Parse a content: comment, sub-element, reference or text.
4137	* Kept for compatibility with old code
4138	*/
4139
4140	static void
4141	htmlParseContent(htmlParserCtxtPtr ctxt) {
4142	xmlChar *currentNode;
4143	int depth;
4144	const xmlChar *name;
4145
4146	currentNode = xmlStrdup(ctxt->name);
4147	depth = ctxt->nameNr;
4148	while (`1`) {
4149	long cons = ctxt->nbChars;
4150
4151	GROW;
4152
4153	if (ctxt->instate == XML_PARSER_EOF)
4154	break;
4155
4156	/*
4157	* Our tag or one of it's parent or children is ending.
4158	*/
4159	if ((CUR == `'<'`) && (NXT(`1`) == `'/'`)) {
4160	if (htmlParseEndTag(ctxt) &&
4161	((currentNode != NULL) \|\| (ctxt->nameNr == `0`))) {
4162	if (currentNode != NULL)
4163	xmlFree(currentNode);
4164	return;
4165	}
4166	continue; / while /
4167	}
4168
4169	else if ((CUR == `'<'`) &&
4170	((IS_ASCII_LETTER(NXT(`1`))) \|\|
4171	(NXT(`1`) == `'_'`) \|\| (NXT(`1`) == `':'`))) {
4172	name = htmlParseHTMLName_nonInvasive(ctxt);
4173	if (name == NULL) {
4174	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4175	"htmlParseStartTag: invalid element name\n",
4176	NULL, NULL);
4177	/ Dump the bogus tag like browsers do /
4178	while ((IS_CHAR_CH(CUR)) && (CUR != `'>'`))
4179	NEXT;
4180
4181	if (currentNode != NULL)
4182	xmlFree(currentNode);
4183	return;
4184	}
4185
4186	if (ctxt->name != NULL) {
4187	if (htmlCheckAutoClose(name, ctxt->name) == `1`) {
4188	htmlAutoClose(ctxt, name);
4189	continue;
4190	}
4191	}
4192	}
4193
4194	/*
4195	* Has this node been popped out during parsing of
4196	* the next element
4197	*/
4198	if ((ctxt->nameNr > `0`) && (depth >= ctxt->nameNr) &&
4199	(!xmlStrEqual(currentNode, ctxt->name)))
4200	{
4201	if (currentNode != NULL) xmlFree(currentNode);
4202	return;
4203	}
4204
4205	if ((CUR != `0`) && ((xmlStrEqual(currentNode, BAD_CAST"script")) \|\|
4206	(xmlStrEqual(currentNode, BAD_CAST"style")))) {
4207	/*
4208	* Handle SCRIPT/STYLE separately
4209	*/
4210	htmlParseScript(ctxt);
4211	} else {
4212	/*
4213	* Sometimes DOCTYPE arrives in the middle of the document
4214	*/
4215	if ((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4216	(UPP(`2`) == `'D'`) && (UPP(`3`) == `'O'`) &&
4217	(UPP(`4`) == `'C'`) && (UPP(`5`) == `'T'`) &&
4218	(UPP(`6`) == `'Y'`) && (UPP(`7`) == `'P'`) &&
4219	(UPP(`8`) == `'E'`)) {
4220	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4221	"Misplaced DOCTYPE declaration\n",
4222	BAD_CAST "DOCTYPE" , NULL);
4223	htmlParseDocTypeDecl(ctxt);
4224	}
4225
4226	/*
4227	* First case : a comment
4228	*/
4229	if ((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4230	(NXT(`2`) == `'-'`) && (NXT(`3`) == `'-'`)) {
4231	htmlParseComment(ctxt);
4232	}
4233
4234	/*
4235	* Second case : a Processing Instruction.
4236	*/
4237	else if ((CUR == `'<'`) && (NXT(`1`) == `'?'`)) {
4238	htmlParsePI(ctxt);
4239	}
4240
4241	/*
4242	* Third case : a sub-element.
4243	*/
4244	else if (CUR == `'<'`) {
4245	htmlParseElement(ctxt);
4246	}
4247
4248	/*
4249	* Fourth case : a reference. If if has not been resolved,
4250	* parsing returns it's Name, create the node
4251	*/
4252	else if (CUR == `'&'`) {
4253	htmlParseReference(ctxt);
4254	}
4255
4256	/*
4257	* Fifth case : end of the resource
4258	*/
4259	else if (CUR == `0`) {
4260	htmlAutoCloseOnEnd(ctxt);
4261	break;
4262	}
4263
4264	/*
4265	* Last case, text. Note that References are handled directly.
4266	*/
4267	else {
4268	htmlParseCharData(ctxt);
4269	}
4270
4271	if (cons == ctxt->nbChars) {
4272	if (ctxt->node != NULL) {
4273	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4274	"detected an error in element content\n",
4275	NULL, NULL);
4276	}
4277	break;
4278	}
4279	}
4280	GROW;
4281	}
4282	if (currentNode != NULL) xmlFree(currentNode);
4283	}
4284
4285	/**
4286	* htmlParseElement:
4287	* @ctxt: an HTML parser context
4288	*
4289	* parse an HTML element, this is highly recursive
4290	* this is kept for compatibility with previous code versions
4291	*
4292	* [39] element ::= EmptyElemTag \| STag content ETag
4293	*
4294	* [41] Attribute ::= Name Eq AttValue
4295	*/
4296
4297	void
4298	htmlParseElement(htmlParserCtxtPtr ctxt) {
4299	const xmlChar *name;
4300	xmlChar *currentNode = NULL;
4301	const htmlElemDesc * info;
4302	htmlParserNodeInfo node_info;
4303	int failed;
4304	int depth;
4305	const xmlChar *oldptr;
4306
4307	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
4308	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4309	"htmlParseElement: context error\n", NULL, NULL);
4310	return;
4311	}
4312
4313	if (ctxt->instate == XML_PARSER_EOF)
4314	return;
4315
4316	/ Capture start position /
4317	if (ctxt->record_info) {
4318	node_info.begin_pos = ctxt->input->consumed +
4319	(CUR_PTR - ctxt->input->base);
4320	node_info.begin_line = ctxt->input->line;
4321	}
4322
4323	failed = htmlParseStartTag(ctxt);
4324	name = ctxt->name;
4325	if ((failed == -`1`) \|\| (name == NULL)) {
4326	if (CUR == `'>'`)
4327	NEXT;
4328	return;
4329	}
4330
4331	/*
4332	* Lookup the info for that element.
4333	*/
4334	info = htmlTagLookup(name);
4335	if (info == NULL) {
4336	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4337	"Tag %s invalid\n", name, NULL);
4338	}
4339
4340	/*
4341	* Check for an Empty Element labeled the XML/SGML way
4342	*/
4343	if ((CUR == `'/'`) && (NXT(`1`) == `'>'`)) {
4344	SKIP(`2`);
4345	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4346	ctxt->sax->endElement(ctxt->userData, name);
4347	htmlnamePop(ctxt);
4348	return;
4349	}
4350
4351	if (CUR == `'>'`) {
4352	NEXT;
4353	} else {
4354	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4355	"Couldn't find end of Start Tag %s\n", name, NULL);
4356
4357	/*
4358	* end of parsing of this node.
4359	*/
4360	if (xmlStrEqual(name, ctxt->name)) {
4361	nodePop(ctxt);
4362	htmlnamePop(ctxt);
4363	}
4364
4365	/*
4366	* Capture end position and add node
4367	*/
4368	if (ctxt->record_info) {
4369	node_info.end_pos = ctxt->input->consumed +
4370	(CUR_PTR - ctxt->input->base);
4371	node_info.end_line = ctxt->input->line;
4372	node_info.node = ctxt->node;
4373	xmlParserAddNodeInfo(ctxt, &node_info);
4374	}
4375	return;
4376	}
4377
4378	/*
4379	* Check for an Empty Element from DTD definition
4380	*/
4381	if ((info != NULL) && (info->empty)) {
4382	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4383	ctxt->sax->endElement(ctxt->userData, name);
4384	htmlnamePop(ctxt);
4385	return;
4386	}
4387
4388	/*
4389	* Parse the content of the element:
4390	*/
4391	currentNode = xmlStrdup(ctxt->name);
4392	depth = ctxt->nameNr;
4393	while (IS_CHAR_CH(CUR)) {
4394	oldptr = ctxt->input->cur;
4395	htmlParseContent(ctxt);
4396	if (oldptr==ctxt->input->cur) break;
4397	if (ctxt->nameNr < depth) break;
4398	}
4399
4400	/*
4401	* Capture end position and add node
4402	*/
4403	if ( currentNode != NULL && ctxt->record_info ) {
4404	node_info.end_pos = ctxt->input->consumed +
4405	(CUR_PTR - ctxt->input->base);
4406	node_info.end_line = ctxt->input->line;
4407	node_info.node = ctxt->node;
4408	xmlParserAddNodeInfo(ctxt, &node_info);
4409	}
4410	if (!IS_CHAR_CH(CUR)) {
4411	htmlAutoCloseOnEnd(ctxt);
4412	}
4413
4414	if (currentNode != NULL)
4415	xmlFree(currentNode);
4416	}
4417
4418	static void
4419	htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
4420	/*
4421	* Capture end position and add node
4422	*/
4423	if ( ctxt->node != NULL && ctxt->record_info ) {
4424	ctxt->nodeInfo->end_pos = ctxt->input->consumed +
4425	(CUR_PTR - ctxt->input->base);
4426	ctxt->nodeInfo->end_line = ctxt->input->line;
4427	ctxt->nodeInfo->node = ctxt->node;
4428	xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
4429	htmlNodeInfoPop(ctxt);
4430	}
4431	if (!IS_CHAR_CH(CUR)) {
4432	htmlAutoCloseOnEnd(ctxt);
4433	}
4434	}
4435
4436	/**
4437	* htmlParseElementInternal:
4438	* @ctxt: an HTML parser context
4439	*
4440	* parse an HTML element, new version, non recursive
4441	*
4442	* [39] element ::= EmptyElemTag \| STag content ETag
4443	*
4444	* [41] Attribute ::= Name Eq AttValue
4445	*/
4446
4447	static void
4448	htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4449	const xmlChar *name;
4450	const htmlElemDesc * info;
4451	htmlParserNodeInfo node_info = { NULL, `0`, `0`, `0`, `0` };
4452	int failed;
4453
4454	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
4455	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4456	"htmlParseElementInternal: context error\n", NULL, NULL);
4457	return;
4458	}
4459
4460	if (ctxt->instate == XML_PARSER_EOF)
4461	return;
4462
4463	/ Capture start position /
4464	if (ctxt->record_info) {
4465	node_info.begin_pos = ctxt->input->consumed +
4466	(CUR_PTR - ctxt->input->base);
4467	node_info.begin_line = ctxt->input->line;
4468	}
4469
4470	failed = htmlParseStartTag(ctxt);
4471	name = ctxt->name;
4472	if ((failed == -`1`) \|\| (name == NULL)) {
4473	if (CUR == `'>'`)
4474	NEXT;
4475	return;
4476	}
4477
4478	/*
4479	* Lookup the info for that element.
4480	*/
4481	info = htmlTagLookup(name);
4482	if (info == NULL) {
4483	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4484	"Tag %s invalid\n", name, NULL);
4485	}
4486
4487	/*
4488	* Check for an Empty Element labeled the XML/SGML way
4489	*/
4490	if ((CUR == `'/'`) && (NXT(`1`) == `'>'`)) {
4491	SKIP(`2`);
4492	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4493	ctxt->sax->endElement(ctxt->userData, name);
4494	htmlnamePop(ctxt);
4495	return;
4496	}
4497
4498	if (CUR == `'>'`) {
4499	NEXT;
4500	} else {
4501	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4502	"Couldn't find end of Start Tag %s\n", name, NULL);
4503
4504	/*
4505	* end of parsing of this node.
4506	*/
4507	if (xmlStrEqual(name, ctxt->name)) {
4508	nodePop(ctxt);
4509	htmlnamePop(ctxt);
4510	}
4511
4512	if (ctxt->record_info)
4513	htmlNodeInfoPush(ctxt, &node_info);
4514	htmlParserFinishElementParsing(ctxt);
4515	return;
4516	}
4517
4518	/*
4519	* Check for an Empty Element from DTD definition
4520	*/
4521	if ((info != NULL) && (info->empty)) {
4522	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4523	ctxt->sax->endElement(ctxt->userData, name);
4524	htmlnamePop(ctxt);
4525	return;
4526	}
4527
4528	if (ctxt->record_info)
4529	htmlNodeInfoPush(ctxt, &node_info);
4530	}
4531
4532	/**
4533	* htmlParseContentInternal:
4534	* @ctxt: an HTML parser context
4535	*
4536	* Parse a content: comment, sub-element, reference or text.
4537	* New version for non recursive htmlParseElementInternal
4538	*/
4539
4540	static void
4541	htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
4542	xmlChar *currentNode;
4543	int depth;
4544	const xmlChar *name;
4545
4546	currentNode = xmlStrdup(ctxt->name);
4547	depth = ctxt->nameNr;
4548	while (`1`) {
4549	long cons = ctxt->nbChars;
4550
4551	GROW;
4552
4553	if (ctxt->instate == XML_PARSER_EOF)
4554	break;
4555
4556	/*
4557	* Our tag or one of it's parent or children is ending.
4558	*/
4559	if ((CUR == `'<'`) && (NXT(`1`) == `'/'`)) {
4560	if (htmlParseEndTag(ctxt) &&
4561	((currentNode != NULL) \|\| (ctxt->nameNr == `0`))) {
4562	if (currentNode != NULL)
4563	xmlFree(currentNode);
4564
4565	currentNode = xmlStrdup(ctxt->name);
4566	depth = ctxt->nameNr;
4567	}
4568	continue; / while /
4569	}
4570
4571	else if ((CUR == `'<'`) &&
4572	((IS_ASCII_LETTER(NXT(`1`))) \|\|
4573	(NXT(`1`) == `'_'`) \|\| (NXT(`1`) == `':'`))) {
4574	name = htmlParseHTMLName_nonInvasive(ctxt);
4575	if (name == NULL) {
4576	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4577	"htmlParseStartTag: invalid element name\n",
4578	NULL, NULL);
4579	/ Dump the bogus tag like browsers do /
4580	while ((IS_CHAR_CH(CUR)) && (CUR != `'>'`))
4581	NEXT;
4582
4583	htmlParserFinishElementParsing(ctxt);
4584	if (currentNode != NULL)
4585	xmlFree(currentNode);
4586
4587	currentNode = xmlStrdup(ctxt->name);
4588	depth = ctxt->nameNr;
4589	continue;
4590	}
4591
4592	if (ctxt->name != NULL) {
4593	if (htmlCheckAutoClose(name, ctxt->name) == `1`) {
4594	htmlAutoClose(ctxt, name);
4595	continue;
4596	}
4597	}
4598	}
4599
4600	/*
4601	* Has this node been popped out during parsing of
4602	* the next element
4603	*/
4604	if ((ctxt->nameNr > `0`) && (depth >= ctxt->nameNr) &&
4605	(!xmlStrEqual(currentNode, ctxt->name)))
4606	{
4607	htmlParserFinishElementParsing(ctxt);
4608	if (currentNode != NULL) xmlFree(currentNode);
4609
4610	currentNode = xmlStrdup(ctxt->name);
4611	depth = ctxt->nameNr;
4612	continue;
4613	}
4614
4615	if ((CUR != `0`) && ((xmlStrEqual(currentNode, BAD_CAST"script")) \|\|
4616	(xmlStrEqual(currentNode, BAD_CAST"style")))) {
4617	/*
4618	* Handle SCRIPT/STYLE separately
4619	*/
4620	htmlParseScript(ctxt);
4621	} else {
4622	/*
4623	* Sometimes DOCTYPE arrives in the middle of the document
4624	*/
4625	if ((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4626	(UPP(`2`) == `'D'`) && (UPP(`3`) == `'O'`) &&
4627	(UPP(`4`) == `'C'`) && (UPP(`5`) == `'T'`) &&
4628	(UPP(`6`) == `'Y'`) && (UPP(`7`) == `'P'`) &&
4629	(UPP(`8`) == `'E'`)) {
4630	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4631	"Misplaced DOCTYPE declaration\n",
4632	BAD_CAST "DOCTYPE" , NULL);
4633	htmlParseDocTypeDecl(ctxt);
4634	}
4635
4636	/*
4637	* First case : a comment
4638	*/
4639	if ((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4640	(NXT(`2`) == `'-'`) && (NXT(`3`) == `'-'`)) {
4641	htmlParseComment(ctxt);
4642	}
4643
4644	/*
4645	* Second case : a Processing Instruction.
4646	*/
4647	else if ((CUR == `'<'`) && (NXT(`1`) == `'?'`)) {
4648	htmlParsePI(ctxt);
4649	}
4650
4651	/*
4652	* Third case : a sub-element.
4653	*/
4654	else if (CUR == `'<'`) {
4655	htmlParseElementInternal(ctxt);
4656	if (currentNode != NULL) xmlFree(currentNode);
4657
4658	currentNode = xmlStrdup(ctxt->name);
4659	depth = ctxt->nameNr;
4660	}
4661
4662	/*
4663	* Fourth case : a reference. If if has not been resolved,
4664	* parsing returns it's Name, create the node
4665	*/
4666	else if (CUR == `'&'`) {
4667	htmlParseReference(ctxt);
4668	}
4669
4670	/*
4671	* Fifth case : end of the resource
4672	*/
4673	else if (CUR == `0`) {
4674	htmlAutoCloseOnEnd(ctxt);
4675	break;
4676	}
4677
4678	/*
4679	* Last case, text. Note that References are handled directly.
4680	*/
4681	else {
4682	htmlParseCharData(ctxt);
4683	}
4684
4685	if (cons == ctxt->nbChars) {
4686	if (ctxt->node != NULL) {
4687	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4688	"detected an error in element content\n",
4689	NULL, NULL);
4690	}
4691	break;
4692	}
4693	}
4694	GROW;
4695	}
4696	if (currentNode != NULL) xmlFree(currentNode);
4697	}
4698
4699	/**
4700	* htmlParseContent:
4701	* @ctxt: an HTML parser context
4702	*
4703	* Parse a content: comment, sub-element, reference or text.
4704	* This is the entry point when called from parser.c
4705	*/
4706
4707	void
4708	__htmlParseContent(void *ctxt) {
4709	if (ctxt != NULL)
4710	htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
4711	}
4712
4713	/**
4714	* htmlParseDocument:
4715	* @ctxt: an HTML parser context
4716	*
4717	* parse an HTML document (and build a tree if using the standard SAX
4718	* interface).
4719	*
4720	* Returns 0, -1 in case of error. the parser context is augmented
4721	* as a result of the parsing.
4722	*/
4723
4724	int
4725	htmlParseDocument(htmlParserCtxtPtr ctxt) {
4726	xmlChar start[`4`];
4727	xmlCharEncoding enc;
4728	xmlDtdPtr dtd;
4729
4730	xmlInitParser();
4731
4732	htmlDefaultSAXHandlerInit();
4733
4734	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
4735	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4736	"htmlParseDocument: context error\n", NULL, NULL);
4737	return(XML_ERR_INTERNAL_ERROR);
4738	}
4739	ctxt->html = `1`;
4740	ctxt->linenumbers = `1`;
4741	GROW;
4742	/*
4743	* SAX: beginning of the document processing.
4744	*/
4745	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4746	ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4747
4748	if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
4749	((ctxt->input->end - ctxt->input->cur) >= `4`)) {
4750	/*
4751	* Get the 4 first bytes and decode the charset
4752	* if enc != XML_CHAR_ENCODING_NONE
4753	* plug some encoding conversion routines.
4754	*/
4755	start[`0`] = RAW;
4756	start[`1`] = NXT(`1`);
4757	start[`2`] = NXT(`2`);
4758	start[`3`] = NXT(`3`);
4759	enc = xmlDetectCharEncoding(&start[`0`], `4`);
4760	if (enc != XML_CHAR_ENCODING_NONE) {
4761	xmlSwitchEncoding(ctxt, enc);
4762	}
4763	}
4764
4765	/*
4766	* Wipe out everything which is before the first '<'
4767	*/
4768	SKIP_BLANKS;
4769	if (CUR == `0`) {
4770	htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4771	"Document is empty\n", NULL, NULL);
4772	}
4773
4774	if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4775	ctxt->sax->startDocument(ctxt->userData);
4776
4777
4778	/*
4779	* Parse possible comments and PIs before any content
4780	*/
4781	while (((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4782	(NXT(`2`) == `'-'`) && (NXT(`3`) == `'-'`)) \|\|
4783	((CUR == `'<'`) && (NXT(`1`) == `'?'`))) {
4784	htmlParseComment(ctxt);
4785	htmlParsePI(ctxt);
4786	SKIP_BLANKS;
4787	}
4788
4789
4790	/*
4791	* Then possibly doc type declaration(s) and more Misc
4792	* (doctypedecl Misc*)?
4793	*/
4794	if ((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4795	(UPP(`2`) == `'D'`) && (UPP(`3`) == `'O'`) &&
4796	(UPP(`4`) == `'C'`) && (UPP(`5`) == `'T'`) &&
4797	(UPP(`6`) == `'Y'`) && (UPP(`7`) == `'P'`) &&
4798	(UPP(`8`) == `'E'`)) {
4799	htmlParseDocTypeDecl(ctxt);
4800	}
4801	SKIP_BLANKS;
4802
4803	/*
4804	* Parse possible comments and PIs before any content
4805	*/
4806	while (((CUR == `'<'`) && (NXT(`1`) == `'!'`) &&
4807	(NXT(`2`) == `'-'`) && (NXT(`3`) == `'-'`)) \|\|
4808	((CUR == `'<'`) && (NXT(`1`) == `'?'`))) {
4809	htmlParseComment(ctxt);
4810	htmlParsePI(ctxt);
4811	SKIP_BLANKS;
4812	}
4813
4814	/*
4815	* Time to start parsing the tree itself
4816	*/
4817	htmlParseContentInternal(ctxt);
4818
4819	/*
4820	* autoclose
4821	*/
4822	if (CUR == `0`)
4823	htmlAutoCloseOnEnd(ctxt);
4824
4825
4826	/*
4827	* SAX: end of the document processing.
4828	*/
4829	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4830	ctxt->sax->endDocument(ctxt->userData);
4831
4832	if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
4833	dtd = xmlGetIntSubset(ctxt->myDoc);
4834	if (dtd == NULL)
4835	ctxt->myDoc->intSubset =
4836	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4837	BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
4838	BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
4839	}
4840	if (! ctxt->wellFormed) return(-`1`);
4841	return(`0`);
4842	}
4843
4844
4845	/************************************************************************
4846	* *
4847	* Parser contexts handling *
4848	* *
4849	************************************************************************/
4850
4851	/**
4852	* htmlInitParserCtxt:
4853	* @ctxt: an HTML parser context
4854	*
4855	* Initialize a parser context
4856	*
4857	* Returns 0 in case of success and -1 in case of error
4858	*/
4859
4860	static int
4861	htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
4862	{
4863	htmlSAXHandler *sax;
4864
4865	if (ctxt == NULL) return(-`1`);
4866	memset(ctxt, `0`, sizeof(htmlParserCtxt));
4867
4868	ctxt->dict = xmlDictCreate();
4869	if (ctxt->dict == NULL) {
4870	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
4871	return(-`1`);
4872	}
4873	sax = (htmlSAXHandler ) xmlMalloc(sizeof*(htmlSAXHandler));
4874	if (sax == NULL) {
4875	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
4876	return(-`1`);
4877	}
4878	else
4879	memset(sax, `0`, sizeof(htmlSAXHandler));
4880
4881	/ Allocate the Input stack /
4882	ctxt->inputTab = (htmlParserInputPtr *)
4883	xmlMalloc(`5` * sizeof(htmlParserInputPtr));
4884	if (ctxt->inputTab == NULL) {
4885	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
4886	ctxt->inputNr = `0`;
4887	ctxt->inputMax = `0`;
4888	ctxt->input = NULL;
4889	return(-`1`);
4890	}
4891	ctxt->inputNr = `0`;
4892	ctxt->inputMax = `5`;
4893	ctxt->input = NULL;
4894	ctxt->version = NULL;
4895	ctxt->encoding = NULL;
4896	ctxt->standalone = -`1`;
4897	ctxt->instate = XML_PARSER_START;
4898
4899	/ Allocate the Node stack /
4900	ctxt->nodeTab = (htmlNodePtr ) xmlMalloc(`10` sizeof(htmlNodePtr));
4901	if (ctxt->nodeTab == NULL) {
4902	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
4903	ctxt->nodeNr = `0`;
4904	ctxt->nodeMax = `0`;
4905	ctxt->node = NULL;
4906	ctxt->inputNr = `0`;
4907	ctxt->inputMax = `0`;
4908	ctxt->input = NULL;
4909	return(-`1`);
4910	}
4911	ctxt->nodeNr = `0`;
4912	ctxt->nodeMax = `10`;
4913	ctxt->node = NULL;
4914
4915	/ Allocate the Name stack /
4916	ctxt->nameTab = (const xmlChar *) xmlMalloc(`10` sizeof(xmlChar *));
4917	if (ctxt->nameTab == NULL) {
4918	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
4919	ctxt->nameNr = `0`;
4920	ctxt->nameMax = `0`;
4921	ctxt->name = NULL;
4922	ctxt->nodeNr = `0`;
4923	ctxt->nodeMax = `0`;
4924	ctxt->node = NULL;
4925	ctxt->inputNr = `0`;
4926	ctxt->inputMax = `0`;
4927	ctxt->input = NULL;
4928	return(-`1`);
4929	}
4930	ctxt->nameNr = `0`;
4931	ctxt->nameMax = `10`;
4932	ctxt->name = NULL;
4933
4934	ctxt->nodeInfoTab = NULL;
4935	ctxt->nodeInfoNr = `0`;
4936	ctxt->nodeInfoMax = `0`;
4937
4938	if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4939	else {
4940	ctxt->sax = sax;
4941	memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
4942	}
4943	ctxt->userData = ctxt;
4944	ctxt->myDoc = NULL;
4945	ctxt->wellFormed = `1`;
4946	ctxt->replaceEntities = `0`;
4947	ctxt->linenumbers = xmlLineNumbersDefaultValue;
4948	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
4949	ctxt->html = `1`;
4950	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
4951	ctxt->vctxt.userData = ctxt;
4952	ctxt->vctxt.error = xmlParserValidityError;
4953	ctxt->vctxt.warning = xmlParserValidityWarning;
4954	ctxt->record_info = `0`;
4955	ctxt->validate = `0`;
4956	ctxt->nbChars = `0`;
4957	ctxt->checkIndex = `0`;
4958	ctxt->catalogs = NULL;
4959	xmlInitNodeInfoSeq(&ctxt->node_seq);
4960	return(`0`);
4961	}
4962
4963	/**
4964	* htmlFreeParserCtxt:
4965	* @ctxt: an HTML parser context
4966	*
4967	* Free all the memory used by a parser context. However the parsed
4968	* document in ctxt->myDoc is not freed.
4969	*/
4970
4971	void
4972	htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
4973	{
4974	xmlFreeParserCtxt(ctxt);
4975	}
4976
4977	/**
4978	* htmlNewParserCtxt:
4979	*
4980	* Allocate and initialize a new parser context.
4981	*
4982	* Returns the htmlParserCtxtPtr or NULL in case of allocation error
4983	*/
4984
4985	htmlParserCtxtPtr
4986	htmlNewParserCtxt(void)
4987	{
4988	xmlParserCtxtPtr ctxt;
4989
4990	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
4991	if (ctxt == NULL) {
4992	htmlErrMemory(NULL, "NewParserCtxt: out of memory\n");
4993	return(NULL);
4994	}
4995	memset(ctxt, `0`, sizeof(xmlParserCtxt));
4996	if (htmlInitParserCtxt(ctxt) < `0`) {
4997	htmlFreeParserCtxt(ctxt);
4998	return(NULL);
4999	}
5000	return(ctxt);
5001	}
5002
5003	/**
5004	* htmlCreateMemoryParserCtxt:
5005	* @buffer: a pointer to a char array
5006	* @size: the size of the array
5007	*
5008	* Create a parser context for an HTML in-memory document.
5009	*
5010	* Returns the new parser context or NULL
5011	*/
5012	htmlParserCtxtPtr
5013	htmlCreateMemoryParserCtxt(const char buffer, int* size) {
5014	xmlParserCtxtPtr ctxt;
5015	xmlParserInputPtr input;
5016	xmlParserInputBufferPtr buf;
5017
5018	if (buffer == NULL)
5019	return(NULL);
5020	if (size <= `0`)
5021	return(NULL);
5022
5023	ctxt = htmlNewParserCtxt();
5024	if (ctxt == NULL)
5025	return(NULL);
5026
5027	buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
5028	if (buf == NULL) return(NULL);
5029
5030	input = xmlNewInputStream(ctxt);
5031	if (input == NULL) {
5032	xmlFreeParserCtxt(ctxt);
5033	return(NULL);
5034	}
5035
5036	input->filename = NULL;
5037	input->buf = buf;
5038	xmlBufResetInput(buf->buffer, input);
5039
5040	inputPush(ctxt, input);
5041	return(ctxt);
5042	}
5043
5044	/**
5045	* htmlCreateDocParserCtxt:
5046	* @cur: a pointer to an array of xmlChar
5047	* @encoding: a free form C string describing the HTML document encoding, or NULL
5048	*
5049	* Create a parser context for an HTML document.
5050	*
5051	* TODO: check the need to add encoding handling there
5052	*
5053	* Returns the new parser context or NULL
5054	*/
5055	static htmlParserCtxtPtr
5056	htmlCreateDocParserCtxt(const xmlChar cur, const* char *encoding) {
5057	int len;
5058	htmlParserCtxtPtr ctxt;
5059
5060	if (cur == NULL)
5061	return(NULL);
5062	len = xmlStrlen(cur);
5063	ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
5064	if (ctxt == NULL)
5065	return(NULL);
5066
5067	if (encoding != NULL) {
5068	xmlCharEncoding enc;
5069	xmlCharEncodingHandlerPtr handler;
5070
5071	if (ctxt->input->encoding != NULL)
5072	xmlFree((xmlChar *) ctxt->input->encoding);
5073	ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
5074
5075	enc = xmlParseCharEncoding(encoding);
5076	/*
5077	* registered set of known encodings
5078	*/
5079	if (enc != XML_CHAR_ENCODING_ERROR) {
5080	xmlSwitchEncoding(ctxt, enc);
5081	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5082	htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
5083	"Unsupported encoding %s\n",
5084	(const xmlChar *) encoding, NULL);
5085	}
5086	} else {
5087	/*
5088	* fallback for unknown encodings
5089	*/
5090	handler = xmlFindCharEncodingHandler((const char *) encoding);
5091	if (handler != NULL) {
5092	xmlSwitchToEncoding(ctxt, handler);
5093	} else {
5094	htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
5095	"Unsupported encoding %s\n",
5096	(const xmlChar *) encoding, NULL);
5097	}
5098	}
5099	}
5100	return(ctxt);
5101	}
5102
5103	#ifdef LIBXML_PUSH_ENABLED
5104	/************************************************************************
5105	* *
5106	* Progressive parsing interfaces *
5107	* *
5108	************************************************************************/
5109
5110	/**
5111	* htmlParseLookupSequence:
5112	* @ctxt: an HTML parser context
5113	* @first: the first char to lookup
5114	* @next: the next char to lookup or zero
5115	* @third: the next char to lookup or zero
5116	* @comment: flag to force checking inside comments
5117	*
5118	* Try to find if a sequence (first, next, third) or just (first next) or
5119	* (first) is available in the input stream.
5120	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
5121	* to avoid rescanning sequences of bytes, it DOES change the state of the
5122	* parser, do not use liberally.
5123	* This is basically similar to xmlParseLookupSequence()
5124	*
5125	* Returns the index to the current parsing point if the full sequence
5126	* is available, -1 otherwise.
5127	*/
5128	static int
5129	htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
5130	xmlChar next, xmlChar third, int iscomment,
5131	int ignoreattrval)
5132	{
5133	int base, len;
5134	htmlParserInputPtr in;
5135	const xmlChar *buf;
5136	int incomment = `0`;
5137	int invalue = `0`;
5138	char valdellim = `0x0`;
5139
5140	in = ctxt->input;
5141	if (in == NULL)
5142	return (-`1`);
5143
5144	base = in->cur - in->base;
5145	if (base < `0`)
5146	return (-`1`);
5147
5148	if (ctxt->checkIndex > base)
5149	base = ctxt->checkIndex;
5150
5151	if (in->buf == NULL) {
5152	buf = in->base;
5153	len = in->length;
5154	} else {
5155	buf = xmlBufContent(in->buf->buffer);
5156	len = xmlBufUse(in->buf->buffer);
5157	}
5158
5159	/ take into account the sequence length /
5160	if (third)
5161	len -= `2`;
5162	else if (next)
5163	len--;
5164	for (; base < len; base++) {
5165	if ((!incomment) && (base + `4` < len) && (!iscomment)) {
5166	if ((buf[base] == `'<'`) && (buf[base + `1`] == `'!'`) &&
5167	(buf[base + `2`] == `'-'`) && (buf[base + `3`] == `'-'`)) {
5168	incomment = `1`;
5169	/ do not increment past <! - some people use <!--> /
5170	base += `2`;
5171	}
5172	}
5173	if (ignoreattrval) {
5174	if (buf[base] == `'"'` \|\| buf[base] == `'\''`) {
5175	if (invalue) {
5176	if (buf[base] == valdellim) {
5177	invalue = `0`;
5178	continue;
5179	}
5180	} else {
5181	valdellim = buf[base];
5182	invalue = `1`;
5183	continue;
5184	}
5185	} else if (invalue) {
5186	continue;
5187	}
5188	}
5189	if (incomment) {
5190	if (base + `3` > len)
5191	return (-`1`);
5192	if ((buf[base] == `'-'`) && (buf[base + `1`] == `'-'`) &&
5193	(buf[base + `2`] == `'>'`)) {
5194	incomment = `0`;
5195	base += `2`;
5196	}
5197	continue;
5198	}
5199	if (buf[base] == first) {
5200	if (third != `0`) {
5201	if ((buf[base + `1`] != next) \|\| (buf[base + `2`] != third))
5202	continue;
5203	} else if (next != `0`) {
5204	if (buf[base + `1`] != next)
5205	continue;
5206	}
5207	ctxt->checkIndex = `0`;
5208	#ifdef DEBUG_PUSH
5209	if (next == `0`)
5210	xmlGenericError(xmlGenericErrorContext,
5211	"HPP: lookup '%c' found at %d\n",
5212	first, base);
5213	else if (third == `0`)
5214	xmlGenericError(xmlGenericErrorContext,
5215	"HPP: lookup '%c%c' found at %d\n",
5216	first, next, base);
5217	else
5218	xmlGenericError(xmlGenericErrorContext,
5219	"HPP: lookup '%c%c%c' found at %d\n",
5220	first, next, third, base);
5221	#endif
5222	return (base - (in->cur - in->base));
5223	}
5224	}
5225	if ((!incomment) && (!invalue))
5226	ctxt->checkIndex = base;
5227	#ifdef DEBUG_PUSH
5228	if (next == `0`)
5229	xmlGenericError(xmlGenericErrorContext,
5230	"HPP: lookup '%c' failed\n", first);
5231	else if (third == `0`)
5232	xmlGenericError(xmlGenericErrorContext,
5233	"HPP: lookup '%c%c' failed\n", first, next);
5234	else
5235	xmlGenericError(xmlGenericErrorContext,
5236	"HPP: lookup '%c%c%c' failed\n", first, next,
5237	third);
5238	#endif
5239	return (-`1`);
5240	}
5241
5242	/**
5243	* htmlParseLookupChars:
5244	* @ctxt: an HTML parser context
5245	* @stop: Array of chars, which stop the lookup.
5246	* @stopLen: Length of stop-Array
5247	*
5248	* Try to find if any char of the stop-Array is available in the input
5249	* stream.
5250	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
5251	* to avoid rescanning sequences of bytes, it DOES change the state of the
5252	* parser, do not use liberally.
5253	*
5254	* Returns the index to the current parsing point if a stopChar
5255	* is available, -1 otherwise.
5256	*/
5257	static int
5258	htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
5259	int stopLen)
5260	{
5261	int base, len;
5262	htmlParserInputPtr in;
5263	const xmlChar *buf;
5264	int incomment = `0`;
5265	int i;
5266
5267	in = ctxt->input;
5268	if (in == NULL)
5269	return (-`1`);
5270
5271	base = in->cur - in->base;
5272	if (base < `0`)
5273	return (-`1`);
5274
5275	if (ctxt->checkIndex > base)
5276	base = ctxt->checkIndex;
5277
5278	if (in->buf == NULL) {
5279	buf = in->base;
5280	len = in->length;
5281	} else {
5282	buf = xmlBufContent(in->buf->buffer);
5283	len = xmlBufUse(in->buf->buffer);
5284	}
5285
5286	for (; base < len; base++) {
5287	if (!incomment && (base + `4` < len)) {
5288	if ((buf[base] == `'<'`) && (buf[base + `1`] == `'!'`) &&
5289	(buf[base + `2`] == `'-'`) && (buf[base + `3`] == `'-'`)) {
5290	incomment = `1`;
5291	/ do not increment past <! - some people use <!--> /
5292	base += `2`;
5293	}
5294	}
5295	if (incomment) {
5296	if (base + `3` > len)
5297	return (-`1`);
5298	if ((buf[base] == `'-'`) && (buf[base + `1`] == `'-'`) &&
5299	(buf[base + `2`] == `'>'`)) {
5300	incomment = `0`;
5301	base += `2`;
5302	}
5303	continue;
5304	}
5305	for (i = `0`; i < stopLen; ++i) {
5306	if (buf[base] == stop[i]) {
5307	ctxt->checkIndex = `0`;
5308	return (base - (in->cur - in->base));
5309	}
5310	}
5311	}
5312	ctxt->checkIndex = base;
5313	return (-`1`);
5314	}
5315
5316	/**
5317	* htmlParseTryOrFinish:
5318	* @ctxt: an HTML parser context
5319	* @terminate: last chunk indicator
5320	*
5321	* Try to progress on parsing
5322	*
5323	* Returns zero if no parsing was possible
5324	*/
5325	static int
5326	htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
5327	int ret = `0`;
5328	htmlParserInputPtr in;
5329	int avail = `0`;
5330	xmlChar cur, next;
5331
5332	htmlParserNodeInfo node_info;
5333
5334	#ifdef DEBUG_PUSH
5335	switch (ctxt->instate) {
5336	case XML_PARSER_EOF:
5337	xmlGenericError(xmlGenericErrorContext,
5338	"HPP: try EOF\n"); break;
5339	case XML_PARSER_START:
5340	xmlGenericError(xmlGenericErrorContext,
5341	"HPP: try START\n"); break;
5342	case XML_PARSER_MISC:
5343	xmlGenericError(xmlGenericErrorContext,
5344	"HPP: try MISC\n");break;
5345	case XML_PARSER_COMMENT:
5346	xmlGenericError(xmlGenericErrorContext,
5347	"HPP: try COMMENT\n");break;
5348	case XML_PARSER_PROLOG:
5349	xmlGenericError(xmlGenericErrorContext,
5350	"HPP: try PROLOG\n");break;
5351	case XML_PARSER_START_TAG:
5352	xmlGenericError(xmlGenericErrorContext,
5353	"HPP: try START_TAG\n");break;
5354	case XML_PARSER_CONTENT:
5355	xmlGenericError(xmlGenericErrorContext,
5356	"HPP: try CONTENT\n");break;
5357	case XML_PARSER_CDATA_SECTION:
5358	xmlGenericError(xmlGenericErrorContext,
5359	"HPP: try CDATA_SECTION\n");break;
5360	case XML_PARSER_END_TAG:
5361	xmlGenericError(xmlGenericErrorContext,
5362	"HPP: try END_TAG\n");break;
5363	case XML_PARSER_ENTITY_DECL:
5364	xmlGenericError(xmlGenericErrorContext,
5365	"HPP: try ENTITY_DECL\n");break;
5366	case XML_PARSER_ENTITY_VALUE:
5367	xmlGenericError(xmlGenericErrorContext,
5368	"HPP: try ENTITY_VALUE\n");break;
5369	case XML_PARSER_ATTRIBUTE_VALUE:
5370	xmlGenericError(xmlGenericErrorContext,
5371	"HPP: try ATTRIBUTE_VALUE\n");break;
5372	case XML_PARSER_DTD:
5373	xmlGenericError(xmlGenericErrorContext,
5374	"HPP: try DTD\n");break;
5375	case XML_PARSER_EPILOG:
5376	xmlGenericError(xmlGenericErrorContext,
5377	"HPP: try EPILOG\n");break;
5378	case XML_PARSER_PI:
5379	xmlGenericError(xmlGenericErrorContext,
5380	"HPP: try PI\n");break;
5381	case XML_PARSER_SYSTEM_LITERAL:
5382	xmlGenericError(xmlGenericErrorContext,
5383	"HPP: try SYSTEM_LITERAL\n");break;
5384	}
5385	#endif
5386
5387	while (`1`) {
5388
5389	in = ctxt->input;
5390	if (in == NULL) break;
5391	if (in->buf == NULL)
5392	avail = in->length - (in->cur - in->base);
5393	else
5394	avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5395	if ((avail == `0`) && (terminate)) {
5396	htmlAutoCloseOnEnd(ctxt);
5397	if ((ctxt->nameNr == `0`) && (ctxt->instate != XML_PARSER_EOF)) {
5398	/*
5399	* SAX: end of the document processing.
5400	*/
5401	ctxt->instate = XML_PARSER_EOF;
5402	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5403	ctxt->sax->endDocument(ctxt->userData);
5404	}
5405	}
5406	if (avail < `1`)
5407	goto done;
5408	cur = in->cur[`0`];
5409	if (cur == `0`) {
5410	SKIP(`1`);
5411	continue;
5412	}
5413
5414	switch (ctxt->instate) {
5415	case XML_PARSER_EOF:
5416	/*
5417	* Document parsing is done !
5418	*/
5419	goto done;
5420	case XML_PARSER_START:
5421	/*
5422	* Very first chars read from the document flow.
5423	*/
5424	cur = in->cur[`0`];
5425	if (IS_BLANK_CH(cur)) {
5426	SKIP_BLANKS;
5427	if (in->buf == NULL)
5428	avail = in->length - (in->cur - in->base);
5429	else
5430	avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5431	}
5432	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5433	ctxt->sax->setDocumentLocator(ctxt->userData,
5434	&xmlDefaultSAXLocator);
5435	if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5436	(!ctxt->disableSAX))
5437	ctxt->sax->startDocument(ctxt->userData);
5438
5439	cur = in->cur[`0`];
5440	next = in->cur[`1`];
5441	if ((cur == `'<'`) && (next == `'!'`) &&
5442	(UPP(`2`) == `'D'`) && (UPP(`3`) == `'O'`) &&
5443	(UPP(`4`) == `'C'`) && (UPP(`5`) == `'T'`) &&
5444	(UPP(`6`) == `'Y'`) && (UPP(`7`) == `'P'`) &&
5445	(UPP(`8`) == `'E'`)) {
5446	if ((!terminate) &&
5447	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5448	goto done;
5449	#ifdef DEBUG_PUSH
5450	xmlGenericError(xmlGenericErrorContext,
5451	"HPP: Parsing internal subset\n");
5452	#endif
5453	htmlParseDocTypeDecl(ctxt);
5454	ctxt->instate = XML_PARSER_PROLOG;
5455	#ifdef DEBUG_PUSH
5456	xmlGenericError(xmlGenericErrorContext,
5457	"HPP: entering PROLOG\n");
5458	#endif
5459	} else {
5460	ctxt->instate = XML_PARSER_MISC;
5461	#ifdef DEBUG_PUSH
5462	xmlGenericError(xmlGenericErrorContext,
5463	"HPP: entering MISC\n");
5464	#endif
5465	}
5466	break;
5467	case XML_PARSER_MISC:
5468	SKIP_BLANKS;
5469	if (in->buf == NULL)
5470	avail = in->length - (in->cur - in->base);
5471	else
5472	avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5473	/*
5474	* no chars in buffer
5475	*/
5476	if (avail < `1`)
5477	goto done;
5478	/*
5479	* not enouth chars in buffer
5480	*/
5481	if (avail < `2`) {
5482	if (!terminate)
5483	goto done;
5484	else
5485	next = `' '`;
5486	} else {
5487	next = in->cur[`1`];
5488	}
5489	cur = in->cur[`0`];
5490	if ((cur == `'<'`) && (next == `'!'`) &&
5491	(in->cur[`2`] == `'-'`) && (in->cur[`3`] == `'-'`)) {
5492	if ((!terminate) &&
5493	(htmlParseLookupSequence(ctxt, `'-'`, `'-'`, `'>'`, `1`, `1`) < `0`))
5494	goto done;
5495	#ifdef DEBUG_PUSH
5496	xmlGenericError(xmlGenericErrorContext,
5497	"HPP: Parsing Comment\n");
5498	#endif
5499	htmlParseComment(ctxt);
5500	ctxt->instate = XML_PARSER_MISC;
5501	} else if ((cur == `'<'`) && (next == `'?'`)) {
5502	if ((!terminate) &&
5503	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5504	goto done;
5505	#ifdef DEBUG_PUSH
5506	xmlGenericError(xmlGenericErrorContext,
5507	"HPP: Parsing PI\n");
5508	#endif
5509	htmlParsePI(ctxt);
5510	ctxt->instate = XML_PARSER_MISC;
5511	} else if ((cur == `'<'`) && (next == `'!'`) &&
5512	(UPP(`2`) == `'D'`) && (UPP(`3`) == `'O'`) &&
5513	(UPP(`4`) == `'C'`) && (UPP(`5`) == `'T'`) &&
5514	(UPP(`6`) == `'Y'`) && (UPP(`7`) == `'P'`) &&
5515	(UPP(`8`) == `'E'`)) {
5516	if ((!terminate) &&
5517	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5518	goto done;
5519	#ifdef DEBUG_PUSH
5520	xmlGenericError(xmlGenericErrorContext,
5521	"HPP: Parsing internal subset\n");
5522	#endif
5523	htmlParseDocTypeDecl(ctxt);
5524	ctxt->instate = XML_PARSER_PROLOG;
5525	#ifdef DEBUG_PUSH
5526	xmlGenericError(xmlGenericErrorContext,
5527	"HPP: entering PROLOG\n");
5528	#endif
5529	} else if ((cur == `'<'`) && (next == `'!'`) &&
5530	(avail < `9`)) {
5531	goto done;
5532	} else {
5533	ctxt->instate = XML_PARSER_START_TAG;
5534	#ifdef DEBUG_PUSH
5535	xmlGenericError(xmlGenericErrorContext,
5536	"HPP: entering START_TAG\n");
5537	#endif
5538	}
5539	break;
5540	case XML_PARSER_PROLOG:
5541	SKIP_BLANKS;
5542	if (in->buf == NULL)
5543	avail = in->length - (in->cur - in->base);
5544	else
5545	avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5546	if (avail < `2`)
5547	goto done;
5548	cur = in->cur[`0`];
5549	next = in->cur[`1`];
5550	if ((cur == `'<'`) && (next == `'!'`) &&
5551	(in->cur[`2`] == `'-'`) && (in->cur[`3`] == `'-'`)) {
5552	if ((!terminate) &&
5553	(htmlParseLookupSequence(ctxt, `'-'`, `'-'`, `'>'`, `1`, `1`) < `0`))
5554	goto done;
5555	#ifdef DEBUG_PUSH
5556	xmlGenericError(xmlGenericErrorContext,
5557	"HPP: Parsing Comment\n");
5558	#endif
5559	htmlParseComment(ctxt);
5560	ctxt->instate = XML_PARSER_PROLOG;
5561	} else if ((cur == `'<'`) && (next == `'?'`)) {
5562	if ((!terminate) &&
5563	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5564	goto done;
5565	#ifdef DEBUG_PUSH
5566	xmlGenericError(xmlGenericErrorContext,
5567	"HPP: Parsing PI\n");
5568	#endif
5569	htmlParsePI(ctxt);
5570	ctxt->instate = XML_PARSER_PROLOG;
5571	} else if ((cur == `'<'`) && (next == `'!'`) &&
5572	(avail < `4`)) {
5573	goto done;
5574	} else {
5575	ctxt->instate = XML_PARSER_START_TAG;
5576	#ifdef DEBUG_PUSH
5577	xmlGenericError(xmlGenericErrorContext,
5578	"HPP: entering START_TAG\n");
5579	#endif
5580	}
5581	break;
5582	case XML_PARSER_EPILOG:
5583	if (in->buf == NULL)
5584	avail = in->length - (in->cur - in->base);
5585	else
5586	avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5587	if (avail < `1`)
5588	goto done;
5589	cur = in->cur[`0`];
5590	if (IS_BLANK_CH(cur)) {
5591	htmlParseCharData(ctxt);
5592	goto done;
5593	}
5594	if (avail < `2`)
5595	goto done;
5596	next = in->cur[`1`];
5597	if ((cur == `'<'`) && (next == `'!'`) &&
5598	(in->cur[`2`] == `'-'`) && (in->cur[`3`] == `'-'`)) {
5599	if ((!terminate) &&
5600	(htmlParseLookupSequence(ctxt, `'-'`, `'-'`, `'>'`, `1`, `1`) < `0`))
5601	goto done;
5602	#ifdef DEBUG_PUSH
5603	xmlGenericError(xmlGenericErrorContext,
5604	"HPP: Parsing Comment\n");
5605	#endif
5606	htmlParseComment(ctxt);
5607	ctxt->instate = XML_PARSER_EPILOG;
5608	} else if ((cur == `'<'`) && (next == `'?'`)) {
5609	if ((!terminate) &&
5610	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5611	goto done;
5612	#ifdef DEBUG_PUSH
5613	xmlGenericError(xmlGenericErrorContext,
5614	"HPP: Parsing PI\n");
5615	#endif
5616	htmlParsePI(ctxt);
5617	ctxt->instate = XML_PARSER_EPILOG;
5618	} else if ((cur == `'<'`) && (next == `'!'`) &&
5619	(avail < `4`)) {
5620	goto done;
5621	} else {
5622	ctxt->errNo = XML_ERR_DOCUMENT_END;
5623	ctxt->wellFormed = `0`;
5624	ctxt->instate = XML_PARSER_EOF;
5625	#ifdef DEBUG_PUSH
5626	xmlGenericError(xmlGenericErrorContext,
5627	"HPP: entering EOF\n");
5628	#endif
5629	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5630	ctxt->sax->endDocument(ctxt->userData);
5631	goto done;
5632	}
5633	break;
5634	case XML_PARSER_START_TAG: {
5635	const xmlChar *name;
5636	int failed;
5637	const htmlElemDesc * info;
5638
5639	/*
5640	* no chars in buffer
5641	*/
5642	if (avail < `1`)
5643	goto done;
5644	/*
5645	* not enouth chars in buffer
5646	*/
5647	if (avail < `2`) {
5648	if (!terminate)
5649	goto done;
5650	else
5651	next = `' '`;
5652	} else {
5653	next = in->cur[`1`];
5654	}
5655	cur = in->cur[`0`];
5656	if (cur != `'<'`) {
5657	ctxt->instate = XML_PARSER_CONTENT;
5658	#ifdef DEBUG_PUSH
5659	xmlGenericError(xmlGenericErrorContext,
5660	"HPP: entering CONTENT\n");
5661	#endif
5662	break;
5663	}
5664	if (next == `'/'`) {
5665	ctxt->instate = XML_PARSER_END_TAG;
5666	ctxt->checkIndex = `0`;
5667	#ifdef DEBUG_PUSH
5668	xmlGenericError(xmlGenericErrorContext,
5669	"HPP: entering END_TAG\n");
5670	#endif
5671	break;
5672	}
5673	if ((!terminate) &&
5674	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5675	goto done;
5676
5677	/ Capture start position /
5678	if (ctxt->record_info) {
5679	node_info.begin_pos = ctxt->input->consumed +
5680	(CUR_PTR - ctxt->input->base);
5681	node_info.begin_line = ctxt->input->line;
5682	}
5683
5684
5685	failed = htmlParseStartTag(ctxt);
5686	name = ctxt->name;
5687	if ((failed == -`1`) \|\|
5688	(name == NULL)) {
5689	if (CUR == `'>'`)
5690	NEXT;
5691	break;
5692	}
5693
5694	/*
5695	* Lookup the info for that element.
5696	*/
5697	info = htmlTagLookup(name);
5698	if (info == NULL) {
5699	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
5700	"Tag %s invalid\n", name, NULL);
5701	}
5702
5703	/*
5704	* Check for an Empty Element labeled the XML/SGML way
5705	*/
5706	if ((CUR == `'/'`) && (NXT(`1`) == `'>'`)) {
5707	SKIP(`2`);
5708	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5709	ctxt->sax->endElement(ctxt->userData, name);
5710	htmlnamePop(ctxt);
5711	ctxt->instate = XML_PARSER_CONTENT;
5712	#ifdef DEBUG_PUSH
5713	xmlGenericError(xmlGenericErrorContext,
5714	"HPP: entering CONTENT\n");
5715	#endif
5716	break;
5717	}
5718
5719	if (CUR == `'>'`) {
5720	NEXT;
5721	} else {
5722	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
5723	"Couldn't find end of Start Tag %s\n",
5724	name, NULL);
5725
5726	/*
5727	* end of parsing of this node.
5728	*/
5729	if (xmlStrEqual(name, ctxt->name)) {
5730	nodePop(ctxt);
5731	htmlnamePop(ctxt);
5732	}
5733
5734	if (ctxt->record_info)
5735	htmlNodeInfoPush(ctxt, &node_info);
5736
5737	ctxt->instate = XML_PARSER_CONTENT;
5738	#ifdef DEBUG_PUSH
5739	xmlGenericError(xmlGenericErrorContext,
5740	"HPP: entering CONTENT\n");
5741	#endif
5742	break;
5743	}
5744
5745	/*
5746	* Check for an Empty Element from DTD definition
5747	*/
5748	if ((info != NULL) && (info->empty)) {
5749	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5750	ctxt->sax->endElement(ctxt->userData, name);
5751	htmlnamePop(ctxt);
5752	}
5753
5754	if (ctxt->record_info)
5755	htmlNodeInfoPush(ctxt, &node_info);
5756
5757	ctxt->instate = XML_PARSER_CONTENT;
5758	#ifdef DEBUG_PUSH
5759	xmlGenericError(xmlGenericErrorContext,
5760	"HPP: entering CONTENT\n");
5761	#endif
5762	break;
5763	}
5764	case XML_PARSER_CONTENT: {
5765	long cons;
5766	/*
5767	* Handle preparsed entities and charRef
5768	*/
5769	if (ctxt->token != `0`) {
5770	xmlChar chr[`2`] = { `0` , `0` } ;
5771
5772	chr[`0`] = (xmlChar) ctxt->token;
5773	htmlCheckParagraph(ctxt);
5774	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5775	ctxt->sax->characters(ctxt->userData, chr, `1`);
5776	ctxt->token = `0`;
5777	ctxt->checkIndex = `0`;
5778	}
5779	if ((avail == `1`) && (terminate)) {
5780	cur = in->cur[`0`];
5781	if ((cur != `'<'`) && (cur != `'&'`)) {
5782	if (ctxt->sax != NULL) {
5783	if (IS_BLANK_CH(cur)) {
5784	if (ctxt->keepBlanks) {
5785	if (ctxt->sax->characters != NULL)
5786	ctxt->sax->characters(
5787	ctxt->userData, &in->cur[`0`], `1`);
5788	} else {
5789	if (ctxt->sax->ignorableWhitespace != NULL)
5790	ctxt->sax->ignorableWhitespace(
5791	ctxt->userData, &in->cur[`0`], `1`);
5792	}
5793	} else {
5794	htmlCheckParagraph(ctxt);
5795	if (ctxt->sax->characters != NULL)
5796	ctxt->sax->characters(
5797	ctxt->userData, &in->cur[`0`], `1`);
5798	}
5799	}
5800	ctxt->token = `0`;
5801	ctxt->checkIndex = `0`;
5802	in->cur++;
5803	break;
5804	}
5805	}
5806	if (avail < `2`)
5807	goto done;
5808	cur = in->cur[`0`];
5809	next = in->cur[`1`];
5810	cons = ctxt->nbChars;
5811	if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) \|\|
5812	(xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5813	/*
5814	* Handle SCRIPT/STYLE separately
5815	*/
5816	if (!terminate) {
5817	int idx;
5818	xmlChar val;
5819
5820	idx = htmlParseLookupSequence(ctxt, `'<'`, `'/'`, `0`, `0`, `0`);
5821	if (idx < `0`)
5822	goto done;
5823	val = in->cur[idx + `2`];
5824	if (val == `0`) / bad cut of input /
5825	goto done;
5826	}
5827	htmlParseScript(ctxt);
5828	if ((cur == `'<'`) && (next == `'/'`)) {
5829	ctxt->instate = XML_PARSER_END_TAG;
5830	ctxt->checkIndex = `0`;
5831	#ifdef DEBUG_PUSH
5832	xmlGenericError(xmlGenericErrorContext,
5833	"HPP: entering END_TAG\n");
5834	#endif
5835	break;
5836	}
5837	} else {
5838	/*
5839	* Sometimes DOCTYPE arrives in the middle of the document
5840	*/
5841	if ((cur == `'<'`) && (next == `'!'`) &&
5842	(UPP(`2`) == `'D'`) && (UPP(`3`) == `'O'`) &&
5843	(UPP(`4`) == `'C'`) && (UPP(`5`) == `'T'`) &&
5844	(UPP(`6`) == `'Y'`) && (UPP(`7`) == `'P'`) &&
5845	(UPP(`8`) == `'E'`)) {
5846	if ((!terminate) &&
5847	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5848	goto done;
5849	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
5850	"Misplaced DOCTYPE declaration\n",
5851	BAD_CAST "DOCTYPE" , NULL);
5852	htmlParseDocTypeDecl(ctxt);
5853	} else if ((cur == `'<'`) && (next == `'!'`) &&
5854	(in->cur[`2`] == `'-'`) && (in->cur[`3`] == `'-'`)) {
5855	if ((!terminate) &&
5856	(htmlParseLookupSequence(
5857	ctxt, `'-'`, `'-'`, `'>'`, `1`, `1`) < `0`))
5858	goto done;
5859	#ifdef DEBUG_PUSH
5860	xmlGenericError(xmlGenericErrorContext,
5861	"HPP: Parsing Comment\n");
5862	#endif
5863	htmlParseComment(ctxt);
5864	ctxt->instate = XML_PARSER_CONTENT;
5865	} else if ((cur == `'<'`) && (next == `'?'`)) {
5866	if ((!terminate) &&
5867	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5868	goto done;
5869	#ifdef DEBUG_PUSH
5870	xmlGenericError(xmlGenericErrorContext,
5871	"HPP: Parsing PI\n");
5872	#endif
5873	htmlParsePI(ctxt);
5874	ctxt->instate = XML_PARSER_CONTENT;
5875	} else if ((cur == `'<'`) && (next == `'!'`) && (avail < `4`)) {
5876	goto done;
5877	} else if ((cur == `'<'`) && (next == `'/'`)) {
5878	ctxt->instate = XML_PARSER_END_TAG;
5879	ctxt->checkIndex = `0`;
5880	#ifdef DEBUG_PUSH
5881	xmlGenericError(xmlGenericErrorContext,
5882	"HPP: entering END_TAG\n");
5883	#endif
5884	break;
5885	} else if (cur == `'<'`) {
5886	ctxt->instate = XML_PARSER_START_TAG;
5887	ctxt->checkIndex = `0`;
5888	#ifdef DEBUG_PUSH
5889	xmlGenericError(xmlGenericErrorContext,
5890	"HPP: entering START_TAG\n");
5891	#endif
5892	break;
5893	} else if (cur == `'&'`) {
5894	if ((!terminate) &&
5895	(htmlParseLookupChars(ctxt,
5896	BAD_CAST "; >/", `4`) < `0`))
5897	goto done;
5898	#ifdef DEBUG_PUSH
5899	xmlGenericError(xmlGenericErrorContext,
5900	"HPP: Parsing Reference\n");
5901	#endif
5902	/ TODO: check generation of subtrees if noent !!! /
5903	htmlParseReference(ctxt);
5904	} else {
5905	/*
5906	* check that the text sequence is complete
5907	* before handing out the data to the parser
5908	* to avoid problems with erroneous end of
5909	* data detection.
5910	*/
5911	if ((!terminate) &&
5912	(htmlParseLookupChars(ctxt, BAD_CAST "<&", `2`) < `0`))
5913	goto done;
5914	ctxt->checkIndex = `0`;
5915	#ifdef DEBUG_PUSH
5916	xmlGenericError(xmlGenericErrorContext,
5917	"HPP: Parsing char data\n");
5918	#endif
5919	htmlParseCharData(ctxt);
5920	}
5921	}
5922	if (cons == ctxt->nbChars) {
5923	if (ctxt->node != NULL) {
5924	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5925	"detected an error in element content\n",
5926	NULL, NULL);
5927	}
5928	NEXT;
5929	break;
5930	}
5931
5932	break;
5933	}
5934	case XML_PARSER_END_TAG:
5935	if (avail < `2`)
5936	goto done;
5937	if ((!terminate) &&
5938	(htmlParseLookupSequence(ctxt, `'>'`, `0`, `0`, `0`, `1`) < `0`))
5939	goto done;
5940	htmlParseEndTag(ctxt);
5941	if (ctxt->nameNr == `0`) {
5942	ctxt->instate = XML_PARSER_EPILOG;
5943	} else {
5944	ctxt->instate = XML_PARSER_CONTENT;
5945	}
5946	ctxt->checkIndex = `0`;
5947	#ifdef DEBUG_PUSH
5948	xmlGenericError(xmlGenericErrorContext,
5949	"HPP: entering CONTENT\n");
5950	#endif
5951	break;
5952	case XML_PARSER_CDATA_SECTION:
5953	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5954	"HPP: internal error, state == CDATA\n",
5955	NULL, NULL);
5956	ctxt->instate = XML_PARSER_CONTENT;
5957	ctxt->checkIndex = `0`;
5958	#ifdef DEBUG_PUSH
5959	xmlGenericError(xmlGenericErrorContext,
5960	"HPP: entering CONTENT\n");
5961	#endif
5962	break;
5963	case XML_PARSER_DTD:
5964	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5965	"HPP: internal error, state == DTD\n",
5966	NULL, NULL);
5967	ctxt->instate = XML_PARSER_CONTENT;
5968	ctxt->checkIndex = `0`;
5969	#ifdef DEBUG_PUSH
5970	xmlGenericError(xmlGenericErrorContext,
5971	"HPP: entering CONTENT\n");
5972	#endif
5973	break;
5974	case XML_PARSER_COMMENT:
5975	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5976	"HPP: internal error, state == COMMENT\n",
5977	NULL, NULL);
5978	ctxt->instate = XML_PARSER_CONTENT;
5979	ctxt->checkIndex = `0`;
5980	#ifdef DEBUG_PUSH
5981	xmlGenericError(xmlGenericErrorContext,
5982	"HPP: entering CONTENT\n");
5983	#endif
5984	break;
5985	case XML_PARSER_PI:
5986	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5987	"HPP: internal error, state == PI\n",
5988	NULL, NULL);
5989	ctxt->instate = XML_PARSER_CONTENT;
5990	ctxt->checkIndex = `0`;
5991	#ifdef DEBUG_PUSH
5992	xmlGenericError(xmlGenericErrorContext,
5993	"HPP: entering CONTENT\n");
5994	#endif
5995	break;
5996	case XML_PARSER_ENTITY_DECL:
5997	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5998	"HPP: internal error, state == ENTITY_DECL\n",
5999	NULL, NULL);
6000	ctxt->instate = XML_PARSER_CONTENT;
6001	ctxt->checkIndex = `0`;
6002	#ifdef DEBUG_PUSH
6003	xmlGenericError(xmlGenericErrorContext,
6004	"HPP: entering CONTENT\n");
6005	#endif
6006	break;
6007	case XML_PARSER_ENTITY_VALUE:
6008	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6009	"HPP: internal error, state == ENTITY_VALUE\n",
6010	NULL, NULL);
6011	ctxt->instate = XML_PARSER_CONTENT;
6012	ctxt->checkIndex = `0`;
6013	#ifdef DEBUG_PUSH
6014	xmlGenericError(xmlGenericErrorContext,
6015	"HPP: entering DTD\n");
6016	#endif
6017	break;
6018	case XML_PARSER_ATTRIBUTE_VALUE:
6019	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6020	"HPP: internal error, state == ATTRIBUTE_VALUE\n",
6021	NULL, NULL);
6022	ctxt->instate = XML_PARSER_START_TAG;
6023	ctxt->checkIndex = `0`;
6024	#ifdef DEBUG_PUSH
6025	xmlGenericError(xmlGenericErrorContext,
6026	"HPP: entering START_TAG\n");
6027	#endif
6028	break;
6029	case XML_PARSER_SYSTEM_LITERAL:
6030	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6031	"HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
6032	NULL, NULL);
6033	ctxt->instate = XML_PARSER_CONTENT;
6034	ctxt->checkIndex = `0`;
6035	#ifdef DEBUG_PUSH
6036	xmlGenericError(xmlGenericErrorContext,
6037	"HPP: entering CONTENT\n");
6038	#endif
6039	break;
6040	case XML_PARSER_IGNORE:
6041	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6042	"HPP: internal error, state == XML_PARSER_IGNORE\n",
6043	NULL, NULL);
6044	ctxt->instate = XML_PARSER_CONTENT;
6045	ctxt->checkIndex = `0`;
6046	#ifdef DEBUG_PUSH
6047	xmlGenericError(xmlGenericErrorContext,
6048	"HPP: entering CONTENT\n");
6049	#endif
6050	break;
6051	case XML_PARSER_PUBLIC_LITERAL:
6052	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6053	"HPP: internal error, state == XML_PARSER_LITERAL\n",
6054	NULL, NULL);
6055	ctxt->instate = XML_PARSER_CONTENT;
6056	ctxt->checkIndex = `0`;
6057	#ifdef DEBUG_PUSH
6058	xmlGenericError(xmlGenericErrorContext,
6059	"HPP: entering CONTENT\n");
6060	#endif
6061	break;
6062
6063	}
6064	}
6065	done:
6066	if ((avail == `0`) && (terminate)) {
6067	htmlAutoCloseOnEnd(ctxt);
6068	if ((ctxt->nameNr == `0`) && (ctxt->instate != XML_PARSER_EOF)) {
6069	/*
6070	* SAX: end of the document processing.
6071	*/
6072	ctxt->instate = XML_PARSER_EOF;
6073	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
6074	ctxt->sax->endDocument(ctxt->userData);
6075	}
6076	}
6077	if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
6078	((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
6079	(ctxt->instate == XML_PARSER_EPILOG))) {
6080	xmlDtdPtr dtd;
6081	dtd = xmlGetIntSubset(ctxt->myDoc);
6082	if (dtd == NULL)
6083	ctxt->myDoc->intSubset =
6084	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
6085	BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
6086	BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
6087	}
6088	#ifdef DEBUG_PUSH
6089	xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
6090	#endif
6091	return(ret);
6092	}
6093
6094	/**
6095	* htmlParseChunk:
6096	* @ctxt: an HTML parser context
6097	* @chunk: an char array
6098	* @size: the size in byte of the chunk
6099	* @terminate: last chunk indicator
6100	*
6101	* Parse a Chunk of memory
6102	*
6103	* Returns zero if no error, the xmlParserErrors otherwise.
6104	*/
6105	int
6106	htmlParseChunk(htmlParserCtxtPtr ctxt, const char chunk, int* size,
6107	int terminate) {
6108	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
6109	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6110	"htmlParseChunk: context error\n", NULL, NULL);
6111	return(XML_ERR_INTERNAL_ERROR);
6112	}
6113	if ((size > `0`) && (chunk != NULL) && (ctxt->input != NULL) &&
6114	(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
6115	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
6116	size_t cur = ctxt->input->cur - ctxt->input->base;
6117	int res;
6118
6119	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6120	if (res < `0`) {
6121	ctxt->errNo = XML_PARSER_EOF;
6122	ctxt->disableSAX = `1`;
6123	return (XML_PARSER_EOF);
6124	}
6125	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
6126	#ifdef DEBUG_PUSH
6127	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
6128	#endif
6129
6130	#if 0
6131	if ((terminate) \|\| (ctxt->input->buf->buffer->use > `80`))
6132	htmlParseTryOrFinish(ctxt, terminate);
6133	#endif
6134	} else if (ctxt->instate != XML_PARSER_EOF) {
6135	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
6136	xmlParserInputBufferPtr in = ctxt->input->buf;
6137	if ((in->encoder != NULL) && (in->buffer != NULL) &&
6138	(in->raw != NULL)) {
6139	int nbchars;
6140	size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
6141	size_t current = ctxt->input->cur - ctxt->input->base;
6142
6143	nbchars = xmlCharEncInput(in, terminate);
6144	if (nbchars < `0`) {
6145	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
6146	"encoder error\n", NULL, NULL);
6147	return(XML_ERR_INVALID_ENCODING);
6148	}
6149	xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
6150	}
6151	}
6152	}
6153	htmlParseTryOrFinish(ctxt, terminate);
6154	if (terminate) {
6155	if ((ctxt->instate != XML_PARSER_EOF) &&
6156	(ctxt->instate != XML_PARSER_EPILOG) &&
6157	(ctxt->instate != XML_PARSER_MISC)) {
6158	ctxt->errNo = XML_ERR_DOCUMENT_END;
6159	ctxt->wellFormed = `0`;
6160	}
6161	if (ctxt->instate != XML_PARSER_EOF) {
6162	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
6163	ctxt->sax->endDocument(ctxt->userData);
6164	}
6165	ctxt->instate = XML_PARSER_EOF;
6166	}
6167	return((xmlParserErrors) ctxt->errNo);
6168	}
6169
6170	/************************************************************************
6171	* *
6172	* User entry points *
6173	* *
6174	************************************************************************/
6175
6176	/**
6177	* htmlCreatePushParserCtxt:
6178	* @sax: a SAX handler
6179	* @user_data: The user data returned on SAX callbacks
6180	* @chunk: a pointer to an array of chars
6181	* @size: number of chars in the array
6182	* @filename: an optional file name or URI
6183	* @enc: an optional encoding
6184	*
6185	* Create a parser context for using the HTML parser in push mode
6186	* The value of @filename is used for fetching external entities
6187	* and error/warning reports.
6188	*
6189	* Returns the new parser context or NULL
6190	*/
6191	htmlParserCtxtPtr
6192	htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
6193	const char chunk, int* size, const char *filename,
6194	xmlCharEncoding enc) {
6195	htmlParserCtxtPtr ctxt;
6196	htmlParserInputPtr inputStream;
6197	xmlParserInputBufferPtr buf;
6198
6199	xmlInitParser();
6200
6201	buf = xmlAllocParserInputBuffer(enc);
6202	if (buf == NULL) return(NULL);
6203
6204	ctxt = htmlNewParserCtxt();
6205	if (ctxt == NULL) {
6206	xmlFreeParserInputBuffer(buf);
6207	return(NULL);
6208	}
6209	if(enc==XML_CHAR_ENCODING_UTF8 \|\| buf->encoder)
6210	ctxt->charset=XML_CHAR_ENCODING_UTF8;
6211	if (sax != NULL) {
6212	if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
6213	xmlFree(ctxt->sax);
6214	ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
6215	if (ctxt->sax == NULL) {
6216	xmlFree(buf);
6217	xmlFree(ctxt);
6218	return(NULL);
6219	}
6220	memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
6221	if (user_data != NULL)
6222	ctxt->userData = user_data;
6223	}
6224	if (filename == NULL) {
6225	ctxt->directory = NULL;
6226	} else {
6227	ctxt->directory = xmlParserGetDirectory(filename);
6228	}
6229
6230	inputStream = htmlNewInputStream(ctxt);
6231	if (inputStream == NULL) {
6232	xmlFreeParserCtxt(ctxt);
6233	xmlFree(buf);
6234	return(NULL);
6235	}
6236
6237	if (filename == NULL)
6238	inputStream->filename = NULL;
6239	else
6240	inputStream->filename = (char *)
6241	xmlCanonicPath((const xmlChar *) filename);
6242	inputStream->buf = buf;
6243	xmlBufResetInput(buf->buffer, inputStream);
6244
6245	inputPush(ctxt, inputStream);
6246
6247	if ((size > `0`) && (chunk != NULL) && (ctxt->input != NULL) &&
6248	(ctxt->input->buf != NULL)) {
6249	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
6250	size_t cur = ctxt->input->cur - ctxt->input->base;
6251
6252	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6253
6254	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
6255	#ifdef DEBUG_PUSH
6256	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
6257	#endif
6258	}
6259	ctxt->progressive = `1`;
6260
6261	return(ctxt);
6262	}
6263	#endif /* LIBXML_PUSH_ENABLED */
6264
6265	/**
6266	* htmlSAXParseDoc:
6267	* @cur: a pointer to an array of xmlChar
6268	* @encoding: a free form C string describing the HTML document encoding, or NULL
6269	* @sax: the SAX handler block
6270	* @userData: if using SAX, this pointer will be provided on callbacks.
6271	*
6272	* Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
6273	* to handle parse events. If sax is NULL, fallback to the default DOM
6274	* behavior and return a tree.
6275	*
6276	* Returns the resulting document tree unless SAX is NULL or the document is
6277	* not well formed.
6278	*/
6279
6280	htmlDocPtr
6281	htmlSAXParseDoc(const xmlChar cur, const* char *encoding,
6282	htmlSAXHandlerPtr sax, void *userData) {
6283	htmlDocPtr ret;
6284	htmlParserCtxtPtr ctxt;
6285
6286	xmlInitParser();
6287
6288	if (cur == NULL) return(NULL);
6289
6290
6291	ctxt = htmlCreateDocParserCtxt(cur, encoding);
6292	if (ctxt == NULL) return(NULL);
6293	if (sax != NULL) {
6294	if (ctxt->sax != NULL) xmlFree (ctxt->sax);
6295	ctxt->sax = sax;
6296	ctxt->userData = userData;
6297	}
6298
6299	htmlParseDocument(ctxt);
6300	ret = ctxt->myDoc;
6301	if (sax != NULL) {
6302	ctxt->sax = NULL;
6303	ctxt->userData = NULL;
6304	}
6305	htmlFreeParserCtxt(ctxt);
6306
6307	return(ret);
6308	}
6309
6310	/**
6311	* htmlParseDoc:
6312	* @cur: a pointer to an array of xmlChar
6313	* @encoding: a free form C string describing the HTML document encoding, or NULL
6314	*
6315	* parse an HTML in-memory document and build a tree.
6316	*
6317	* Returns the resulting document tree
6318	*/
6319
6320	htmlDocPtr
6321	htmlParseDoc(const xmlChar cur, const* char *encoding) {
6322	return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
6323	}
6324
6325
6326	/**
6327	* htmlCreateFileParserCtxt:
6328	* @filename: the filename
6329	* @encoding: a free form C string describing the HTML document encoding, or NULL
6330	*
6331	* Create a parser context for a file content.
6332	* Automatic support for ZLIB/Compress compressed document is provided
6333	* by default if found at compile-time.
6334	*
6335	* Returns the new parser context or NULL
6336	*/
6337	htmlParserCtxtPtr
6338	htmlCreateFileParserCtxt(const char filename, const* char *encoding)
6339	{
6340	htmlParserCtxtPtr ctxt;
6341	htmlParserInputPtr inputStream;
6342	char *canonicFilename;
6343	/ htmlCharEncoding enc; /
6344	xmlChar content, content_line = (xmlChar *) "charset=";
6345
6346	if (filename == NULL)
6347	return(NULL);
6348
6349	ctxt = htmlNewParserCtxt();
6350	if (ctxt == NULL) {
6351	return(NULL);
6352	}
6353	canonicFilename = (char ) xmlCanonicPath((const* xmlChar *) filename);
6354	if (canonicFilename == NULL) {
6355	#ifdef LIBXML_SAX1_ENABLED
6356	if (xmlDefaultSAXHandler.error != NULL) {
6357	xmlDefaultSAXHandler.error(NULL, "out of memory\n");
6358	}
6359	#endif
6360	xmlFreeParserCtxt(ctxt);
6361	return(NULL);
6362	}
6363
6364	inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
6365	xmlFree(canonicFilename);
6366	if (inputStream == NULL) {
6367	xmlFreeParserCtxt(ctxt);
6368	return(NULL);
6369	}
6370
6371	inputPush(ctxt, inputStream);
6372
6373	/ set encoding /
6374	if (encoding) {
6375	size_t l = strlen(encoding);
6376
6377	if (l < `1000`) {
6378	content = xmlMallocAtomic (xmlStrlen(content_line) + l + `1`);
6379	if (content) {
6380	strcpy ((char )content, (char* *)content_line);
6381	strcat ((char )content, (char* *)encoding);
6382	htmlCheckEncoding (ctxt, content);
6383	xmlFree (content);
6384	}
6385	}
6386	}
6387
6388	return(ctxt);
6389	}
6390
6391	/**
6392	* htmlSAXParseFile:
6393	* @filename: the filename
6394	* @encoding: a free form C string describing the HTML document encoding, or NULL
6395	* @sax: the SAX handler block
6396	* @userData: if using SAX, this pointer will be provided on callbacks.
6397	*
6398	* parse an HTML file and build a tree. Automatic support for ZLIB/Compress
6399	* compressed document is provided by default if found at compile-time.
6400	* It use the given SAX function block to handle the parsing callback.
6401	* If sax is NULL, fallback to the default DOM tree building routines.
6402	*
6403	* Returns the resulting document tree unless SAX is NULL or the document is
6404	* not well formed.
6405	*/
6406
6407	htmlDocPtr
6408	htmlSAXParseFile(const char filename, const* char *encoding, htmlSAXHandlerPtr sax,
6409	void *userData) {
6410	htmlDocPtr ret;
6411	htmlParserCtxtPtr ctxt;
6412	htmlSAXHandlerPtr oldsax = NULL;
6413
6414	xmlInitParser();
6415
6416	ctxt = htmlCreateFileParserCtxt(filename, encoding);
6417	if (ctxt == NULL) return(NULL);
6418	if (sax != NULL) {
6419	oldsax = ctxt->sax;
6420	ctxt->sax = sax;
6421	ctxt->userData = userData;
6422	}
6423
6424	htmlParseDocument(ctxt);
6425
6426	ret = ctxt->myDoc;
6427	if (sax != NULL) {
6428	ctxt->sax = oldsax;
6429	ctxt->userData = NULL;
6430	}
6431	htmlFreeParserCtxt(ctxt);
6432
6433	return(ret);
6434	}
6435
6436	/**
6437	* htmlParseFile:
6438	* @filename: the filename
6439	* @encoding: a free form C string describing the HTML document encoding, or NULL
6440	*
6441	* parse an HTML file and build a tree. Automatic support for ZLIB/Compress
6442	* compressed document is provided by default if found at compile-time.
6443	*
6444	* Returns the resulting document tree
6445	*/
6446
6447	htmlDocPtr
6448	htmlParseFile(const char filename, const* char *encoding) {
6449	return(htmlSAXParseFile(filename, encoding, NULL, NULL));
6450	}
6451
6452	/**
6453	* htmlHandleOmittedElem:
6454	* @val: int 0 or 1
6455	*
6456	* Set and return the previous value for handling HTML omitted tags.
6457	*
6458	* Returns the last value for 0 for no handling, 1 for auto insertion.
6459	*/
6460
6461	int
6462	htmlHandleOmittedElem(int val) {
6463	int old = htmlOmittedDefaultValue;
6464
6465	htmlOmittedDefaultValue = val;
6466	return(old);
6467	}
6468
6469	/**
6470	* htmlElementAllowedHere:
6471	* @parent: HTML parent element
6472	* @elt: HTML element
6473	*
6474	* Checks whether an HTML element may be a direct child of a parent element.
6475	* Note - doesn't check for deprecated elements
6476	*
6477	* Returns 1 if allowed; 0 otherwise.
6478	*/
6479	int
6480	htmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) {
6481	const char** p ;
6482
6483	if ( ! elt \|\| ! parent \|\| ! parent->subelts )
6484	return `0` ;
6485
6486	for ( p = parent->subelts; *p; ++p )
6487	if ( !xmlStrcmp((const xmlChar )p, elt) )
6488	return `1` ;
6489
6490	return `0` ;
6491	}
6492	/**
6493	* htmlElementStatusHere:
6494	* @parent: HTML parent element
6495	* @elt: HTML element
6496	*
6497	* Checks whether an HTML element may be a direct child of a parent element.
6498	* and if so whether it is valid or deprecated.
6499	*
6500	* Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID
6501	*/
6502	htmlStatus
6503	htmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) {
6504	if ( ! parent \|\| ! elt )
6505	return HTML_INVALID ;
6506	if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) )
6507	return HTML_INVALID ;
6508
6509	return ( elt->dtd == `0` ) ? HTML_VALID : HTML_DEPRECATED ;
6510	}
6511	/**
6512	* htmlAttrAllowed:
6513	* @elt: HTML element
6514	* @attr: HTML attribute
6515	* @legacy: whether to allow deprecated attributes
6516	*
6517	* Checks whether an attribute is valid for an element
6518	* Has full knowledge of Required and Deprecated attributes
6519	*
6520	* Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID
6521	*/
6522	htmlStatus
6523	htmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) {
6524	const char** p ;
6525
6526	if ( !elt \|\| ! attr )
6527	return HTML_INVALID ;
6528
6529	if ( elt->attrs_req )
6530	for ( p = elt->attrs_req; *p; ++p)
6531	if ( !xmlStrcmp((const xmlChar)p, attr) )
6532	return HTML_REQUIRED ;
6533
6534	if ( elt->attrs_opt )
6535	for ( p = elt->attrs_opt; *p; ++p)
6536	if ( !xmlStrcmp((const xmlChar)p, attr) )
6537	return HTML_VALID ;
6538
6539	if ( legacy && elt->attrs_depr )
6540	for ( p = elt->attrs_depr; *p; ++p)
6541	if ( !xmlStrcmp((const xmlChar)p, attr) )
6542	return HTML_DEPRECATED ;
6543
6544	return HTML_INVALID ;
6545	}
6546	/**
6547	* htmlNodeStatus:
6548	* @node: an htmlNodePtr in a tree
6549	* @legacy: whether to allow deprecated elements (YES is faster here
6550	* for Element nodes)
6551	*
6552	* Checks whether the tree node is valid. Experimental (the author
6553	* only uses the HTML enhancements in a SAX parser)
6554	*
6555	* Return: for Element nodes, a return from htmlElementAllowedHere (if
6556	* legacy allowed) or htmlElementStatusHere (otherwise).
6557	* for Attribute nodes, a return from htmlAttrAllowed
6558	* for other nodes, HTML_NA (no checks performed)
6559	*/
6560	htmlStatus
6561	htmlNodeStatus(const htmlNodePtr node, int legacy) {
6562	if ( ! node )
6563	return HTML_INVALID ;
6564
6565	switch ( node->type ) {
6566	case XML_ELEMENT_NODE:
6567	return legacy
6568	? ( htmlElementAllowedHere (
6569	htmlTagLookup(node->parent->name) , node->name
6570	) ? HTML_VALID : HTML_INVALID )
6571	: htmlElementStatusHere(
6572	htmlTagLookup(node->parent->name) ,
6573	htmlTagLookup(node->name) )
6574	;
6575	case XML_ATTRIBUTE_NODE:
6576	return htmlAttrAllowed(
6577	htmlTagLookup(node->parent->name) , node->name, legacy) ;
6578	default: return HTML_NA ;
6579	}
6580	}
6581	/************************************************************************
6582	* *
6583	* New set (2.6.0) of simpler and more flexible APIs *
6584	* *
6585	************************************************************************/
6586	/**
6587	* DICT_FREE:
6588	* @str: a string
6589	*
6590	* Free a string if it is not owned by the "dict" dictionary in the
6591	* current scope
6592	*/
6593	#define DICT_FREE(str) \
6594	if ((str) && ((!dict) \|\| \
6595	(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
6596	xmlFree((char *)(str));
6597
6598	/**
6599	* htmlCtxtReset:
6600	* @ctxt: an HTML parser context
6601	*
6602	* Reset a parser context
6603	*/
6604	void
6605	htmlCtxtReset(htmlParserCtxtPtr ctxt)
6606	{
6607	xmlParserInputPtr input;
6608	xmlDictPtr dict;
6609
6610	if (ctxt == NULL)
6611	return;
6612
6613	xmlInitParser();
6614	dict = ctxt->dict;
6615
6616	while ((input = inputPop(ctxt)) != NULL) { / Non consuming /
6617	xmlFreeInputStream(input);
6618	}
6619	ctxt->inputNr = `0`;
6620	ctxt->input = NULL;
6621
6622	ctxt->spaceNr = `0`;
6623	if (ctxt->spaceTab != NULL) {
6624	ctxt->spaceTab[`0`] = -`1`;
6625	ctxt->space = &ctxt->spaceTab[`0`];
6626	} else {
6627	ctxt->space = NULL;
6628	}
6629
6630
6631	ctxt->nodeNr = `0`;
6632	ctxt->node = NULL;
6633
6634	ctxt->nameNr = `0`;
6635	ctxt->name = NULL;
6636
6637	DICT_FREE(ctxt->version);
6638	ctxt->version = NULL;
6639	DICT_FREE(ctxt->encoding);
6640	ctxt->encoding = NULL;
6641	DICT_FREE(ctxt->directory);
6642	ctxt->directory = NULL;
6643	DICT_FREE(ctxt->extSubURI);
6644	ctxt->extSubURI = NULL;
6645	DICT_FREE(ctxt->extSubSystem);
6646	ctxt->extSubSystem = NULL;
6647	if (ctxt->myDoc != NULL)
6648	xmlFreeDoc(ctxt->myDoc);
6649	ctxt->myDoc = NULL;
6650
6651	ctxt->standalone = -`1`;
6652	ctxt->hasExternalSubset = `0`;
6653	ctxt->hasPErefs = `0`;
6654	ctxt->html = `1`;
6655	ctxt->external = `0`;
6656	ctxt->instate = XML_PARSER_START;
6657	ctxt->token = `0`;
6658
6659	ctxt->wellFormed = `1`;
6660	ctxt->nsWellFormed = `1`;
6661	ctxt->disableSAX = `0`;
6662	ctxt->valid = `1`;
6663	ctxt->vctxt.userData = ctxt;
6664	ctxt->vctxt.error = xmlParserValidityError;
6665	ctxt->vctxt.warning = xmlParserValidityWarning;
6666	ctxt->record_info = `0`;
6667	ctxt->nbChars = `0`;
6668	ctxt->checkIndex = `0`;
6669	ctxt->inSubset = `0`;
6670	ctxt->errNo = XML_ERR_OK;
6671	ctxt->depth = `0`;
6672	ctxt->charset = XML_CHAR_ENCODING_NONE;
6673	ctxt->catalogs = NULL;
6674	xmlInitNodeInfoSeq(&ctxt->node_seq);
6675
6676	if (ctxt->attsDefault != NULL) {
6677	xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
6678	ctxt->attsDefault = NULL;
6679	}
6680	if (ctxt->attsSpecial != NULL) {
6681	xmlHashFree(ctxt->attsSpecial, NULL);
6682	ctxt->attsSpecial = NULL;
6683	}
6684	}
6685
6686	/**
6687	* htmlCtxtUseOptions:
6688	* @ctxt: an HTML parser context
6689	* @options: a combination of htmlParserOption(s)
6690	*
6691	* Applies the options to the parser context
6692	*
6693	* Returns 0 in case of success, the set of unknown or unimplemented options
6694	* in case of error.
6695	*/
6696	int
6697	htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
6698	{
6699	if (ctxt == NULL)
6700	return(-`1`);
6701
6702	if (options & HTML_PARSE_NOWARNING) {
6703	ctxt->sax->warning = NULL;
6704	ctxt->vctxt.warning = NULL;
6705	options -= XML_PARSE_NOWARNING;
6706	ctxt->options \|= XML_PARSE_NOWARNING;
6707	}
6708	if (options & HTML_PARSE_NOERROR) {
6709	ctxt->sax->error = NULL;
6710	ctxt->vctxt.error = NULL;
6711	ctxt->sax->fatalError = NULL;
6712	options -= XML_PARSE_NOERROR;
6713	ctxt->options \|= XML_PARSE_NOERROR;
6714	}
6715	if (options & HTML_PARSE_PEDANTIC) {
6716	ctxt->pedantic = `1`;
6717	options -= XML_PARSE_PEDANTIC;
6718	ctxt->options \|= XML_PARSE_PEDANTIC;
6719	} else
6720	ctxt->pedantic = `0`;
6721	if (options & XML_PARSE_NOBLANKS) {
6722	ctxt->keepBlanks = `0`;
6723	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
6724	options -= XML_PARSE_NOBLANKS;
6725	ctxt->options \|= XML_PARSE_NOBLANKS;
6726	} else
6727	ctxt->keepBlanks = `1`;
6728	if (options & HTML_PARSE_RECOVER) {
6729	ctxt->recovery = `1`;
6730	options -= HTML_PARSE_RECOVER;
6731	} else
6732	ctxt->recovery = `0`;
6733	if (options & HTML_PARSE_COMPACT) {
6734	ctxt->options \|= HTML_PARSE_COMPACT;
6735	options -= HTML_PARSE_COMPACT;
6736	}
6737	if (options & XML_PARSE_HUGE) {
6738	ctxt->options \|= XML_PARSE_HUGE;
6739	options -= XML_PARSE_HUGE;
6740	}
6741	if (options & HTML_PARSE_NODEFDTD) {
6742	ctxt->options \|= HTML_PARSE_NODEFDTD;
6743	options -= HTML_PARSE_NODEFDTD;
6744	}
6745	if (options & HTML_PARSE_IGNORE_ENC) {
6746	ctxt->options \|= HTML_PARSE_IGNORE_ENC;
6747	options -= HTML_PARSE_IGNORE_ENC;
6748	}
6749	if (options & HTML_PARSE_NOIMPLIED) {
6750	ctxt->options \|= HTML_PARSE_NOIMPLIED;
6751	options -= HTML_PARSE_NOIMPLIED;
6752	}
6753	ctxt->dictNames = `0`;
6754	return (options);
6755	}
6756
6757	/**
6758	* htmlDoRead:
6759	* @ctxt: an HTML parser context
6760	* @URL: the base URL to use for the document
6761	* @encoding: the document encoding, or NULL
6762	* @options: a combination of htmlParserOption(s)
6763	* @reuse: keep the context for reuse
6764	*
6765	* Common front-end for the htmlRead functions
6766	*
6767	* Returns the resulting document tree or NULL
6768	*/
6769	static htmlDocPtr
6770	htmlDoRead(htmlParserCtxtPtr ctxt, const char URL, const* char *encoding,
6771	int options, int reuse)
6772	{
6773	htmlDocPtr ret;
6774
6775	htmlCtxtUseOptions(ctxt, options);
6776	ctxt->html = `1`;
6777	if (encoding != NULL) {
6778	xmlCharEncodingHandlerPtr hdlr;
6779
6780	hdlr = xmlFindCharEncodingHandler(encoding);
6781	if (hdlr != NULL) {
6782	xmlSwitchToEncoding(ctxt, hdlr);
6783	if (ctxt->input->encoding != NULL)
6784	xmlFree((xmlChar *) ctxt->input->encoding);
6785	ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
6786	}
6787	}
6788	if ((URL != NULL) && (ctxt->input != NULL) &&
6789	(ctxt->input->filename == NULL))
6790	ctxt->input->filename = (char ) xmlStrdup((const* xmlChar *) URL);
6791	htmlParseDocument(ctxt);
6792	ret = ctxt->myDoc;
6793	ctxt->myDoc = NULL;
6794	if (!reuse) {
6795	if ((ctxt->dictNames) &&
6796	(ret != NULL) &&
6797	(ret->dict == ctxt->dict))
6798	ctxt->dict = NULL;
6799	xmlFreeParserCtxt(ctxt);
6800	}
6801	return (ret);
6802	}
6803
6804	/**
6805	* htmlReadDoc:
6806	* @cur: a pointer to a zero terminated string
6807	* @URL: the base URL to use for the document
6808	* @encoding: the document encoding, or NULL
6809	* @options: a combination of htmlParserOption(s)
6810	*
6811	* parse an XML in-memory document and build a tree.
6812	*
6813	* Returns the resulting document tree
6814	*/
6815	htmlDocPtr
6816	htmlReadDoc(const xmlChar * cur, const char URL, const* char encoding, int* options)
6817	{
6818	htmlParserCtxtPtr ctxt;
6819
6820	if (cur == NULL)
6821	return (NULL);
6822
6823	xmlInitParser();
6824	ctxt = htmlCreateDocParserCtxt(cur, NULL);
6825	if (ctxt == NULL)
6826	return (NULL);
6827	return (htmlDoRead(ctxt, URL, encoding, options, `0`));
6828	}
6829
6830	/**
6831	* htmlReadFile:
6832	* @filename: a file or URL
6833	* @encoding: the document encoding, or NULL
6834	* @options: a combination of htmlParserOption(s)
6835	*
6836	* parse an XML file from the filesystem or the network.
6837	*
6838	* Returns the resulting document tree
6839	*/
6840	htmlDocPtr
6841	htmlReadFile(const char filename, const* char encoding, int* options)
6842	{
6843	htmlParserCtxtPtr ctxt;
6844
6845	xmlInitParser();
6846	ctxt = htmlCreateFileParserCtxt(filename, encoding);
6847	if (ctxt == NULL)
6848	return (NULL);
6849	return (htmlDoRead(ctxt, NULL, NULL, options, `0`));
6850	}
6851
6852	/**
6853	* htmlReadMemory:
6854	* @buffer: a pointer to a char array
6855	* @size: the size of the array
6856	* @URL: the base URL to use for the document
6857	* @encoding: the document encoding, or NULL
6858	* @options: a combination of htmlParserOption(s)
6859	*
6860	* parse an XML in-memory document and build a tree.
6861	*
6862	* Returns the resulting document tree
6863	*/
6864	htmlDocPtr
6865	htmlReadMemory(const char buffer, int* size, const char URL, const* char encoding, int* options)
6866	{
6867	htmlParserCtxtPtr ctxt;
6868
6869	xmlInitParser();
6870	ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6871	if (ctxt == NULL)
6872	return (NULL);
6873	htmlDefaultSAXHandlerInit();
6874	if (ctxt->sax != NULL)
6875	memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
6876	return (htmlDoRead(ctxt, URL, encoding, options, `0`));
6877	}
6878
6879	/**
6880	* htmlReadFd:
6881	* @fd: an open file descriptor
6882	* @URL: the base URL to use for the document
6883	* @encoding: the document encoding, or NULL
6884	* @options: a combination of htmlParserOption(s)
6885	*
6886	* parse an XML from a file descriptor and build a tree.
6887	*
6888	* Returns the resulting document tree
6889	*/
6890	htmlDocPtr
6891	htmlReadFd(int fd, const char URL, const* char encoding, int* options)
6892	{
6893	htmlParserCtxtPtr ctxt;
6894	xmlParserInputBufferPtr input;
6895	xmlParserInputPtr stream;
6896
6897	if (fd < `0`)
6898	return (NULL);
6899	xmlInitParser();
6900
6901	xmlInitParser();
6902	input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
6903	if (input == NULL)
6904	return (NULL);
6905	ctxt = xmlNewParserCtxt();
6906	if (ctxt == NULL) {
6907	xmlFreeParserInputBuffer(input);
6908	return (NULL);
6909	}
6910	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6911	if (stream == NULL) {
6912	xmlFreeParserInputBuffer(input);
6913	xmlFreeParserCtxt(ctxt);
6914	return (NULL);
6915	}
6916	inputPush(ctxt, stream);
6917	return (htmlDoRead(ctxt, URL, encoding, options, `0`));
6918	}
6919
6920	/**
6921	* htmlReadIO:
6922	* @ioread: an I/O read function
6923	* @ioclose: an I/O close function
6924	* @ioctx: an I/O handler
6925	* @URL: the base URL to use for the document
6926	* @encoding: the document encoding, or NULL
6927	* @options: a combination of htmlParserOption(s)
6928	*
6929	* parse an HTML document from I/O functions and source and build a tree.
6930	*
6931	* Returns the resulting document tree
6932	*/
6933	htmlDocPtr
6934	htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
6935	void ioctx, const* char URL, const* char encoding, int* options)
6936	{
6937	htmlParserCtxtPtr ctxt;
6938	xmlParserInputBufferPtr input;
6939	xmlParserInputPtr stream;
6940
6941	if (ioread == NULL)
6942	return (NULL);
6943	xmlInitParser();
6944
6945	input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
6946	XML_CHAR_ENCODING_NONE);
6947	if (input == NULL) {
6948	if (ioclose != NULL)
6949	ioclose(ioctx);
6950	return (NULL);
6951	}
6952	ctxt = htmlNewParserCtxt();
6953	if (ctxt == NULL) {
6954	xmlFreeParserInputBuffer(input);
6955	return (NULL);
6956	}
6957	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6958	if (stream == NULL) {
6959	xmlFreeParserInputBuffer(input);
6960	xmlFreeParserCtxt(ctxt);
6961	return (NULL);
6962	}
6963	inputPush(ctxt, stream);
6964	return (htmlDoRead(ctxt, URL, encoding, options, `0`));
6965	}
6966
6967	/**
6968	* htmlCtxtReadDoc:
6969	* @ctxt: an HTML parser context
6970	* @cur: a pointer to a zero terminated string
6971	* @URL: the base URL to use for the document
6972	* @encoding: the document encoding, or NULL
6973	* @options: a combination of htmlParserOption(s)
6974	*
6975	* parse an XML in-memory document and build a tree.
6976	* This reuses the existing @ctxt parser context
6977	*
6978	* Returns the resulting document tree
6979	*/
6980	htmlDocPtr
6981	htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6982	const char URL, const* char encoding, int* options)
6983	{
6984	xmlParserInputPtr stream;
6985
6986	if (cur == NULL)
6987	return (NULL);
6988	if (ctxt == NULL)
6989	return (NULL);
6990	xmlInitParser();
6991
6992	htmlCtxtReset(ctxt);
6993
6994	stream = xmlNewStringInputStream(ctxt, cur);
6995	if (stream == NULL) {
6996	return (NULL);
6997	}
6998	inputPush(ctxt, stream);
6999	return (htmlDoRead(ctxt, URL, encoding, options, `1`));
7000	}
7001
7002	/**
7003	* htmlCtxtReadFile:
7004	* @ctxt: an HTML parser context
7005	* @filename: a file or URL
7006	* @encoding: the document encoding, or NULL
7007	* @options: a combination of htmlParserOption(s)
7008	*
7009	* parse an XML file from the filesystem or the network.
7010	* This reuses the existing @ctxt parser context
7011	*
7012	* Returns the resulting document tree
7013	*/
7014	htmlDocPtr
7015	htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
7016	const char encoding, int* options)
7017	{
7018	xmlParserInputPtr stream;
7019
7020	if (filename == NULL)
7021	return (NULL);
7022	if (ctxt == NULL)
7023	return (NULL);
7024	xmlInitParser();
7025
7026	htmlCtxtReset(ctxt);
7027
7028	stream = xmlLoadExternalEntity(filename, NULL, ctxt);
7029	if (stream == NULL) {
7030	return (NULL);
7031	}
7032	inputPush(ctxt, stream);
7033	return (htmlDoRead(ctxt, NULL, encoding, options, `1`));
7034	}
7035
7036	/**
7037	* htmlCtxtReadMemory:
7038	* @ctxt: an HTML parser context
7039	* @buffer: a pointer to a char array
7040	* @size: the size of the array
7041	* @URL: the base URL to use for the document
7042	* @encoding: the document encoding, or NULL
7043	* @options: a combination of htmlParserOption(s)
7044	*
7045	* parse an XML in-memory document and build a tree.
7046	* This reuses the existing @ctxt parser context
7047	*
7048	* Returns the resulting document tree
7049	*/
7050	htmlDocPtr
7051	htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char buffer, int* size,
7052	const char URL, const* char encoding, int* options)
7053	{
7054	xmlParserInputBufferPtr input;
7055	xmlParserInputPtr stream;
7056
7057	if (ctxt == NULL)
7058	return (NULL);
7059	if (buffer == NULL)
7060	return (NULL);
7061	xmlInitParser();
7062
7063	htmlCtxtReset(ctxt);
7064
7065	input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
7066	if (input == NULL) {
7067	return(NULL);
7068	}
7069
7070	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7071	if (stream == NULL) {
7072	xmlFreeParserInputBuffer(input);
7073	return(NULL);
7074	}
7075
7076	inputPush(ctxt, stream);
7077	return (htmlDoRead(ctxt, URL, encoding, options, `1`));
7078	}
7079
7080	/**
7081	* htmlCtxtReadFd:
7082	* @ctxt: an HTML parser context
7083	* @fd: an open file descriptor
7084	* @URL: the base URL to use for the document
7085	* @encoding: the document encoding, or NULL
7086	* @options: a combination of htmlParserOption(s)
7087	*
7088	* parse an XML from a file descriptor and build a tree.
7089	* This reuses the existing @ctxt parser context
7090	*
7091	* Returns the resulting document tree
7092	*/
7093	htmlDocPtr
7094	htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
7095	const char URL, const* char encoding, int* options)
7096	{
7097	xmlParserInputBufferPtr input;
7098	xmlParserInputPtr stream;
7099
7100	if (fd < `0`)
7101	return (NULL);
7102	if (ctxt == NULL)
7103	return (NULL);
7104	xmlInitParser();
7105
7106	htmlCtxtReset(ctxt);
7107
7108
7109	input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
7110	if (input == NULL)
7111	return (NULL);
7112	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7113	if (stream == NULL) {
7114	xmlFreeParserInputBuffer(input);
7115	return (NULL);
7116	}
7117	inputPush(ctxt, stream);
7118	return (htmlDoRead(ctxt, URL, encoding, options, `1`));
7119	}
7120
7121	/**
7122	* htmlCtxtReadIO:
7123	* @ctxt: an HTML parser context
7124	* @ioread: an I/O read function
7125	* @ioclose: an I/O close function
7126	* @ioctx: an I/O handler
7127	* @URL: the base URL to use for the document
7128	* @encoding: the document encoding, or NULL
7129	* @options: a combination of htmlParserOption(s)
7130	*
7131	* parse an HTML document from I/O functions and source and build a tree.
7132	* This reuses the existing @ctxt parser context
7133	*
7134	* Returns the resulting document tree
7135	*/
7136	htmlDocPtr
7137	htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
7138	xmlInputCloseCallback ioclose, void *ioctx,
7139	const char *URL,
7140	const char encoding, int* options)
7141	{
7142	xmlParserInputBufferPtr input;
7143	xmlParserInputPtr stream;
7144
7145	if (ioread == NULL)
7146	return (NULL);
7147	if (ctxt == NULL)
7148	return (NULL);
7149	xmlInitParser();
7150
7151	htmlCtxtReset(ctxt);
7152
7153	input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
7154	XML_CHAR_ENCODING_NONE);
7155	if (input == NULL) {
7156	if (ioclose != NULL)
7157	ioclose(ioctx);
7158	return (NULL);
7159	}
7160	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7161	if (stream == NULL) {
7162	xmlFreeParserInputBuffer(input);
7163	return (NULL);
7164	}
7165	inputPush(ctxt, stream);
7166	return (htmlDoRead(ctxt, URL, encoding, options, `1`));
7167	}
7168
7169	#define bottom_HTMLparser
7170	#include "elfgcchack.h"
7171	#endif /* LIBXML_HTML_ENABLED */
7172

Browse the source code of ClickHouse/contrib/libxml2/HTMLparser.c