tinyxmlparser.cpp source code [Aseprite/third_party/tinyxml/tinyxmlparser.cpp]

1	/*
2	www.sourceforge.net/projects/tinyxml
3	Original code by Lee Thomason (www.grinninglizard.com)
4
5	This software is provided 'as-is', without any express or implied
6	warranty. In no event will the authors be held liable for any
7	damages arising from the use of this software.
8
9	Permission is granted to anyone to use this software for any
10	purpose, including commercial applications, and to alter it and
11	redistribute it freely, subject to the following restrictions:
12
13	1. The origin of this software must not be misrepresented; you must
14	not claim that you wrote the original software. If you use this
15	software in a product, an acknowledgment in the product documentation
16	would be appreciated but is not required.
17
18	2. Altered source versions must be plainly marked as such, and
19	must not be misrepresented as being the original software.
20
21	3. This notice may not be removed or altered from any source
22	distribution.
23	*/
24
25	#include <ctype.h>
26	#include <stddef.h>
27
28	#include "tinyxml.h"
29
30	//#define DEBUG_PARSER
31	#if defined( DEBUG_PARSER )
32	# if defined( DEBUG ) && defined( _MSC_VER )
33	# include <windows.h>
34	# define TIXML_LOG OutputDebugString
35	# else
36	# define TIXML_LOG printf
37	# endif
38	#endif
39
40	// Note tha "PutString" hardcodes the same list. This
41	// is less flexible than it appears. Changing the entries
42	// or order will break putstring.
43	TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
44	{
45	{ "&", `5`, `'&'` },
46	{ "<", `4`, `'<'` },
47	{ ">", `4`, `'>'` },
48	{ """, `6`, `'\"'` },
49	{ "'", `6`, `'\''` }
50	};
51
52	// Bunch of unicode info at:
53	// http://www.unicode.org/faq/utf_bom.html
54	// Including the basic of this table, which determines the #bytes in the
55	// sequence from the lead byte. 1 placed for invalid sequences --
56	// although the result will be junk, pass it through as much as possible.
57	// Beware of the non-characters in UTF-8:
58	// ef bb bf (Microsoft "lead bytes")
59	// ef bf be
60	// ef bf bf
61
62	const unsigned char TIXML_UTF_LEAD_0 = `0xefU`;
63	const unsigned char TIXML_UTF_LEAD_1 = `0xbbU`;
64	const unsigned char TIXML_UTF_LEAD_2 = `0xbfU`;
65
66	const int TiXmlBase::utf8ByteTable[`256`] =
67	{
68	// 0 1 2 3 4 5 6 7 8 9 a b c d e f
69	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x00
70	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x10
71	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x20
72	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x30
73	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x40
74	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x50
75	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x60
76	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x70 End of ASCII range
77	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x80 0x80 to 0xc1 invalid
78	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0x90
79	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0xa0
80	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 0xb0
81	`1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, // 0xc0 0xc2 to 0xdf 2 byte
82	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, // 0xd0
83	`3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, // 0xe0 0xe0 to 0xef 3 byte
84	`4`, `4`, `4`, `4`, `4`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
85	};
86
87
88	void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
89	{
90	const unsigned long BYTE_MASK = `0xBF`;
91	const unsigned long BYTE_MARK = `0x80`;
92	const unsigned long FIRST_BYTE_MARK[`7`] = { `0x00`, `0x00`, `0xC0`, `0xE0`, `0xF0`, `0xF8`, `0xFC` };
93
94	if (input < `0x80`)
95	*length = `1`;
96	else if ( input < `0x800` )
97	*length = `2`;
98	else if ( input < `0x10000` )
99	*length = `3`;
100	else if ( input < `0x200000` )
101	*length = `4`;
102	else
103	{ length = `0`; return; } // This code won't covert this correctly anyway.*
104
105	output += *length;
106
107	// Scary scary fall throughs.
108	switch (*length)
109	{
110	case `4`:
111	--output;
112	output = (char*)((input \| BYTE_MARK) & BYTE_MASK);
113	input >>= `6`;
114	case `3`:
115	--output;
116	output = (char*)((input \| BYTE_MARK) & BYTE_MASK);
117	input >>= `6`;
118	case `2`:
119	--output;
120	output = (char*)((input \| BYTE_MARK) & BYTE_MASK);
121	input >>= `6`;
122	case `1`:
123	--output;
124	output = (char)(input \| FIRST_BYTE_MARK[length]);
125	}
126	}
127
128
129	/static/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /encoding/ )
130	{
131	// This will only work for low-ascii, everything else is assumed to be a valid
132	// letter. I'm not sure this is the best approach, but it is quite tricky trying
133	// to figure out alhabetical vs. not across encoding. So take a very
134	// conservative approach.
135
136	// if ( encoding == TIXML_ENCODING_UTF8 )
137	// {
138	if ( anyByte < `127` )
139	return isalpha( anyByte );
140	else
141	return `1`; // What else to do? The unicode set is huge...get the english ones right.
142	// }
143	// else
144	// {
145	// return isalpha( anyByte );
146	// }
147	}
148
149
150	/static/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /encoding/ )
151	{
152	// This will only work for low-ascii, everything else is assumed to be a valid
153	// letter. I'm not sure this is the best approach, but it is quite tricky trying
154	// to figure out alhabetical vs. not across encoding. So take a very
155	// conservative approach.
156
157	// if ( encoding == TIXML_ENCODING_UTF8 )
158	// {
159	if ( anyByte < `127` )
160	return isalnum( anyByte );
161	else
162	return `1`; // What else to do? The unicode set is huge...get the english ones right.
163	// }
164	// else
165	// {
166	// return isalnum( anyByte );
167	// }
168	}
169
170
171	class TiXmlParsingData
172	{
173	friend class TiXmlDocument;
174	public:
175	void Stamp( const char* now, TiXmlEncoding encoding );
176
177	const TiXmlCursor& Cursor() const { return cursor; }
178
179	private:
180	// Only used by the document!
181	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
182	{
183	assert( start );
184	stamp = start;
185	tabsize = _tabsize;
186	cursor.row = row;
187	cursor.col = col;
188	}
189
190	TiXmlCursor cursor;
191	const char* stamp;
192	int tabsize;
193	};
194
195
196	void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
197	{
198	assert( now );
199
200	// Do nothing if the tabsize is 0.
201	if ( tabsize < `1` )
202	{
203	return;
204	}
205
206	// Get the current row, column.
207	int row = cursor.row;
208	int col = cursor.col;
209	const char* p = stamp;
210	assert( p );
211
212	while ( p < now )
213	{
214	// Treat p as unsigned, so we have a happy compiler.
215	const unsigned char* pU = (const unsigned char*)p;
216
217	// Code contributed by Fletcher Dunn: (modified by lee)
218	switch (*pU) {
219	case `0`:
220	// We should* never get here, but in case we do, don't*
221	// advance past the terminating null character, ever
222	return;
223
224	case `'\r'`:
225	// bump down to the next line
226	++row;
227	col = `0`;
228	// Eat the character
229	++p;
230
231	// Check for \r\n sequence, and treat this as a single character
232	if (*p == `'\n'`) {
233	++p;
234	}
235	break;
236
237	case `'\n'`:
238	// bump down to the next line
239	++row;
240	col = `0`;
241
242	// Eat the character
243	++p;
244
245	// Check for \n\r sequence, and treat this as a single
246	// character. (Yes, this bizarre thing does occur still
247	// on some arcane platforms...)
248	if (*p == `'\r'`) {
249	++p;
250	}
251	break;
252
253	case `'\t'`:
254	// Eat the character
255	++p;
256
257	// Skip to next tab stop
258	col = (col / tabsize + `1`) * tabsize;
259	break;
260
261	case TIXML_UTF_LEAD_0:
262	if ( encoding == TIXML_ENCODING_UTF8 )
263	{
264	if ( (p+`1`) && (p+`2`) )
265	{
266	// In these cases, don't advance the column. These are
267	// 0-width spaces.
268	if ( (pU+`1`)==TIXML_UTF_LEAD_1 && (pU+`2`)==TIXML_UTF_LEAD_2 )
269	p += `3`;
270	else if ( (pU+`1`)==`0xbfU` && (pU+`2`)==`0xbeU` )
271	p += `3`;
272	else if ( (pU+`1`)==`0xbfU` && (pU+`2`)==`0xbfU` )
273	p += `3`;
274	else
275	{ p +=`3`; ++col; } // A normal character.
276	}
277	}
278	else
279	{
280	++p;
281	++col;
282	}
283	break;
284
285	default:
286	if ( encoding == TIXML_ENCODING_UTF8 )
287	{
288	// Eat the 1 to 4 byte utf8 character.
289	int step = TiXmlBase::utf8ByteTable[((const* unsigned char*)p)];
290	if ( step == `0` )
291	step = `1`; // Error case from bad encoding, but handle gracefully.
292	p += step;
293
294	// Just advance one column, of course.
295	++col;
296	}
297	else
298	{
299	++p;
300	++col;
301	}
302	break;
303	}
304	}
305	cursor.row = row;
306	cursor.col = col;
307	assert( cursor.row >= -`1` );
308	assert( cursor.col >= -`1` );
309	stamp = p;
310	assert( stamp );
311	}
312
313
314	const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
315	{
316	if ( !p \|\| !*p )
317	{
318	return `0`;
319	}
320	if ( encoding == TIXML_ENCODING_UTF8 )
321	{
322	while ( *p )
323	{
324	const unsigned char* pU = (const unsigned char*)p;
325
326	// Skip the stupid Microsoft UTF-8 Byte order marks
327	if ( *(pU+`0`)==TIXML_UTF_LEAD_0
328	&& *(pU+`1`)==TIXML_UTF_LEAD_1
329	&& *(pU+`2`)==TIXML_UTF_LEAD_2 )
330	{
331	p += `3`;
332	continue;
333	}
334	else if(*(pU+`0`)==TIXML_UTF_LEAD_0
335	&& *(pU+`1`)==`0xbfU`
336	&& *(pU+`2`)==`0xbeU` )
337	{
338	p += `3`;
339	continue;
340	}
341	else if(*(pU+`0`)==TIXML_UTF_LEAD_0
342	&& *(pU+`1`)==`0xbfU`
343	&& *(pU+`2`)==`0xbfU` )
344	{
345	p += `3`;
346	continue;
347	}
348
349	if ( IsWhiteSpace( p ) ) // Still using old rules for white space.*
350	++p;
351	else
352	break;
353	}
354	}
355	else
356	{
357	while ( p && IsWhiteSpace( p ) )
358	++p;
359	}
360
361	return p;
362	}
363
364	#ifdef TIXML_USE_STL
365	/static/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
366	{
367	for( ;; )
368	{
369	if ( !in->good() ) return false;
370
371	int c = in->peek();
372	// At this scope, we can't get to a document. So fail silently.
373	if ( !IsWhiteSpace( c ) \|\| c <= `0` )
374	return true;
375
376	tag += (char*) in->get();
377	}
378	}
379
380	/static/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
381	{
382	//assert( character > 0 && character < 128 ); // else it won't work in utf-8
383	while ( in->good() )
384	{
385	int c = in->peek();
386	if ( c == character )
387	return true;
388	if ( c <= `0` ) // Silent failure: can't get document at this scope
389	return false;
390
391	in->get();
392	tag += (char*) c;
393	}
394	return false;
395	}
396	#endif
397
398	// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
399	// "assign" optimization removes over 10% of the execution time.
400	//
401	const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
402	{
403	// Oddly, not supported on some comilers,
404	//name->clear();
405	// So use this:
406	*name = "";
407	assert( p );
408
409	// Names start with letters or underscores.
410	// Of course, in unicode, tinyxml has no idea what a letter is. The
411	// algorithm is generous.
412	//
413	// After that, they can be letters, underscores, numbers,
414	// hyphens, or colons. (Colons are valid ony for namespaces,
415	// but tinyxml can't tell namespaces from names.)
416	if ( p && *p
417	&& ( IsAlpha( (unsigned char) p, encoding ) \|\| p == `'_'` ) )
418	{
419	const char* start = p;
420	while( p && *p
421	&& ( IsAlphaNum( (unsigned char ) *p, encoding )
422	\|\| *p == `'_'`
423	\|\| *p == `'-'`
424	\|\| *p == `'.'`
425	\|\| *p == `':'` ) )
426	{
427	//(name) += p; // expensive
428	++p;
429	}
430	if ( p-start > `0` ) {
431	name->assign( start, p-start );
432	}
433	return p;
434	}
435	return `0`;
436	}
437
438	const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
439	{
440	// Presume an entity, and pull it out.
441	TIXML_STRING ent;
442	int i;
443	*length = `0`;
444
445	if ( (p+`1`) && (p+`1`) == `'#'` && *(p+`2`) )
446	{
447	unsigned long ucs = `0`;
448	ptrdiff_t delta = `0`;
449	unsigned mult = `1`;
450
451	if ( *(p+`2`) == `'x'` )
452	{
453	// Hexadecimal.
454	if ( !(p+`3`) ) return* `0`;
455
456	const char* q = p+`3`;
457	q = strchr( q, `';'` );
458
459	if ( !q \|\| !q ) return* `0`;
460
461	delta = q-p;
462	--q;
463
464	while ( *q != `'x'` )
465	{
466	if ( q >= `'0'` && q <= `'9'` )
467	ucs += mult * (*q - `'0'`);
468	else if ( q >= `'a'` && q <= `'f'` )
469	ucs += mult * (*q - `'a'` + `10`);
470	else if ( q >= `'A'` && q <= `'F'` )
471	ucs += mult * (*q - `'A'` + `10` );
472	else
473	return `0`;
474	mult *= `16`;
475	--q;
476	}
477	}
478	else
479	{
480	// Decimal.
481	if ( !(p+`2`) ) return* `0`;
482
483	const char* q = p+`2`;
484	q = strchr( q, `';'` );
485
486	if ( !q \|\| !q ) return* `0`;
487
488	delta = q-p;
489	--q;
490
491	while ( *q != `'#'` )
492	{
493	if ( q >= `'0'` && q <= `'9'` )
494	ucs += mult * (*q - `'0'`);
495	else
496	return `0`;
497	mult *= `10`;
498	--q;
499	}
500	}
501	if ( encoding == TIXML_ENCODING_UTF8 )
502	{
503	// convert the UCS to UTF-8
504	ConvertUTF32ToUTF8( ucs, value, length );
505	}
506	else
507	{
508	value = (char*)ucs;
509	*length = `1`;
510	}
511	return p + delta + `1`;
512	}
513
514	// Now try to match it.
515	for( i=`0`; i<NUM_ENTITY; ++i )
516	{
517	if ( strncmp( entity[i].str, p, entity[i].strLength ) == `0` )
518	{
519	assert( strlen( entity[i].str ) == entity[i].strLength );
520	*value = entity[i].chr;
521	*length = `1`;
522	return ( p + entity[i].strLength );
523	}
524	}
525
526	// So it wasn't an entity, its unrecognized, or something like that.
527	value = p; // Don't put back the last one, since we return it!
528	//length = 1; // Leave unrecognized entities - this doesn't really work.*
529	// Just writes strange XML.
530	return p+`1`;
531	}
532
533
534	bool TiXmlBase::StringEqual( const char* p,
535	const char* tag,
536	bool ignoreCase,
537	TiXmlEncoding encoding )
538	{
539	assert( p );
540	assert( tag );
541	if ( !p \|\| !*p )
542	{
543	assert( `0` );
544	return false;
545	}
546
547	const char* q = p;
548
549	if ( ignoreCase )
550	{
551	while ( q && tag && ToLower( q, encoding ) == ToLower( tag, encoding ) )
552	{
553	++q;
554	++tag;
555	}
556
557	if ( *tag == `0` )
558	return true;
559	}
560	else
561	{
562	while ( q && tag && q == tag )
563	{
564	++q;
565	++tag;
566	}
567
568	if ( tag == `0` ) // Have we found the end of the tag, and everything equal?*
569	return true;
570	}
571	return false;
572	}
573
574	const char* TiXmlBase::ReadText( const char* p,
575	TIXML_STRING * text,
576	bool trimWhiteSpace,
577	const char* endTag,
578	bool caseInsensitive,
579	TiXmlEncoding encoding )
580	{
581	*text = "";
582	if ( !trimWhiteSpace // certain tags always keep whitespace
583	\|\| !condenseWhiteSpace ) // if true, whitespace is always kept
584	{
585	// Keep all the white space.
586	while ( p && *p
587	&& !StringEqual( p, endTag, caseInsensitive, encoding )
588	)
589	{
590	int len;
591	char cArr[`4`] = { `0`, `0`, `0`, `0` };
592	p = GetChar( p, cArr, &len, encoding );
593	text->append( cArr, len );
594	}
595	}
596	else
597	{
598	bool whitespace = false;
599
600	// Remove leading white space:
601	p = SkipWhiteSpace( p, encoding );
602	while ( p && *p
603	&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
604	{
605	if ( p == `'\r'` \|\| p == `'\n'` )
606	{
607	whitespace = true;
608	++p;
609	}
610	else if ( IsWhiteSpace( *p ) )
611	{
612	whitespace = true;
613	++p;
614	}
615	else
616	{
617	// If we've found whitespace, add it before the
618	// new character. Any whitespace just becomes a space.
619	if ( whitespace )
620	{
621	(*text) += `' '`;
622	whitespace = false;
623	}
624	int len;
625	char cArr[`4`] = { `0`, `0`, `0`, `0` };
626	p = GetChar( p, cArr, &len, encoding );
627	if ( len == `1` )
628	(text) += cArr[`0`]; // more efficient*
629	else
630	text->append( cArr, len );
631	}
632	}
633	}
634	if ( p && *p )
635	p += strlen( endTag );
636	return ( p && *p ) ? p : `0`;
637	}
638
639	#ifdef TIXML_USE_STL
640
641	void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
642	{
643	// The basic issue with a document is that we don't know what we're
644	// streaming. Read something presumed to be a tag (and hope), then
645	// identify it, and call the appropriate stream method on the tag.
646	//
647	// This "pre-streaming" will never read the closing ">" so the
648	// sub-tag can orient itself.
649
650	if ( !StreamTo( in, `'<'`, tag ) )
651	{
652	SetError( TIXML_ERROR_PARSING_EMPTY, `0`, `0`, TIXML_ENCODING_UNKNOWN );
653	return;
654	}
655
656	while ( in->good() )
657	{
658	int tagIndex = (int) tag->length();
659	while ( in->good() && in->peek() != `'>'` )
660	{
661	int c = in->get();
662	if ( c <= `0` )
663	{
664	SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
665	break;
666	}
667	(tag) += (char*) c;
668	}
669
670	if ( in->good() )
671	{
672	// We now have something we presume to be a node of
673	// some sort. Identify it, and call the node to
674	// continue streaming.
675	TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
676
677	if ( node )
678	{
679	node->StreamIn( in, tag );
680	bool isElement = node->ToElement() != `0`;
681	delete node;
682	node = `0`;
683
684	// If this is the root element, we're done. Parsing will be
685	// done by the >> operator.
686	if ( isElement )
687	{
688	return;
689	}
690	}
691	else
692	{
693	SetError( TIXML_ERROR, `0`, `0`, TIXML_ENCODING_UNKNOWN );
694	return;
695	}
696	}
697	}
698	// We should have returned sooner.
699	SetError( TIXML_ERROR, `0`, `0`, TIXML_ENCODING_UNKNOWN );
700	}
701
702	#endif
703
704	const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
705	{
706	ClearError();
707
708	// Parse away, at the document level. Since a document
709	// contains nothing but other tags, most of what happens
710	// here is skipping white space.
711	if ( !p \|\| !*p )
712	{
713	SetError( TIXML_ERROR_DOCUMENT_EMPTY, `0`, `0`, TIXML_ENCODING_UNKNOWN );
714	return `0`;
715	}
716
717	// Note that, for a document, this needs to come
718	// before the while space skip, so that parsing
719	// starts from the pointer we are given.
720	location.Clear();
721	if ( prevData )
722	{
723	location.row = prevData->cursor.row;
724	location.col = prevData->cursor.col;
725	}
726	else
727	{
728	location.row = `0`;
729	location.col = `0`;
730	}
731	TiXmlParsingData data( p, TabSize(), location.row, location.col );
732	location = data.Cursor();
733
734	if ( encoding == TIXML_ENCODING_UNKNOWN )
735	{
736	// Check for the Microsoft UTF-8 lead bytes.
737	const unsigned char* pU = (const unsigned char*)p;
738	if ( (pU+`0`) && (pU+`0`) == TIXML_UTF_LEAD_0
739	&& (pU+`1`) && (pU+`1`) == TIXML_UTF_LEAD_1
740	&& (pU+`2`) && (pU+`2`) == TIXML_UTF_LEAD_2 )
741	{
742	encoding = TIXML_ENCODING_UTF8;
743	useMicrosoftBOM = true;
744	}
745	}
746
747	p = SkipWhiteSpace( p, encoding );
748	if ( !p )
749	{
750	SetError( TIXML_ERROR_DOCUMENT_EMPTY, `0`, `0`, TIXML_ENCODING_UNKNOWN );
751	return `0`;
752	}
753
754	while ( p && *p )
755	{
756	TiXmlNode* node = Identify( p, encoding );
757	if ( node )
758	{
759	p = node->Parse( p, &data, encoding );
760	LinkEndChild( node );
761	}
762	else
763	{
764	break;
765	}
766
767	// Did we get encoding info?
768	if ( encoding == TIXML_ENCODING_UNKNOWN
769	&& node->ToDeclaration() )
770	{
771	TiXmlDeclaration* dec = node->ToDeclaration();
772	const char* enc = dec->Encoding();
773	assert( enc );
774
775	if ( *enc == `0` )
776	encoding = TIXML_ENCODING_UTF8;
777	else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
778	encoding = TIXML_ENCODING_UTF8;
779	else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
780	encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
781	else
782	encoding = TIXML_ENCODING_LEGACY;
783	}
784
785	p = SkipWhiteSpace( p, encoding );
786	}
787
788	// Was this empty?
789	if ( !firstChild ) {
790	SetError( TIXML_ERROR_DOCUMENT_EMPTY, `0`, `0`, encoding );
791	return `0`;
792	}
793
794	// All is well.
795	return p;
796	}
797
798	void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
799	{
800	// The first error in a chain is more accurate - don't set again!
801	if ( error )
802	return;
803
804	assert( err > `0` && err < TIXML_ERROR_STRING_COUNT );
805	error = true;
806	errorId = err;
807	errorDesc = errorString[ errorId ];
808
809	errorLocation.Clear();
810	if ( pError && data )
811	{
812	data->Stamp( pError, encoding );
813	errorLocation = data->Cursor();
814	}
815	}
816
817
818	TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
819	{
820	TiXmlNode* returnNode = `0`;
821
822	p = SkipWhiteSpace( p, encoding );
823	if( !p \|\| !p \|\| p != `'<'` )
824	{
825	return `0`;
826	}
827
828	p = SkipWhiteSpace( p, encoding );
829
830	if ( !p \|\| !*p )
831	{
832	return `0`;
833	}
834
835	// What is this thing?
836	// - Elements start with a letter or underscore, but xml is reserved.
837	// - Comments: <!--
838	// - Decleration: <?xml
839	// - Everthing else is unknown to tinyxml.
840	//
841
842	const char* xmlHeader = { "<?xml" };
843	const char* commentHeader = { "<!--" };
844	const char* dtdHeader = { "<!" };
845	const char* cdataHeader = { "<![CDATA[" };
846
847	if ( StringEqual( p, xmlHeader, true, encoding ) )
848	{
849	#ifdef DEBUG_PARSER
850	TIXML_LOG( "XML parsing Declaration\n" );
851	#endif
852	returnNode = new TiXmlDeclaration ();
853	}
854	else if ( StringEqual( p, commentHeader, false, encoding ) )
855	{
856	#ifdef DEBUG_PARSER
857	TIXML_LOG( "XML parsing Comment\n" );
858	#endif
859	returnNode = new TiXmlComment ();
860	}
861	else if ( StringEqual( p, cdataHeader, false, encoding ) )
862	{
863	#ifdef DEBUG_PARSER
864	TIXML_LOG( "XML parsing CDATA\n" );
865	#endif
866	TiXmlText* text = new TiXmlText ( "" );
867	text->SetCDATA( true );
868	returnNode = text;
869	}
870	else if ( StringEqual( p, dtdHeader, false, encoding ) )
871	{
872	#ifdef DEBUG_PARSER
873	TIXML_LOG( "XML parsing Unknown(1)\n" );
874	#endif
875	returnNode = new TiXmlUnknown ();
876	}
877	else if ( IsAlpha( *(p+`1`), encoding )
878	\|\| *(p+`1`) == `'_'` )
879	{
880	#ifdef DEBUG_PARSER
881	TIXML_LOG( "XML parsing Element\n" );
882	#endif
883	returnNode = new TiXmlElement ( "" );
884	}
885	else
886	{
887	#ifdef DEBUG_PARSER
888	TIXML_LOG( "XML parsing Unknown(2)\n" );
889	#endif
890	returnNode = new TiXmlUnknown ();
891	}
892
893	if ( returnNode )
894	{
895	// Set the parent, so it can report errors
896	returnNode->parent = this;
897	}
898	return returnNode;
899	}
900
901	#ifdef TIXML_USE_STL
902
903	void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
904	{
905	// We're called with some amount of pre-parsing. That is, some of "this"
906	// element is in "tag". Go ahead and stream to the closing ">"
907	while( in->good() )
908	{
909	int c = in->get();
910	if ( c <= `0` )
911	{
912	TiXmlDocument* document = GetDocument();
913	if ( document )
914	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
915	return;
916	}
917	(tag) += (char*) c ;
918
919	if ( c == `'>'` )
920	break;
921	}
922
923	if ( tag->length() < `3` ) return;
924
925	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
926	// If not, identify and stream.
927
928	if ( tag->at( tag->length() - `1` ) == `'>'`
929	&& tag->at( tag->length() - `2` ) == `'/'` )
930	{
931	// All good!
932	return;
933	}
934	else if ( tag->at( tag->length() - `1` ) == `'>'` )
935	{
936	// There is more. Could be:
937	// text
938	// cdata text (which looks like another node)
939	// closing tag
940	// another node.
941	for ( ;; )
942	{
943	StreamWhiteSpace( in, tag );
944
945	// Do we have text?
946	if ( in->good() && in->peek() != `'<'` )
947	{
948	// Yep, text.
949	TiXmlText text( "" );
950	text.StreamIn( in, tag );
951
952	// What follows text is a closing tag or another node.
953	// Go around again and figure it out.
954	continue;
955	}
956
957	// We now have either a closing tag...or another node.
958	// We should be at a "<", regardless.
959	if ( !in->good() ) return;
960	assert( in->peek() == `'<'` );
961	int tagIndex = (int) tag->length();
962
963	bool closingTag = false;
964	bool firstCharFound = false;
965
966	for( ;; )
967	{
968	if ( !in->good() )
969	return;
970
971	int c = in->peek();
972	if ( c <= `0` )
973	{
974	TiXmlDocument* document = GetDocument();
975	if ( document )
976	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
977	return;
978	}
979
980	if ( c == `'>'` )
981	break;
982
983	tag += (char*) c;
984	in->get();
985
986	// Early out if we find the CDATA id.
987	if ( c == `'['` && tag->size() >= `9` )
988	{
989	size_t len = tag->size();
990	const char* start = tag->c_str() + len - `9`;
991	if ( strcmp( start, "<![CDATA[" ) == `0` ) {
992	assert( !closingTag );
993	break;
994	}
995	}
996
997	if ( !firstCharFound && c != `'<'` && !IsWhiteSpace( c ) )
998	{
999	firstCharFound = true;
1000	if ( c == `'/'` )
1001	closingTag = true;
1002	}
1003	}
1004	// If it was a closing tag, then read in the closing '>' to clean up the input stream.
1005	// If it was not, the streaming will be done by the tag.
1006	if ( closingTag )
1007	{
1008	if ( !in->good() )
1009	return;
1010
1011	int c = in->get();
1012	if ( c <= `0` )
1013	{
1014	TiXmlDocument* document = GetDocument();
1015	if ( document )
1016	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
1017	return;
1018	}
1019	assert( c == `'>'` );
1020	tag += (char*) c;
1021
1022	// We are done, once we've found our closing tag.
1023	return;
1024	}
1025	else
1026	{
1027	// If not a closing tag, id it, and stream.
1028	const char* tagloc = tag->c_str() + tagIndex;
1029	TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1030	if ( !node )
1031	return;
1032	node->StreamIn( in, tag );
1033	delete node;
1034	node = `0`;
1035
1036	// No return: go around from the beginning: text, closing tag, or node.
1037	}
1038	}
1039	}
1040	}
1041	#endif
1042
1043	const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1044	{
1045	p = SkipWhiteSpace( p, encoding );
1046	TiXmlDocument* document = GetDocument();
1047
1048	if ( !p \|\| !*p )
1049	{
1050	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, `0`, `0`, encoding );
1051	return `0`;
1052	}
1053
1054	if ( data )
1055	{
1056	data->Stamp( p, encoding );
1057	location = data->Cursor();
1058	}
1059
1060	if ( *p != `'<'` )
1061	{
1062	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1063	return `0`;
1064	}
1065
1066	p = SkipWhiteSpace( p+`1`, encoding );
1067
1068	// Read the name.
1069	const char* pErr = p;
1070
1071	p = ReadName( p, &value, encoding );
1072	if ( !p \|\| !*p )
1073	{
1074	if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1075	return `0`;
1076	}
1077
1078	TIXML_STRING endTag ("</");
1079	endTag += value;
1080
1081	// Check for and read attributes. Also look for an empty
1082	// tag or an end tag.
1083	while ( p && *p )
1084	{
1085	pErr = p;
1086	p = SkipWhiteSpace( p, encoding );
1087	if ( !p \|\| !*p )
1088	{
1089	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1090	return `0`;
1091	}
1092	if ( *p == `'/'` )
1093	{
1094	++p;
1095	// Empty tag.
1096	if ( *p != `'>'` )
1097	{
1098	if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1099	return `0`;
1100	}
1101	return (p+`1`);
1102	}
1103	else if ( *p == `'>'` )
1104	{
1105	// Done with attributes (if there were any.)
1106	// Read the value -- which can include other
1107	// elements -- read the end tag, and return.
1108	++p;
1109	p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1110	if ( !p \|\| !*p ) {
1111	// We were looking for the end tag, but found nothing.
1112	// Fix for [ 1663758 ] Failure to report error on bad XML
1113	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1114	return `0`;
1115	}
1116
1117	// We should find the end tag now
1118	// note that:
1119	// </foo > and
1120	// </foo>
1121	// are both valid end tags.
1122	if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1123	{
1124	p += endTag.length();
1125	p = SkipWhiteSpace( p, encoding );
1126	if ( p && p && p == `'>'` ) {
1127	++p;
1128	return p;
1129	}
1130	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1131	return `0`;
1132	}
1133	else
1134	{
1135	if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1136	return `0`;
1137	}
1138	}
1139	else
1140	{
1141	// Try to read an attribute:
1142	TiXmlAttribute* attrib = new TiXmlAttribute ();
1143	if ( !attrib )
1144	{
1145	return `0`;
1146	}
1147
1148	attrib->SetDocument( document );
1149	pErr = p;
1150	p = attrib->Parse( p, data, encoding );
1151
1152	if ( !p \|\| !*p )
1153	{
1154	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1155	delete attrib;
1156	return `0`;
1157	}
1158
1159	// Handle the strange case of double attributes:
1160	#ifdef TIXML_USE_STL
1161	TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1162	#else
1163	TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1164	#endif
1165	if ( node )
1166	{
1167	if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1168	delete attrib;
1169	return `0`;
1170	}
1171
1172	attributeSet.Add( attrib );
1173	}
1174	}
1175	return p;
1176	}
1177
1178
1179	const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1180	{
1181	TiXmlDocument* document = GetDocument();
1182
1183	// Read in text and elements in any order.
1184	const char* pWithWhiteSpace = p;
1185	p = SkipWhiteSpace( p, encoding );
1186
1187	while ( p && *p )
1188	{
1189	if ( *p != `'<'` )
1190	{
1191	// Take what we have, make a text element.
1192	TiXmlText* textNode = new TiXmlText ( "" );
1193
1194	if ( !textNode )
1195	{
1196	return `0`;
1197	}
1198
1199	if ( TiXmlBase::IsWhiteSpaceCondensed() )
1200	{
1201	p = textNode->Parse( p, data, encoding );
1202	}
1203	else
1204	{
1205	// Special case: we want to keep the white space
1206	// so that leading spaces aren't removed.
1207	p = textNode->Parse( pWithWhiteSpace, data, encoding );
1208	}
1209
1210	if ( !textNode->Blank() )
1211	LinkEndChild( textNode );
1212	else
1213	delete textNode;
1214	}
1215	else
1216	{
1217	// We hit a '<'
1218	// Have we hit a new element or an end tag? This could also be
1219	// a TiXmlText in the "CDATA" style.
1220	if ( StringEqual( p, "</", false, encoding ) )
1221	{
1222	return p;
1223	}
1224	else
1225	{
1226	TiXmlNode* node = Identify( p, encoding );
1227	if ( node )
1228	{
1229	p = node->Parse( p, data, encoding );
1230	LinkEndChild( node );
1231	}
1232	else
1233	{
1234	return `0`;
1235	}
1236	}
1237	}
1238	pWithWhiteSpace = p;
1239	p = SkipWhiteSpace( p, encoding );
1240	}
1241
1242	if ( !p )
1243	{
1244	if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, `0`, `0`, encoding );
1245	}
1246	return p;
1247	}
1248
1249
1250	#ifdef TIXML_USE_STL
1251	void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1252	{
1253	while ( in->good() )
1254	{
1255	int c = in->get();
1256	if ( c <= `0` )
1257	{
1258	TiXmlDocument* document = GetDocument();
1259	if ( document )
1260	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
1261	return;
1262	}
1263	(tag) += (char*) c;
1264
1265	if ( c == `'>'` )
1266	{
1267	// All is well.
1268	return;
1269	}
1270	}
1271	}
1272	#endif
1273
1274
1275	const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1276	{
1277	TiXmlDocument* document = GetDocument();
1278	p = SkipWhiteSpace( p, encoding );
1279
1280	if ( data )
1281	{
1282	data->Stamp( p, encoding );
1283	location = data->Cursor();
1284	}
1285	if ( !p \|\| !p \|\| p != `'<'` )
1286	{
1287	if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1288	return `0`;
1289	}
1290	++p;
1291	value = "";
1292
1293	while ( p && p && p != `'>'` )
1294	{
1295	value += *p;
1296	++p;
1297	}
1298
1299	if ( !p )
1300	{
1301	if ( document )
1302	document->SetError( TIXML_ERROR_PARSING_UNKNOWN, `0`, `0`, encoding );
1303	}
1304	if ( p && *p == `'>'` )
1305	return p+`1`;
1306	return p;
1307	}
1308
1309	#ifdef TIXML_USE_STL
1310	void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1311	{
1312	while ( in->good() )
1313	{
1314	int c = in->get();
1315	if ( c <= `0` )
1316	{
1317	TiXmlDocument* document = GetDocument();
1318	if ( document )
1319	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
1320	return;
1321	}
1322
1323	(tag) += (char*) c;
1324
1325	if ( c == `'>'`
1326	&& tag->at( tag->length() - `2` ) == `'-'`
1327	&& tag->at( tag->length() - `3` ) == `'-'` )
1328	{
1329	// All is well.
1330	return;
1331	}
1332	}
1333	}
1334	#endif
1335
1336
1337	const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1338	{
1339	TiXmlDocument* document = GetDocument();
1340	value = "";
1341
1342	p = SkipWhiteSpace( p, encoding );
1343
1344	if ( data )
1345	{
1346	data->Stamp( p, encoding );
1347	location = data->Cursor();
1348	}
1349	const char* startTag = "<!--";
1350	const char* endTag = "-->";
1351
1352	if ( !StringEqual( p, startTag, false, encoding ) )
1353	{
1354	if ( document )
1355	document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1356	return `0`;
1357	}
1358	p += strlen( startTag );
1359
1360	// [ 1475201 ] TinyXML parses entities in comments
1361	// Oops - ReadText doesn't work, because we don't want to parse the entities.
1362	// p = ReadText( p, &value, false, endTag, false, encoding );
1363	//
1364	// from the XML spec:
1365	/*
1366	[Definition: Comments may appear anywhere in a document outside other markup; in addition,
1367	they may appear within the document type declaration at places allowed by the grammar.
1368	They are not part of the document's character data; an XML processor MAY, but need not,
1369	make it possible for an application to retrieve the text of comments. For compatibility,
1370	the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1371	references MUST NOT be recognized within comments.
1372
1373	An example of a comment:
1374
1375	<!-- declarations for <head> & <body> -->
1376	*/
1377
1378	value = "";
1379	// Keep all the white space.
1380	while ( p && p && !StringEqual( p, endTag, false*, encoding ) )
1381	{
1382	value.append( p, `1` );
1383	++p;
1384	}
1385	if ( p && *p )
1386	p += strlen( endTag );
1387
1388	return p;
1389	}
1390
1391
1392	const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1393	{
1394	p = SkipWhiteSpace( p, encoding );
1395	if ( !p \|\| !p ) return* `0`;
1396
1397	if ( data )
1398	{
1399	data->Stamp( p, encoding );
1400	location = data->Cursor();
1401	}
1402	// Read the name, the '=' and the value.
1403	const char* pErr = p;
1404	p = ReadName( p, &name, encoding );
1405	if ( !p \|\| !*p )
1406	{
1407	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1408	return `0`;
1409	}
1410	p = SkipWhiteSpace( p, encoding );
1411	if ( !p \|\| !p \|\| p != `'='` )
1412	{
1413	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1414	return `0`;
1415	}
1416
1417	++p; // skip '='
1418	p = SkipWhiteSpace( p, encoding );
1419	if ( !p \|\| !*p )
1420	{
1421	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1422	return `0`;
1423	}
1424
1425	const char* end;
1426	const char SINGLE_QUOTE = `'\''`;
1427	const char DOUBLE_QUOTE = `'\"'`;
1428
1429	if ( *p == SINGLE_QUOTE )
1430	{
1431	++p;
1432	end = "\'"; // single quote in string
1433	p = ReadText( p, &value, false, end, false, encoding );
1434	}
1435	else if ( *p == DOUBLE_QUOTE )
1436	{
1437	++p;
1438	end = "\""; // double quote in string
1439	p = ReadText( p, &value, false, end, false, encoding );
1440	}
1441	else
1442	{
1443	// All attribute values should be in single or double quotes.
1444	// But this is such a common error that the parser will try
1445	// its best, even without them.
1446	value = "";
1447	while ( p && p // existence*
1448	&& !IsWhiteSpace( p ) // whitespace*
1449	&& p != `'/'` && p != `'>'` ) // tag end
1450	{
1451	if ( p == SINGLE_QUOTE \|\| p == DOUBLE_QUOTE ) {
1452	// [ 1451649 ] Attribute values with trailing quotes not handled correctly
1453	// We did not have an opening quote but seem to have a
1454	// closing one. Give up and throw an error.
1455	if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1456	return `0`;
1457	}
1458	value += *p;
1459	++p;
1460	}
1461	}
1462	return p;
1463	}
1464
1465	#ifdef TIXML_USE_STL
1466	void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1467	{
1468	while ( in->good() )
1469	{
1470	int c = in->peek();
1471	if ( !cdata && (c == `'<'` ) )
1472	{
1473	return;
1474	}
1475	if ( c <= `0` )
1476	{
1477	TiXmlDocument* document = GetDocument();
1478	if ( document )
1479	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
1480	return;
1481	}
1482
1483	(tag) += (char*) c;
1484	in->get(); // "commits" the peek made above
1485
1486	if ( cdata && c == `'>'` && tag->size() >= `3` ) {
1487	size_t len = tag->size();
1488	if ( (tag)[len-`2`] == `']'` && (tag)[len-`3`] == `']'` ) {
1489	// terminator of cdata.
1490	return;
1491	}
1492	}
1493	}
1494	}
1495	#endif
1496
1497	const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1498	{
1499	value = "";
1500	TiXmlDocument* document = GetDocument();
1501
1502	if ( data )
1503	{
1504	data->Stamp( p, encoding );
1505	location = data->Cursor();
1506	}
1507
1508	const char* const startTag = "<![CDATA[";
1509	const char* const endTag = "]]>";
1510
1511	if ( cdata \|\| StringEqual( p, startTag, false, encoding ) )
1512	{
1513	cdata = true;
1514
1515	if ( !StringEqual( p, startTag, false, encoding ) )
1516	{
1517	if ( document )
1518	document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1519	return `0`;
1520	}
1521	p += strlen( startTag );
1522
1523	// Keep all the white space, ignore the encoding, etc.
1524	while ( p && *p
1525	&& !StringEqual( p, endTag, false, encoding )
1526	)
1527	{
1528	value += *p;
1529	++p;
1530	}
1531
1532	TIXML_STRING dummy;
1533	p = ReadText( p, &dummy, false, endTag, false, encoding );
1534	return p;
1535	}
1536	else
1537	{
1538	bool ignoreWhite = true;
1539
1540	const char* end = "<";
1541	p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1542	if ( p && *p )
1543	return p-`1`; // don't truncate the '<'
1544	return `0`;
1545	}
1546	}
1547
1548	#ifdef TIXML_USE_STL
1549	void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1550	{
1551	while ( in->good() )
1552	{
1553	int c = in->get();
1554	if ( c <= `0` )
1555	{
1556	TiXmlDocument* document = GetDocument();
1557	if ( document )
1558	document->SetError( TIXML_ERROR_EMBEDDED_NULL, `0`, `0`, TIXML_ENCODING_UNKNOWN );
1559	return;
1560	}
1561	(tag) += (char*) c;
1562
1563	if ( c == `'>'` )
1564	{
1565	// All is well.
1566	return;
1567	}
1568	}
1569	}
1570	#endif
1571
1572	const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1573	{
1574	p = SkipWhiteSpace( p, _encoding );
1575	// Find the beginning, find the end, and look for
1576	// the stuff in-between.
1577	TiXmlDocument* document = GetDocument();
1578	if ( !p \|\| !p \|\| !StringEqual( p, "<?xml", true*, _encoding ) )
1579	{
1580	if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, `0`, `0`, _encoding );
1581	return `0`;
1582	}
1583	if ( data )
1584	{
1585	data->Stamp( p, _encoding );
1586	location = data->Cursor();
1587	}
1588	p += `5`;
1589
1590	version = "";
1591	encoding = "";
1592	standalone = "";
1593
1594	while ( p && *p )
1595	{
1596	if ( *p == `'>'` )
1597	{
1598	++p;
1599	return p;
1600	}
1601
1602	p = SkipWhiteSpace( p, _encoding );
1603	if ( StringEqual( p, "version", true, _encoding ) )
1604	{
1605	TiXmlAttribute attrib;
1606	p = attrib.Parse( p, data, _encoding );
1607	version = attrib.Value();
1608	}
1609	else if ( StringEqual( p, "encoding", true, _encoding ) )
1610	{
1611	TiXmlAttribute attrib;
1612	p = attrib.Parse( p, data, _encoding );
1613	encoding = attrib.Value();
1614	}
1615	else if ( StringEqual( p, "standalone", true, _encoding ) )
1616	{
1617	TiXmlAttribute attrib;
1618	p = attrib.Parse( p, data, _encoding );
1619	standalone = attrib.Value();
1620	}
1621	else
1622	{
1623	// Read over whatever it is.
1624	while( p && p && p != `'>'` && !IsWhiteSpace( *p ) )
1625	++p;
1626	}
1627	}
1628	return `0`;
1629	}
1630
1631	bool TiXmlText::Blank() const
1632	{
1633	for ( unsigned i=`0`; i<value.length(); i++ )
1634	if ( !IsWhiteSpace( value[i] ) )
1635	return false;
1636	return true;
1637	}
1638

Browse the source code of Aseprite/third_party/tinyxml/tinyxmlparser.cpp