xml_parser.cpp source code [Godot/core/io/xml_parser.cpp]

1	/************************************************************************/
2	/ xml_parser.cpp /
3	/************************************************************************/
4	/ This file is part of: /
5	/ GODOT ENGINE /
6	/ https://godotengine.org /
7	/************************************************************************/
8	/ Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). /
9	/ Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. /
10	/ /
11	/ Permission is hereby granted, free of charge, to any person obtaining /
12	/ a copy of this software and associated documentation files (the /
13	/ "Software"), to deal in the Software without restriction, including /
14	/ without limitation the rights to use, copy, modify, merge, publish, /
15	/ distribute, sublicense, and/or sell copies of the Software, and to /
16	/ permit persons to whom the Software is furnished to do so, subject to /
17	/ the following conditions: /
18	/ /
19	/ The above copyright notice and this permission notice shall be /
20	/ included in all copies or substantial portions of the Software. /
21	/ /
22	/ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, /
23	/ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF /
24	/ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. /
25	/ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY /
26	/ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, /
27	/ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE /
28	/ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /
29	/************************************************************************/
30
31	#include "xml_parser.h"
32
33	#include "core/string/print_string.h"
34
35	//#define DEBUG_XML
36
37	static inline bool _is_white_space(char c) {
38	return (c == `' '` \|\| c == `'\t'` \|\| c == `'\n'` \|\| c == `'\r'`);
39	}
40
41	//! sets the state that text was found. Returns true if set should be set
42	bool XMLParser::_set_text(const char start, const* char *end) {
43	// check if text is more than 2 characters, and if not, check if there is
44	// only white space, so that this text won't be reported
45	if (end - start < `3`) {
46	const char *p = start;
47	for (; p != end; ++p) {
48	if (!_is_white_space(*p)) {
49	break;
50	}
51	}
52
53	if (p == end) {
54	return false;
55	}
56	}
57
58	// set current text to the parsed text, and replace xml special characters
59	String s = String::utf8(start, (int)(end - start));
60	node_name = s.xml_unescape();
61
62	// current XML node type is text
63	node_type = NODE_TEXT;
64
65	return true;
66	}
67
68	void XMLParser::_parse_closing_xml_element() {
69	node_type = NODE_ELEMENT_END;
70	node_empty = false;
71	attributes.clear();
72
73	next_char();
74	const char *pBeginClose = P;
75
76	while (P && P != `'>'`) {
77	next_char();
78	}
79
80	node_name = String::utf8(pBeginClose, (int)(P - pBeginClose));
81	#ifdef DEBUG_XML
82	print_line("XML CLOSE: " + node_name);
83	#endif
84
85	if (*P) {
86	next_char();
87	}
88	}
89
90	void XMLParser::_ignore_definition() {
91	node_type = NODE_UNKNOWN;
92
93	const char *F = P;
94	// move until end marked with '>' reached
95	while (P && P != `'>'`) {
96	next_char();
97	}
98	node_name.parse_utf8(F, P - F);
99
100	if (*P) {
101	next_char();
102	}
103	}
104
105	bool XMLParser::_parse_cdata() {
106	if (*(P + `1`) != `'['`) {
107	return false;
108	}
109
110	node_type = NODE_CDATA;
111
112	// skip '<![CDATA['
113	int count = `0`;
114	while (*P && count < `8`) {
115	next_char();
116	++count;
117	}
118
119	if (!*P) {
120	node_name = "";
121	return true;
122	}
123
124	const char *cDataBegin = P;
125	const char cDataEnd = nullptr*;
126
127	// find end of CDATA
128	while (*P && !cDataEnd) {
129	if (*P == `'>'` &&
130	(*(P - `1`) == `']'`) &&
131	(*(P - `2`) == `']'`)) {
132	cDataEnd = P - `2`;
133	}
134
135	next_char();
136	}
137
138	if (!cDataEnd) {
139	cDataEnd = P;
140	}
141	node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin));
142	#ifdef DEBUG_XML
143	print_line("XML CDATA: " + node_name);
144	#endif
145
146	return true;
147	}
148
149	void XMLParser::_parse_comment() {
150	node_type = NODE_COMMENT;
151	P += `1`;
152
153	const char *pEndOfInput = data + length;
154	const char *pCommentBegin;
155	const char *pCommentEnd;
156
157	if (P + `1` < pEndOfInput && P[`0`] == `'-'` && P[`1`] == `'-'`) {
158	// Comment, use '-->' as end.
159	pCommentBegin = P + `2`;
160	for (pCommentEnd = pCommentBegin; pCommentEnd + `2` < pEndOfInput; pCommentEnd++) {
161	if (pCommentEnd[`0`] == `'-'` && pCommentEnd[`1`] == `'-'` && pCommentEnd[`2`] == `'>'`) {
162	break;
163	}
164	}
165	if (pCommentEnd + `2` < pEndOfInput) {
166	P = pCommentEnd + `3`;
167	} else {
168	P = pCommentEnd = pEndOfInput;
169	}
170	} else {
171	// Like document type definition, match angle brackets.
172	pCommentBegin = P;
173
174	int count = `1`;
175	while (*P && count) {
176	if (*P == `'>'`) {
177	--count;
178	} else if (*P == `'<'`) {
179	++count;
180	}
181	next_char();
182	}
183
184	if (count) {
185	pCommentEnd = P;
186	} else {
187	pCommentEnd = P - `1`;
188	}
189	}
190
191	node_name = String::utf8(pCommentBegin, (int)(pCommentEnd - pCommentBegin));
192	#ifdef DEBUG_XML
193	print_line("XML COMMENT: " + node_name);
194	#endif
195	}
196
197	void XMLParser::_parse_opening_xml_element() {
198	node_type = NODE_ELEMENT;
199	node_empty = false;
200	attributes.clear();
201
202	// find name
203	const char *startName = P;
204
205	// find end of element
206	while (P && P != `'>'` && !_is_white_space(*P)) {
207	next_char();
208	}
209
210	const char *endName = P;
211
212	// find attributes
213	while (P && P != `'>'`) {
214	if (_is_white_space(*P)) {
215	next_char();
216	} else {
217	if (*P != `'/'`) {
218	// we've got an attribute
219
220	// read the attribute names
221	const char *attributeNameBegin = P;
222
223	while (P && !_is_white_space(P) && *P != `'='`) {
224	next_char();
225	}
226
227	if (!*P) {
228	break;
229	}
230
231	const char *attributeNameEnd = P;
232	next_char();
233
234	// read the attribute value
235	// check for quotes and single quotes, thx to murphy
236	while ((P != `'\"'`) && (P != `'\''`) && *P) {
237	next_char();
238	}
239
240	if (!P) { // malformatted xml file*
241	break;
242	}
243
244	const char attributeQuoteChar = *P;
245
246	next_char();
247	const char *attributeValueBegin = P;
248
249	while (P != attributeQuoteChar && P) {
250	next_char();
251	}
252
253	const char *attributeValueEnd = P;
254	if (*P) {
255	next_char();
256	}
257
258	Attribute attr;
259	attr.name = String::utf8(attributeNameBegin,
260	(int)(attributeNameEnd - attributeNameBegin));
261
262	String s = String::utf8(attributeValueBegin,
263	(int)(attributeValueEnd - attributeValueBegin));
264
265	attr.value = s.xml_unescape();
266	attributes.push_back(attr);
267	} else {
268	// tag is closed directly
269	next_char();
270	node_empty = true;
271	break;
272	}
273	}
274	}
275
276	// check if this tag is closing directly
277	if (endName > startName && *(endName - `1`) == `'/'`) {
278	// directly closing tag
279	node_empty = true;
280	endName--;
281	}
282
283	node_name = String::utf8(startName, (int)(endName - startName));
284	#ifdef DEBUG_XML
285	print_line("XML OPEN: " + node_name);
286	#endif
287
288	if (*P) {
289	next_char();
290	}
291	}
292
293	void XMLParser::_parse_current_node() {
294	const char *start = P;
295	node_offset = P - data;
296
297	// more forward until '<' found
298	while (P != `'<'` && P) {
299	next_char();
300	}
301
302	if (P - start > `0`) {
303	// we found some text, store it
304	if (_set_text(start, P)) {
305	return;
306	}
307	}
308
309	if (!*P) {
310	return;
311	}
312
313	next_char();
314
315	// based on current token, parse and report next element
316	switch (*P) {
317	case `'/'`:
318	_parse_closing_xml_element();
319	break;
320	case `'?'`:
321	_ignore_definition();
322	break;
323	case `'!'`:
324	if (!_parse_cdata()) {
325	_parse_comment();
326	}
327	break;
328	default:
329	_parse_opening_xml_element();
330	break;
331	}
332	}
333
334	uint64_t XMLParser::get_node_offset() const {
335	return node_offset;
336	}
337
338	Error XMLParser::seek(uint64_t p_pos) {
339	ERR_FAIL_NULL_V(data, ERR_FILE_EOF);
340	ERR_FAIL_COND_V(p_pos >= length, ERR_FILE_EOF);
341
342	P = data + p_pos;
343
344	return read();
345	}
346
347	void XMLParser::_bind_methods() {
348	ClassDB::bind_method(D_METHOD("read"), &XMLParser::read);
349	ClassDB::bind_method(D_METHOD("get_node_type"), &XMLParser::get_node_type);
350	ClassDB::bind_method(D_METHOD("get_node_name"), &XMLParser::get_node_name);
351	ClassDB::bind_method(D_METHOD("get_node_data"), &XMLParser::get_node_data);
352	ClassDB::bind_method(D_METHOD("get_node_offset"), &XMLParser::get_node_offset);
353	ClassDB::bind_method(D_METHOD("get_attribute_count"), &XMLParser::get_attribute_count);
354	ClassDB::bind_method(D_METHOD("get_attribute_name", "idx"), &XMLParser::get_attribute_name);
355	ClassDB::bind_method(D_METHOD("get_attribute_value", "idx"), &XMLParser::get_attribute_value);
356	ClassDB::bind_method(D_METHOD("has_attribute", "name"), &XMLParser::has_attribute);
357	ClassDB::bind_method(D_METHOD("get_named_attribute_value", "name"), &XMLParser::get_named_attribute_value);
358	ClassDB::bind_method(D_METHOD("get_named_attribute_value_safe", "name"), &XMLParser::get_named_attribute_value_safe);
359	ClassDB::bind_method(D_METHOD("is_empty"), &XMLParser::is_empty);
360	ClassDB::bind_method(D_METHOD("get_current_line"), &XMLParser::get_current_line);
361	ClassDB::bind_method(D_METHOD("skip_section"), &XMLParser::skip_section);
362	ClassDB::bind_method(D_METHOD("seek", "position"), &XMLParser::seek);
363	ClassDB::bind_method(D_METHOD("open", "file"), &XMLParser::open);
364	ClassDB::bind_method(D_METHOD("open_buffer", "buffer"), &XMLParser::open_buffer);
365
366	BIND_ENUM_CONSTANT(NODE_NONE);
367	BIND_ENUM_CONSTANT(NODE_ELEMENT);
368	BIND_ENUM_CONSTANT(NODE_ELEMENT_END);
369	BIND_ENUM_CONSTANT(NODE_TEXT);
370	BIND_ENUM_CONSTANT(NODE_COMMENT);
371	BIND_ENUM_CONSTANT(NODE_CDATA);
372	BIND_ENUM_CONSTANT(NODE_UNKNOWN);
373	}
374
375	Error XMLParser::read() {
376	// if end not reached, parse the node
377	if (P && (P - data) < (int64_t)length - `1` && *P != `0`) {
378	_parse_current_node();
379	return OK;
380	}
381
382	return ERR_FILE_EOF;
383	}
384
385	XMLParser::NodeType XMLParser::get_node_type() {
386	return node_type;
387	}
388
389	String XMLParser::get_node_data() const {
390	ERR_FAIL_COND_V(node_type != NODE_TEXT, "");
391	return node_name;
392	}
393
394	String XMLParser::get_node_name() const {
395	ERR_FAIL_COND_V(node_type == NODE_TEXT, "");
396	return node_name;
397	}
398
399	int XMLParser::get_attribute_count() const {
400	return attributes.size();
401	}
402
403	String XMLParser::get_attribute_name(int p_idx) const {
404	ERR_FAIL_INDEX_V(p_idx, attributes.size(), "");
405	return attributes [p_idx].name;
406	}
407
408	String XMLParser::get_attribute_value(int p_idx) const {
409	ERR_FAIL_INDEX_V(p_idx, attributes.size(), "");
410	return attributes [p_idx].value;
411	}
412
413	bool XMLParser::has_attribute(const String &p_name) const {
414	for (int i = `0`; i < attributes.size(); i++) {
415	if (attributes [i].name == p_name) {
416	return true;
417	}
418	}
419
420	return false;
421	}
422
423	String XMLParser::get_named_attribute_value(const String &p_name) const {
424	int idx = -`1`;
425	for (int i = `0`; i < attributes.size(); i++) {
426	if (attributes [i].name == p_name) {
427	idx = i;
428	break;
429	}
430	}
431
432	ERR_FAIL_COND_V_MSG(idx < `0`, "", "Attribute not found: " + p_name + ".");
433
434	return attributes [idx].value;
435	}
436
437	String XMLParser::get_named_attribute_value_safe(const String &p_name) const {
438	int idx = -`1`;
439	for (int i = `0`; i < attributes.size(); i++) {
440	if (attributes [i].name == p_name) {
441	idx = i;
442	break;
443	}
444	}
445
446	if (idx < `0`) {
447	return "";
448	}
449	return attributes [idx].value;
450	}
451
452	bool XMLParser::is_empty() const {
453	return node_empty;
454	}
455
456	Error XMLParser::open_buffer(const Vector<uint8_t> &p_buffer) {
457	ERR_FAIL_COND_V(p_buffer.size() == `0`, ERR_INVALID_DATA);
458
459	if (data_copy) {
460	memdelete_arr(data_copy);
461	data_copy = nullptr;
462	}
463
464	length = p_buffer.size();
465	data_copy = memnew_arr(char, length + `1`);
466	memcpy(data_copy, p_buffer.ptr(), length);
467	data_copy[length] = `0`;
468	data = data_copy;
469	P = data;
470	current_line = `0`;
471
472	return OK;
473	}
474
475	Error XMLParser::_open_buffer(const uint8_t *p_buffer, size_t p_size) {
476	ERR_FAIL_COND_V(p_size == `0`, ERR_INVALID_DATA);
477	ERR_FAIL_NULL_V(p_buffer, ERR_INVALID_DATA);
478
479	if (data_copy) {
480	memdelete_arr(data_copy);
481	data_copy = nullptr;
482	}
483
484	length = p_size;
485	data = (const char *)p_buffer;
486	P = data;
487	current_line = `0`;
488
489	return OK;
490	}
491
492	Error XMLParser::open(const String &p_path) {
493	Error err;
494	Ref<FileAccess> file = FileAccess::open(p_path, FileAccess::READ, &err);
495
496	ERR_FAIL_COND_V_MSG(err != OK, err, "Cannot open file '" + p_path + "'.");
497
498	length = file ->get_length();
499	ERR_FAIL_COND_V(length < `1`, ERR_FILE_CORRUPT);
500
501	if (data_copy) {
502	memdelete_arr(data_copy);
503	data_copy = nullptr;
504	}
505
506	data_copy = memnew_arr(char, length + `1`);
507	file ->get_buffer((uint8_t *)data_copy, length);
508	data_copy[length] = `0`;
509	data = data_copy;
510	P = data;
511	current_line = `0`;
512
513	return OK;
514	}
515
516	void XMLParser::skip_section() {
517	// skip if this element is empty anyway.
518	if (is_empty()) {
519	return;
520	}
521
522	// read until we've reached the last element in this section
523	int tagcount = `1`;
524
525	while (tagcount && read() == OK) {
526	if (get_node_type() == XMLParser::NODE_ELEMENT &&
527	!is_empty()) {
528	++tagcount;
529	} else if (get_node_type() == XMLParser::NODE_ELEMENT_END) {
530	--tagcount;
531	}
532	}
533	}
534
535	void XMLParser::close() {
536	if (data_copy) {
537	memdelete_arr(data);
538	data_copy = nullptr;
539	}
540	data = nullptr;
541	length = `0`;
542	P = nullptr;
543	node_empty = false;
544	node_type = NODE_NONE;
545	node_offset = `0`;
546	}
547
548	int XMLParser::get_current_line() const {
549	return current_line;
550	}
551
552	XMLParser::~XMLParser() {
553	if (data_copy) {
554	memdelete_arr(data_copy);
555	data_copy = nullptr;
556	}
557	}
558

Browse the source code of Godot/core/io/xml_parser.cpp