1/**************************************************************************/
2/* xml_parser.cpp */
3/**************************************************************************/
4/* This file is part of: */
5/* GODOT ENGINE */
6/* https://godotengine.org */
7/**************************************************************************/
8/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10/* */
11/* Permission is hereby granted, free of charge, to any person obtaining */
12/* a copy of this software and associated documentation files (the */
13/* "Software"), to deal in the Software without restriction, including */
14/* without limitation the rights to use, copy, modify, merge, publish, */
15/* distribute, sublicense, and/or sell copies of the Software, and to */
16/* permit persons to whom the Software is furnished to do so, subject to */
17/* the following conditions: */
18/* */
19/* The above copyright notice and this permission notice shall be */
20/* included in all copies or substantial portions of the Software. */
21/* */
22/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29/**************************************************************************/
30
31#include "xml_parser.h"
32
33#include "core/string/print_string.h"
34
35//#define DEBUG_XML
36
37static inline bool _is_white_space(char c) {
38 return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
39}
40
41//! sets the state that text was found. Returns true if set should be set
42bool XMLParser::_set_text(const char *start, const char *end) {
43 // check if text is more than 2 characters, and if not, check if there is
44 // only white space, so that this text won't be reported
45 if (end - start < 3) {
46 const char *p = start;
47 for (; p != end; ++p) {
48 if (!_is_white_space(*p)) {
49 break;
50 }
51 }
52
53 if (p == end) {
54 return false;
55 }
56 }
57
58 // set current text to the parsed text, and replace xml special characters
59 String s = String::utf8(start, (int)(end - start));
60 node_name = s.xml_unescape();
61
62 // current XML node type is text
63 node_type = NODE_TEXT;
64
65 return true;
66}
67
68void XMLParser::_parse_closing_xml_element() {
69 node_type = NODE_ELEMENT_END;
70 node_empty = false;
71 attributes.clear();
72
73 next_char();
74 const char *pBeginClose = P;
75
76 while (*P && *P != '>') {
77 next_char();
78 }
79
80 node_name = String::utf8(pBeginClose, (int)(P - pBeginClose));
81#ifdef DEBUG_XML
82 print_line("XML CLOSE: " + node_name);
83#endif
84
85 if (*P) {
86 next_char();
87 }
88}
89
90void XMLParser::_ignore_definition() {
91 node_type = NODE_UNKNOWN;
92
93 const char *F = P;
94 // move until end marked with '>' reached
95 while (*P && *P != '>') {
96 next_char();
97 }
98 node_name.parse_utf8(F, P - F);
99
100 if (*P) {
101 next_char();
102 }
103}
104
105bool XMLParser::_parse_cdata() {
106 if (*(P + 1) != '[') {
107 return false;
108 }
109
110 node_type = NODE_CDATA;
111
112 // skip '<![CDATA['
113 int count = 0;
114 while (*P && count < 8) {
115 next_char();
116 ++count;
117 }
118
119 if (!*P) {
120 node_name = "";
121 return true;
122 }
123
124 const char *cDataBegin = P;
125 const char *cDataEnd = nullptr;
126
127 // find end of CDATA
128 while (*P && !cDataEnd) {
129 if (*P == '>' &&
130 (*(P - 1) == ']') &&
131 (*(P - 2) == ']')) {
132 cDataEnd = P - 2;
133 }
134
135 next_char();
136 }
137
138 if (!cDataEnd) {
139 cDataEnd = P;
140 }
141 node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin));
142#ifdef DEBUG_XML
143 print_line("XML CDATA: " + node_name);
144#endif
145
146 return true;
147}
148
149void XMLParser::_parse_comment() {
150 node_type = NODE_COMMENT;
151 P += 1;
152
153 const char *pEndOfInput = data + length;
154 const char *pCommentBegin;
155 const char *pCommentEnd;
156
157 if (P + 1 < pEndOfInput && P[0] == '-' && P[1] == '-') {
158 // Comment, use '-->' as end.
159 pCommentBegin = P + 2;
160 for (pCommentEnd = pCommentBegin; pCommentEnd + 2 < pEndOfInput; pCommentEnd++) {
161 if (pCommentEnd[0] == '-' && pCommentEnd[1] == '-' && pCommentEnd[2] == '>') {
162 break;
163 }
164 }
165 if (pCommentEnd + 2 < pEndOfInput) {
166 P = pCommentEnd + 3;
167 } else {
168 P = pCommentEnd = pEndOfInput;
169 }
170 } else {
171 // Like document type definition, match angle brackets.
172 pCommentBegin = P;
173
174 int count = 1;
175 while (*P && count) {
176 if (*P == '>') {
177 --count;
178 } else if (*P == '<') {
179 ++count;
180 }
181 next_char();
182 }
183
184 if (count) {
185 pCommentEnd = P;
186 } else {
187 pCommentEnd = P - 1;
188 }
189 }
190
191 node_name = String::utf8(pCommentBegin, (int)(pCommentEnd - pCommentBegin));
192#ifdef DEBUG_XML
193 print_line("XML COMMENT: " + node_name);
194#endif
195}
196
197void XMLParser::_parse_opening_xml_element() {
198 node_type = NODE_ELEMENT;
199 node_empty = false;
200 attributes.clear();
201
202 // find name
203 const char *startName = P;
204
205 // find end of element
206 while (*P && *P != '>' && !_is_white_space(*P)) {
207 next_char();
208 }
209
210 const char *endName = P;
211
212 // find attributes
213 while (*P && *P != '>') {
214 if (_is_white_space(*P)) {
215 next_char();
216 } else {
217 if (*P != '/') {
218 // we've got an attribute
219
220 // read the attribute names
221 const char *attributeNameBegin = P;
222
223 while (*P && !_is_white_space(*P) && *P != '=') {
224 next_char();
225 }
226
227 if (!*P) {
228 break;
229 }
230
231 const char *attributeNameEnd = P;
232 next_char();
233
234 // read the attribute value
235 // check for quotes and single quotes, thx to murphy
236 while ((*P != '\"') && (*P != '\'') && *P) {
237 next_char();
238 }
239
240 if (!*P) { // malformatted xml file
241 break;
242 }
243
244 const char attributeQuoteChar = *P;
245
246 next_char();
247 const char *attributeValueBegin = P;
248
249 while (*P != attributeQuoteChar && *P) {
250 next_char();
251 }
252
253 const char *attributeValueEnd = P;
254 if (*P) {
255 next_char();
256 }
257
258 Attribute attr;
259 attr.name = String::utf8(attributeNameBegin,
260 (int)(attributeNameEnd - attributeNameBegin));
261
262 String s = String::utf8(attributeValueBegin,
263 (int)(attributeValueEnd - attributeValueBegin));
264
265 attr.value = s.xml_unescape();
266 attributes.push_back(attr);
267 } else {
268 // tag is closed directly
269 next_char();
270 node_empty = true;
271 break;
272 }
273 }
274 }
275
276 // check if this tag is closing directly
277 if (endName > startName && *(endName - 1) == '/') {
278 // directly closing tag
279 node_empty = true;
280 endName--;
281 }
282
283 node_name = String::utf8(startName, (int)(endName - startName));
284#ifdef DEBUG_XML
285 print_line("XML OPEN: " + node_name);
286#endif
287
288 if (*P) {
289 next_char();
290 }
291}
292
293void XMLParser::_parse_current_node() {
294 const char *start = P;
295 node_offset = P - data;
296
297 // more forward until '<' found
298 while (*P != '<' && *P) {
299 next_char();
300 }
301
302 if (P - start > 0) {
303 // we found some text, store it
304 if (_set_text(start, P)) {
305 return;
306 }
307 }
308
309 if (!*P) {
310 return;
311 }
312
313 next_char();
314
315 // based on current token, parse and report next element
316 switch (*P) {
317 case '/':
318 _parse_closing_xml_element();
319 break;
320 case '?':
321 _ignore_definition();
322 break;
323 case '!':
324 if (!_parse_cdata()) {
325 _parse_comment();
326 }
327 break;
328 default:
329 _parse_opening_xml_element();
330 break;
331 }
332}
333
334uint64_t XMLParser::get_node_offset() const {
335 return node_offset;
336}
337
338Error XMLParser::seek(uint64_t p_pos) {
339 ERR_FAIL_NULL_V(data, ERR_FILE_EOF);
340 ERR_FAIL_COND_V(p_pos >= length, ERR_FILE_EOF);
341
342 P = data + p_pos;
343
344 return read();
345}
346
347void XMLParser::_bind_methods() {
348 ClassDB::bind_method(D_METHOD("read"), &XMLParser::read);
349 ClassDB::bind_method(D_METHOD("get_node_type"), &XMLParser::get_node_type);
350 ClassDB::bind_method(D_METHOD("get_node_name"), &XMLParser::get_node_name);
351 ClassDB::bind_method(D_METHOD("get_node_data"), &XMLParser::get_node_data);
352 ClassDB::bind_method(D_METHOD("get_node_offset"), &XMLParser::get_node_offset);
353 ClassDB::bind_method(D_METHOD("get_attribute_count"), &XMLParser::get_attribute_count);
354 ClassDB::bind_method(D_METHOD("get_attribute_name", "idx"), &XMLParser::get_attribute_name);
355 ClassDB::bind_method(D_METHOD("get_attribute_value", "idx"), &XMLParser::get_attribute_value);
356 ClassDB::bind_method(D_METHOD("has_attribute", "name"), &XMLParser::has_attribute);
357 ClassDB::bind_method(D_METHOD("get_named_attribute_value", "name"), &XMLParser::get_named_attribute_value);
358 ClassDB::bind_method(D_METHOD("get_named_attribute_value_safe", "name"), &XMLParser::get_named_attribute_value_safe);
359 ClassDB::bind_method(D_METHOD("is_empty"), &XMLParser::is_empty);
360 ClassDB::bind_method(D_METHOD("get_current_line"), &XMLParser::get_current_line);
361 ClassDB::bind_method(D_METHOD("skip_section"), &XMLParser::skip_section);
362 ClassDB::bind_method(D_METHOD("seek", "position"), &XMLParser::seek);
363 ClassDB::bind_method(D_METHOD("open", "file"), &XMLParser::open);
364 ClassDB::bind_method(D_METHOD("open_buffer", "buffer"), &XMLParser::open_buffer);
365
366 BIND_ENUM_CONSTANT(NODE_NONE);
367 BIND_ENUM_CONSTANT(NODE_ELEMENT);
368 BIND_ENUM_CONSTANT(NODE_ELEMENT_END);
369 BIND_ENUM_CONSTANT(NODE_TEXT);
370 BIND_ENUM_CONSTANT(NODE_COMMENT);
371 BIND_ENUM_CONSTANT(NODE_CDATA);
372 BIND_ENUM_CONSTANT(NODE_UNKNOWN);
373}
374
375Error XMLParser::read() {
376 // if end not reached, parse the node
377 if (P && (P - data) < (int64_t)length - 1 && *P != 0) {
378 _parse_current_node();
379 return OK;
380 }
381
382 return ERR_FILE_EOF;
383}
384
385XMLParser::NodeType XMLParser::get_node_type() {
386 return node_type;
387}
388
389String XMLParser::get_node_data() const {
390 ERR_FAIL_COND_V(node_type != NODE_TEXT, "");
391 return node_name;
392}
393
394String XMLParser::get_node_name() const {
395 ERR_FAIL_COND_V(node_type == NODE_TEXT, "");
396 return node_name;
397}
398
399int XMLParser::get_attribute_count() const {
400 return attributes.size();
401}
402
403String XMLParser::get_attribute_name(int p_idx) const {
404 ERR_FAIL_INDEX_V(p_idx, attributes.size(), "");
405 return attributes[p_idx].name;
406}
407
408String XMLParser::get_attribute_value(int p_idx) const {
409 ERR_FAIL_INDEX_V(p_idx, attributes.size(), "");
410 return attributes[p_idx].value;
411}
412
413bool XMLParser::has_attribute(const String &p_name) const {
414 for (int i = 0; i < attributes.size(); i++) {
415 if (attributes[i].name == p_name) {
416 return true;
417 }
418 }
419
420 return false;
421}
422
423String XMLParser::get_named_attribute_value(const String &p_name) const {
424 int idx = -1;
425 for (int i = 0; i < attributes.size(); i++) {
426 if (attributes[i].name == p_name) {
427 idx = i;
428 break;
429 }
430 }
431
432 ERR_FAIL_COND_V_MSG(idx < 0, "", "Attribute not found: " + p_name + ".");
433
434 return attributes[idx].value;
435}
436
437String XMLParser::get_named_attribute_value_safe(const String &p_name) const {
438 int idx = -1;
439 for (int i = 0; i < attributes.size(); i++) {
440 if (attributes[i].name == p_name) {
441 idx = i;
442 break;
443 }
444 }
445
446 if (idx < 0) {
447 return "";
448 }
449 return attributes[idx].value;
450}
451
452bool XMLParser::is_empty() const {
453 return node_empty;
454}
455
456Error XMLParser::open_buffer(const Vector<uint8_t> &p_buffer) {
457 ERR_FAIL_COND_V(p_buffer.size() == 0, ERR_INVALID_DATA);
458
459 if (data_copy) {
460 memdelete_arr(data_copy);
461 data_copy = nullptr;
462 }
463
464 length = p_buffer.size();
465 data_copy = memnew_arr(char, length + 1);
466 memcpy(data_copy, p_buffer.ptr(), length);
467 data_copy[length] = 0;
468 data = data_copy;
469 P = data;
470 current_line = 0;
471
472 return OK;
473}
474
475Error XMLParser::_open_buffer(const uint8_t *p_buffer, size_t p_size) {
476 ERR_FAIL_COND_V(p_size == 0, ERR_INVALID_DATA);
477 ERR_FAIL_NULL_V(p_buffer, ERR_INVALID_DATA);
478
479 if (data_copy) {
480 memdelete_arr(data_copy);
481 data_copy = nullptr;
482 }
483
484 length = p_size;
485 data = (const char *)p_buffer;
486 P = data;
487 current_line = 0;
488
489 return OK;
490}
491
492Error XMLParser::open(const String &p_path) {
493 Error err;
494 Ref<FileAccess> file = FileAccess::open(p_path, FileAccess::READ, &err);
495
496 ERR_FAIL_COND_V_MSG(err != OK, err, "Cannot open file '" + p_path + "'.");
497
498 length = file->get_length();
499 ERR_FAIL_COND_V(length < 1, ERR_FILE_CORRUPT);
500
501 if (data_copy) {
502 memdelete_arr(data_copy);
503 data_copy = nullptr;
504 }
505
506 data_copy = memnew_arr(char, length + 1);
507 file->get_buffer((uint8_t *)data_copy, length);
508 data_copy[length] = 0;
509 data = data_copy;
510 P = data;
511 current_line = 0;
512
513 return OK;
514}
515
516void XMLParser::skip_section() {
517 // skip if this element is empty anyway.
518 if (is_empty()) {
519 return;
520 }
521
522 // read until we've reached the last element in this section
523 int tagcount = 1;
524
525 while (tagcount && read() == OK) {
526 if (get_node_type() == XMLParser::NODE_ELEMENT &&
527 !is_empty()) {
528 ++tagcount;
529 } else if (get_node_type() == XMLParser::NODE_ELEMENT_END) {
530 --tagcount;
531 }
532 }
533}
534
535void XMLParser::close() {
536 if (data_copy) {
537 memdelete_arr(data);
538 data_copy = nullptr;
539 }
540 data = nullptr;
541 length = 0;
542 P = nullptr;
543 node_empty = false;
544 node_type = NODE_NONE;
545 node_offset = 0;
546}
547
548int XMLParser::get_current_line() const {
549 return current_line;
550}
551
552XMLParser::~XMLParser() {
553 if (data_copy) {
554 memdelete_arr(data_copy);
555 data_copy = nullptr;
556 }
557}
558