1/*
2 * Copyright (c) 2020 - 2023 the ThorVG project. All rights reserved.
3
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
13
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include <cstring>
24#include <ctype.h>
25#include <string>
26
27#ifdef _WIN32
28 #include <malloc.h>
29#elif defined(__linux__)
30 #include <alloca.h>
31#else
32 #include <stdlib.h>
33#endif
34
35#include "tvgXmlParser.h"
36#include "tvgSvgUtil.h"
37
38/************************************************************************/
39/* Internal Class Implementation */
40/************************************************************************/
41
42bool _isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char* tagAttribute, TVG_UNUSED const char* tagValue)
43{
44#ifdef THORVG_LOG_ENABLED
45 const auto attributesNum = 6;
46 const struct
47 {
48 const char* tag;
49 bool tagWildcard; //If true, it is assumed that a wildcard is used after the tag. (ex: tagName*)
50 const char* value;
51 } attributes[] = {
52 {"id", false, nullptr},
53 {"data-name", false, nullptr},
54 {"overflow", false, "visible"},
55 {"version", false, nullptr},
56 {"xmlns", true, nullptr},
57 {"xml:space", false, nullptr},
58 };
59
60 for (unsigned int i = 0; i < attributesNum; ++i) {
61 if (!strncmp(tagAttribute, attributes[i].tag, attributes[i].tagWildcard ? strlen(attributes[i].tag) : strlen(tagAttribute))) {
62 if (attributes[i].value && tagValue) {
63 if (!strncmp(tagValue, attributes[i].value, strlen(tagValue))) {
64 return true;
65 } else continue;
66 }
67 return true;
68 }
69 }
70 return false;
71#endif
72 return true;
73}
74
75
76static const char* _simpleXmlFindWhiteSpace(const char* itr, const char* itrEnd)
77{
78 for (; itr < itrEnd; itr++) {
79 if (isspace((unsigned char)*itr)) break;
80 }
81 return itr;
82}
83
84
85static const char* _simpleXmlSkipWhiteSpace(const char* itr, const char* itrEnd)
86{
87 for (; itr < itrEnd; itr++) {
88 if (!isspace((unsigned char)*itr)) break;
89 }
90 return itr;
91}
92
93
94static const char* _simpleXmlUnskipWhiteSpace(const char* itr, const char* itrStart)
95{
96 for (itr--; itr > itrStart; itr--) {
97 if (!isspace((unsigned char)*itr)) break;
98 }
99 return itr + 1;
100}
101
102
103static const char* _simpleXmlSkipXmlEntities(const char* itr, const char* itrEnd)
104{
105 auto p = itr;
106 while (itr < itrEnd && *itr == '&') {
107 for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
108 if (strncmp(itr, xmlEntity[i], xmlEntityLength[i]) == 0) {
109 itr += xmlEntityLength[i];
110 break;
111 }
112 }
113 if (itr == p) break;
114 p = itr;
115 }
116 return itr;
117}
118
119
120static const char* _simpleXmlUnskipXmlEntities(const char* itr, const char* itrStart)
121{
122 auto p = itr;
123 while (itr > itrStart && *(itr - 1) == ';') {
124 for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
125 if (itr - xmlEntityLength[i] > itrStart &&
126 strncmp(itr - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) {
127 itr -= xmlEntityLength[i];
128 break;
129 }
130 }
131 if (itr == p) break;
132 p = itr;
133 }
134 return itr;
135}
136
137
138static const char* _skipWhiteSpacesAndXmlEntities(const char* itr, const char* itrEnd)
139{
140 itr = _simpleXmlSkipWhiteSpace(itr, itrEnd);
141 auto p = itr;
142 while (true) {
143 if (p != (itr = _simpleXmlSkipXmlEntities(itr, itrEnd))) p = itr;
144 else break;
145 if (p != (itr = _simpleXmlSkipWhiteSpace(itr, itrEnd))) p = itr;
146 else break;
147 }
148 return itr;
149}
150
151
152static const char* _unskipWhiteSpacesAndXmlEntities(const char* itr, const char* itrStart)
153{
154 itr = _simpleXmlUnskipWhiteSpace(itr, itrStart);
155 auto p = itr;
156 while (true) {
157 if (p != (itr = _simpleXmlUnskipXmlEntities(itr, itrStart))) p = itr;
158 else break;
159 if (p != (itr = _simpleXmlUnskipWhiteSpace(itr, itrStart))) p = itr;
160 else break;
161 }
162 return itr;
163}
164
165
166static const char* _simpleXmlFindStartTag(const char* itr, const char* itrEnd)
167{
168 return (const char*)memchr(itr, '<', itrEnd - itr);
169}
170
171
172static const char* _simpleXmlFindEndTag(const char* itr, const char* itrEnd)
173{
174 bool insideQuote = false;
175 for (; itr < itrEnd; itr++) {
176 if (*itr == '"') insideQuote = !insideQuote;
177 if (!insideQuote) {
178 if ((*itr == '>') || (*itr == '<'))
179 return itr;
180 }
181 }
182 return nullptr;
183}
184
185
186static const char* _simpleXmlFindEndCommentTag(const char* itr, const char* itrEnd)
187{
188 for (; itr < itrEnd; itr++) {
189 if ((*itr == '-') && ((itr + 1 < itrEnd) && (*(itr + 1) == '-')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
190 }
191 return nullptr;
192}
193
194
195static const char* _simpleXmlFindEndCdataTag(const char* itr, const char* itrEnd)
196{
197 for (; itr < itrEnd; itr++) {
198 if ((*itr == ']') && ((itr + 1 < itrEnd) && (*(itr + 1) == ']')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
199 }
200 return nullptr;
201}
202
203
204static const char* _simpleXmlFindDoctypeChildEndTag(const char* itr, const char* itrEnd)
205{
206 for (; itr < itrEnd; itr++) {
207 if (*itr == '>') return itr;
208 }
209 return nullptr;
210}
211
212
213static SimpleXMLType _getXMLType(const char* itr, const char* itrEnd, size_t &toff)
214{
215 toff = 0;
216 if (itr[1] == '/') {
217 toff = 1;
218 return SimpleXMLType::Close;
219 } else if (itr[1] == '?') {
220 toff = 1;
221 return SimpleXMLType::Processing;
222 } else if (itr[1] == '!') {
223 if ((itr + sizeof("<!DOCTYPE>") - 1 < itrEnd) && (!memcmp(itr + 2, "DOCTYPE", sizeof("DOCTYPE") - 1)) && ((itr[2 + sizeof("DOCTYPE") - 1] == '>') || (isspace((unsigned char)itr[2 + sizeof("DOCTYPE") - 1])))) {
224 toff = sizeof("!DOCTYPE") - 1;
225 return SimpleXMLType::Doctype;
226 } else if ((itr + sizeof("<![CDATA[]]>") - 1 < itrEnd) && (!memcmp(itr + 2, "[CDATA[", sizeof("[CDATA[") - 1))) {
227 toff = sizeof("![CDATA[") - 1;
228 return SimpleXMLType::CData;
229 } else if ((itr + sizeof("<!---->") - 1 < itrEnd) && (!memcmp(itr + 2, "--", sizeof("--") - 1))) {
230 toff = sizeof("!--") - 1;
231 return SimpleXMLType::Comment;
232 } else if (itr + sizeof("<!>") - 1 < itrEnd) {
233 toff = sizeof("!") - 1;
234 return SimpleXMLType::DoctypeChild;
235 }
236 return SimpleXMLType::Open;
237 }
238 return SimpleXMLType::Open;
239}
240
241
242/************************************************************************/
243/* External Class Implementation */
244/************************************************************************/
245
246const char* simpleXmlNodeTypeToString(TVG_UNUSED SvgNodeType type)
247{
248#ifdef THORVG_LOG_ENABLED
249 static const char* TYPE_NAMES[] = {
250 "Svg",
251 "G",
252 "Defs",
253 "Animation",
254 "Arc",
255 "Circle",
256 "Ellipse",
257 "Image",
258 "Line",
259 "Path",
260 "Polygon",
261 "Polyline",
262 "Rect",
263 "Text",
264 "TextArea",
265 "Tspan",
266 "Use",
267 "Video",
268 "ClipPath",
269 "Mask",
270 "Symbol",
271 "Unknown",
272 };
273 return TYPE_NAMES[(int) type];
274#endif
275 return nullptr;
276}
277
278
279bool isIgnoreUnsupportedLogElements(TVG_UNUSED const char* tagName)
280{
281#ifdef THORVG_LOG_ENABLED
282 const auto elementsNum = 1;
283 const char* const elements[] = { "title" };
284
285 for (unsigned int i = 0; i < elementsNum; ++i) {
286 if (!strncmp(tagName, elements[i], strlen(tagName))) {
287 return true;
288 }
289 }
290 return false;
291#else
292 return true;
293#endif
294}
295
296
297bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
298{
299 const char *itr = buf, *itrEnd = buf + bufLength;
300 char* tmpBuf = (char*)malloc(bufLength + 1);
301
302 if (!buf || !func || !tmpBuf) goto error;
303
304 while (itr < itrEnd) {
305 const char* p = _skipWhiteSpacesAndXmlEntities(itr, itrEnd);
306 const char *key, *keyEnd, *value, *valueEnd;
307 char* tval;
308
309 if (p == itrEnd) goto success;
310
311 key = p;
312 for (keyEnd = key; keyEnd < itrEnd; keyEnd++) {
313 if ((*keyEnd == '=') || (isspace((unsigned char)*keyEnd))) break;
314 }
315 if (keyEnd == itrEnd) goto error;
316 if (keyEnd == key) continue;
317
318 if (*keyEnd == '=') value = keyEnd + 1;
319 else {
320 value = (const char*)memchr(keyEnd, '=', itrEnd - keyEnd);
321 if (!value) goto error;
322 value++;
323 }
324 keyEnd = _simpleXmlUnskipXmlEntities(keyEnd, key);
325
326 value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
327 if (value == itrEnd) goto error;
328
329 if ((*value == '"') || (*value == '\'')) {
330 valueEnd = (const char*)memchr(value + 1, *value, itrEnd - value);
331 if (!valueEnd) goto error;
332 value++;
333 } else {
334 valueEnd = _simpleXmlFindWhiteSpace(value, itrEnd);
335 }
336
337 itr = valueEnd + 1;
338
339 value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
340 valueEnd = _unskipWhiteSpacesAndXmlEntities(valueEnd, value);
341
342 memcpy(tmpBuf, key, keyEnd - key);
343 tmpBuf[keyEnd - key] = '\0';
344
345 tval = tmpBuf + (keyEnd - key) + 1;
346 int i = 0;
347 while (value < valueEnd) {
348 value = _simpleXmlSkipXmlEntities(value, valueEnd);
349 tval[i++] = *value;
350 value++;
351 }
352 tval[i] = '\0';
353
354 if (!func((void*)data, tmpBuf, tval)) {
355 if (!_isIgnoreUnsupportedLogAttributes(tmpBuf, tval)) {
356 TVGLOG("SVG", "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID", tmpBuf, tval ? tval : "NONE");
357 }
358 }
359 }
360
361success:
362 free(tmpBuf);
363 return true;
364
365error:
366 free(tmpBuf);
367 return false;
368}
369
370
371bool simpleXmlParse(const char* buf, unsigned bufLength, bool strip, simpleXMLCb func, const void* data)
372{
373 const char *itr = buf, *itrEnd = buf + bufLength;
374
375 if (!buf || !func) return false;
376
377 while (itr < itrEnd) {
378 if (itr[0] == '<') {
379 //Invalid case
380 if (itr + 1 >= itrEnd) return false;
381
382 size_t toff = 0;
383 SimpleXMLType type = _getXMLType(itr, itrEnd, toff);
384
385 const char* p;
386 if (type == SimpleXMLType::CData) p = _simpleXmlFindEndCdataTag(itr + 1 + toff, itrEnd);
387 else if (type == SimpleXMLType::DoctypeChild) p = _simpleXmlFindDoctypeChildEndTag(itr + 1 + toff, itrEnd);
388 else if (type == SimpleXMLType::Comment) p = _simpleXmlFindEndCommentTag(itr + 1 + toff, itrEnd);
389 else p = _simpleXmlFindEndTag(itr + 1 + toff, itrEnd);
390
391 if (p) {
392 //Invalid case: '<' nested
393 if (*p == '<' && type != SimpleXMLType::Doctype) return false;
394 const char *start, *end;
395
396 start = itr + 1 + toff;
397 end = p;
398
399 switch (type) {
400 case SimpleXMLType::Open: {
401 if (p[-1] == '/') {
402 type = SimpleXMLType::OpenEmpty;
403 end--;
404 }
405 break;
406 }
407 case SimpleXMLType::CData: {
408 if (!memcmp(p - 2, "]]", 2)) end -= 2;
409 break;
410 }
411 case SimpleXMLType::Processing: {
412 if (p[-1] == '?') end--;
413 break;
414 }
415 case SimpleXMLType::Comment: {
416 if (!memcmp(p - 2, "--", 2)) end -= 2;
417 break;
418 }
419 default: {
420 break;
421 }
422 }
423
424 if (strip && (type != SimpleXMLType::CData)) {
425 start = _skipWhiteSpacesAndXmlEntities(start, end);
426 end = _unskipWhiteSpacesAndXmlEntities(end, start);
427 }
428
429 if (!func((void*)data, type, start, (unsigned int)(end - start))) return false;
430
431 itr = p + 1;
432 } else {
433 return false;
434 }
435 } else {
436 const char *p, *end;
437
438 if (strip) {
439 p = itr;
440 p = _skipWhiteSpacesAndXmlEntities(p, itrEnd);
441 if (p) {
442 if (!func((void*)data, SimpleXMLType::Ignored, itr, (unsigned int)(p - itr))) return false;
443 itr = p;
444 }
445 }
446
447 p = _simpleXmlFindStartTag(itr, itrEnd);
448 if (!p) p = itrEnd;
449
450 end = p;
451 if (strip) end = _unskipWhiteSpacesAndXmlEntities(end, itr);
452
453 if (itr != end && !func((void*)data, SimpleXMLType::Data, itr, (unsigned int)(end - itr))) return false;
454
455 if (strip && (end < p) && !func((void*)data, SimpleXMLType::Ignored, end, (unsigned int)(p - end))) return false;
456
457 itr = p;
458 }
459 }
460 return true;
461}
462
463
464bool simpleXmlParseW3CAttribute(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
465{
466 const char* end;
467 char* key;
468 char* val;
469 char* next;
470
471 if (!buf) return false;
472
473 end = buf + bufLength;
474 key = (char*)alloca(end - buf + 1);
475 val = (char*)alloca(end - buf + 1);
476
477 if (buf == end) return true;
478
479 do {
480 char* sep = (char*)strchr(buf, ':');
481 next = (char*)strchr(buf, ';');
482 if (sep >= end) {
483 next = nullptr;
484 sep = nullptr;
485 }
486 if (next >= end) next = nullptr;
487
488 key[0] = '\0';
489 val[0] = '\0';
490
491 if (next == nullptr && sep != nullptr) {
492 memcpy(key, buf, sep - buf);
493 key[sep - buf] = '\0';
494
495 memcpy(val, sep + 1, end - sep - 1);
496 val[end - sep - 1] = '\0';
497 } else if (sep < next && sep != nullptr) {
498 memcpy(key, buf, sep - buf);
499 key[sep - buf] = '\0';
500
501 memcpy(val, sep + 1, next - sep - 1);
502 val[next - sep - 1] = '\0';
503 } else if (next) {
504 memcpy(key, buf, next - buf);
505 key[next - buf] = '\0';
506 }
507
508 if (key[0]) {
509 key = const_cast<char*>(_simpleXmlSkipWhiteSpace(key, key + strlen(key)));
510 key[_simpleXmlUnskipWhiteSpace(key + strlen(key) , key) - key] = '\0';
511 val = const_cast<char*>(_simpleXmlSkipWhiteSpace(val, val + strlen(val)));
512 val[_simpleXmlUnskipWhiteSpace(val + strlen(val) , val) - val] = '\0';
513
514 if (!func((void*)data, key, val)) {
515 if (!_isIgnoreUnsupportedLogAttributes(key, val)) {
516 TVGLOG("SVG", "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID", key, val ? val : "NONE");
517 }
518 }
519 }
520
521 buf = next + 1;
522 } while (next != nullptr);
523
524 return true;
525}
526
527
528/*
529 * Supported formats:
530 * tag {}, .name {}, tag.name{}
531 */
532const char* simpleXmlParseCSSAttribute(const char* buf, unsigned bufLength, char** tag, char** name, const char** attrs, unsigned* attrsLength)
533{
534 if (!buf) return nullptr;
535
536 *tag = *name = nullptr;
537 *attrsLength = 0;
538
539 auto itr = _simpleXmlSkipWhiteSpace(buf, buf + bufLength);
540 auto itrEnd = (const char*)memchr(buf, '{', bufLength);
541
542 if (!itrEnd || itr == itrEnd) return nullptr;
543
544 auto nextElement = (const char*)memchr(itrEnd, '}', bufLength - (itrEnd - buf));
545 if (!nextElement) return nullptr;
546
547 *attrs = itrEnd + 1;
548 *attrsLength = nextElement - *attrs;
549
550 const char *p;
551
552 itrEnd = _simpleXmlUnskipWhiteSpace(itrEnd, itr);
553 if (*(itrEnd - 1) == '.') return nullptr;
554
555 for (p = itr; p < itrEnd; p++) {
556 if (*p == '.') break;
557 }
558
559 if (p == itr) *tag = strdup("all");
560 else *tag = svgUtilStrndup(itr, p - itr);
561
562 if (p == itrEnd) *name = nullptr;
563 else *name = svgUtilStrndup(p + 1, itrEnd - p - 1);
564
565 return (nextElement ? nextElement + 1 : nullptr);
566}
567
568
569const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength)
570{
571 const char *itr = buf, *itrEnd = buf + bufLength;
572
573 for (; itr < itrEnd; itr++) {
574 if (!isspace((unsigned char)*itr)) {
575 //User skip tagname and already gave it the attributes.
576 if (*itr == '=') return buf;
577 } else {
578 itr = _simpleXmlUnskipXmlEntities(itr, buf);
579 if (itr == itrEnd) return nullptr;
580 return itr;
581 }
582 }
583
584 return nullptr;
585}
586