1 | /* |
2 | * Copyright (c) 2020 - 2023 the ThorVG project. All rights reserved. |
3 | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | * of this software and associated documentation files (the "Software"), to deal |
6 | * in the Software without restriction, including without limitation the rights |
7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | * copies of the Software, and to permit persons to whom the Software is |
9 | * furnished to do so, subject to the following conditions: |
10 | |
11 | * The above copyright notice and this permission notice shall be included in all |
12 | * copies or substantial portions of the Software. |
13 | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
20 | * SOFTWARE. |
21 | */ |
22 | |
23 | #include <cstring> |
24 | #include <ctype.h> |
25 | #include <string> |
26 | |
27 | #ifdef _WIN32 |
28 | #include <malloc.h> |
29 | #elif defined(__linux__) |
30 | #include <alloca.h> |
31 | #else |
32 | #include <stdlib.h> |
33 | #endif |
34 | |
35 | #include "tvgXmlParser.h" |
36 | #include "tvgSvgUtil.h" |
37 | |
38 | /************************************************************************/ |
39 | /* Internal Class Implementation */ |
40 | /************************************************************************/ |
41 | |
42 | bool _isIgnoreUnsupportedLogAttributes(TVG_UNUSED const char* tagAttribute, TVG_UNUSED const char* tagValue) |
43 | { |
44 | #ifdef THORVG_LOG_ENABLED |
45 | const auto attributesNum = 6; |
46 | const struct |
47 | { |
48 | const char* tag; |
49 | bool tagWildcard; //If true, it is assumed that a wildcard is used after the tag. (ex: tagName*) |
50 | const char* value; |
51 | } attributes[] = { |
52 | {"id" , false, nullptr}, |
53 | {"data-name" , false, nullptr}, |
54 | {"overflow" , false, "visible" }, |
55 | {"version" , false, nullptr}, |
56 | {"xmlns" , true, nullptr}, |
57 | {"xml:space" , false, nullptr}, |
58 | }; |
59 | |
60 | for (unsigned int i = 0; i < attributesNum; ++i) { |
61 | if (!strncmp(tagAttribute, attributes[i].tag, attributes[i].tagWildcard ? strlen(attributes[i].tag) : strlen(tagAttribute))) { |
62 | if (attributes[i].value && tagValue) { |
63 | if (!strncmp(tagValue, attributes[i].value, strlen(tagValue))) { |
64 | return true; |
65 | } else continue; |
66 | } |
67 | return true; |
68 | } |
69 | } |
70 | return false; |
71 | #endif |
72 | return true; |
73 | } |
74 | |
75 | |
76 | static const char* _simpleXmlFindWhiteSpace(const char* itr, const char* itrEnd) |
77 | { |
78 | for (; itr < itrEnd; itr++) { |
79 | if (isspace((unsigned char)*itr)) break; |
80 | } |
81 | return itr; |
82 | } |
83 | |
84 | |
85 | static const char* _simpleXmlSkipWhiteSpace(const char* itr, const char* itrEnd) |
86 | { |
87 | for (; itr < itrEnd; itr++) { |
88 | if (!isspace((unsigned char)*itr)) break; |
89 | } |
90 | return itr; |
91 | } |
92 | |
93 | |
94 | static const char* _simpleXmlUnskipWhiteSpace(const char* itr, const char* itrStart) |
95 | { |
96 | for (itr--; itr > itrStart; itr--) { |
97 | if (!isspace((unsigned char)*itr)) break; |
98 | } |
99 | return itr + 1; |
100 | } |
101 | |
102 | |
103 | static const char* _simpleXmlSkipXmlEntities(const char* itr, const char* itrEnd) |
104 | { |
105 | auto p = itr; |
106 | while (itr < itrEnd && *itr == '&') { |
107 | for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) { |
108 | if (strncmp(itr, xmlEntity[i], xmlEntityLength[i]) == 0) { |
109 | itr += xmlEntityLength[i]; |
110 | break; |
111 | } |
112 | } |
113 | if (itr == p) break; |
114 | p = itr; |
115 | } |
116 | return itr; |
117 | } |
118 | |
119 | |
120 | static const char* _simpleXmlUnskipXmlEntities(const char* itr, const char* itrStart) |
121 | { |
122 | auto p = itr; |
123 | while (itr > itrStart && *(itr - 1) == ';') { |
124 | for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) { |
125 | if (itr - xmlEntityLength[i] > itrStart && |
126 | strncmp(itr - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) { |
127 | itr -= xmlEntityLength[i]; |
128 | break; |
129 | } |
130 | } |
131 | if (itr == p) break; |
132 | p = itr; |
133 | } |
134 | return itr; |
135 | } |
136 | |
137 | |
138 | static const char* _skipWhiteSpacesAndXmlEntities(const char* itr, const char* itrEnd) |
139 | { |
140 | itr = _simpleXmlSkipWhiteSpace(itr, itrEnd); |
141 | auto p = itr; |
142 | while (true) { |
143 | if (p != (itr = _simpleXmlSkipXmlEntities(itr, itrEnd))) p = itr; |
144 | else break; |
145 | if (p != (itr = _simpleXmlSkipWhiteSpace(itr, itrEnd))) p = itr; |
146 | else break; |
147 | } |
148 | return itr; |
149 | } |
150 | |
151 | |
152 | static const char* _unskipWhiteSpacesAndXmlEntities(const char* itr, const char* itrStart) |
153 | { |
154 | itr = _simpleXmlUnskipWhiteSpace(itr, itrStart); |
155 | auto p = itr; |
156 | while (true) { |
157 | if (p != (itr = _simpleXmlUnskipXmlEntities(itr, itrStart))) p = itr; |
158 | else break; |
159 | if (p != (itr = _simpleXmlUnskipWhiteSpace(itr, itrStart))) p = itr; |
160 | else break; |
161 | } |
162 | return itr; |
163 | } |
164 | |
165 | |
166 | static const char* _simpleXmlFindStartTag(const char* itr, const char* itrEnd) |
167 | { |
168 | return (const char*)memchr(itr, '<', itrEnd - itr); |
169 | } |
170 | |
171 | |
172 | static const char* _simpleXmlFindEndTag(const char* itr, const char* itrEnd) |
173 | { |
174 | bool insideQuote = false; |
175 | for (; itr < itrEnd; itr++) { |
176 | if (*itr == '"') insideQuote = !insideQuote; |
177 | if (!insideQuote) { |
178 | if ((*itr == '>') || (*itr == '<')) |
179 | return itr; |
180 | } |
181 | } |
182 | return nullptr; |
183 | } |
184 | |
185 | |
186 | static const char* (const char* itr, const char* itrEnd) |
187 | { |
188 | for (; itr < itrEnd; itr++) { |
189 | if ((*itr == '-') && ((itr + 1 < itrEnd) && (*(itr + 1) == '-')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2; |
190 | } |
191 | return nullptr; |
192 | } |
193 | |
194 | |
195 | static const char* _simpleXmlFindEndCdataTag(const char* itr, const char* itrEnd) |
196 | { |
197 | for (; itr < itrEnd; itr++) { |
198 | if ((*itr == ']') && ((itr + 1 < itrEnd) && (*(itr + 1) == ']')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2; |
199 | } |
200 | return nullptr; |
201 | } |
202 | |
203 | |
204 | static const char* _simpleXmlFindDoctypeChildEndTag(const char* itr, const char* itrEnd) |
205 | { |
206 | for (; itr < itrEnd; itr++) { |
207 | if (*itr == '>') return itr; |
208 | } |
209 | return nullptr; |
210 | } |
211 | |
212 | |
213 | static SimpleXMLType _getXMLType(const char* itr, const char* itrEnd, size_t &toff) |
214 | { |
215 | toff = 0; |
216 | if (itr[1] == '/') { |
217 | toff = 1; |
218 | return SimpleXMLType::Close; |
219 | } else if (itr[1] == '?') { |
220 | toff = 1; |
221 | return SimpleXMLType::Processing; |
222 | } else if (itr[1] == '!') { |
223 | if ((itr + sizeof("<!DOCTYPE>" ) - 1 < itrEnd) && (!memcmp(itr + 2, "DOCTYPE" , sizeof("DOCTYPE" ) - 1)) && ((itr[2 + sizeof("DOCTYPE" ) - 1] == '>') || (isspace((unsigned char)itr[2 + sizeof("DOCTYPE" ) - 1])))) { |
224 | toff = sizeof("!DOCTYPE" ) - 1; |
225 | return SimpleXMLType::Doctype; |
226 | } else if ((itr + sizeof("<![CDATA[]]>" ) - 1 < itrEnd) && (!memcmp(itr + 2, "[CDATA[" , sizeof("[CDATA[" ) - 1))) { |
227 | toff = sizeof("![CDATA[" ) - 1; |
228 | return SimpleXMLType::CData; |
229 | } else if ((itr + sizeof("<!---->" ) - 1 < itrEnd) && (!memcmp(itr + 2, "--" , sizeof("--" ) - 1))) { |
230 | toff = sizeof("!--" ) - 1; |
231 | return SimpleXMLType::Comment; |
232 | } else if (itr + sizeof("<!>" ) - 1 < itrEnd) { |
233 | toff = sizeof("!" ) - 1; |
234 | return SimpleXMLType::DoctypeChild; |
235 | } |
236 | return SimpleXMLType::Open; |
237 | } |
238 | return SimpleXMLType::Open; |
239 | } |
240 | |
241 | |
242 | /************************************************************************/ |
243 | /* External Class Implementation */ |
244 | /************************************************************************/ |
245 | |
246 | const char* simpleXmlNodeTypeToString(TVG_UNUSED SvgNodeType type) |
247 | { |
248 | #ifdef THORVG_LOG_ENABLED |
249 | static const char* TYPE_NAMES[] = { |
250 | "Svg" , |
251 | "G" , |
252 | "Defs" , |
253 | "Animation" , |
254 | "Arc" , |
255 | "Circle" , |
256 | "Ellipse" , |
257 | "Image" , |
258 | "Line" , |
259 | "Path" , |
260 | "Polygon" , |
261 | "Polyline" , |
262 | "Rect" , |
263 | "Text" , |
264 | "TextArea" , |
265 | "Tspan" , |
266 | "Use" , |
267 | "Video" , |
268 | "ClipPath" , |
269 | "Mask" , |
270 | "Symbol" , |
271 | "Unknown" , |
272 | }; |
273 | return TYPE_NAMES[(int) type]; |
274 | #endif |
275 | return nullptr; |
276 | } |
277 | |
278 | |
279 | bool isIgnoreUnsupportedLogElements(TVG_UNUSED const char* tagName) |
280 | { |
281 | #ifdef THORVG_LOG_ENABLED |
282 | const auto elementsNum = 1; |
283 | const char* const elements[] = { "title" }; |
284 | |
285 | for (unsigned int i = 0; i < elementsNum; ++i) { |
286 | if (!strncmp(tagName, elements[i], strlen(tagName))) { |
287 | return true; |
288 | } |
289 | } |
290 | return false; |
291 | #else |
292 | return true; |
293 | #endif |
294 | } |
295 | |
296 | |
297 | bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data) |
298 | { |
299 | const char *itr = buf, *itrEnd = buf + bufLength; |
300 | char* tmpBuf = (char*)malloc(bufLength + 1); |
301 | |
302 | if (!buf || !func || !tmpBuf) goto error; |
303 | |
304 | while (itr < itrEnd) { |
305 | const char* p = _skipWhiteSpacesAndXmlEntities(itr, itrEnd); |
306 | const char *key, *keyEnd, *value, *valueEnd; |
307 | char* tval; |
308 | |
309 | if (p == itrEnd) goto success; |
310 | |
311 | key = p; |
312 | for (keyEnd = key; keyEnd < itrEnd; keyEnd++) { |
313 | if ((*keyEnd == '=') || (isspace((unsigned char)*keyEnd))) break; |
314 | } |
315 | if (keyEnd == itrEnd) goto error; |
316 | if (keyEnd == key) continue; |
317 | |
318 | if (*keyEnd == '=') value = keyEnd + 1; |
319 | else { |
320 | value = (const char*)memchr(keyEnd, '=', itrEnd - keyEnd); |
321 | if (!value) goto error; |
322 | value++; |
323 | } |
324 | keyEnd = _simpleXmlUnskipXmlEntities(keyEnd, key); |
325 | |
326 | value = _skipWhiteSpacesAndXmlEntities(value, itrEnd); |
327 | if (value == itrEnd) goto error; |
328 | |
329 | if ((*value == '"') || (*value == '\'')) { |
330 | valueEnd = (const char*)memchr(value + 1, *value, itrEnd - value); |
331 | if (!valueEnd) goto error; |
332 | value++; |
333 | } else { |
334 | valueEnd = _simpleXmlFindWhiteSpace(value, itrEnd); |
335 | } |
336 | |
337 | itr = valueEnd + 1; |
338 | |
339 | value = _skipWhiteSpacesAndXmlEntities(value, itrEnd); |
340 | valueEnd = _unskipWhiteSpacesAndXmlEntities(valueEnd, value); |
341 | |
342 | memcpy(tmpBuf, key, keyEnd - key); |
343 | tmpBuf[keyEnd - key] = '\0'; |
344 | |
345 | tval = tmpBuf + (keyEnd - key) + 1; |
346 | int i = 0; |
347 | while (value < valueEnd) { |
348 | value = _simpleXmlSkipXmlEntities(value, valueEnd); |
349 | tval[i++] = *value; |
350 | value++; |
351 | } |
352 | tval[i] = '\0'; |
353 | |
354 | if (!func((void*)data, tmpBuf, tval)) { |
355 | if (!_isIgnoreUnsupportedLogAttributes(tmpBuf, tval)) { |
356 | TVGLOG("SVG" , "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]" , simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID" , tmpBuf, tval ? tval : "NONE" ); |
357 | } |
358 | } |
359 | } |
360 | |
361 | success: |
362 | free(tmpBuf); |
363 | return true; |
364 | |
365 | error: |
366 | free(tmpBuf); |
367 | return false; |
368 | } |
369 | |
370 | |
371 | bool simpleXmlParse(const char* buf, unsigned bufLength, bool strip, simpleXMLCb func, const void* data) |
372 | { |
373 | const char *itr = buf, *itrEnd = buf + bufLength; |
374 | |
375 | if (!buf || !func) return false; |
376 | |
377 | while (itr < itrEnd) { |
378 | if (itr[0] == '<') { |
379 | //Invalid case |
380 | if (itr + 1 >= itrEnd) return false; |
381 | |
382 | size_t toff = 0; |
383 | SimpleXMLType type = _getXMLType(itr, itrEnd, toff); |
384 | |
385 | const char* p; |
386 | if (type == SimpleXMLType::CData) p = _simpleXmlFindEndCdataTag(itr + 1 + toff, itrEnd); |
387 | else if (type == SimpleXMLType::DoctypeChild) p = _simpleXmlFindDoctypeChildEndTag(itr + 1 + toff, itrEnd); |
388 | else if (type == SimpleXMLType::Comment) p = _simpleXmlFindEndCommentTag(itr + 1 + toff, itrEnd); |
389 | else p = _simpleXmlFindEndTag(itr + 1 + toff, itrEnd); |
390 | |
391 | if (p) { |
392 | //Invalid case: '<' nested |
393 | if (*p == '<' && type != SimpleXMLType::Doctype) return false; |
394 | const char *start, *end; |
395 | |
396 | start = itr + 1 + toff; |
397 | end = p; |
398 | |
399 | switch (type) { |
400 | case SimpleXMLType::Open: { |
401 | if (p[-1] == '/') { |
402 | type = SimpleXMLType::OpenEmpty; |
403 | end--; |
404 | } |
405 | break; |
406 | } |
407 | case SimpleXMLType::CData: { |
408 | if (!memcmp(p - 2, "]]" , 2)) end -= 2; |
409 | break; |
410 | } |
411 | case SimpleXMLType::Processing: { |
412 | if (p[-1] == '?') end--; |
413 | break; |
414 | } |
415 | case SimpleXMLType::Comment: { |
416 | if (!memcmp(p - 2, "--" , 2)) end -= 2; |
417 | break; |
418 | } |
419 | default: { |
420 | break; |
421 | } |
422 | } |
423 | |
424 | if (strip && (type != SimpleXMLType::CData)) { |
425 | start = _skipWhiteSpacesAndXmlEntities(start, end); |
426 | end = _unskipWhiteSpacesAndXmlEntities(end, start); |
427 | } |
428 | |
429 | if (!func((void*)data, type, start, (unsigned int)(end - start))) return false; |
430 | |
431 | itr = p + 1; |
432 | } else { |
433 | return false; |
434 | } |
435 | } else { |
436 | const char *p, *end; |
437 | |
438 | if (strip) { |
439 | p = itr; |
440 | p = _skipWhiteSpacesAndXmlEntities(p, itrEnd); |
441 | if (p) { |
442 | if (!func((void*)data, SimpleXMLType::Ignored, itr, (unsigned int)(p - itr))) return false; |
443 | itr = p; |
444 | } |
445 | } |
446 | |
447 | p = _simpleXmlFindStartTag(itr, itrEnd); |
448 | if (!p) p = itrEnd; |
449 | |
450 | end = p; |
451 | if (strip) end = _unskipWhiteSpacesAndXmlEntities(end, itr); |
452 | |
453 | if (itr != end && !func((void*)data, SimpleXMLType::Data, itr, (unsigned int)(end - itr))) return false; |
454 | |
455 | if (strip && (end < p) && !func((void*)data, SimpleXMLType::Ignored, end, (unsigned int)(p - end))) return false; |
456 | |
457 | itr = p; |
458 | } |
459 | } |
460 | return true; |
461 | } |
462 | |
463 | |
464 | bool simpleXmlParseW3CAttribute(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data) |
465 | { |
466 | const char* end; |
467 | char* key; |
468 | char* val; |
469 | char* next; |
470 | |
471 | if (!buf) return false; |
472 | |
473 | end = buf + bufLength; |
474 | key = (char*)alloca(end - buf + 1); |
475 | val = (char*)alloca(end - buf + 1); |
476 | |
477 | if (buf == end) return true; |
478 | |
479 | do { |
480 | char* sep = (char*)strchr(buf, ':'); |
481 | next = (char*)strchr(buf, ';'); |
482 | if (sep >= end) { |
483 | next = nullptr; |
484 | sep = nullptr; |
485 | } |
486 | if (next >= end) next = nullptr; |
487 | |
488 | key[0] = '\0'; |
489 | val[0] = '\0'; |
490 | |
491 | if (next == nullptr && sep != nullptr) { |
492 | memcpy(key, buf, sep - buf); |
493 | key[sep - buf] = '\0'; |
494 | |
495 | memcpy(val, sep + 1, end - sep - 1); |
496 | val[end - sep - 1] = '\0'; |
497 | } else if (sep < next && sep != nullptr) { |
498 | memcpy(key, buf, sep - buf); |
499 | key[sep - buf] = '\0'; |
500 | |
501 | memcpy(val, sep + 1, next - sep - 1); |
502 | val[next - sep - 1] = '\0'; |
503 | } else if (next) { |
504 | memcpy(key, buf, next - buf); |
505 | key[next - buf] = '\0'; |
506 | } |
507 | |
508 | if (key[0]) { |
509 | key = const_cast<char*>(_simpleXmlSkipWhiteSpace(key, key + strlen(key))); |
510 | key[_simpleXmlUnskipWhiteSpace(key + strlen(key) , key) - key] = '\0'; |
511 | val = const_cast<char*>(_simpleXmlSkipWhiteSpace(val, val + strlen(val))); |
512 | val[_simpleXmlUnskipWhiteSpace(val + strlen(val) , val) - val] = '\0'; |
513 | |
514 | if (!func((void*)data, key, val)) { |
515 | if (!_isIgnoreUnsupportedLogAttributes(key, val)) { |
516 | TVGLOG("SVG" , "Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]" , simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id : "NO_ID" , key, val ? val : "NONE" ); |
517 | } |
518 | } |
519 | } |
520 | |
521 | buf = next + 1; |
522 | } while (next != nullptr); |
523 | |
524 | return true; |
525 | } |
526 | |
527 | |
528 | /* |
529 | * Supported formats: |
530 | * tag {}, .name {}, tag.name{} |
531 | */ |
532 | const char* simpleXmlParseCSSAttribute(const char* buf, unsigned bufLength, char** tag, char** name, const char** attrs, unsigned* attrsLength) |
533 | { |
534 | if (!buf) return nullptr; |
535 | |
536 | *tag = *name = nullptr; |
537 | *attrsLength = 0; |
538 | |
539 | auto itr = _simpleXmlSkipWhiteSpace(buf, buf + bufLength); |
540 | auto itrEnd = (const char*)memchr(buf, '{', bufLength); |
541 | |
542 | if (!itrEnd || itr == itrEnd) return nullptr; |
543 | |
544 | auto nextElement = (const char*)memchr(itrEnd, '}', bufLength - (itrEnd - buf)); |
545 | if (!nextElement) return nullptr; |
546 | |
547 | *attrs = itrEnd + 1; |
548 | *attrsLength = nextElement - *attrs; |
549 | |
550 | const char *p; |
551 | |
552 | itrEnd = _simpleXmlUnskipWhiteSpace(itrEnd, itr); |
553 | if (*(itrEnd - 1) == '.') return nullptr; |
554 | |
555 | for (p = itr; p < itrEnd; p++) { |
556 | if (*p == '.') break; |
557 | } |
558 | |
559 | if (p == itr) *tag = strdup("all" ); |
560 | else *tag = svgUtilStrndup(itr, p - itr); |
561 | |
562 | if (p == itrEnd) *name = nullptr; |
563 | else *name = svgUtilStrndup(p + 1, itrEnd - p - 1); |
564 | |
565 | return (nextElement ? nextElement + 1 : nullptr); |
566 | } |
567 | |
568 | |
569 | const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength) |
570 | { |
571 | const char *itr = buf, *itrEnd = buf + bufLength; |
572 | |
573 | for (; itr < itrEnd; itr++) { |
574 | if (!isspace((unsigned char)*itr)) { |
575 | //User skip tagname and already gave it the attributes. |
576 | if (*itr == '=') return buf; |
577 | } else { |
578 | itr = _simpleXmlUnskipXmlEntities(itr, buf); |
579 | if (itr == itrEnd) return nullptr; |
580 | return itr; |
581 | } |
582 | } |
583 | |
584 | return nullptr; |
585 | } |
586 | |