1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | // |
41 | // W A R N I N G |
42 | // ------------- |
43 | // |
44 | // This file is not part of the Qt API. It exists for the convenience |
45 | // of other Qt classes. This header file may change from version to |
46 | // version without notice, or even be removed. |
47 | // |
48 | // We mean it. |
49 | // |
50 | |
51 | #include <QtCore/private/qglobal_p.h> |
52 | #include <qstringconverter.h> |
53 | #include <qxmlstream.h> |
54 | #include "qxmlstreamgrammar_p.h" |
55 | |
56 | #include <memory> |
57 | |
58 | #ifndef QXMLSTREAM_P_H |
59 | #define QXMLSTREAM_P_H |
60 | |
61 | QT_BEGIN_NAMESPACE |
62 | |
63 | namespace QtPrivate { |
64 | |
65 | class XmlStringRef |
66 | { |
67 | public: |
68 | const QString *m_string = nullptr; |
69 | qsizetype m_pos = 0; |
70 | qsizetype m_size = 0; |
71 | |
72 | constexpr XmlStringRef() = default; |
73 | constexpr inline XmlStringRef(const QString *string, int pos, int length) |
74 | : m_string(string), m_pos(pos), m_size(length) |
75 | { |
76 | } |
77 | XmlStringRef(const QString *string) |
78 | : XmlStringRef(string, 0, string->length()) |
79 | { |
80 | } |
81 | |
82 | operator QXmlString() const { |
83 | if (!m_string) |
84 | return QXmlString(); |
85 | QStringPrivate d = m_string->data_ptr(); |
86 | d.setBegin(d.data() + m_pos); |
87 | d.size = m_size; |
88 | return QXmlString(std::move(d)); |
89 | } |
90 | operator QStringView() const { return view(); } |
91 | |
92 | void clear() { m_string = nullptr; m_pos = 0; m_size= 0; } |
93 | QStringView view() const { return m_string ? QStringView(m_string->data() + m_pos, m_size) : QStringView(); } |
94 | bool isEmpty() const { return m_size == 0; } |
95 | bool isNull() const { return !m_string; } |
96 | QString toString() const { return view().toString(); } |
97 | |
98 | #define MAKE_OP(op) \ |
99 | friend auto operator op(const XmlStringRef &lhs, const XmlStringRef &rhs) noexcept { return lhs.view() op rhs.view(); } \ |
100 | /*end*/ |
101 | MAKE_OP(==) |
102 | MAKE_OP(!=) |
103 | MAKE_OP(<=) |
104 | MAKE_OP(>=) |
105 | MAKE_OP(<) |
106 | MAKE_OP(>) |
107 | #ifdef __cpp_impl_three_way_comparison |
108 | MAKE_OP(<=>) |
109 | #endif |
110 | #undef MAKE_OP |
111 | #define MAKE_OP(op) \ |
112 | friend auto operator op(const XmlStringRef &lhs, QStringView rhs) noexcept { return lhs.view() op rhs; } \ |
113 | friend auto operator op(QStringView lhs, const XmlStringRef &rhs) noexcept { return lhs op rhs.view(); } \ |
114 | /*end*/ |
115 | MAKE_OP(==) |
116 | MAKE_OP(!=) |
117 | MAKE_OP(<=) |
118 | MAKE_OP(>=) |
119 | MAKE_OP(<) |
120 | MAKE_OP(>) |
121 | #ifdef __cpp_impl_three_way_comparison |
122 | MAKE_OP(<=>) |
123 | #endif |
124 | #undef MAKE_OP |
125 | }; |
126 | |
127 | } |
128 | |
129 | using namespace QtPrivate; |
130 | |
131 | template <typename T> class QXmlStreamSimpleStack |
132 | { |
133 | T *data; |
134 | qsizetype tos, cap; |
135 | public: |
136 | inline QXmlStreamSimpleStack() |
137 | : data(nullptr), tos(-1), cap(0) |
138 | {} |
139 | inline ~QXmlStreamSimpleStack() |
140 | { |
141 | if (data) { |
142 | std::destroy_n(data, size()); |
143 | free(data); |
144 | } |
145 | } |
146 | |
147 | inline void reserve(qsizetype ) |
148 | { |
149 | if (tos + extraCapacity + 1 > cap) { |
150 | cap = qMax(tos + extraCapacity + 1, cap << 1 ); |
151 | void *ptr = realloc(static_cast<void *>(data), cap * sizeof(T)); |
152 | data = reinterpret_cast<T *>(ptr); |
153 | Q_CHECK_PTR(data); |
154 | } |
155 | } |
156 | |
157 | inline T &push() { reserve(1); return rawPush(); } |
158 | inline T &rawPush() { return *new (data + (++tos)) T; } |
159 | inline const T &top() const { return data[tos]; } |
160 | inline T &top() { return data[tos]; } |
161 | inline T pop() { T t = std::move(data[tos]); std::destroy_at(data + tos); --tos; return t; } |
162 | inline T &operator[](qsizetype index) { return data[index]; } |
163 | inline const T &at(qsizetype index) const { return data[index]; } |
164 | inline qsizetype size() const { return tos + 1; } |
165 | inline void resize(qsizetype s) { tos = s - 1; } |
166 | inline bool isEmpty() const { return tos < 0; } |
167 | inline void clear() { tos = -1; } |
168 | |
169 | using const_iterator = const T*; |
170 | using iterator = T*; |
171 | T *begin() { return data; } |
172 | const T *begin() const { return data; } |
173 | const T *cbegin() const { return begin(); } |
174 | T *end() { return data + size(); } |
175 | const T *end() const { return data + size(); } |
176 | const T *cend() const { return end(); } |
177 | }; |
178 | |
179 | |
180 | class QXmlStream |
181 | { |
182 | Q_DECLARE_TR_FUNCTIONS(QXmlStream) |
183 | }; |
184 | |
185 | class QXmlStreamPrivateTagStack { |
186 | public: |
187 | struct NamespaceDeclaration |
188 | { |
189 | XmlStringRef prefix; |
190 | XmlStringRef namespaceUri; |
191 | }; |
192 | |
193 | struct Tag |
194 | { |
195 | XmlStringRef name; |
196 | XmlStringRef qualifiedName; |
197 | NamespaceDeclaration namespaceDeclaration; |
198 | int tagStackStringStorageSize; |
199 | qsizetype namespaceDeclarationsSize; |
200 | }; |
201 | |
202 | |
203 | QXmlStreamPrivateTagStack(); |
204 | QXmlStreamSimpleStack<NamespaceDeclaration> namespaceDeclarations; |
205 | QString tagStackStringStorage; |
206 | int tagStackStringStorageSize; |
207 | int initialTagStackStringStorageSize; |
208 | bool tagsDone; |
209 | |
210 | XmlStringRef addToStringStorage(QStringView s) |
211 | { |
212 | int pos = tagStackStringStorageSize; |
213 | int sz = s.size(); |
214 | if (pos != tagStackStringStorage.size()) |
215 | tagStackStringStorage.resize(pos); |
216 | tagStackStringStorage.append(s.data(), sz); |
217 | tagStackStringStorageSize += sz; |
218 | return XmlStringRef(&tagStackStringStorage, pos, sz); |
219 | } |
220 | |
221 | QXmlStreamSimpleStack<Tag> tagStack; |
222 | |
223 | |
224 | inline Tag tagStack_pop() { |
225 | Tag tag = tagStack.pop(); |
226 | tagStackStringStorageSize = tag.tagStackStringStorageSize; |
227 | namespaceDeclarations.resize(tag.namespaceDeclarationsSize); |
228 | tagsDone = tagStack.isEmpty(); |
229 | return tag; |
230 | } |
231 | inline Tag &tagStack_push() { |
232 | Tag &tag = tagStack.push(); |
233 | tag.tagStackStringStorageSize = tagStackStringStorageSize; |
234 | tag.namespaceDeclarationsSize = namespaceDeclarations.size(); |
235 | return tag; |
236 | } |
237 | }; |
238 | |
239 | |
240 | class QXmlStreamEntityResolver; |
241 | class QXmlStreamReaderPrivate : public QXmlStreamGrammar, public QXmlStreamPrivateTagStack |
242 | { |
243 | QXmlStreamReader *q_ptr; |
244 | Q_DECLARE_PUBLIC(QXmlStreamReader) |
245 | public: |
246 | QXmlStreamReaderPrivate(QXmlStreamReader *q); |
247 | ~QXmlStreamReaderPrivate(); |
248 | void init(); |
249 | |
250 | QByteArray rawReadBuffer; |
251 | QByteArray dataBuffer; |
252 | uchar firstByte; |
253 | qint64 nbytesread; |
254 | QString readBuffer; |
255 | int readBufferPos; |
256 | QXmlStreamSimpleStack<uint> putStack; |
257 | struct Entity { |
258 | Entity() = default; |
259 | Entity(const QString &name, const QString &value) |
260 | : name(name), value(value), external(false), unparsed(false), literal(false), |
261 | hasBeenParsed(false), isCurrentlyReferenced(false){} |
262 | static inline Entity createLiteral(QLatin1String name, QLatin1String value) |
263 | { Entity result(name, value); result.literal = result.hasBeenParsed = true; return result; } |
264 | QString name, value; |
265 | uint external : 1; |
266 | uint unparsed : 1; |
267 | uint literal : 1; |
268 | uint hasBeenParsed : 1; |
269 | uint isCurrentlyReferenced : 1; |
270 | }; |
271 | // these hash tables use a QStringView as a key to avoid creating QStrings |
272 | // just for lookup. The keys are usually views into Entity::name and thus |
273 | // are guaranteed to have the same lifetime as the referenced data: |
274 | QHash<QStringView, Entity> entityHash; |
275 | QHash<QStringView, Entity> parameterEntityHash; |
276 | QXmlStreamSimpleStack<Entity *>entityReferenceStack; |
277 | int entityExpansionLimit = 4096; |
278 | int entityLength = 0; |
279 | inline bool referenceEntity(Entity &entity) { |
280 | if (entity.isCurrentlyReferenced) { |
281 | raiseWellFormedError(QXmlStream::tr("Self-referencing entity detected." )); |
282 | return false; |
283 | } |
284 | // entityLength represents the amount of additional characters the |
285 | // entity expands into (can be negative for e.g. &). It's used to |
286 | // avoid DoS attacks through recursive entity expansions |
287 | entityLength += entity.value.size() - entity.name.size() - 2; |
288 | if (entityLength > entityExpansionLimit) { |
289 | raiseWellFormedError(QXmlStream::tr("Entity expands to more characters than the entity expansion limit." )); |
290 | return false; |
291 | } |
292 | entity.isCurrentlyReferenced = true; |
293 | entityReferenceStack.push() = &entity; |
294 | injectToken(ENTITY_DONE); |
295 | return true; |
296 | } |
297 | |
298 | |
299 | QIODevice *device; |
300 | bool deleteDevice; |
301 | QStringDecoder decoder; |
302 | bool atEnd; |
303 | |
304 | /*! |
305 | \sa setType() |
306 | */ |
307 | QXmlStreamReader::TokenType type; |
308 | QXmlStreamReader::Error error; |
309 | QString errorString; |
310 | QString unresolvedEntity; |
311 | |
312 | qint64 lineNumber, lastLineStart, characterOffset; |
313 | |
314 | |
315 | void write(const QString &); |
316 | void write(const char *); |
317 | |
318 | |
319 | QXmlStreamAttributes attributes; |
320 | XmlStringRef namespaceForPrefix(QStringView prefix); |
321 | void resolveTag(); |
322 | void resolvePublicNamespaces(); |
323 | void resolveDtd(); |
324 | uint resolveCharRef(int symbolIndex); |
325 | bool checkStartDocument(); |
326 | void startDocument(); |
327 | void parseError(); |
328 | void checkPublicLiteral(QStringView publicId); |
329 | |
330 | bool scanDtd; |
331 | XmlStringRef lastAttributeValue; |
332 | bool lastAttributeIsCData; |
333 | struct DtdAttribute { |
334 | XmlStringRef tagName; |
335 | XmlStringRef attributeQualifiedName; |
336 | XmlStringRef attributePrefix; |
337 | XmlStringRef attributeName; |
338 | XmlStringRef defaultValue; |
339 | bool isCDATA; |
340 | bool isNamespaceAttribute; |
341 | }; |
342 | QXmlStreamSimpleStack<DtdAttribute> dtdAttributes; |
343 | struct NotationDeclaration { |
344 | XmlStringRef name; |
345 | XmlStringRef publicId; |
346 | XmlStringRef systemId; |
347 | }; |
348 | QXmlStreamSimpleStack<NotationDeclaration> notationDeclarations; |
349 | QXmlStreamNotationDeclarations publicNotationDeclarations; |
350 | QXmlStreamNamespaceDeclarations publicNamespaceDeclarations; |
351 | |
352 | struct EntityDeclaration { |
353 | XmlStringRef name; |
354 | XmlStringRef notationName; |
355 | XmlStringRef publicId; |
356 | XmlStringRef systemId; |
357 | XmlStringRef value; |
358 | bool parameter; |
359 | bool external; |
360 | inline void clear() { |
361 | name.clear(); |
362 | notationName.clear(); |
363 | publicId.clear(); |
364 | systemId.clear(); |
365 | value.clear(); |
366 | parameter = external = false; |
367 | } |
368 | }; |
369 | QXmlStreamSimpleStack<EntityDeclaration> entityDeclarations; |
370 | QXmlStreamEntityDeclarations publicEntityDeclarations; |
371 | |
372 | XmlStringRef text; |
373 | |
374 | XmlStringRef prefix, namespaceUri, qualifiedName, name; |
375 | XmlStringRef processingInstructionTarget, processingInstructionData; |
376 | XmlStringRef dtdName, dtdPublicId, dtdSystemId; |
377 | XmlStringRef documentVersion, documentEncoding; |
378 | uint isEmptyElement : 1; |
379 | uint isWhitespace : 1; |
380 | uint isCDATA : 1; |
381 | uint standalone : 1; |
382 | uint hasCheckedStartDocument : 1; |
383 | uint normalizeLiterals : 1; |
384 | uint hasSeenTag : 1; |
385 | uint inParseEntity : 1; |
386 | uint referenceToUnparsedEntityDetected : 1; |
387 | uint referenceToParameterEntityDetected : 1; |
388 | uint hasExternalDtdSubset : 1; |
389 | uint lockEncoding : 1; |
390 | uint namespaceProcessing : 1; |
391 | |
392 | int resumeReduction; |
393 | void resume(int rule); |
394 | |
395 | inline bool entitiesMustBeDeclared() const { |
396 | return (!inParseEntity |
397 | && (standalone |
398 | || (!referenceToUnparsedEntityDetected |
399 | && !referenceToParameterEntityDetected // Errata 13 as of 2006-04-25 |
400 | && !hasExternalDtdSubset))); |
401 | } |
402 | |
403 | // qlalr parser |
404 | int tos; |
405 | int stack_size; |
406 | struct Value { |
407 | int pos; |
408 | int len; |
409 | int prefix; |
410 | ushort c; |
411 | }; |
412 | |
413 | Value *sym_stack; |
414 | int *state_stack; |
415 | inline void reallocateStack(); |
416 | inline Value &sym(int index) const |
417 | { return sym_stack[tos + index - 1]; } |
418 | QString textBuffer; |
419 | inline void clearTextBuffer() { |
420 | if (!scanDtd) { |
421 | textBuffer.resize(0); |
422 | textBuffer.reserve(256); |
423 | } |
424 | } |
425 | struct Attribute { |
426 | Value key; |
427 | Value value; |
428 | }; |
429 | QXmlStreamSimpleStack<Attribute> attributeStack; |
430 | |
431 | inline XmlStringRef symString(int index) { |
432 | const Value &symbol = sym(index); |
433 | return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix, symbol.len - symbol.prefix); |
434 | } |
435 | QStringView symView(int index) const |
436 | { |
437 | const Value &symbol = sym(index); |
438 | return QStringView(textBuffer.data() + symbol.pos, symbol.len).mid(symbol.prefix); |
439 | } |
440 | inline XmlStringRef symName(int index) { |
441 | const Value &symbol = sym(index); |
442 | return XmlStringRef(&textBuffer, symbol.pos, symbol.len); |
443 | } |
444 | inline XmlStringRef symString(int index, int offset) { |
445 | const Value &symbol = sym(index); |
446 | return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix + offset, symbol.len - symbol.prefix - offset); |
447 | } |
448 | inline XmlStringRef symPrefix(int index) { |
449 | const Value &symbol = sym(index); |
450 | if (symbol.prefix) |
451 | return XmlStringRef(&textBuffer, symbol.pos, symbol.prefix - 1); |
452 | return XmlStringRef(); |
453 | } |
454 | inline XmlStringRef symString(const Value &symbol) { |
455 | return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix, symbol.len - symbol.prefix); |
456 | } |
457 | inline XmlStringRef symName(const Value &symbol) { |
458 | return XmlStringRef(&textBuffer, symbol.pos, symbol.len); |
459 | } |
460 | inline XmlStringRef symPrefix(const Value &symbol) { |
461 | if (symbol.prefix) |
462 | return XmlStringRef(&textBuffer, symbol.pos, symbol.prefix - 1); |
463 | return XmlStringRef(); |
464 | } |
465 | |
466 | inline void clearSym() { Value &val = sym(1); val.pos = textBuffer.size(); val.len = 0; } |
467 | |
468 | |
469 | short token; |
470 | uint token_char; |
471 | |
472 | uint filterCarriageReturn(); |
473 | inline uint getChar(); |
474 | inline uint peekChar(); |
475 | inline void putChar(uint c) { putStack.push() = c; } |
476 | inline void putChar(QChar c) { putStack.push() = c.unicode(); } |
477 | void putString(QStringView s, qsizetype from = 0); |
478 | void putStringLiteral(QStringView s); |
479 | void putReplacement(QStringView s); |
480 | void putReplacementInAttributeValue(QStringView s); |
481 | uint getChar_helper(); |
482 | |
483 | bool scanUntil(const char *str, short tokenToInject = -1); |
484 | bool scanString(const char *str, short tokenToInject, bool requireSpace = true); |
485 | inline void injectToken(ushort tokenToInject) { |
486 | putChar(int(tokenToInject) << 16); |
487 | } |
488 | |
489 | QString resolveUndeclaredEntity(const QString &name); |
490 | void parseEntity(const QString &value); |
491 | std::unique_ptr<QXmlStreamReaderPrivate> entityParser; |
492 | |
493 | bool scanAfterLangleBang(); |
494 | bool scanPublicOrSystem(); |
495 | bool scanNData(); |
496 | bool scanAfterDefaultDecl(); |
497 | bool scanAttType(); |
498 | |
499 | |
500 | // scan optimization functions. Not strictly necessary but LALR is |
501 | // not very well suited for scanning fast |
502 | int fastScanLiteralContent(); |
503 | int fastScanSpace(); |
504 | int fastScanContentCharList(); |
505 | int fastScanName(int *prefix = nullptr); |
506 | inline int fastScanNMTOKEN(); |
507 | |
508 | |
509 | bool parse(); |
510 | inline void consumeRule(int); |
511 | |
512 | void raiseError(QXmlStreamReader::Error error, const QString& message = QString()); |
513 | void raiseWellFormedError(const QString &message); |
514 | |
515 | QXmlStreamEntityResolver *entityResolver; |
516 | |
517 | private: |
518 | /*! \internal |
519 | Never assign to variable type directly. Instead use this function. |
520 | |
521 | This prevents errors from being ignored. |
522 | */ |
523 | inline void setType(const QXmlStreamReader::TokenType t) |
524 | { |
525 | if(type != QXmlStreamReader::Invalid) |
526 | type = t; |
527 | } |
528 | }; |
529 | |
530 | QT_END_NAMESPACE |
531 | |
532 | #endif // QXMLSTREAM_P_H |
533 | |
534 | |