1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qtexthtmlparser_p.h"
41
42#include <qbytearray.h>
43#include <qstack.h>
44#include <qdebug.h>
45#include <qthread.h>
46#include <qguiapplication.h>
47
48#include "qtextdocument.h"
49#include "qtextformat_p.h"
50#include "qtextdocument_p.h"
51#include "qtextcursor.h"
52#include "qfont_p.h"
53
54#include <algorithm>
55
56#ifndef QT_NO_TEXTHTMLPARSER
57
58QT_BEGIN_NAMESPACE
59
60// see also tst_qtextdocumentfragment.cpp
61#define MAX_ENTITY 258
62static const struct QTextHtmlEntity { const char name[9]; char16_t code; } entities[]= {
63 { "AElig", 0x00c6 },
64 { "AMP", 38 },
65 { "Aacute", 0x00c1 },
66 { "Acirc", 0x00c2 },
67 { "Agrave", 0x00c0 },
68 { "Alpha", 0x0391 },
69 { "Aring", 0x00c5 },
70 { "Atilde", 0x00c3 },
71 { "Auml", 0x00c4 },
72 { "Beta", 0x0392 },
73 { "Ccedil", 0x00c7 },
74 { "Chi", 0x03a7 },
75 { "Dagger", 0x2021 },
76 { "Delta", 0x0394 },
77 { "ETH", 0x00d0 },
78 { "Eacute", 0x00c9 },
79 { "Ecirc", 0x00ca },
80 { "Egrave", 0x00c8 },
81 { "Epsilon", 0x0395 },
82 { "Eta", 0x0397 },
83 { "Euml", 0x00cb },
84 { "GT", 62 },
85 { "Gamma", 0x0393 },
86 { "Iacute", 0x00cd },
87 { "Icirc", 0x00ce },
88 { "Igrave", 0x00cc },
89 { "Iota", 0x0399 },
90 { "Iuml", 0x00cf },
91 { "Kappa", 0x039a },
92 { "LT", 60 },
93 { "Lambda", 0x039b },
94 { "Mu", 0x039c },
95 { "Ntilde", 0x00d1 },
96 { "Nu", 0x039d },
97 { "OElig", 0x0152 },
98 { "Oacute", 0x00d3 },
99 { "Ocirc", 0x00d4 },
100 { "Ograve", 0x00d2 },
101 { "Omega", 0x03a9 },
102 { "Omicron", 0x039f },
103 { "Oslash", 0x00d8 },
104 { "Otilde", 0x00d5 },
105 { "Ouml", 0x00d6 },
106 { "Phi", 0x03a6 },
107 { "Pi", 0x03a0 },
108 { "Prime", 0x2033 },
109 { "Psi", 0x03a8 },
110 { "QUOT", 34 },
111 { "Rho", 0x03a1 },
112 { "Scaron", 0x0160 },
113 { "Sigma", 0x03a3 },
114 { "THORN", 0x00de },
115 { "Tau", 0x03a4 },
116 { "Theta", 0x0398 },
117 { "Uacute", 0x00da },
118 { "Ucirc", 0x00db },
119 { "Ugrave", 0x00d9 },
120 { "Upsilon", 0x03a5 },
121 { "Uuml", 0x00dc },
122 { "Xi", 0x039e },
123 { "Yacute", 0x00dd },
124 { "Yuml", 0x0178 },
125 { "Zeta", 0x0396 },
126 { "aacute", 0x00e1 },
127 { "acirc", 0x00e2 },
128 { "acute", 0x00b4 },
129 { "aelig", 0x00e6 },
130 { "agrave", 0x00e0 },
131 { "alefsym", 0x2135 },
132 { "alpha", 0x03b1 },
133 { "amp", 38 },
134 { "and", 0x22a5 },
135 { "ang", 0x2220 },
136 { "apos", 0x0027 },
137 { "aring", 0x00e5 },
138 { "asymp", 0x2248 },
139 { "atilde", 0x00e3 },
140 { "auml", 0x00e4 },
141 { "bdquo", 0x201e },
142 { "beta", 0x03b2 },
143 { "brvbar", 0x00a6 },
144 { "bull", 0x2022 },
145 { "cap", 0x2229 },
146 { "ccedil", 0x00e7 },
147 { "cedil", 0x00b8 },
148 { "cent", 0x00a2 },
149 { "chi", 0x03c7 },
150 { "circ", 0x02c6 },
151 { "clubs", 0x2663 },
152 { "cong", 0x2245 },
153 { "copy", 0x00a9 },
154 { "crarr", 0x21b5 },
155 { "cup", 0x222a },
156 { "curren", 0x00a4 },
157 { "dArr", 0x21d3 },
158 { "dagger", 0x2020 },
159 { "darr", 0x2193 },
160 { "deg", 0x00b0 },
161 { "delta", 0x03b4 },
162 { "diams", 0x2666 },
163 { "divide", 0x00f7 },
164 { "eacute", 0x00e9 },
165 { "ecirc", 0x00ea },
166 { "egrave", 0x00e8 },
167 { "empty", 0x2205 },
168 { "emsp", 0x2003 },
169 { "ensp", 0x2002 },
170 { "epsilon", 0x03b5 },
171 { "equiv", 0x2261 },
172 { "eta", 0x03b7 },
173 { "eth", 0x00f0 },
174 { "euml", 0x00eb },
175 { "euro", 0x20ac },
176 { "exist", 0x2203 },
177 { "fnof", 0x0192 },
178 { "forall", 0x2200 },
179 { "frac12", 0x00bd },
180 { "frac14", 0x00bc },
181 { "frac34", 0x00be },
182 { "frasl", 0x2044 },
183 { "gamma", 0x03b3 },
184 { "ge", 0x2265 },
185 { "gt", 62 },
186 { "hArr", 0x21d4 },
187 { "harr", 0x2194 },
188 { "hearts", 0x2665 },
189 { "hellip", 0x2026 },
190 { "iacute", 0x00ed },
191 { "icirc", 0x00ee },
192 { "iexcl", 0x00a1 },
193 { "igrave", 0x00ec },
194 { "image", 0x2111 },
195 { "infin", 0x221e },
196 { "int", 0x222b },
197 { "iota", 0x03b9 },
198 { "iquest", 0x00bf },
199 { "isin", 0x2208 },
200 { "iuml", 0x00ef },
201 { "kappa", 0x03ba },
202 { "lArr", 0x21d0 },
203 { "lambda", 0x03bb },
204 { "lang", 0x2329 },
205 { "laquo", 0x00ab },
206 { "larr", 0x2190 },
207 { "lceil", 0x2308 },
208 { "ldquo", 0x201c },
209 { "le", 0x2264 },
210 { "lfloor", 0x230a },
211 { "lowast", 0x2217 },
212 { "loz", 0x25ca },
213 { "lrm", 0x200e },
214 { "lsaquo", 0x2039 },
215 { "lsquo", 0x2018 },
216 { "lt", 60 },
217 { "macr", 0x00af },
218 { "mdash", 0x2014 },
219 { "micro", 0x00b5 },
220 { "middot", 0x00b7 },
221 { "minus", 0x2212 },
222 { "mu", 0x03bc },
223 { "nabla", 0x2207 },
224 { "nbsp", 0x00a0 },
225 { "ndash", 0x2013 },
226 { "ne", 0x2260 },
227 { "ni", 0x220b },
228 { "not", 0x00ac },
229 { "notin", 0x2209 },
230 { "nsub", 0x2284 },
231 { "ntilde", 0x00f1 },
232 { "nu", 0x03bd },
233 { "oacute", 0x00f3 },
234 { "ocirc", 0x00f4 },
235 { "oelig", 0x0153 },
236 { "ograve", 0x00f2 },
237 { "oline", 0x203e },
238 { "omega", 0x03c9 },
239 { "omicron", 0x03bf },
240 { "oplus", 0x2295 },
241 { "or", 0x22a6 },
242 { "ordf", 0x00aa },
243 { "ordm", 0x00ba },
244 { "oslash", 0x00f8 },
245 { "otilde", 0x00f5 },
246 { "otimes", 0x2297 },
247 { "ouml", 0x00f6 },
248 { "para", 0x00b6 },
249 { "part", 0x2202 },
250 { "percnt", 0x0025 },
251 { "permil", 0x2030 },
252 { "perp", 0x22a5 },
253 { "phi", 0x03c6 },
254 { "pi", 0x03c0 },
255 { "piv", 0x03d6 },
256 { "plusmn", 0x00b1 },
257 { "pound", 0x00a3 },
258 { "prime", 0x2032 },
259 { "prod", 0x220f },
260 { "prop", 0x221d },
261 { "psi", 0x03c8 },
262 { "quot", 34 },
263 { "rArr", 0x21d2 },
264 { "radic", 0x221a },
265 { "rang", 0x232a },
266 { "raquo", 0x00bb },
267 { "rarr", 0x2192 },
268 { "rceil", 0x2309 },
269 { "rdquo", 0x201d },
270 { "real", 0x211c },
271 { "reg", 0x00ae },
272 { "rfloor", 0x230b },
273 { "rho", 0x03c1 },
274 { "rlm", 0x200f },
275 { "rsaquo", 0x203a },
276 { "rsquo", 0x2019 },
277 { "sbquo", 0x201a },
278 { "scaron", 0x0161 },
279 { "sdot", 0x22c5 },
280 { "sect", 0x00a7 },
281 { "shy", 0x00ad },
282 { "sigma", 0x03c3 },
283 { "sigmaf", 0x03c2 },
284 { "sim", 0x223c },
285 { "spades", 0x2660 },
286 { "sub", 0x2282 },
287 { "sube", 0x2286 },
288 { "sum", 0x2211 },
289 { "sup", 0x2283 },
290 { "sup1", 0x00b9 },
291 { "sup2", 0x00b2 },
292 { "sup3", 0x00b3 },
293 { "supe", 0x2287 },
294 { "szlig", 0x00df },
295 { "tau", 0x03c4 },
296 { "there4", 0x2234 },
297 { "theta", 0x03b8 },
298 { "thetasym", 0x03d1 },
299 { "thinsp", 0x2009 },
300 { "thorn", 0x00fe },
301 { "tilde", 0x02dc },
302 { "times", 0x00d7 },
303 { "trade", 0x2122 },
304 { "uArr", 0x21d1 },
305 { "uacute", 0x00fa },
306 { "uarr", 0x2191 },
307 { "ucirc", 0x00fb },
308 { "ugrave", 0x00f9 },
309 { "uml", 0x00a8 },
310 { "upsih", 0x03d2 },
311 { "upsilon", 0x03c5 },
312 { "uuml", 0x00fc },
313 { "weierp", 0x2118 },
314 { "xi", 0x03be },
315 { "yacute", 0x00fd },
316 { "yen", 0x00a5 },
317 { "yuml", 0x00ff },
318 { "zeta", 0x03b6 },
319 { "zwj", 0x200d },
320 { "zwnj", 0x200c }
321};
322static_assert(MAX_ENTITY == sizeof entities / sizeof *entities);
323
324#if defined(Q_CC_MSVC) && _MSC_VER < 1600
325bool operator<(const QTextHtmlEntity &entity1, const QTextHtmlEntity &entity2)
326{
327 return QLatin1String(entity1.name) < QLatin1String(entity2.name);
328}
329#endif
330
331static bool operator<(QStringView entityStr, const QTextHtmlEntity &entity)
332{
333 return entityStr < QLatin1String(entity.name);
334}
335
336static bool operator<(const QTextHtmlEntity &entity, QStringView entityStr)
337{
338 return QLatin1String(entity.name) < entityStr;
339}
340
341static QChar resolveEntity(QStringView entity)
342{
343 const QTextHtmlEntity *start = &entities[0];
344 const QTextHtmlEntity *end = &entities[MAX_ENTITY];
345 const QTextHtmlEntity *e = std::lower_bound(start, end, entity);
346 if (e == end || (entity < *e))
347 return QChar();
348 return e->code;
349}
350
351static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = {
352 0x20ac, // 0x80
353 0x0081, // 0x81 direct mapping
354 0x201a, // 0x82
355 0x0192, // 0x83
356 0x201e, // 0x84
357 0x2026, // 0x85
358 0x2020, // 0x86
359 0x2021, // 0x87
360 0x02C6, // 0x88
361 0x2030, // 0x89
362 0x0160, // 0x8A
363 0x2039, // 0x8B
364 0x0152, // 0x8C
365 0x008D, // 0x8D direct mapping
366 0x017D, // 0x8E
367 0x008F, // 0x8F directmapping
368 0x0090, // 0x90 directmapping
369 0x2018, // 0x91
370 0x2019, // 0x92
371 0x201C, // 0x93
372 0X201D, // 0x94
373 0x2022, // 0x95
374 0x2013, // 0x96
375 0x2014, // 0x97
376 0x02DC, // 0x98
377 0x2122, // 0x99
378 0x0161, // 0x9A
379 0x203A, // 0x9B
380 0x0153, // 0x9C
381 0x009D, // 0x9D direct mapping
382 0x017E, // 0x9E
383 0x0178 // 0x9F
384};
385
386// the displayMode value is according to the what are blocks in the piecetable, not
387// what the w3c defines.
388static const QTextHtmlElement elements[Html_NumElements]= {
389 { "a", Html_a, QTextHtmlElement::DisplayInline },
390 { "address", Html_address, QTextHtmlElement::DisplayInline },
391 { "b", Html_b, QTextHtmlElement::DisplayInline },
392 { "big", Html_big, QTextHtmlElement::DisplayInline },
393 { "blockquote", Html_blockquote, QTextHtmlElement::DisplayBlock },
394 { "body", Html_body, QTextHtmlElement::DisplayBlock },
395 { "br", Html_br, QTextHtmlElement::DisplayInline },
396 { "caption", Html_caption, QTextHtmlElement::DisplayBlock },
397 { "center", Html_center, QTextHtmlElement::DisplayBlock },
398 { "cite", Html_cite, QTextHtmlElement::DisplayInline },
399 { "code", Html_code, QTextHtmlElement::DisplayInline },
400 { "dd", Html_dd, QTextHtmlElement::DisplayBlock },
401 { "dfn", Html_dfn, QTextHtmlElement::DisplayInline },
402 { "div", Html_div, QTextHtmlElement::DisplayBlock },
403 { "dl", Html_dl, QTextHtmlElement::DisplayBlock },
404 { "dt", Html_dt, QTextHtmlElement::DisplayBlock },
405 { "em", Html_em, QTextHtmlElement::DisplayInline },
406 { "font", Html_font, QTextHtmlElement::DisplayInline },
407 { "h1", Html_h1, QTextHtmlElement::DisplayBlock },
408 { "h2", Html_h2, QTextHtmlElement::DisplayBlock },
409 { "h3", Html_h3, QTextHtmlElement::DisplayBlock },
410 { "h4", Html_h4, QTextHtmlElement::DisplayBlock },
411 { "h5", Html_h5, QTextHtmlElement::DisplayBlock },
412 { "h6", Html_h6, QTextHtmlElement::DisplayBlock },
413 { "head", Html_head, QTextHtmlElement::DisplayNone },
414 { "hr", Html_hr, QTextHtmlElement::DisplayBlock },
415 { "html", Html_html, QTextHtmlElement::DisplayInline },
416 { "i", Html_i, QTextHtmlElement::DisplayInline },
417 { "img", Html_img, QTextHtmlElement::DisplayInline },
418 { "kbd", Html_kbd, QTextHtmlElement::DisplayInline },
419 { "li", Html_li, QTextHtmlElement::DisplayBlock },
420 { "link", Html_link, QTextHtmlElement::DisplayNone },
421 { "meta", Html_meta, QTextHtmlElement::DisplayNone },
422 { "nobr", Html_nobr, QTextHtmlElement::DisplayInline },
423 { "ol", Html_ol, QTextHtmlElement::DisplayBlock },
424 { "p", Html_p, QTextHtmlElement::DisplayBlock },
425 { "pre", Html_pre, QTextHtmlElement::DisplayBlock },
426 { "qt", Html_body /*deliberate mapping*/, QTextHtmlElement::DisplayBlock },
427 { "s", Html_s, QTextHtmlElement::DisplayInline },
428 { "samp", Html_samp, QTextHtmlElement::DisplayInline },
429 { "script", Html_script, QTextHtmlElement::DisplayNone },
430 { "small", Html_small, QTextHtmlElement::DisplayInline },
431 { "span", Html_span, QTextHtmlElement::DisplayInline },
432 { "strong", Html_strong, QTextHtmlElement::DisplayInline },
433 { "style", Html_style, QTextHtmlElement::DisplayNone },
434 { "sub", Html_sub, QTextHtmlElement::DisplayInline },
435 { "sup", Html_sup, QTextHtmlElement::DisplayInline },
436 { "table", Html_table, QTextHtmlElement::DisplayTable },
437 { "tbody", Html_tbody, QTextHtmlElement::DisplayTable },
438 { "td", Html_td, QTextHtmlElement::DisplayBlock },
439 { "tfoot", Html_tfoot, QTextHtmlElement::DisplayTable },
440 { "th", Html_th, QTextHtmlElement::DisplayBlock },
441 { "thead", Html_thead, QTextHtmlElement::DisplayTable },
442 { "title", Html_title, QTextHtmlElement::DisplayNone },
443 { "tr", Html_tr, QTextHtmlElement::DisplayTable },
444 { "tt", Html_tt, QTextHtmlElement::DisplayInline },
445 { "u", Html_u, QTextHtmlElement::DisplayInline },
446 { "ul", Html_ul, QTextHtmlElement::DisplayBlock },
447 { "var", Html_var, QTextHtmlElement::DisplayInline },
448};
449
450static bool operator<(const QString &str, const QTextHtmlElement &e)
451{
452 return str < QLatin1String(e.name);
453}
454
455static bool operator<(const QTextHtmlElement &e, const QString &str)
456{
457 return QLatin1String(e.name) < str;
458}
459
460static const QTextHtmlElement *lookupElementHelper(const QString &element)
461{
462 const QTextHtmlElement *start = &elements[0];
463 const QTextHtmlElement *end = &elements[Html_NumElements];
464 const QTextHtmlElement *e = std::lower_bound(start, end, element);
465 if ((e == end) || (element < *e))
466 return nullptr;
467 return e;
468}
469
470int QTextHtmlParser::lookupElement(const QString &element)
471{
472 const QTextHtmlElement *e = lookupElementHelper(element);
473 if (!e)
474 return -1;
475 return e->id;
476}
477
478// quotes newlines as "\\n"
479static QString quoteNewline(const QString &s)
480{
481 QString n = s;
482 n.replace(QLatin1Char('\n'), QLatin1String("\\n"));
483 return n;
484}
485
486QTextHtmlParserNode::QTextHtmlParserNode()
487 : parent(0), id(Html_unknown),
488 cssFloat(QTextFrameFormat::InFlow), hasOwnListStyle(false), hasOwnLineHeightType(false), hasLineHeightMultiplier(false),
489 hasCssListIndent(false), isEmptyParagraph(false), isTextFrame(false), isRootFrame(false),
490 displayMode(QTextHtmlElement::DisplayInline), hasHref(false),
491 listStyle(QTextListFormat::ListStyleUndefined), imageWidth(-1), imageHeight(-1), tableBorder(0),
492 tableCellRowSpan(1), tableCellColSpan(1), tableCellSpacing(2), tableCellPadding(0),
493 borderBrush(Qt::darkGray), borderStyle(QTextFrameFormat::BorderStyle_Outset),
494 borderCollapse(false),
495 userState(-1), cssListIndent(0), wsm(WhiteSpaceModeUndefined)
496{
497 margin[QTextHtmlParser::MarginLeft] = 0;
498 margin[QTextHtmlParser::MarginRight] = 0;
499 margin[QTextHtmlParser::MarginTop] = 0;
500 margin[QTextHtmlParser::MarginBottom] = 0;
501
502 for (int i = 0; i < 4; ++i) {
503 tableCellBorderStyle[i] = QTextFrameFormat::BorderStyle_None;
504 tableCellBorder[i] = 0;
505 tableCellBorderBrush[i] = Qt::NoBrush;
506 }
507}
508
509void QTextHtmlParser::dumpHtml()
510{
511 for (int i = 0; i < count(); ++i) {
512 qDebug().nospace() << qPrintable(QString(depth(i)*4, QLatin1Char(' ')))
513 << qPrintable(at(i).tag) << ':'
514 << quoteNewline(at(i).text);
515 ;
516 }
517}
518
519QTextHtmlParserNode *QTextHtmlParser::newNode(int parent)
520{
521 QTextHtmlParserNode *lastNode = nodes.last();
522 QTextHtmlParserNode *newNode = nullptr;
523
524 bool reuseLastNode = true;
525
526 if (nodes.count() == 1) {
527 reuseLastNode = false;
528 } else if (lastNode->tag.isEmpty()) {
529
530 if (lastNode->text.isEmpty()) {
531 reuseLastNode = true;
532 } else { // last node is a text node (empty tag) with some text
533
534 if (lastNode->text.length() == 1 && lastNode->text.at(0).isSpace()) {
535
536 int lastSibling = count() - 2;
537 while (lastSibling
538 && at(lastSibling).parent != lastNode->parent
539 && at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
540 lastSibling = at(lastSibling).parent;
541 }
542
543 if (at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
544 reuseLastNode = false;
545 } else {
546 reuseLastNode = true;
547 }
548 } else {
549 // text node with real (non-whitespace) text -> nothing to re-use
550 reuseLastNode = false;
551 }
552
553 }
554
555 } else {
556 // last node had a proper tag -> nothing to re-use
557 reuseLastNode = false;
558 }
559
560 if (reuseLastNode) {
561 newNode = lastNode;
562 newNode->tag.clear();
563 newNode->text.clear();
564 newNode->id = Html_unknown;
565 } else {
566 nodes.append(new QTextHtmlParserNode);
567 newNode = nodes.last();
568 }
569
570 newNode->parent = parent;
571 return newNode;
572}
573
574void QTextHtmlParser::parse(const QString &text, const QTextDocument *_resourceProvider)
575{
576 qDeleteAll(nodes);
577 nodes.clear();
578 nodes.append(new QTextHtmlParserNode);
579 txt = text;
580 pos = 0;
581 len = txt.length();
582 textEditMode = false;
583 resourceProvider = _resourceProvider;
584 parse();
585 //dumpHtml();
586}
587
588int QTextHtmlParser::depth(int i) const
589{
590 int depth = 0;
591 while (i) {
592 i = at(i).parent;
593 ++depth;
594 }
595 return depth;
596}
597
598int QTextHtmlParser::margin(int i, int mar) const {
599 int m = 0;
600 const QTextHtmlParserNode *node;
601 if (mar == MarginLeft
602 || mar == MarginRight) {
603 while (i) {
604 node = &at(i);
605 if (!node->isBlock() && node->id != Html_table)
606 break;
607 if (node->isTableCell())
608 break;
609 m += node->margin[mar];
610 i = node->parent;
611 }
612 }
613 return m;
614}
615
616int QTextHtmlParser::topMargin(int i) const
617{
618 if (!i)
619 return 0;
620 return at(i).margin[MarginTop];
621}
622
623int QTextHtmlParser::bottomMargin(int i) const
624{
625 if (!i)
626 return 0;
627 return at(i).margin[MarginBottom];
628}
629
630void QTextHtmlParser::eatSpace()
631{
632 while (pos < len && txt.at(pos).isSpace() && txt.at(pos) != QChar::ParagraphSeparator)
633 pos++;
634}
635
636void QTextHtmlParser::parse()
637{
638 while (pos < len) {
639 QChar c = txt.at(pos++);
640 if (c == QLatin1Char('<')) {
641 parseTag();
642 } else if (c == QLatin1Char('&')) {
643 nodes.last()->text += parseEntity();
644 } else {
645 nodes.last()->text += c;
646 }
647 }
648}
649
650// parses a tag after "<"
651void QTextHtmlParser::parseTag()
652{
653 eatSpace();
654
655 // handle comments and other exclamation mark declarations
656 if (hasPrefix(QLatin1Char('!'))) {
657 parseExclamationTag();
658 if (nodes.last()->wsm != QTextHtmlParserNode::WhiteSpacePre
659 && nodes.last()->wsm != QTextHtmlParserNode::WhiteSpacePreWrap
660 && !textEditMode)
661 eatSpace();
662 return;
663 }
664
665 // if close tag just close
666 if (hasPrefix(QLatin1Char('/'))) {
667 if (nodes.last()->id == Html_style) {
668#ifndef QT_NO_CSSPARSER
669 QCss::Parser parser(nodes.constLast()->text);
670 QCss::StyleSheet sheet;
671 sheet.origin = QCss::StyleSheetOrigin_Author;
672 parser.parse(&sheet, Qt::CaseInsensitive);
673 inlineStyleSheets.append(sheet);
674 resolveStyleSheetImports(sheet);
675#endif
676 }
677 parseCloseTag();
678 return;
679 }
680
681 int p = last();
682 while (p && at(p).tag.size() == 0)
683 p = at(p).parent;
684
685 QTextHtmlParserNode *node = newNode(p);
686
687 // parse tag name
688 node->tag = parseWord().toLower();
689
690 const QTextHtmlElement *elem = lookupElementHelper(node->tag);
691 if (elem) {
692 node->id = elem->id;
693 node->displayMode = elem->displayMode;
694 } else {
695 node->id = Html_unknown;
696 }
697
698 node->attributes.clear();
699 // _need_ at least one space after the tag name, otherwise there can't be attributes
700 if (pos < len && txt.at(pos).isSpace())
701 node->attributes = parseAttributes();
702
703 // resolveParent() may have to change the order in the tree and
704 // insert intermediate nodes for buggy HTML, so re-initialize the 'node'
705 // pointer through the return value
706 node = resolveParent();
707 resolveNode();
708
709#ifndef QT_NO_CSSPARSER
710 const int nodeIndex = nodes.count() - 1; // this new node is always the last
711 node->applyCssDeclarations(declarationsForNode(nodeIndex), resourceProvider);
712#endif
713 applyAttributes(node->attributes);
714
715 // finish tag
716 bool tagClosed = false;
717 while (pos < len && txt.at(pos) != QLatin1Char('>')) {
718 if (txt.at(pos) == QLatin1Char('/'))
719 tagClosed = true;
720
721 pos++;
722 }
723 pos++;
724
725 // in a white-space preserving environment strip off a initial newline
726 // since the element itself already generates a newline
727 if ((node->wsm == QTextHtmlParserNode::WhiteSpacePre
728 || node->wsm == QTextHtmlParserNode::WhiteSpacePreWrap
729 || node->wsm == QTextHtmlParserNode::WhiteSpacePreLine)
730 && node->isBlock()) {
731 if (pos < len - 1 && txt.at(pos) == QLatin1Char('\n'))
732 ++pos;
733 }
734
735 if (node->mayNotHaveChildren() || tagClosed) {
736 newNode(node->parent);
737 resolveNode();
738 }
739}
740
741// parses a tag beginning with "/"
742void QTextHtmlParser::parseCloseTag()
743{
744 ++pos;
745 QString tag = parseWord().toLower().trimmed();
746 while (pos < len) {
747 QChar c = txt.at(pos++);
748 if (c == QLatin1Char('>'))
749 break;
750 }
751
752 // find corresponding open node
753 int p = last();
754 if (p > 0
755 && at(p - 1).tag == tag
756 && at(p - 1).mayNotHaveChildren())
757 p--;
758
759 while (p && at(p).tag != tag)
760 p = at(p).parent;
761
762 // simply ignore the tag if we can't find
763 // a corresponding open node, for broken
764 // html such as <font>blah</font></font>
765 if (!p)
766 return;
767
768 // in a white-space preserving environment strip off a trailing newline
769 // since the closing of the opening block element will automatically result
770 // in a new block for elements following the <pre>
771 // ...foo\n</pre><p>blah -> foo</pre><p>blah
772 if ((at(p).wsm == QTextHtmlParserNode::WhiteSpacePre
773 || at(p).wsm == QTextHtmlParserNode::WhiteSpacePreWrap
774 || at(p).wsm == QTextHtmlParserNode::WhiteSpacePreLine)
775 && at(p).isBlock()) {
776 if (at(last()).text.endsWith(QLatin1Char('\n')))
777 nodes[last()]->text.chop(1);
778 }
779
780 newNode(at(p).parent);
781 resolveNode();
782}
783
784// parses a tag beginning with "!"
785void QTextHtmlParser::parseExclamationTag()
786{
787 ++pos;
788 if (hasPrefix(QLatin1Char('-'),1) && hasPrefix(QLatin1Char('-'),2)) {
789 pos += 3;
790 // eat comments
791 int end = txt.indexOf(QLatin1String("-->"), pos);
792 pos = (end >= 0 ? end + 3 : len);
793 } else {
794 // eat internal tags
795 while (pos < len) {
796 QChar c = txt.at(pos++);
797 if (c == QLatin1Char('>'))
798 break;
799 }
800 }
801}
802
803// parses an entity after "&", and returns it
804QString QTextHtmlParser::parseEntity()
805{
806 const int recover = pos;
807 int entityLen = 0;
808 QStringView entity;
809 while (pos < len) {
810 QChar c = txt.at(pos++);
811 if (c.isSpace() || pos - recover > 9) {
812 goto error;
813 }
814 if (c == QLatin1Char(';'))
815 break;
816 ++entityLen;
817 }
818 if (entityLen) {
819 entity = QStringView(txt).mid(recover, entityLen);
820 QChar resolved = resolveEntity(entity);
821 if (!resolved.isNull())
822 return QString(resolved);
823
824 if (entityLen > 1 && entity.at(0) == QLatin1Char('#')) {
825 entity = entity.mid(1); // removing leading #
826
827 int base = 10;
828 bool ok = false;
829
830 if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity?
831 entity = entity.mid(1);
832 base = 16;
833 }
834
835 uint uc = entity.toUInt(&ok, base);
836 if (ok) {
837 if (uc >= 0x80 && uc < 0x80 + (sizeof(windowsLatin1ExtendedCharacters)/sizeof(windowsLatin1ExtendedCharacters[0])))
838 uc = windowsLatin1ExtendedCharacters[uc - 0x80];
839 return QStringView{QChar::fromUcs4(uc)}.toString();
840 }
841 }
842 }
843error:
844 pos = recover;
845 return QLatin1String("&");
846}
847
848// parses one word, possibly quoted, and returns it
849QString QTextHtmlParser::parseWord()
850{
851 QString word;
852 if (hasPrefix(QLatin1Char('\"'))) { // double quotes
853 ++pos;
854 while (pos < len) {
855 QChar c = txt.at(pos++);
856 if (c == QLatin1Char('\"'))
857 break;
858 else if (c == QLatin1Char('&'))
859 word += parseEntity();
860 else
861 word += c;
862 }
863 } else if (hasPrefix(QLatin1Char('\''))) { // single quotes
864 ++pos;
865 while (pos < len) {
866 QChar c = txt.at(pos++);
867 // Allow for escaped single quotes as they may be part of the string
868 if (c == QLatin1Char('\'') && (txt.length() > 1 && txt.at(pos - 2) != QLatin1Char('\\')))
869 break;
870 else
871 word += c;
872 }
873 } else { // normal text
874 while (pos < len) {
875 QChar c = txt.at(pos++);
876 if (c == QLatin1Char('>')
877 || (c == QLatin1Char('/') && hasPrefix(QLatin1Char('>'), 1))
878 || c == QLatin1Char('<')
879 || c == QLatin1Char('=')
880 || c.isSpace()) {
881 --pos;
882 break;
883 }
884 if (c == QLatin1Char('&'))
885 word += parseEntity();
886 else
887 word += c;
888 }
889 }
890 return word;
891}
892
893// gives the new node the right parent
894QTextHtmlParserNode *QTextHtmlParser::resolveParent()
895{
896 QTextHtmlParserNode *node = nodes.last();
897
898 int p = node->parent;
899
900 // Excel gives us buggy HTML with just tr without surrounding table tags
901 // or with just td tags
902
903 if (node->id == Html_td) {
904 int n = p;
905 while (n && at(n).id != Html_tr)
906 n = at(n).parent;
907
908 if (!n) {
909 nodes.insert(nodes.count() - 1, new QTextHtmlParserNode);
910 nodes.insert(nodes.count() - 1, new QTextHtmlParserNode);
911
912 QTextHtmlParserNode *table = nodes[nodes.count() - 3];
913 table->parent = p;
914 table->id = Html_table;
915 table->tag = QLatin1String("table");
916 table->children.append(nodes.count() - 2); // add row as child
917
918 QTextHtmlParserNode *row = nodes[nodes.count() - 2];
919 row->parent = nodes.count() - 3; // table as parent
920 row->id = Html_tr;
921 row->tag = QLatin1String("tr");
922
923 p = nodes.count() - 2;
924 node = nodes.last(); // re-initialize pointer
925 }
926 }
927
928 if (node->id == Html_tr) {
929 int n = p;
930 while (n && at(n).id != Html_table)
931 n = at(n).parent;
932
933 if (!n) {
934 nodes.insert(nodes.count() - 1, new QTextHtmlParserNode);
935 QTextHtmlParserNode *table = nodes[nodes.count() - 2];
936 table->parent = p;
937 table->id = Html_table;
938 table->tag = QLatin1String("table");
939 p = nodes.count() - 2;
940 node = nodes.last(); // re-initialize pointer
941 }
942 }
943
944 // permit invalid html by letting block elements be children
945 // of inline elements with the exception of paragraphs:
946 //
947 // a new paragraph closes parent inline elements (while loop),
948 // unless they themselves are children of a non-paragraph block
949 // element (if statement)
950 //
951 // For example:
952 //
953 // <body><p><b>Foo<p>Bar <-- second <p> implicitly closes <b> that
954 // belongs to the first <p>. The self-nesting
955 // check further down prevents the second <p>
956 // from nesting into the first one then.
957 // so Bar is not bold.
958 //
959 // <body><b><p>Foo <-- Foo should be bold.
960 //
961 // <body><b><p>Foo<p>Bar <-- Foo and Bar should be bold.
962 //
963 if (node->id == Html_p) {
964 while (p && !at(p).isBlock())
965 p = at(p).parent;
966
967 if (!p || at(p).id != Html_p)
968 p = node->parent;
969 }
970
971 // some elements are not self nesting
972 if (node->id == at(p).id
973 && node->isNotSelfNesting())
974 p = at(p).parent;
975
976 // some elements are not allowed in certain contexts
977 while ((p && !node->allowedInContext(at(p).id))
978 // ### make new styles aware of empty tags
979 || at(p).mayNotHaveChildren()
980 ) {
981 p = at(p).parent;
982 }
983
984 node->parent = p;
985
986 // makes it easier to traverse the tree, later
987 nodes[p]->children.append(nodes.count() - 1);
988 return node;
989}
990
991// sets all properties on the new node
992void QTextHtmlParser::resolveNode()
993{
994 QTextHtmlParserNode *node = nodes.last();
995 const QTextHtmlParserNode *parent = nodes.at(node->parent);
996 node->initializeProperties(parent, this);
997}
998
999bool QTextHtmlParserNode::isNestedList(const QTextHtmlParser *parser) const
1000{
1001 if (!isListStart())
1002 return false;
1003
1004 int p = parent;
1005 while (p) {
1006 if (parser->at(p).isListStart())
1007 return true;
1008 p = parser->at(p).parent;
1009 }
1010 return false;
1011}
1012
1013void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent, const QTextHtmlParser *parser)
1014{
1015 // inherit properties from parent element
1016 charFormat = parent->charFormat;
1017
1018 if (id == Html_html)
1019 blockFormat.setLayoutDirection(Qt::LeftToRight); // HTML default
1020 else if (parent->blockFormat.hasProperty(QTextFormat::LayoutDirection))
1021 blockFormat.setLayoutDirection(parent->blockFormat.layoutDirection());
1022
1023 if (parent->displayMode == QTextHtmlElement::DisplayNone)
1024 displayMode = QTextHtmlElement::DisplayNone;
1025
1026 if (parent->id != Html_table || id == Html_caption) {
1027 if (parent->blockFormat.hasProperty(QTextFormat::BlockAlignment))
1028 blockFormat.setAlignment(parent->blockFormat.alignment());
1029 else
1030 blockFormat.clearProperty(QTextFormat::BlockAlignment);
1031 }
1032 // we don't paint per-row background colors, yet. so as an
1033 // exception inherit the background color here
1034 // we also inherit the background between inline elements
1035 if ((parent->id != Html_tr || !isTableCell())
1036 && (displayMode != QTextHtmlElement::DisplayInline || parent->displayMode != QTextHtmlElement::DisplayInline)) {
1037 charFormat.clearProperty(QTextFormat::BackgroundBrush);
1038 }
1039
1040 listStyle = parent->listStyle;
1041 // makes no sense to inherit that property, a named anchor is a single point
1042 // in the document, which is set by the DocumentFragment
1043 charFormat.clearProperty(QTextFormat::AnchorName);
1044 wsm = parent->wsm;
1045
1046 // initialize remaining properties
1047 margin[QTextHtmlParser::MarginLeft] = 0;
1048 margin[QTextHtmlParser::MarginRight] = 0;
1049 margin[QTextHtmlParser::MarginTop] = 0;
1050 margin[QTextHtmlParser::MarginBottom] = 0;
1051 cssFloat = QTextFrameFormat::InFlow;
1052
1053 for (int i = 0; i < 4; ++i)
1054 padding[i] = -1;
1055
1056 // set element specific attributes
1057 switch (id) {
1058 case Html_a:
1059 for (int i = 0; i < attributes.count(); i += 2) {
1060 const QString key = attributes.at(i);
1061 if (key.compare(QLatin1String("href"), Qt::CaseInsensitive) == 0
1062 && !attributes.at(i + 1).isEmpty()) {
1063 hasHref = true;
1064 }
1065 }
1066 charFormat.setAnchor(true);
1067 break;
1068 case Html_big:
1069 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1));
1070 break;
1071 case Html_small:
1072 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1));
1073 break;
1074 case Html_h1:
1075 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(3));
1076 margin[QTextHtmlParser::MarginTop] = 18;
1077 margin[QTextHtmlParser::MarginBottom] = 12;
1078 break;
1079 case Html_h2:
1080 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(2));
1081 margin[QTextHtmlParser::MarginTop] = 16;
1082 margin[QTextHtmlParser::MarginBottom] = 12;
1083 break;
1084 case Html_h3:
1085 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1));
1086 margin[QTextHtmlParser::MarginTop] = 14;
1087 margin[QTextHtmlParser::MarginBottom] = 12;
1088 break;
1089 case Html_h4:
1090 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(0));
1091 margin[QTextHtmlParser::MarginTop] = 12;
1092 margin[QTextHtmlParser::MarginBottom] = 12;
1093 break;
1094 case Html_h5:
1095 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1));
1096 margin[QTextHtmlParser::MarginTop] = 12;
1097 margin[QTextHtmlParser::MarginBottom] = 4;
1098 break;
1099 case Html_p:
1100 margin[QTextHtmlParser::MarginTop] = 12;
1101 margin[QTextHtmlParser::MarginBottom] = 12;
1102 break;
1103 case Html_ul:
1104 // nested lists don't have margins, except for the toplevel one
1105 if (!isNestedList(parser)) {
1106 margin[QTextHtmlParser::MarginTop] = 12;
1107 margin[QTextHtmlParser::MarginBottom] = 12;
1108 }
1109 // no left margin as we use indenting instead
1110 break;
1111 case Html_ol:
1112 // nested lists don't have margins, except for the toplevel one
1113 if (!isNestedList(parser)) {
1114 margin[QTextHtmlParser::MarginTop] = 12;
1115 margin[QTextHtmlParser::MarginBottom] = 12;
1116 }
1117 // no left margin as we use indenting instead
1118 break;
1119 case Html_br:
1120 text = QChar(QChar::LineSeparator);
1121 break;
1122 case Html_pre:
1123 margin[QTextHtmlParser::MarginTop] = 12;
1124 margin[QTextHtmlParser::MarginBottom] = 12;
1125 break;
1126 case Html_blockquote:
1127 margin[QTextHtmlParser::MarginTop] = 12;
1128 margin[QTextHtmlParser::MarginBottom] = 12;
1129 margin[QTextHtmlParser::MarginLeft] = 40;
1130 margin[QTextHtmlParser::MarginRight] = 40;
1131 blockFormat.setProperty(QTextFormat::BlockQuoteLevel, 1);
1132 break;
1133 case Html_dl:
1134 margin[QTextHtmlParser::MarginTop] = 8;
1135 margin[QTextHtmlParser::MarginBottom] = 8;
1136 break;
1137 case Html_dd:
1138 margin[QTextHtmlParser::MarginLeft] = 30;
1139 break;
1140 default: break;
1141 }
1142}
1143
1144#ifndef QT_NO_CSSPARSER
1145void QTextHtmlParserNode::setListStyle(const QList<QCss::Value> &cssValues)
1146{
1147 for (int i = 0; i < cssValues.count(); ++i) {
1148 if (cssValues.at(i).type == QCss::Value::KnownIdentifier) {
1149 switch (static_cast<QCss::KnownValue>(cssValues.at(i).variant.toInt())) {
1150 case QCss::Value_None: hasOwnListStyle = true; listStyle = QTextListFormat::ListStyleUndefined; break;
1151 case QCss::Value_Disc: hasOwnListStyle = true; listStyle = QTextListFormat::ListDisc; break;
1152 case QCss::Value_Square: hasOwnListStyle = true; listStyle = QTextListFormat::ListSquare; break;
1153 case QCss::Value_Circle: hasOwnListStyle = true; listStyle = QTextListFormat::ListCircle; break;
1154 case QCss::Value_Decimal: hasOwnListStyle = true; listStyle = QTextListFormat::ListDecimal; break;
1155 case QCss::Value_LowerAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerAlpha; break;
1156 case QCss::Value_UpperAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperAlpha; break;
1157 case QCss::Value_LowerRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerRoman; break;
1158 case QCss::Value_UpperRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperRoman; break;
1159 default: break;
1160 }
1161 }
1162 }
1163 // allow individual list items to override the style
1164 if (id == Html_li && hasOwnListStyle)
1165 blockFormat.setProperty(QTextFormat::ListStyle, listStyle);
1166}
1167
1168void QTextHtmlParserNode::applyCssDeclarations(const QList<QCss::Declaration> &declarations, const QTextDocument *resourceProvider)
1169{
1170 QCss::ValueExtractor extractor(declarations);
1171 extractor.extractBox(margin, padding);
1172
1173 if (id == Html_td || id == Html_th) {
1174 QCss::BorderStyle cssStyles[4];
1175 int cssBorder[4];
1176 QSize cssRadii[4]; // unused
1177 for (int i = 0; i < 4; ++i) {
1178 cssStyles[i] = QCss::BorderStyle_None;
1179 cssBorder[i] = 0;
1180 }
1181 // this will parse (and cache) "border-width" as a list so the
1182 // QCss::BorderWidth parsing below which expects a single value
1183 // will not work as expected - which in this case does not matter
1184 // because tableBorder is not relevant for cells.
1185 extractor.extractBorder(cssBorder, tableCellBorderBrush, cssStyles, cssRadii);
1186 for (int i = 0; i < 4; ++i) {
1187 tableCellBorderStyle[i] = static_cast<QTextFrameFormat::BorderStyle>(cssStyles[i] - 1);
1188 tableCellBorder[i] = static_cast<qreal>(cssBorder[i]);
1189 }
1190 }
1191
1192 for (int i = 0; i < declarations.count(); ++i) {
1193 const QCss::Declaration &decl = declarations.at(i);
1194 if (decl.d->values.isEmpty()) continue;
1195
1196 QCss::KnownValue identifier = QCss::UnknownValue;
1197 if (decl.d->values.first().type == QCss::Value::KnownIdentifier)
1198 identifier = static_cast<QCss::KnownValue>(decl.d->values.first().variant.toInt());
1199
1200 switch (decl.d->propertyId) {
1201 case QCss::BorderColor: borderBrush = QBrush(decl.colorValue()); break;
1202 case QCss::BorderStyles:
1203 if (decl.styleValue() != QCss::BorderStyle_Unknown && decl.styleValue() != QCss::BorderStyle_Native)
1204 borderStyle = static_cast<QTextFrameFormat::BorderStyle>(decl.styleValue() - 1);
1205 break;
1206 case QCss::BorderWidth: {
1207 int borders[4];
1208 extractor.lengthValues(decl, borders);
1209 tableBorder = borders[0];
1210 }
1211 break;
1212 case QCss::BorderCollapse:
1213 borderCollapse = decl.borderCollapseValue();
1214 break;
1215 case QCss::Color: charFormat.setForeground(decl.colorValue()); break;
1216 case QCss::Float:
1217 cssFloat = QTextFrameFormat::InFlow;
1218 switch (identifier) {
1219 case QCss::Value_Left: cssFloat = QTextFrameFormat::FloatLeft; break;
1220 case QCss::Value_Right: cssFloat = QTextFrameFormat::FloatRight; break;
1221 default: break;
1222 }
1223 break;
1224 case QCss::QtBlockIndent:
1225 blockFormat.setIndent(decl.d->values.first().variant.toInt());
1226 break;
1227 case QCss::QtLineHeightType: {
1228 QString lineHeightTypeName = decl.d->values.first().variant.toString();
1229 QTextBlockFormat::LineHeightTypes lineHeightType;
1230 if (lineHeightTypeName.compare(QLatin1String("proportional"), Qt::CaseInsensitive) == 0)
1231 lineHeightType = QTextBlockFormat::ProportionalHeight;
1232 else if (lineHeightTypeName.compare(QLatin1String("fixed"), Qt::CaseInsensitive) == 0)
1233 lineHeightType = QTextBlockFormat::FixedHeight;
1234 else if (lineHeightTypeName.compare(QLatin1String("minimum"), Qt::CaseInsensitive) == 0)
1235 lineHeightType = QTextBlockFormat::MinimumHeight;
1236 else if (lineHeightTypeName.compare(QLatin1String("line-distance"), Qt::CaseInsensitive) == 0)
1237 lineHeightType = QTextBlockFormat::LineDistanceHeight;
1238 else
1239 lineHeightType = QTextBlockFormat::SingleHeight;
1240
1241 if (hasLineHeightMultiplier) {
1242 qreal lineHeight = blockFormat.lineHeight() / 100.0;
1243 blockFormat.setProperty(QTextBlockFormat::LineHeight, lineHeight);
1244 }
1245
1246 blockFormat.setProperty(QTextBlockFormat::LineHeightType, lineHeightType);
1247 hasOwnLineHeightType = true;
1248 }
1249 break;
1250 case QCss::LineHeight: {
1251 qreal lineHeight;
1252 QTextBlockFormat::LineHeightTypes lineHeightType;
1253 if (decl.realValue(&lineHeight, "px")) {
1254 lineHeightType = QTextBlockFormat::MinimumHeight;
1255 } else {
1256 bool ok;
1257 QCss::Value cssValue = decl.d->values.first();
1258 QString value = cssValue.toString();
1259 lineHeight = value.toDouble(&ok);
1260 if (ok) {
1261 if (!hasOwnLineHeightType && cssValue.type == QCss::Value::Number) {
1262 lineHeight *= 100.0;
1263 hasLineHeightMultiplier = true;
1264 }
1265 lineHeightType = QTextBlockFormat::ProportionalHeight;
1266 } else {
1267 lineHeight = 0.0;
1268 lineHeightType = QTextBlockFormat::SingleHeight;
1269 }
1270 }
1271
1272 // Only override line height type if specified in same node
1273 if (hasOwnLineHeightType)
1274 lineHeightType = QTextBlockFormat::LineHeightTypes(blockFormat.lineHeightType());
1275
1276 blockFormat.setLineHeight(lineHeight, lineHeightType);
1277 break;
1278 }
1279 case QCss::TextIndent: {
1280 qreal indent = 0;
1281 if (decl.realValue(&indent, "px"))
1282 blockFormat.setTextIndent(indent);
1283 break; }
1284 case QCss::QtListIndent:
1285 if (decl.intValue(&cssListIndent))
1286 hasCssListIndent = true;
1287 break;
1288 case QCss::QtParagraphType:
1289 if (decl.d->values.first().variant.toString().compare(QLatin1String("empty"), Qt::CaseInsensitive) == 0)
1290 isEmptyParagraph = true;
1291 break;
1292 case QCss::QtTableType:
1293 if (decl.d->values.first().variant.toString().compare(QLatin1String("frame"), Qt::CaseInsensitive) == 0)
1294 isTextFrame = true;
1295 else if (decl.d->values.first().variant.toString().compare(QLatin1String("root"), Qt::CaseInsensitive) == 0) {
1296 isTextFrame = true;
1297 isRootFrame = true;
1298 }
1299 break;
1300 case QCss::QtUserState:
1301 userState = decl.d->values.first().variant.toInt();
1302 break;
1303 case QCss::Whitespace:
1304 switch (identifier) {
1305 case QCss::Value_Normal: wsm = QTextHtmlParserNode::WhiteSpaceNormal; break;
1306 case QCss::Value_Pre: wsm = QTextHtmlParserNode::WhiteSpacePre; break;
1307 case QCss::Value_NoWrap: wsm = QTextHtmlParserNode::WhiteSpaceNoWrap; break;
1308 case QCss::Value_PreWrap: wsm = QTextHtmlParserNode::WhiteSpacePreWrap; break;
1309 case QCss::Value_PreLine: wsm = QTextHtmlParserNode::WhiteSpacePreLine; break;
1310 default: break;
1311 }
1312 break;
1313 case QCss::VerticalAlignment:
1314 switch (identifier) {
1315 case QCss::Value_Sub: charFormat.setVerticalAlignment(QTextCharFormat::AlignSubScript); break;
1316 case QCss::Value_Super: charFormat.setVerticalAlignment(QTextCharFormat::AlignSuperScript); break;
1317 case QCss::Value_Middle: charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle); break;
1318 case QCss::Value_Top: charFormat.setVerticalAlignment(QTextCharFormat::AlignTop); break;
1319 case QCss::Value_Bottom: charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom); break;
1320 default: charFormat.setVerticalAlignment(QTextCharFormat::AlignNormal); break;
1321 }
1322 break;
1323 case QCss::PageBreakBefore:
1324 switch (identifier) {
1325 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysBefore); break;
1326 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysBefore); break;
1327 default: break;
1328 }
1329 break;
1330 case QCss::PageBreakAfter:
1331 switch (identifier) {
1332 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysAfter); break;
1333 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysAfter); break;
1334 default: break;
1335 }
1336 break;
1337 case QCss::TextUnderlineStyle:
1338 switch (identifier) {
1339 case QCss::Value_None: charFormat.setUnderlineStyle(QTextCharFormat::NoUnderline); break;
1340 case QCss::Value_Solid: charFormat.setUnderlineStyle(QTextCharFormat::SingleUnderline); break;
1341 case QCss::Value_Dashed: charFormat.setUnderlineStyle(QTextCharFormat::DashUnderline); break;
1342 case QCss::Value_Dotted: charFormat.setUnderlineStyle(QTextCharFormat::DotLine); break;
1343 case QCss::Value_DotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotLine); break;
1344 case QCss::Value_DotDotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotDotLine); break;
1345 case QCss::Value_Wave: charFormat.setUnderlineStyle(QTextCharFormat::WaveUnderline); break;
1346 default: break;
1347 }
1348 break;
1349 case QCss::ListStyleType:
1350 case QCss::ListStyle:
1351 setListStyle(decl.d->values);
1352 break;
1353 case QCss::QtListNumberPrefix:
1354 textListNumberPrefix = decl.d->values.first().variant.toString();
1355 break;
1356 case QCss::QtListNumberSuffix:
1357 textListNumberSuffix = decl.d->values.first().variant.toString();
1358 break;
1359 case QCss::TextAlignment:
1360 switch (identifier) {
1361 case QCss::Value_Left: blockFormat.setAlignment(Qt::AlignLeft); break;
1362 case QCss::Value_Center: blockFormat.setAlignment(Qt::AlignCenter); break;
1363 case QCss::Value_Right: blockFormat.setAlignment(Qt::AlignRight); break;
1364 default: break;
1365 }
1366 break;
1367
1368 case QCss::QtForegroundTextureCacheKey:
1369 {
1370 if (resourceProvider != nullptr && QTextDocumentPrivate::get(resourceProvider) != nullptr) {
1371 bool ok;
1372 qint64 searchKey = decl.d->values.first().variant.toLongLong(&ok);
1373 if (ok)
1374 applyForegroundImage(searchKey, resourceProvider);
1375 }
1376 break;
1377 }
1378 default: break;
1379 }
1380 }
1381
1382 QFont f;
1383 int adjustment = -255;
1384 extractor.extractFont(&f, &adjustment);
1385 if (f.pixelSize() > INT32_MAX / 2)
1386 f.setPixelSize(INT32_MAX / 2); // avoid even more extreme values
1387 charFormat.setFont(f, QTextCharFormat::FontPropertiesSpecifiedOnly);
1388
1389 if (adjustment >= -1)
1390 charFormat.setProperty(QTextFormat::FontSizeAdjustment, adjustment);
1391
1392 {
1393 Qt::Alignment ignoredAlignment;
1394 QCss::Repeat ignoredRepeat;
1395 QString bgImage;
1396 QBrush bgBrush;
1397 QCss::Origin ignoredOrigin, ignoredClip;
1398 QCss::Attachment ignoredAttachment;
1399 extractor.extractBackground(&bgBrush, &bgImage, &ignoredRepeat, &ignoredAlignment,
1400 &ignoredOrigin, &ignoredAttachment, &ignoredClip);
1401
1402 if (!bgImage.isEmpty() && resourceProvider) {
1403 applyBackgroundImage(bgImage, resourceProvider);
1404 } else if (bgBrush.style() != Qt::NoBrush) {
1405 charFormat.setBackground(bgBrush);
1406 }
1407 }
1408}
1409
1410#endif // QT_NO_CSSPARSER
1411
1412void QTextHtmlParserNode::applyForegroundImage(qint64 searchKey, const QTextDocument *resourceProvider)
1413{
1414 const QTextDocumentPrivate *priv = QTextDocumentPrivate::get(resourceProvider);
1415 for (int i = 0; i < priv->formats.numFormats(); ++i) {
1416 QTextCharFormat format = priv->formats.charFormat(i);
1417 if (format.isValid()) {
1418 QBrush brush = format.foreground();
1419 if (brush.style() == Qt::TexturePattern) {
1420 const bool isPixmap = qHasPixmapTexture(brush);
1421
1422 if (isPixmap && QCoreApplication::instance()->thread() != QThread::currentThread()) {
1423 qWarning("Can't apply QPixmap outside of GUI thread");
1424 return;
1425 }
1426
1427 const qint64 cacheKey = isPixmap ? brush.texture().cacheKey() : brush.textureImage().cacheKey();
1428 if (cacheKey == searchKey) {
1429 QBrush b;
1430 if (isPixmap)
1431 b.setTexture(brush.texture());
1432 else
1433 b.setTextureImage(brush.textureImage());
1434 b.setStyle(Qt::TexturePattern);
1435 charFormat.setForeground(b);
1436 }
1437 }
1438 }
1439 }
1440
1441}
1442
1443void QTextHtmlParserNode::applyBackgroundImage(const QString &url, const QTextDocument *resourceProvider)
1444{
1445 if (!url.isEmpty() && resourceProvider) {
1446 QVariant val = resourceProvider->resource(QTextDocument::ImageResource, url);
1447
1448 if (QCoreApplication::instance()->thread() != QThread::currentThread()) {
1449 // must use images in non-GUI threads
1450 if (val.userType() == QMetaType::QImage) {
1451 QImage image = qvariant_cast<QImage>(val);
1452 charFormat.setBackground(image);
1453 } else if (val.userType() == QMetaType::QByteArray) {
1454 QImage image;
1455 if (image.loadFromData(val.toByteArray())) {
1456 charFormat.setBackground(image);
1457 }
1458 }
1459 } else {
1460 if (val.userType() == QMetaType::QImage || val.userType() == QMetaType::QPixmap) {
1461 charFormat.setBackground(qvariant_cast<QPixmap>(val));
1462 } else if (val.userType() == QMetaType::QByteArray) {
1463 QPixmap pm;
1464 if (pm.loadFromData(val.toByteArray())) {
1465 charFormat.setBackground(pm);
1466 }
1467 }
1468 }
1469 }
1470 if (!url.isEmpty())
1471 charFormat.setProperty(QTextFormat::BackgroundImageUrl, url);
1472}
1473
1474bool QTextHtmlParserNode::hasOnlyWhitespace() const
1475{
1476 for (int i = 0; i < text.count(); ++i)
1477 if (!text.at(i).isSpace() || text.at(i) == QChar::LineSeparator)
1478 return false;
1479 return true;
1480}
1481
1482static bool setIntAttribute(int *destination, const QString &value)
1483{
1484 bool ok = false;
1485 int val = value.toInt(&ok);
1486 if (ok)
1487 *destination = val;
1488
1489 return ok;
1490}
1491
1492static bool setFloatAttribute(qreal *destination, const QString &value)
1493{
1494 bool ok = false;
1495 qreal val = value.toDouble(&ok);
1496 if (ok)
1497 *destination = val;
1498
1499 return ok;
1500}
1501
1502static void setWidthAttribute(QTextLength *width, const QString &valueStr)
1503{
1504 bool ok = false;
1505 qreal realVal = valueStr.toDouble(&ok);
1506 if (ok) {
1507 *width = QTextLength(QTextLength::FixedLength, realVal);
1508 } else {
1509 auto value = QStringView(valueStr).trimmed();
1510 if (!value.isEmpty() && value.endsWith(QLatin1Char('%'))) {
1511 value.truncate(value.size() - 1);
1512 realVal = value.toDouble(&ok);
1513 if (ok)
1514 *width = QTextLength(QTextLength::PercentageLength, realVal);
1515 }
1516 }
1517}
1518
1519#ifndef QT_NO_CSSPARSER
1520void QTextHtmlParserNode::parseStyleAttribute(const QString &value, const QTextDocument *resourceProvider)
1521{
1522 const QString css = QLatin1String("* {") + value + QLatin1Char('}');
1523 QCss::Parser parser(css);
1524 QCss::StyleSheet sheet;
1525 parser.parse(&sheet, Qt::CaseInsensitive);
1526 if (sheet.styleRules.count() != 1) return;
1527 applyCssDeclarations(sheet.styleRules.at(0).declarations, resourceProvider);
1528}
1529#endif
1530
1531QStringList QTextHtmlParser::parseAttributes()
1532{
1533 QStringList attrs;
1534
1535 while (pos < len) {
1536 eatSpace();
1537 if (hasPrefix(QLatin1Char('>')) || hasPrefix(QLatin1Char('/')))
1538 break;
1539 QString key = parseWord().toLower();
1540 QString value = QLatin1String("1");
1541 if (key.size() == 0)
1542 break;
1543 eatSpace();
1544 if (hasPrefix(QLatin1Char('='))){
1545 pos++;
1546 eatSpace();
1547 value = parseWord();
1548 }
1549 if (value.size() == 0)
1550 continue;
1551 attrs << key << value;
1552 }
1553
1554 return attrs;
1555}
1556
1557void QTextHtmlParser::applyAttributes(const QStringList &attributes)
1558{
1559 // local state variable for qt3 textedit mode
1560 bool seenQt3Richtext = false;
1561 QString linkHref;
1562 QString linkType;
1563
1564 if (attributes.count() % 2 == 1)
1565 return;
1566
1567 QTextHtmlParserNode *node = nodes.last();
1568
1569 for (int i = 0; i < attributes.count(); i += 2) {
1570 QString key = attributes.at(i);
1571 QString value = attributes.at(i + 1);
1572
1573 switch (node->id) {
1574 case Html_font:
1575 // the infamous font tag
1576 if (key == QLatin1String("size") && value.size()) {
1577 int n = value.toInt();
1578 if (value.at(0) != QLatin1Char('+') && value.at(0) != QLatin1Char('-'))
1579 n -= 3;
1580 node->charFormat.setProperty(QTextFormat::FontSizeAdjustment, n);
1581 } else if (key == QLatin1String("face")) {
1582 if (value.contains(QLatin1Char(','))) {
1583 const QStringList values = value.split(QLatin1Char(','));
1584 QStringList families;
1585 for (const QString &family : values)
1586 families << family.trimmed();
1587 node->charFormat.setFontFamilies(families);
1588 node->charFormat.setFontFamily(families.at(0));
1589 } else {
1590 node->charFormat.setFontFamily(value);
1591 }
1592 } else if (key == QLatin1String("color")) {
1593 QColor c; c.setNamedColor(value);
1594 if (!c.isValid())
1595 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1596 node->charFormat.setForeground(c);
1597 }
1598 break;
1599 case Html_ol:
1600 case Html_ul:
1601 if (key == QLatin1String("type")) {
1602 node->hasOwnListStyle = true;
1603 if (value == QLatin1String("1")) {
1604 node->listStyle = QTextListFormat::ListDecimal;
1605 } else if (value == QLatin1String("a")) {
1606 node->listStyle = QTextListFormat::ListLowerAlpha;
1607 } else if (value == QLatin1String("A")) {
1608 node->listStyle = QTextListFormat::ListUpperAlpha;
1609 } else if (value == QLatin1String("i")) {
1610 node->listStyle = QTextListFormat::ListLowerRoman;
1611 } else if (value == QLatin1String("I")) {
1612 node->listStyle = QTextListFormat::ListUpperRoman;
1613 } else {
1614 value = std::move(value).toLower();
1615 if (value == QLatin1String("square"))
1616 node->listStyle = QTextListFormat::ListSquare;
1617 else if (value == QLatin1String("disc"))
1618 node->listStyle = QTextListFormat::ListDisc;
1619 else if (value == QLatin1String("circle"))
1620 node->listStyle = QTextListFormat::ListCircle;
1621 else if (value == QLatin1String("none"))
1622 node->listStyle = QTextListFormat::ListStyleUndefined;
1623 }
1624 }
1625 break;
1626 case Html_a:
1627 if (key == QLatin1String("href"))
1628 node->charFormat.setAnchorHref(value);
1629 else if (key == QLatin1String("name"))
1630 node->charFormat.setAnchorNames({value});
1631 break;
1632 case Html_img:
1633 if (key == QLatin1String("src") || key == QLatin1String("source")) {
1634 node->imageName = value;
1635 } else if (key == QLatin1String("width")) {
1636 node->imageWidth = -2; // register that there is a value for it.
1637 setFloatAttribute(&node->imageWidth, value);
1638 } else if (key == QLatin1String("height")) {
1639 node->imageHeight = -2; // register that there is a value for it.
1640 setFloatAttribute(&node->imageHeight, value);
1641 } else if (key == QLatin1String("alt")) {
1642 node->imageAlt = value;
1643 } else if (key == QLatin1String("title")) {
1644 node->text = value;
1645 }
1646 break;
1647 case Html_tr:
1648 case Html_body:
1649 if (key == QLatin1String("bgcolor")) {
1650 QColor c; c.setNamedColor(value);
1651 if (!c.isValid())
1652 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1653 node->charFormat.setBackground(c);
1654 } else if (key == QLatin1String("background")) {
1655 node->applyBackgroundImage(value, resourceProvider);
1656 }
1657 break;
1658 case Html_th:
1659 case Html_td:
1660 if (key == QLatin1String("width")) {
1661 setWidthAttribute(&node->width, value);
1662 } else if (key == QLatin1String("bgcolor")) {
1663 QColor c; c.setNamedColor(value);
1664 if (!c.isValid())
1665 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1666 node->charFormat.setBackground(c);
1667 } else if (key == QLatin1String("background")) {
1668 node->applyBackgroundImage(value, resourceProvider);
1669 } else if (key == QLatin1String("rowspan")) {
1670 if (setIntAttribute(&node->tableCellRowSpan, value))
1671 node->tableCellRowSpan = qMax(1, node->tableCellRowSpan);
1672 } else if (key == QLatin1String("colspan")) {
1673 if (setIntAttribute(&node->tableCellColSpan, value))
1674 node->tableCellColSpan = qMax(1, node->tableCellColSpan);
1675 }
1676 break;
1677 case Html_table:
1678 if (key == QLatin1String("border")) {
1679 setFloatAttribute(&node->tableBorder, value);
1680 } else if (key == QLatin1String("bgcolor")) {
1681 QColor c; c.setNamedColor(value);
1682 if (!c.isValid())
1683 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1684 node->charFormat.setBackground(c);
1685 } else if (key == QLatin1String("bordercolor")) {
1686 QColor c; c.setNamedColor(value);
1687 if (!c.isValid())
1688 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1689 node->borderBrush = c;
1690 } else if (key == QLatin1String("background")) {
1691 node->applyBackgroundImage(value, resourceProvider);
1692 } else if (key == QLatin1String("cellspacing")) {
1693 setFloatAttribute(&node->tableCellSpacing, value);
1694 } else if (key == QLatin1String("cellpadding")) {
1695 setFloatAttribute(&node->tableCellPadding, value);
1696 } else if (key == QLatin1String("width")) {
1697 setWidthAttribute(&node->width, value);
1698 } else if (key == QLatin1String("height")) {
1699 setWidthAttribute(&node->height, value);
1700 }
1701 break;
1702 case Html_meta:
1703 if (key == QLatin1String("name")
1704 && value == QLatin1String("qrichtext")) {
1705 seenQt3Richtext = true;
1706 }
1707
1708 if (key == QLatin1String("content")
1709 && value == QLatin1String("1")
1710 && seenQt3Richtext) {
1711
1712 textEditMode = true;
1713 }
1714 break;
1715 case Html_hr:
1716 if (key == QLatin1String("width"))
1717 setWidthAttribute(&node->width, value);
1718 break;
1719 case Html_link:
1720 if (key == QLatin1String("href"))
1721 linkHref = value;
1722 else if (key == QLatin1String("type"))
1723 linkType = value;
1724 break;
1725 case Html_pre:
1726 if (key == QLatin1String("class") && value.startsWith(QLatin1String("language-")))
1727 node->blockFormat.setProperty(QTextFormat::BlockCodeLanguage, value.mid(9));
1728 break;
1729 default:
1730 break;
1731 }
1732
1733 if (key == QLatin1String("style")) {
1734#ifndef QT_NO_CSSPARSER
1735 node->parseStyleAttribute(value, resourceProvider);
1736#endif
1737 } else if (key == QLatin1String("align")) {
1738 value = std::move(value).toLower();
1739 bool alignmentSet = true;
1740
1741 if (value == QLatin1String("left"))
1742 node->blockFormat.setAlignment(Qt::AlignLeft|Qt::AlignAbsolute);
1743 else if (value == QLatin1String("right"))
1744 node->blockFormat.setAlignment(Qt::AlignRight|Qt::AlignAbsolute);
1745 else if (value == QLatin1String("center"))
1746 node->blockFormat.setAlignment(Qt::AlignHCenter);
1747 else if (value == QLatin1String("justify"))
1748 node->blockFormat.setAlignment(Qt::AlignJustify);
1749 else
1750 alignmentSet = false;
1751
1752 if (node->id == Html_img) {
1753 // HTML4 compat
1754 if (alignmentSet) {
1755 if (node->blockFormat.alignment() & Qt::AlignLeft)
1756 node->cssFloat = QTextFrameFormat::FloatLeft;
1757 else if (node->blockFormat.alignment() & Qt::AlignRight)
1758 node->cssFloat = QTextFrameFormat::FloatRight;
1759 } else if (value == QLatin1String("middle")) {
1760 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1761 } else if (value == QLatin1String("top")) {
1762 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1763 }
1764 }
1765 } else if (key == QLatin1String("valign")) {
1766 value = std::move(value).toLower();
1767 if (value == QLatin1String("top"))
1768 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1769 else if (value == QLatin1String("middle"))
1770 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1771 else if (value == QLatin1String("bottom"))
1772 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom);
1773 } else if (key == QLatin1String("dir")) {
1774 value = std::move(value).toLower();
1775 if (value == QLatin1String("ltr"))
1776 node->blockFormat.setLayoutDirection(Qt::LeftToRight);
1777 else if (value == QLatin1String("rtl"))
1778 node->blockFormat.setLayoutDirection(Qt::RightToLeft);
1779 } else if (key == QLatin1String("title")) {
1780 node->charFormat.setToolTip(value);
1781 } else if (key == QLatin1String("id")) {
1782 node->charFormat.setAnchor(true);
1783 node->charFormat.setAnchorNames({value});
1784 }
1785 }
1786
1787#ifndef QT_NO_CSSPARSER
1788 if (resourceProvider && !linkHref.isEmpty() && linkType == QLatin1String("text/css"))
1789 importStyleSheet(linkHref);
1790#endif
1791}
1792
1793#ifndef QT_NO_CSSPARSER
1794class QTextHtmlStyleSelector : public QCss::StyleSelector
1795{
1796public:
1797 inline QTextHtmlStyleSelector(const QTextHtmlParser *parser)
1798 : parser(parser) { nameCaseSensitivity = Qt::CaseInsensitive; }
1799
1800 virtual QStringList nodeNames(NodePtr node) const override;
1801 virtual QString attribute(NodePtr node, const QString &name) const override;
1802 virtual bool hasAttributes(NodePtr node) const override;
1803 virtual bool isNullNode(NodePtr node) const override;
1804 virtual NodePtr parentNode(NodePtr node) const override;
1805 virtual NodePtr previousSiblingNode(NodePtr node) const override;
1806 virtual NodePtr duplicateNode(NodePtr node) const override;
1807 virtual void freeNode(NodePtr node) const override;
1808
1809private:
1810 const QTextHtmlParser *parser;
1811};
1812
1813QStringList QTextHtmlStyleSelector::nodeNames(NodePtr node) const
1814{
1815 return QStringList(parser->at(node.id).tag.toLower());
1816}
1817
1818#endif // QT_NO_CSSPARSER
1819
1820#ifndef QT_NO_CSSPARSER
1821
1822static inline int findAttribute(const QStringList &attributes, const QString &name)
1823{
1824 int idx = -1;
1825 do {
1826 idx = attributes.indexOf(name, idx + 1);
1827 } while (idx != -1 && (idx % 2 == 1));
1828 return idx;
1829}
1830
1831QString QTextHtmlStyleSelector::attribute(NodePtr node, const QString &name) const
1832{
1833 const QStringList &attributes = parser->at(node.id).attributes;
1834 const int idx = findAttribute(attributes, name);
1835 if (idx == -1)
1836 return QString();
1837 return attributes.at(idx + 1);
1838}
1839
1840bool QTextHtmlStyleSelector::hasAttributes(NodePtr node) const
1841{
1842 const QStringList &attributes = parser->at(node.id).attributes;
1843 return !attributes.isEmpty();
1844}
1845
1846bool QTextHtmlStyleSelector::isNullNode(NodePtr node) const
1847{
1848 return node.id == 0;
1849}
1850
1851QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::parentNode(NodePtr node) const
1852{
1853 NodePtr parent;
1854 parent.id = 0;
1855 if (node.id) {
1856 parent.id = parser->at(node.id).parent;
1857 }
1858 return parent;
1859}
1860
1861QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::duplicateNode(NodePtr node) const
1862{
1863 return node;
1864}
1865
1866QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::previousSiblingNode(NodePtr node) const
1867{
1868 NodePtr sibling;
1869 sibling.id = 0;
1870 if (!node.id)
1871 return sibling;
1872 int parent = parser->at(node.id).parent;
1873 if (!parent)
1874 return sibling;
1875 const int childIdx = parser->at(parent).children.indexOf(node.id);
1876 if (childIdx <= 0)
1877 return sibling;
1878 sibling.id = parser->at(parent).children.at(childIdx - 1);
1879 return sibling;
1880}
1881
1882void QTextHtmlStyleSelector::freeNode(NodePtr) const
1883{
1884}
1885
1886void QTextHtmlParser::resolveStyleSheetImports(const QCss::StyleSheet &sheet)
1887{
1888 for (int i = 0; i < sheet.importRules.count(); ++i) {
1889 const QCss::ImportRule &rule = sheet.importRules.at(i);
1890 if (rule.media.isEmpty()
1891 || rule.media.contains(QLatin1String("screen"), Qt::CaseInsensitive))
1892 importStyleSheet(rule.href);
1893 }
1894}
1895
1896void QTextHtmlParser::importStyleSheet(const QString &href)
1897{
1898 if (!resourceProvider)
1899 return;
1900 for (int i = 0; i < externalStyleSheets.count(); ++i)
1901 if (externalStyleSheets.at(i).url == href)
1902 return;
1903
1904 QVariant res = resourceProvider->resource(QTextDocument::StyleSheetResource, href);
1905 QString css;
1906 if (res.userType() == QMetaType::QString) {
1907 css = res.toString();
1908 } else if (res.userType() == QMetaType::QByteArray) {
1909 // #### detect @charset
1910 css = QString::fromUtf8(res.toByteArray());
1911 }
1912 if (!css.isEmpty()) {
1913 QCss::Parser parser(css);
1914 QCss::StyleSheet sheet;
1915 parser.parse(&sheet, Qt::CaseInsensitive);
1916 externalStyleSheets.append(ExternalStyleSheet(href, sheet));
1917 resolveStyleSheetImports(sheet);
1918 }
1919}
1920
1921QList<QCss::Declaration> standardDeclarationForNode(const QTextHtmlParserNode &node)
1922{
1923 QList<QCss::Declaration> decls;
1924 QCss::Declaration decl;
1925 QCss::Value val;
1926 switch (node.id) {
1927 case Html_a:
1928 case Html_u: {
1929 bool needsUnderline = (node.id == Html_u) ? true : false;
1930 if (node.id == Html_a) {
1931 for (int i = 0; i < node.attributes.count(); i += 2) {
1932 const QString key = node.attributes.at(i);
1933 if (key.compare(QLatin1String("href"), Qt::CaseInsensitive) == 0
1934 && !node.attributes.at(i + 1).isEmpty()) {
1935 needsUnderline = true;
1936 decl.d->property = QLatin1String("color");
1937 decl.d->propertyId = QCss::Color;
1938 val.type = QCss::Value::Function;
1939 val.variant = QStringList() << QLatin1String("palette") << QLatin1String("link");
1940 decl.d->values = QList<QCss::Value> { val };
1941 decl.d->inheritable = true;
1942 decls << decl;
1943 break;
1944 }
1945 }
1946 }
1947 if (needsUnderline) {
1948 decl = QCss::Declaration();
1949 decl.d->property = QLatin1String("text-decoration");
1950 decl.d->propertyId = QCss::TextDecoration;
1951 val.type = QCss::Value::KnownIdentifier;
1952 val.variant = QVariant(QCss::Value_Underline);
1953 decl.d->values = QList<QCss::Value> { val };
1954 decl.d->inheritable = true;
1955 decls << decl;
1956 }
1957 break;
1958 }
1959 case Html_b:
1960 case Html_strong:
1961 case Html_h1:
1962 case Html_h2:
1963 case Html_h3:
1964 case Html_h4:
1965 case Html_h5:
1966 case Html_th:
1967 decl = QCss::Declaration();
1968 decl.d->property = QLatin1String("font-weight");
1969 decl.d->propertyId = QCss::FontWeight;
1970 val.type = QCss::Value::KnownIdentifier;
1971 val.variant = QVariant(QCss::Value_Bold);
1972 decl.d->values = QList<QCss::Value> { val };
1973 decl.d->inheritable = true;
1974 decls << decl;
1975 if (node.id == Html_b || node.id == Html_strong)
1976 break;
1977 Q_FALLTHROUGH();
1978 case Html_big:
1979 case Html_small:
1980 if (node.id != Html_th) {
1981 decl = QCss::Declaration();
1982 decl.d->property = QLatin1String("font-size");
1983 decl.d->propertyId = QCss::FontSize;
1984 decl.d->inheritable = false;
1985 val.type = QCss::Value::KnownIdentifier;
1986 switch (node.id) {
1987 case Html_h1: val.variant = QVariant(QCss::Value_XXLarge); break;
1988 case Html_h2: val.variant = QVariant(QCss::Value_XLarge); break;
1989 case Html_h3: case Html_big: val.variant = QVariant(QCss::Value_Large); break;
1990 case Html_h4: val.variant = QVariant(QCss::Value_Medium); break;
1991 case Html_h5: case Html_small: val.variant = QVariant(QCss::Value_Small); break;
1992 default: break;
1993 }
1994 decl.d->values = QList<QCss::Value> { val };
1995 decls << decl;
1996 break;
1997 }
1998 Q_FALLTHROUGH();
1999 case Html_center:
2000 case Html_td:
2001 decl = QCss::Declaration();
2002 decl.d->property = QLatin1String("text-align");
2003 decl.d->propertyId = QCss::TextAlignment;
2004 val.type = QCss::Value::KnownIdentifier;
2005 val.variant = (node.id == Html_td) ? QVariant(QCss::Value_Left) : QVariant(QCss::Value_Center);
2006 decl.d->values = QList<QCss::Value> { val };
2007 decl.d->inheritable = true;
2008 decls << decl;
2009 break;
2010 case Html_s:
2011 decl = QCss::Declaration();
2012 decl.d->property = QLatin1String("text-decoration");
2013 decl.d->propertyId = QCss::TextDecoration;
2014 val.type = QCss::Value::KnownIdentifier;
2015 val.variant = QVariant(QCss::Value_LineThrough);
2016 decl.d->values = QList<QCss::Value> { val };
2017 decl.d->inheritable = true;
2018 decls << decl;
2019 break;
2020 case Html_em:
2021 case Html_i:
2022 case Html_cite:
2023 case Html_address:
2024 case Html_var:
2025 case Html_dfn:
2026 decl = QCss::Declaration();
2027 decl.d->property = QLatin1String("font-style");
2028 decl.d->propertyId = QCss::FontStyle;
2029 val.type = QCss::Value::KnownIdentifier;
2030 val.variant = QVariant(QCss::Value_Italic);
2031 decl.d->values = QList<QCss::Value> { val };
2032 decl.d->inheritable = true;
2033 decls << decl;
2034 break;
2035 case Html_sub:
2036 case Html_sup:
2037 decl = QCss::Declaration();
2038 decl.d->property = QLatin1String("vertical-align");
2039 decl.d->propertyId = QCss::VerticalAlignment;
2040 val.type = QCss::Value::KnownIdentifier;
2041 val.variant = (node.id == Html_sub) ? QVariant(QCss::Value_Sub) : QVariant(QCss::Value_Super);
2042 decl.d->values = QList<QCss::Value> { val };
2043 decl.d->inheritable = true;
2044 decls << decl;
2045 break;
2046 case Html_ul:
2047 case Html_ol:
2048 decl = QCss::Declaration();
2049 decl.d->property = QLatin1String("list-style");
2050 decl.d->propertyId = QCss::ListStyle;
2051 val.type = QCss::Value::KnownIdentifier;
2052 val.variant = (node.id == Html_ul) ? QVariant(QCss::Value_Disc) : QVariant(QCss::Value_Decimal);
2053 decl.d->values = QList<QCss::Value> { val };
2054 decl.d->inheritable = true;
2055 decls << decl;
2056 break;
2057 case Html_code:
2058 case Html_tt:
2059 case Html_kbd:
2060 case Html_samp:
2061 case Html_pre: {
2062 decl = QCss::Declaration();
2063 decl.d->property = QLatin1String("font-family");
2064 decl.d->propertyId = QCss::FontFamily;
2065 QList<QCss::Value> values;
2066 val.type = QCss::Value::String;
2067 val.variant = QFontDatabase::systemFont(QFontDatabase::FixedFont).family();
2068 values << val;
2069 decl.d->values = values;
2070 decl.d->inheritable = true;
2071 decls << decl;
2072 }
2073 if (node.id != Html_pre)
2074 break;
2075 Q_FALLTHROUGH();
2076 case Html_br:
2077 case Html_nobr:
2078 decl = QCss::Declaration();
2079 decl.d->property = QLatin1String("whitespace");
2080 decl.d->propertyId = QCss::Whitespace;
2081 val.type = QCss::Value::KnownIdentifier;
2082 switch (node.id) {
2083 case Html_br: val.variant = QVariant(QCss::Value_PreWrap); break;
2084 case Html_nobr: val.variant = QVariant(QCss::Value_NoWrap); break;
2085 case Html_pre: val.variant = QVariant(QCss::Value_Pre); break;
2086 default: break;
2087 }
2088 decl.d->values = QList<QCss::Value> { val };
2089 decl.d->inheritable = true;
2090 decls << decl;
2091 break;
2092 default:
2093 break;
2094 }
2095 return decls;
2096}
2097
2098QList<QCss::Declaration> QTextHtmlParser::declarationsForNode(int node) const
2099{
2100 QList<QCss::Declaration> decls;
2101
2102 QTextHtmlStyleSelector selector(this);
2103
2104 int idx = 0;
2105 selector.styleSheets.resize((resourceProvider ? 1 : 0)
2106 + externalStyleSheets.count()
2107 + inlineStyleSheets.count());
2108 if (resourceProvider)
2109 selector.styleSheets[idx++] = QTextDocumentPrivate::get(resourceProvider)->parsedDefaultStyleSheet;
2110
2111 for (int i = 0; i < externalStyleSheets.count(); ++i, ++idx)
2112 selector.styleSheets[idx] = externalStyleSheets.at(i).sheet;
2113
2114 for (int i = 0; i < inlineStyleSheets.count(); ++i, ++idx)
2115 selector.styleSheets[idx] = inlineStyleSheets.at(i);
2116
2117 selector.medium = QLatin1String("screen");
2118
2119 QCss::StyleSelector::NodePtr n;
2120 n.id = node;
2121
2122 const char *extraPseudo = nullptr;
2123 if (nodes.at(node)->id == Html_a && nodes.at(node)->hasHref)
2124 extraPseudo = "link";
2125 // Ensure that our own style is taken into consideration
2126 decls = standardDeclarationForNode(*nodes.at(node));
2127 decls += selector.declarationsForNode(n, extraPseudo);
2128 n = selector.parentNode(n);
2129 while (!selector.isNullNode(n)) {
2130 QList<QCss::Declaration> inheritedDecls;
2131 inheritedDecls = selector.declarationsForNode(n, extraPseudo);
2132 for (int i = 0; i < inheritedDecls.size(); ++i) {
2133 const QCss::Declaration &decl = inheritedDecls.at(i);
2134 if (decl.d->inheritable)
2135 decls.prepend(decl);
2136 }
2137 n = selector.parentNode(n);
2138 }
2139 return decls;
2140}
2141
2142bool QTextHtmlParser::nodeIsChildOf(int i, QTextHTMLElements id) const
2143{
2144 while (i) {
2145 if (at(i).id == id)
2146 return true;
2147 i = at(i).parent;
2148 }
2149 return false;
2150}
2151
2152QT_END_NAMESPACE
2153#endif // QT_NO_CSSPARSER
2154
2155#endif // QT_NO_TEXTHTMLPARSER
2156