1/****************************************************************************
2**
3** Copyright (C) 2019 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qtextmarkdownwriter_p.h"
41#include "qtextdocumentlayout_p.h"
42#include "qfontinfo.h"
43#include "qfontmetrics.h"
44#include "qtextdocument_p.h"
45#include "qtextlist.h"
46#include "qtexttable.h"
47#include "qtextcursor.h"
48#include "qtextimagehandler_p.h"
49#include "qloggingcategory.h"
50#if QT_CONFIG(itemmodel)
51#include "qabstractitemmodel.h"
52#endif
53
54QT_BEGIN_NAMESPACE
55
56Q_LOGGING_CATEGORY(lcMDW, "qt.text.markdown.writer")
57
58static const QChar Space = QLatin1Char(' ');
59static const QChar Tab = QLatin1Char('\t');
60static const QChar Newline = QLatin1Char('\n');
61static const QChar CarriageReturn = QLatin1Char('\r');
62static const QChar LineBreak = u'\x2028';
63static const QChar DoubleQuote = QLatin1Char('"');
64static const QChar Backtick = QLatin1Char('`');
65static const QChar Backslash = QLatin1Char('\\');
66static const QChar Period = QLatin1Char('.');
67
68QTextMarkdownWriter::QTextMarkdownWriter(QTextStream &stream, QTextDocument::MarkdownFeatures features)
69 : m_stream(stream), m_features(features)
70{
71}
72
73bool QTextMarkdownWriter::writeAll(const QTextDocument *document)
74{
75 writeFrame(document->rootFrame());
76 return true;
77}
78
79#if QT_CONFIG(itemmodel)
80void QTextMarkdownWriter::writeTable(const QAbstractItemModel *table)
81{
82 QList<int> tableColumnWidths(table->columnCount());
83 for (int col = 0; col < table->columnCount(); ++col) {
84 tableColumnWidths[col] = table->headerData(col, Qt::Horizontal).toString().length();
85 for (int row = 0; row < table->rowCount(); ++row) {
86 tableColumnWidths[col] = qMax(tableColumnWidths[col],
87 table->data(table->index(row, col)).toString().length());
88 }
89 }
90
91 // write the header and separator
92 for (int col = 0; col < table->columnCount(); ++col) {
93 QString s = table->headerData(col, Qt::Horizontal).toString();
94 m_stream << "|" << s << QString(tableColumnWidths[col] - s.length(), Space);
95 }
96 m_stream << "|" << Qt::endl;
97 for (int col = 0; col < tableColumnWidths.length(); ++col)
98 m_stream << '|' << QString(tableColumnWidths[col], QLatin1Char('-'));
99 m_stream << '|'<< Qt::endl;
100
101 // write the body
102 for (int row = 0; row < table->rowCount(); ++row) {
103 for (int col = 0; col < table->columnCount(); ++col) {
104 QString s = table->data(table->index(row, col)).toString();
105 m_stream << "|" << s << QString(tableColumnWidths[col] - s.length(), Space);
106 }
107 m_stream << '|'<< Qt::endl;
108 }
109 m_listInfo.clear();
110}
111#endif
112
113void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
114{
115 Q_ASSERT(frame);
116 const QTextTable *table = qobject_cast<const QTextTable*> (frame);
117 QTextFrame::iterator iterator = frame->begin();
118 QTextFrame *child = nullptr;
119 int tableRow = -1;
120 bool lastWasList = false;
121 QList<int> tableColumnWidths;
122 if (table) {
123 tableColumnWidths.resize(table->columns());
124 for (int col = 0; col < table->columns(); ++col) {
125 for (int row = 0; row < table->rows(); ++ row) {
126 QTextTableCell cell = table->cellAt(row, col);
127 int cellTextLen = 0;
128 auto it = cell.begin();
129 while (it != cell.end()) {
130 QTextBlock block = it.currentBlock();
131 if (block.isValid())
132 cellTextLen += block.text().length();
133 ++it;
134 }
135 if (cell.columnSpan() == 1 && tableColumnWidths[col] < cellTextLen)
136 tableColumnWidths[col] = cellTextLen;
137 }
138 }
139 }
140 while (!iterator.atEnd()) {
141 if (iterator.currentFrame() && child != iterator.currentFrame())
142 writeFrame(iterator.currentFrame());
143 else { // no frame, it's a block
144 QTextBlock block = iterator.currentBlock();
145 // Look ahead and detect some cases when we should
146 // suppress needless blank lines, when there will be a big change in block format
147 bool nextIsDifferent = false;
148 bool ending = false;
149 {
150 QTextFrame::iterator next = iterator;
151 ++next;
152 if (next.atEnd()) {
153 nextIsDifferent = true;
154 ending = true;
155 } else {
156 QTextBlockFormat format = iterator.currentBlock().blockFormat();
157 QTextBlockFormat nextFormat = next.currentBlock().blockFormat();
158 if (nextFormat.indent() != format.indent() ||
159 nextFormat.property(QTextFormat::BlockCodeLanguage) != format.property(QTextFormat::BlockCodeLanguage))
160 nextIsDifferent = true;
161 }
162 }
163 if (table) {
164 QTextTableCell cell = table->cellAt(block.position());
165 if (tableRow < cell.row()) {
166 if (tableRow == 0) {
167 m_stream << Newline;
168 for (int col = 0; col < tableColumnWidths.length(); ++col)
169 m_stream << '|' << QString(tableColumnWidths[col], QLatin1Char('-'));
170 m_stream << '|';
171 }
172 m_stream << Newline << "|";
173 tableRow = cell.row();
174 }
175 } else if (!block.textList()) {
176 if (lastWasList)
177 m_stream << Newline;
178 }
179 int endingCol = writeBlock(block, !table, table && tableRow == 0,
180 nextIsDifferent && !block.textList());
181 m_doubleNewlineWritten = false;
182 if (table) {
183 QTextTableCell cell = table->cellAt(block.position());
184 int paddingLen = -endingCol;
185 int spanEndCol = cell.column() + cell.columnSpan();
186 for (int col = cell.column(); col < spanEndCol; ++col)
187 paddingLen += tableColumnWidths[col];
188 if (paddingLen > 0)
189 m_stream << QString(paddingLen, Space);
190 for (int col = cell.column(); col < spanEndCol; ++col)
191 m_stream << "|";
192 } else if (m_fencedCodeBlock && ending) {
193 m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
194 << m_codeBlockFence << Newline << Newline;
195 m_codeBlockFence.clear();
196 } else if (m_indentedCodeBlock && nextIsDifferent) {
197 m_stream << Newline;
198 } else if (endingCol > 0) {
199 if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
200 m_stream << Newline;
201 } else {
202 m_stream << Newline << Newline;
203 m_doubleNewlineWritten = true;
204 }
205 }
206 lastWasList = block.textList();
207 }
208 child = iterator.currentFrame();
209 ++iterator;
210 }
211 if (table) {
212 m_stream << Newline << Newline;
213 m_doubleNewlineWritten = true;
214 }
215 m_listInfo.clear();
216}
217
218QTextMarkdownWriter::ListInfo QTextMarkdownWriter::listInfo(QTextList *list)
219{
220 if (!m_listInfo.contains(list)) {
221 // decide whether this list is loose or tight
222 ListInfo info;
223 info.loose = false;
224 if (list->count() > 1) {
225 QTextBlock first = list->item(0);
226 QTextBlock last = list->item(list->count() - 1);
227 QTextBlock next = first.next();
228 while (next.isValid()) {
229 if (next == last)
230 break;
231 qCDebug(lcMDW) << "next block in list" << list << next.text() << "part of list?" << next.textList();
232 if (!next.textList()) {
233 // If we find a continuation paragraph, this list is "loose"
234 // because it will need a blank line to separate that paragraph.
235 qCDebug(lcMDW) << "decided list beginning with" << first.text() << "is loose after" << next.text();
236 info.loose = true;
237 break;
238 }
239 next = next.next();
240 }
241 }
242 m_listInfo.insert(list, info);
243 return info;
244 }
245 return m_listInfo.value(list);
246}
247
248static int nearestWordWrapIndex(const QString &s, int before)
249{
250 before = qMin(before, s.length());
251 int fragBegin = qMax(before - 15, 0);
252 if (lcMDW().isDebugEnabled()) {
253 QString frag = s.mid(fragBegin, 30);
254 qCDebug(lcMDW) << frag << before;
255 qCDebug(lcMDW) << QString(before - fragBegin, Period) + QLatin1Char('<');
256 }
257 for (int i = before - 1; i >= 0; --i) {
258 if (s.at(i).isSpace()) {
259 qCDebug(lcMDW) << QString(i - fragBegin, Period) + QLatin1Char('^') << i;
260 return i;
261 }
262 }
263 qCDebug(lcMDW, "not possible");
264 return -1;
265}
266
267static int adjacentBackticksCount(const QString &s)
268{
269 int start = -1, len = s.length();
270 int ret = 0;
271 for (int i = 0; i < len; ++i) {
272 if (s.at(i) == Backtick) {
273 if (start < 0)
274 start = i;
275 } else if (start >= 0) {
276 ret = qMax(ret, i - start);
277 start = -1;
278 }
279 }
280 if (s.at(len - 1) == Backtick)
281 ret = qMax(ret, len - start);
282 return ret;
283}
284
285static void maybeEscapeFirstChar(QString &s)
286{
287 QString sTrimmed = s.trimmed();
288 if (sTrimmed.isEmpty())
289 return;
290 char firstChar = sTrimmed.at(0).toLatin1();
291 if (firstChar == '*' || firstChar == '+' || firstChar == '-') {
292 int i = s.indexOf(QLatin1Char(firstChar));
293 s.insert(i, QLatin1Char('\\'));
294 }
295}
296
297struct LineEndPositions {
298 const QChar *lineEnd;
299 const QChar *nextLineBegin;
300};
301
302static LineEndPositions findLineEnd(const QChar *begin, const QChar *end)
303{
304 LineEndPositions result{ end, end };
305
306 while (begin < end) {
307 if (*begin == Newline) {
308 result.lineEnd = begin;
309 result.nextLineBegin = begin + 1;
310 break;
311 } else if (*begin == CarriageReturn) {
312 result.lineEnd = begin;
313 result.nextLineBegin = begin + 1;
314 if (((begin + 1) < end) && begin[1] == Newline)
315 ++result.nextLineBegin;
316 break;
317 }
318
319 ++begin;
320 }
321
322 return result;
323}
324
325static bool isBlankLine(const QChar *begin, const QChar *end)
326{
327 while (begin < end) {
328 if (*begin != Space && *begin != Tab)
329 return false;
330 ++begin;
331 }
332 return true;
333}
334
335static QString createLinkTitle(const QString &title)
336{
337 QString result;
338 result.reserve(title.size() + 2);
339 result += DoubleQuote;
340
341 const QChar *data = title.data();
342 const QChar *end = data + title.size();
343
344 while (data < end) {
345 const auto lineEndPositions = findLineEnd(data, end);
346
347 if (!isBlankLine(data, lineEndPositions.lineEnd)) {
348 while (data < lineEndPositions.nextLineBegin) {
349 if (*data == DoubleQuote)
350 result += Backslash;
351 result += *data;
352 ++data;
353 }
354 }
355
356 data = lineEndPositions.nextLineBegin;
357 }
358
359 result += DoubleQuote;
360 return result;
361}
362
363int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat, bool ignoreEmpty)
364{
365 if (block.text().isEmpty() && ignoreEmpty)
366 return 0;
367 const int ColumnLimit = 80;
368 QTextBlockFormat blockFmt = block.blockFormat();
369 bool missedBlankCodeBlockLine = false;
370 const bool codeBlock = blockFmt.hasProperty(QTextFormat::BlockCodeFence) ||
371 blockFmt.stringProperty(QTextFormat::BlockCodeLanguage).length() > 0;
372 if (m_fencedCodeBlock && !codeBlock) {
373 m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
374 << m_codeBlockFence << Newline;
375 m_fencedCodeBlock = false;
376 m_codeBlockFence.clear();
377 }
378 if (block.textList()) { // it's a list-item
379 auto fmt = block.textList()->format();
380 const int listLevel = fmt.indent();
381 const int number = block.textList()->itemNumber(block) + 1;
382 QByteArray bullet = " ";
383 bool numeric = false;
384 switch (fmt.style()) {
385 case QTextListFormat::ListDisc:
386 bullet = "-";
387 m_wrappedLineIndent = 2;
388 break;
389 case QTextListFormat::ListCircle:
390 bullet = "*";
391 m_wrappedLineIndent = 2;
392 break;
393 case QTextListFormat::ListSquare:
394 bullet = "+";
395 m_wrappedLineIndent = 2;
396 break;
397 case QTextListFormat::ListStyleUndefined: break;
398 case QTextListFormat::ListDecimal:
399 case QTextListFormat::ListLowerAlpha:
400 case QTextListFormat::ListUpperAlpha:
401 case QTextListFormat::ListLowerRoman:
402 case QTextListFormat::ListUpperRoman:
403 numeric = true;
404 m_wrappedLineIndent = 4;
405 break;
406 }
407 switch (blockFmt.marker()) {
408 case QTextBlockFormat::MarkerType::Checked:
409 bullet += " [x]";
410 break;
411 case QTextBlockFormat::MarkerType::Unchecked:
412 bullet += " [ ]";
413 break;
414 default:
415 break;
416 }
417 int indentFirstLine = (listLevel - 1) * (numeric ? 4 : 2);
418 m_wrappedLineIndent += indentFirstLine;
419 if (m_lastListIndent != listLevel && !m_doubleNewlineWritten && listInfo(block.textList()).loose)
420 m_stream << Newline;
421 m_lastListIndent = listLevel;
422 QString prefix(indentFirstLine, Space);
423 if (numeric) {
424 QString suffix = fmt.numberSuffix();
425 if (suffix.isEmpty())
426 suffix = QString(Period);
427 QString numberStr = QString::number(number) + suffix + Space;
428 if (numberStr.length() == 3)
429 numberStr += Space;
430 prefix += numberStr;
431 } else {
432 prefix += QLatin1String(bullet) + Space;
433 }
434 m_stream << prefix;
435 } else if (blockFmt.hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) {
436 m_stream << "- - -\n"; // unambiguous horizontal rule, not an underline under a heading
437 return 0;
438 } else if (codeBlock) {
439 // It's important to preserve blank lines in code blocks. But blank lines in code blocks
440 // inside block quotes are getting preserved anyway (along with the "> " prefix).
441 if (!blockFmt.hasProperty(QTextFormat::BlockQuoteLevel))
442 missedBlankCodeBlockLine = true; // only if we don't get any fragments below
443 if (!m_fencedCodeBlock) {
444 QString fenceChar = blockFmt.stringProperty(QTextFormat::BlockCodeFence);
445 if (fenceChar.isEmpty())
446 fenceChar = QLatin1String("`");
447 m_codeBlockFence = QString(3, fenceChar.at(0));
448 // A block quote can contain an indented code block, but not vice-versa.
449 m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space) << m_codeBlockFence
450 << blockFmt.stringProperty(QTextFormat::BlockCodeLanguage) << Newline;
451 m_fencedCodeBlock = true;
452 }
453 wrap = false;
454 } else if (!blockFmt.indent()) {
455 m_wrappedLineIndent = 0;
456 m_linePrefix.clear();
457 if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) {
458 int level = blockFmt.intProperty(QTextFormat::BlockQuoteLevel);
459 QString quoteMarker = QStringLiteral("> ");
460 m_linePrefix.reserve(level * 2);
461 for (int i = 0; i < level; ++i)
462 m_linePrefix += quoteMarker;
463 }
464 if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) {
465 // A block quote can contain an indented code block, but not vice-versa.
466 m_linePrefix += QString(4, Space);
467 m_indentedCodeBlock = true;
468 }
469 }
470 if (blockFmt.headingLevel())
471 m_stream << QByteArray(blockFmt.headingLevel(), '#') << ' ';
472 else
473 m_stream << m_linePrefix;
474
475 QString wrapIndentString = m_linePrefix + QString(m_wrappedLineIndent, Space);
476 // It would be convenient if QTextStream had a lineCharPos() accessor,
477 // to keep track of how many characters (not bytes) have been written on the current line,
478 // but it doesn't. So we have to keep track with this col variable.
479 int col = wrapIndentString.length();
480 bool mono = false;
481 bool startsOrEndsWithBacktick = false;
482 bool bold = false;
483 bool italic = false;
484 bool underline = false;
485 bool strikeOut = false;
486 QString backticks(Backtick);
487 for (QTextBlock::Iterator frag = block.begin(); !frag.atEnd(); ++frag) {
488 missedBlankCodeBlockLine = false;
489 QString fragmentText = frag.fragment().text();
490 while (fragmentText.endsWith(Newline))
491 fragmentText.chop(1);
492 if (block.textList()) { // <li>first line</br>continuation</li>
493 QString newlineIndent = QString(Newline) + QString(m_wrappedLineIndent, Space);
494 fragmentText.replace(QString(LineBreak), newlineIndent);
495 } else if (blockFmt.indent() > 0) { // <li>first line<p>continuation</p></li>
496 m_stream << QString(m_wrappedLineIndent, Space);
497 } else {
498 fragmentText.replace(LineBreak, Newline);
499 }
500 startsOrEndsWithBacktick |= fragmentText.startsWith(Backtick) || fragmentText.endsWith(Backtick);
501 QTextCharFormat fmt = frag.fragment().charFormat();
502 if (fmt.isImageFormat()) {
503 QTextImageFormat ifmt = fmt.toImageFormat();
504 QString desc = ifmt.stringProperty(QTextFormat::ImageAltText);
505 if (desc.isEmpty())
506 desc = QLatin1String("image");
507 QString s = QLatin1String("![") + desc + QLatin1String("](") + ifmt.name();
508 QString title = ifmt.stringProperty(QTextFormat::ImageTitle);
509 if (!title.isEmpty())
510 s += Space + DoubleQuote + title + DoubleQuote;
511 s += QLatin1Char(')');
512 if (wrap && col + s.length() > ColumnLimit) {
513 m_stream << Newline << wrapIndentString;
514 col = m_wrappedLineIndent;
515 }
516 m_stream << s;
517 col += s.length();
518 } else if (fmt.hasProperty(QTextFormat::AnchorHref)) {
519 QString s = QLatin1Char('[') + fragmentText + QLatin1String("](") +
520 fmt.property(QTextFormat::AnchorHref).toString();
521 if (fmt.hasProperty(QTextFormat::TextToolTip)) {
522 s += Space;
523 s += createLinkTitle(fmt.property(QTextFormat::TextToolTip).toString());
524 }
525 s += QLatin1Char(')');
526 if (wrap && col + s.length() > ColumnLimit) {
527 m_stream << Newline << wrapIndentString;
528 col = m_wrappedLineIndent;
529 }
530 m_stream << s;
531 col += s.length();
532 } else {
533 QFontInfo fontInfo(fmt.font());
534 bool monoFrag = fontInfo.fixedPitch();
535 QString markers;
536 if (!ignoreFormat) {
537 if (monoFrag != mono && !m_indentedCodeBlock && !m_fencedCodeBlock) {
538 if (monoFrag)
539 backticks = QString(adjacentBackticksCount(fragmentText) + 1, Backtick);
540 markers += backticks;
541 if (startsOrEndsWithBacktick)
542 markers += Space;
543 mono = monoFrag;
544 }
545 if (!blockFmt.headingLevel() && !mono) {
546 if (fontInfo.bold() != bold) {
547 markers += QLatin1String("**");
548 bold = fontInfo.bold();
549 }
550 if (fontInfo.italic() != italic) {
551 markers += QLatin1Char('*');
552 italic = fontInfo.italic();
553 }
554 if (fontInfo.strikeOut() != strikeOut) {
555 markers += QLatin1String("~~");
556 strikeOut = fontInfo.strikeOut();
557 }
558 if (fontInfo.underline() != underline) {
559 // Markdown doesn't support underline, but the parser will treat a single underline
560 // the same as a single asterisk, and the marked fragment will be rendered in italics.
561 // That will have to do.
562 markers += QLatin1Char('_');
563 underline = fontInfo.underline();
564 }
565 }
566 }
567 if (wrap && col + markers.length() * 2 + fragmentText.length() > ColumnLimit) {
568 int i = 0;
569 int fragLen = fragmentText.length();
570 bool breakingLine = false;
571 while (i < fragLen) {
572 if (col >= ColumnLimit) {
573 m_stream << Newline << wrapIndentString;
574 col = m_wrappedLineIndent;
575 while (fragmentText[i].isSpace())
576 ++i;
577 }
578 int j = i + ColumnLimit - col;
579 if (j < fragLen) {
580 int wi = nearestWordWrapIndex(fragmentText, j);
581 if (wi < 0) {
582 j = fragLen;
583 } else if (wi >= i) {
584 j = wi;
585 breakingLine = true;
586 }
587 } else {
588 j = fragLen;
589 breakingLine = false;
590 }
591 QString subfrag = fragmentText.mid(i, j - i);
592 if (!i) {
593 m_stream << markers;
594 col += markers.length();
595 }
596 if (col == m_wrappedLineIndent)
597 maybeEscapeFirstChar(subfrag);
598 m_stream << subfrag;
599 if (breakingLine) {
600 m_stream << Newline << wrapIndentString;
601 col = m_wrappedLineIndent;
602 } else {
603 col += subfrag.length();
604 }
605 i = j + 1;
606 }
607 } else {
608 m_stream << markers << fragmentText;
609 col += markers.length() + fragmentText.length();
610 }
611 }
612 }
613 if (mono) {
614 if (startsOrEndsWithBacktick) {
615 m_stream << Space;
616 col += 1;
617 }
618 m_stream << backticks;
619 col += backticks.size();
620 }
621 if (bold) {
622 m_stream << "**";
623 col += 2;
624 }
625 if (italic) {
626 m_stream << "*";
627 col += 1;
628 }
629 if (underline) {
630 m_stream << "_";
631 col += 1;
632 }
633 if (strikeOut) {
634 m_stream << "~~";
635 col += 2;
636 }
637 if (missedBlankCodeBlockLine)
638 m_stream << Newline;
639 return col;
640}
641
642QT_END_NAMESPACE
643