1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/pdf/SkPDFDocumentPriv.h"
9#include "src/pdf/SkPDFTag.h"
10
11// Table 333 in PDF 32000-1:2008
12static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
13 switch (type) {
14 #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
15 M(Document);
16 M(Part);
17 M(Art);
18 M(Sect);
19 M(Div);
20 M(BlockQuote);
21 M(Caption);
22 M(TOC);
23 M(TOCI);
24 M(Index);
25 M(NonStruct);
26 M(Private);
27 M(H);
28 M(H1);
29 M(H2);
30 M(H3);
31 M(H4);
32 M(H5);
33 M(H6);
34 M(P);
35 M(L);
36 M(LI);
37 M(Lbl);
38 M(LBody);
39 M(Table);
40 M(TR);
41 M(TH);
42 M(TD);
43 M(THead);
44 M(TBody);
45 M(TFoot);
46 M(Span);
47 M(Quote);
48 M(Note);
49 M(Reference);
50 M(BibEntry);
51 M(Code);
52 M(Link);
53 M(Annot);
54 M(Ruby);
55 M(RB);
56 M(RT);
57 M(RP);
58 M(Warichu);
59 M(WT);
60 M(WP);
61 M(Figure);
62 M(Formula);
63 M(Form);
64 #undef M
65 }
66 SK_ABORT("bad tag");
67}
68
69SkPDF::AttributeList::AttributeList() = default;
70
71SkPDF::AttributeList::~AttributeList() = default;
72
73void SkPDF::AttributeList::appendInt(
74 const char* owner, const char* name, int value) {
75 if (!fAttrs)
76 fAttrs = SkPDFMakeArray();
77 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
78 attrDict->insertName("O", owner);
79 attrDict->insertInt(name, value);
80 fAttrs->appendObject(std::move(attrDict));
81}
82
83void SkPDF::AttributeList::appendFloat(
84 const char* owner, const char* name, float value) {
85 if (!fAttrs)
86 fAttrs = SkPDFMakeArray();
87 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
88 attrDict->insertName("O", owner);
89 attrDict->insertScalar(name, value);
90 fAttrs->appendObject(std::move(attrDict));
91}
92
93void SkPDF::AttributeList::appendString(
94 const char* owner, const char* name, const char* value) {
95 if (!fAttrs)
96 fAttrs = SkPDFMakeArray();
97 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
98 attrDict->insertName("O", owner);
99 attrDict->insertName(name, value);
100 fAttrs->appendObject(std::move(attrDict));
101}
102
103void SkPDF::AttributeList::appendFloatArray(
104 const char* owner, const char* name, const std::vector<float>& value) {
105 if (!fAttrs)
106 fAttrs = SkPDFMakeArray();
107 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
108 attrDict->insertName("O", owner);
109 std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
110 for (float element : value) {
111 pdfArray->appendScalar(element);
112 }
113 attrDict->insertObject(name, std::move(pdfArray));
114 fAttrs->appendObject(std::move(attrDict));
115}
116
117void SkPDF::AttributeList::appendStringArray(
118 const char* owner,
119 const char* name,
120 const std::vector<SkString>& value) {
121 if (!fAttrs)
122 fAttrs = SkPDFMakeArray();
123 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
124 attrDict->insertName("O", owner);
125 std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
126 for (SkString element : value) {
127 pdfArray->appendName(element);
128 }
129 attrDict->insertObject(name, std::move(pdfArray));
130 fAttrs->appendObject(std::move(attrDict));
131}
132
133struct SkPDFTagNode {
134 SkPDFTagNode* fChildren = nullptr;
135 size_t fChildCount = 0;
136 struct MarkedContentInfo {
137 unsigned fPageIndex;
138 int fMarkId;
139 };
140 SkTArray<MarkedContentInfo> fMarkedContent;
141 int fNodeId;
142 SkPDF::DocumentStructureType fType;
143 SkString fTypeString;
144 SkString fAlt;
145 SkString fLang;
146 SkPDFIndirectReference fRef;
147 enum State {
148 kUnknown,
149 kYes,
150 kNo,
151 } fCanDiscard = kUnknown;
152 std::unique_ptr<SkPDFArray> fAttributes;
153 std::vector<SkPDFIndirectReference> fAnnotations;
154};
155
156SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
157
158SkPDFTagTree::~SkPDFTagTree() = default;
159
160// static
161void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
162 SkPDFTagNode* dst,
163 SkArenaAlloc* arena,
164 SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
165 nodeMap->set(node.fNodeId, dst);
166 for (int nodeId : node.fAdditionalNodeIds) {
167 SkASSERT(!nodeMap->find(nodeId));
168 nodeMap->set(nodeId, dst);
169 }
170 dst->fNodeId = node.fNodeId;
171 dst->fType = node.fType;
172 dst->fTypeString = node.fTypeString;
173 dst->fAlt = node.fAlt;
174 dst->fLang = node.fLang;
175
176 // Temporarily support both raw fChildren and fChildVector.
177 if (node.fChildren) {
178 size_t childCount = node.fChildCount;
179 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
180 dst->fChildCount = childCount;
181 dst->fChildren = children;
182 for (size_t i = 0; i < childCount; ++i) {
183 Copy(node.fChildren[i], &children[i], arena, nodeMap);
184 }
185 } else {
186 size_t childCount = node.fChildVector.size();
187 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
188 dst->fChildCount = childCount;
189 dst->fChildren = children;
190 for (size_t i = 0; i < childCount; ++i) {
191 Copy(*node.fChildVector[i], &children[i], arena, nodeMap);
192 }
193 }
194
195 dst->fAttributes = std::move(node.fAttributes.fAttrs);
196}
197
198void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
199 if (node) {
200 fRoot = fArena.make<SkPDFTagNode>();
201 Copy(*node, fRoot, &fArena, &fNodeMap);
202 }
203}
204
205void SkPDFTagTree::reset() {
206 fArena.reset();
207 fNodeMap.reset();
208 fMarksPerPage.reset();
209 fRoot = nullptr;
210}
211
212int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
213 if (!fRoot) {
214 return -1;
215 }
216 SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
217 if (!tagPtr) {
218 return -1;
219 }
220 SkPDFTagNode* tag = *tagPtr;
221 SkASSERT(tag);
222 while (fMarksPerPage.size() < pageIndex + 1) {
223 fMarksPerPage.push_back();
224 }
225 SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
226 int markId = pageMarks.count();
227 tag->fMarkedContent.push_back({pageIndex, markId});
228 pageMarks.push_back(tag);
229 return markId;
230}
231
232static bool can_discard(SkPDFTagNode* node) {
233 if (node->fCanDiscard == SkPDFTagNode::kYes) {
234 return true;
235 }
236 if (node->fCanDiscard == SkPDFTagNode::kNo) {
237 return false;
238 }
239 if (!node->fMarkedContent.empty()) {
240 node->fCanDiscard = SkPDFTagNode::kNo;
241 return false;
242 }
243 for (size_t i = 0; i < node->fChildCount; ++i) {
244 if (!can_discard(&node->fChildren[i])) {
245 node->fCanDiscard = SkPDFTagNode::kNo;
246 return false;
247 }
248 }
249 node->fCanDiscard = SkPDFTagNode::kYes;
250 return true;
251}
252
253
254SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
255 SkPDFTagNode* node,
256 SkPDFDocument* doc) {
257 SkPDFIndirectReference ref = doc->reserveRef();
258 std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
259 SkPDFTagNode* children = node->fChildren;
260 size_t childCount = node->fChildCount;
261 for (size_t i = 0; i < childCount; ++i) {
262 SkPDFTagNode* child = &children[i];
263 if (!(can_discard(child))) {
264 kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
265 }
266 }
267 for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
268 std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
269 mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
270 mcr->insertInt("MCID", info.fMarkId);
271 kids->appendObject(std::move(mcr));
272 }
273 for (SkPDFIndirectReference annotationRef : node->fAnnotations) {
274 std::unique_ptr<SkPDFDict> annotationDict = SkPDFMakeDict("OBJR");
275 annotationDict->insertRef("Obj", annotationRef);
276 kids->appendObject(std::move(annotationDict));
277 }
278 node->fRef = ref;
279 SkPDFDict dict("StructElem");
280 if (!node->fTypeString.isEmpty()) {
281 dict.insertName("S", node->fTypeString.c_str());
282 } else {
283 dict.insertName("S", tag_name_from_type(node->fType));
284 }
285 if (!node->fAlt.isEmpty()) {
286 dict.insertString("Alt", node->fAlt);
287 }
288 if (!node->fLang.isEmpty()) {
289 dict.insertString("Lang", node->fLang);
290 }
291 dict.insertRef("P", parent);
292 dict.insertObject("K", std::move(kids));
293 SkString idString;
294 idString.printf("%d", node->fNodeId);
295 dict.insertName("ID", idString.c_str());
296 if (node->fAttributes) {
297 dict.insertObject("A", std::move(node->fAttributes));
298 }
299
300 return doc->emit(dict, ref);
301}
302
303void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef) {
304 if (!fRoot) {
305 return;
306 }
307 SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
308 if (!tagPtr) {
309 return;
310 }
311 SkPDFTagNode* tag = *tagPtr;
312 SkASSERT(tag);
313 tag->fAnnotations.push_back(annotationRef);
314}
315
316
317SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
318 if (!fRoot) {
319 return SkPDFIndirectReference();
320 }
321 if (can_discard(fRoot)) {
322 SkDEBUGFAIL("PDF has tag tree but no marked content.");
323 }
324 SkPDFIndirectReference ref = doc->reserveRef();
325
326 unsigned pageCount = SkToUInt(doc->pageCount());
327
328 // Build the StructTreeRoot.
329 SkPDFDict structTreeRoot("StructTreeRoot");
330 structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
331 structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
332
333 // Build the parent tree, which is a mapping from the marked
334 // content IDs on each page to their corressponding tags.
335 SkPDFDict parentTree("ParentTree");
336 auto parentTreeNums = SkPDFMakeArray();
337
338 SkASSERT(fMarksPerPage.size() <= pageCount);
339 for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
340 const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
341 SkPDFArray markToTagArray;
342 for (SkPDFTagNode* mark : pageMarks) {
343 SkASSERT(mark->fRef);
344 markToTagArray.appendRef(mark->fRef);
345 }
346 parentTreeNums->appendInt(j);
347 parentTreeNums->appendRef(doc->emit(markToTagArray));
348 }
349 parentTree.insertObject("Nums", std::move(parentTreeNums));
350 structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
351 return doc->emit(structTreeRoot, ref);
352}
353