1 | /* |
2 | * Copyright 2018 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "src/pdf/SkPDFDocumentPriv.h" |
9 | #include "src/pdf/SkPDFTag.h" |
10 | |
11 | // Table 333 in PDF 32000-1:2008 |
12 | static const char* tag_name_from_type(SkPDF::DocumentStructureType type) { |
13 | switch (type) { |
14 | #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X |
15 | M(Document); |
16 | M(Part); |
17 | M(Art); |
18 | M(Sect); |
19 | M(Div); |
20 | M(BlockQuote); |
21 | M(Caption); |
22 | M(TOC); |
23 | M(TOCI); |
24 | M(Index); |
25 | M(NonStruct); |
26 | M(Private); |
27 | M(H); |
28 | M(H1); |
29 | M(H2); |
30 | M(H3); |
31 | M(H4); |
32 | M(H5); |
33 | M(H6); |
34 | M(P); |
35 | M(L); |
36 | M(LI); |
37 | M(Lbl); |
38 | M(LBody); |
39 | M(Table); |
40 | M(TR); |
41 | M(TH); |
42 | M(TD); |
43 | M(THead); |
44 | M(TBody); |
45 | M(TFoot); |
46 | M(Span); |
47 | M(Quote); |
48 | M(Note); |
49 | M(Reference); |
50 | M(BibEntry); |
51 | M(Code); |
52 | M(Link); |
53 | M(Annot); |
54 | M(Ruby); |
55 | M(RB); |
56 | M(RT); |
57 | M(RP); |
58 | M(Warichu); |
59 | M(WT); |
60 | M(WP); |
61 | M(Figure); |
62 | M(Formula); |
63 | M(Form); |
64 | #undef M |
65 | } |
66 | SK_ABORT("bad tag" ); |
67 | } |
68 | |
69 | SkPDF::AttributeList::AttributeList() = default; |
70 | |
71 | SkPDF::AttributeList::~AttributeList() = default; |
72 | |
73 | void SkPDF::AttributeList::appendInt( |
74 | const char* owner, const char* name, int value) { |
75 | if (!fAttrs) |
76 | fAttrs = SkPDFMakeArray(); |
77 | std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
78 | attrDict->insertName("O" , owner); |
79 | attrDict->insertInt(name, value); |
80 | fAttrs->appendObject(std::move(attrDict)); |
81 | } |
82 | |
83 | void SkPDF::AttributeList::appendFloat( |
84 | const char* owner, const char* name, float value) { |
85 | if (!fAttrs) |
86 | fAttrs = SkPDFMakeArray(); |
87 | std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
88 | attrDict->insertName("O" , owner); |
89 | attrDict->insertScalar(name, value); |
90 | fAttrs->appendObject(std::move(attrDict)); |
91 | } |
92 | |
93 | void SkPDF::AttributeList::appendString( |
94 | const char* owner, const char* name, const char* value) { |
95 | if (!fAttrs) |
96 | fAttrs = SkPDFMakeArray(); |
97 | std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
98 | attrDict->insertName("O" , owner); |
99 | attrDict->insertName(name, value); |
100 | fAttrs->appendObject(std::move(attrDict)); |
101 | } |
102 | |
103 | void SkPDF::AttributeList::appendFloatArray( |
104 | const char* owner, const char* name, const std::vector<float>& value) { |
105 | if (!fAttrs) |
106 | fAttrs = SkPDFMakeArray(); |
107 | std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
108 | attrDict->insertName("O" , owner); |
109 | std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray(); |
110 | for (float element : value) { |
111 | pdfArray->appendScalar(element); |
112 | } |
113 | attrDict->insertObject(name, std::move(pdfArray)); |
114 | fAttrs->appendObject(std::move(attrDict)); |
115 | } |
116 | |
117 | void SkPDF::AttributeList::appendStringArray( |
118 | const char* owner, |
119 | const char* name, |
120 | const std::vector<SkString>& value) { |
121 | if (!fAttrs) |
122 | fAttrs = SkPDFMakeArray(); |
123 | std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict(); |
124 | attrDict->insertName("O" , owner); |
125 | std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray(); |
126 | for (SkString element : value) { |
127 | pdfArray->appendName(element); |
128 | } |
129 | attrDict->insertObject(name, std::move(pdfArray)); |
130 | fAttrs->appendObject(std::move(attrDict)); |
131 | } |
132 | |
133 | struct SkPDFTagNode { |
134 | SkPDFTagNode* fChildren = nullptr; |
135 | size_t fChildCount = 0; |
136 | struct MarkedContentInfo { |
137 | unsigned fPageIndex; |
138 | int fMarkId; |
139 | }; |
140 | SkTArray<MarkedContentInfo> fMarkedContent; |
141 | int fNodeId; |
142 | SkPDF::DocumentStructureType fType; |
143 | SkString fTypeString; |
144 | SkString fAlt; |
145 | SkString fLang; |
146 | SkPDFIndirectReference fRef; |
147 | enum State { |
148 | kUnknown, |
149 | kYes, |
150 | kNo, |
151 | } fCanDiscard = kUnknown; |
152 | std::unique_ptr<SkPDFArray> fAttributes; |
153 | std::vector<SkPDFIndirectReference> fAnnotations; |
154 | }; |
155 | |
156 | SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {} |
157 | |
158 | SkPDFTagTree::~SkPDFTagTree() = default; |
159 | |
160 | // static |
161 | void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node, |
162 | SkPDFTagNode* dst, |
163 | SkArenaAlloc* arena, |
164 | SkTHashMap<int, SkPDFTagNode*>* nodeMap) { |
165 | nodeMap->set(node.fNodeId, dst); |
166 | for (int nodeId : node.fAdditionalNodeIds) { |
167 | SkASSERT(!nodeMap->find(nodeId)); |
168 | nodeMap->set(nodeId, dst); |
169 | } |
170 | dst->fNodeId = node.fNodeId; |
171 | dst->fType = node.fType; |
172 | dst->fTypeString = node.fTypeString; |
173 | dst->fAlt = node.fAlt; |
174 | dst->fLang = node.fLang; |
175 | |
176 | // Temporarily support both raw fChildren and fChildVector. |
177 | if (node.fChildren) { |
178 | size_t childCount = node.fChildCount; |
179 | SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount); |
180 | dst->fChildCount = childCount; |
181 | dst->fChildren = children; |
182 | for (size_t i = 0; i < childCount; ++i) { |
183 | Copy(node.fChildren[i], &children[i], arena, nodeMap); |
184 | } |
185 | } else { |
186 | size_t childCount = node.fChildVector.size(); |
187 | SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount); |
188 | dst->fChildCount = childCount; |
189 | dst->fChildren = children; |
190 | for (size_t i = 0; i < childCount; ++i) { |
191 | Copy(*node.fChildVector[i], &children[i], arena, nodeMap); |
192 | } |
193 | } |
194 | |
195 | dst->fAttributes = std::move(node.fAttributes.fAttrs); |
196 | } |
197 | |
198 | void SkPDFTagTree::init(SkPDF::StructureElementNode* node) { |
199 | if (node) { |
200 | fRoot = fArena.make<SkPDFTagNode>(); |
201 | Copy(*node, fRoot, &fArena, &fNodeMap); |
202 | } |
203 | } |
204 | |
205 | void SkPDFTagTree::reset() { |
206 | fArena.reset(); |
207 | fNodeMap.reset(); |
208 | fMarksPerPage.reset(); |
209 | fRoot = nullptr; |
210 | } |
211 | |
212 | int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) { |
213 | if (!fRoot) { |
214 | return -1; |
215 | } |
216 | SkPDFTagNode** tagPtr = fNodeMap.find(nodeId); |
217 | if (!tagPtr) { |
218 | return -1; |
219 | } |
220 | SkPDFTagNode* tag = *tagPtr; |
221 | SkASSERT(tag); |
222 | while (fMarksPerPage.size() < pageIndex + 1) { |
223 | fMarksPerPage.push_back(); |
224 | } |
225 | SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex]; |
226 | int markId = pageMarks.count(); |
227 | tag->fMarkedContent.push_back({pageIndex, markId}); |
228 | pageMarks.push_back(tag); |
229 | return markId; |
230 | } |
231 | |
232 | static bool can_discard(SkPDFTagNode* node) { |
233 | if (node->fCanDiscard == SkPDFTagNode::kYes) { |
234 | return true; |
235 | } |
236 | if (node->fCanDiscard == SkPDFTagNode::kNo) { |
237 | return false; |
238 | } |
239 | if (!node->fMarkedContent.empty()) { |
240 | node->fCanDiscard = SkPDFTagNode::kNo; |
241 | return false; |
242 | } |
243 | for (size_t i = 0; i < node->fChildCount; ++i) { |
244 | if (!can_discard(&node->fChildren[i])) { |
245 | node->fCanDiscard = SkPDFTagNode::kNo; |
246 | return false; |
247 | } |
248 | } |
249 | node->fCanDiscard = SkPDFTagNode::kYes; |
250 | return true; |
251 | } |
252 | |
253 | |
254 | SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent, |
255 | SkPDFTagNode* node, |
256 | SkPDFDocument* doc) { |
257 | SkPDFIndirectReference ref = doc->reserveRef(); |
258 | std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray(); |
259 | SkPDFTagNode* children = node->fChildren; |
260 | size_t childCount = node->fChildCount; |
261 | for (size_t i = 0; i < childCount; ++i) { |
262 | SkPDFTagNode* child = &children[i]; |
263 | if (!(can_discard(child))) { |
264 | kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc)); |
265 | } |
266 | } |
267 | for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) { |
268 | std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR" ); |
269 | mcr->insertRef("Pg" , doc->getPage(info.fPageIndex)); |
270 | mcr->insertInt("MCID" , info.fMarkId); |
271 | kids->appendObject(std::move(mcr)); |
272 | } |
273 | for (SkPDFIndirectReference annotationRef : node->fAnnotations) { |
274 | std::unique_ptr<SkPDFDict> annotationDict = SkPDFMakeDict("OBJR" ); |
275 | annotationDict->insertRef("Obj" , annotationRef); |
276 | kids->appendObject(std::move(annotationDict)); |
277 | } |
278 | node->fRef = ref; |
279 | SkPDFDict dict("StructElem" ); |
280 | if (!node->fTypeString.isEmpty()) { |
281 | dict.insertName("S" , node->fTypeString.c_str()); |
282 | } else { |
283 | dict.insertName("S" , tag_name_from_type(node->fType)); |
284 | } |
285 | if (!node->fAlt.isEmpty()) { |
286 | dict.insertString("Alt" , node->fAlt); |
287 | } |
288 | if (!node->fLang.isEmpty()) { |
289 | dict.insertString("Lang" , node->fLang); |
290 | } |
291 | dict.insertRef("P" , parent); |
292 | dict.insertObject("K" , std::move(kids)); |
293 | SkString idString; |
294 | idString.printf("%d" , node->fNodeId); |
295 | dict.insertName("ID" , idString.c_str()); |
296 | if (node->fAttributes) { |
297 | dict.insertObject("A" , std::move(node->fAttributes)); |
298 | } |
299 | |
300 | return doc->emit(dict, ref); |
301 | } |
302 | |
303 | void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef) { |
304 | if (!fRoot) { |
305 | return; |
306 | } |
307 | SkPDFTagNode** tagPtr = fNodeMap.find(nodeId); |
308 | if (!tagPtr) { |
309 | return; |
310 | } |
311 | SkPDFTagNode* tag = *tagPtr; |
312 | SkASSERT(tag); |
313 | tag->fAnnotations.push_back(annotationRef); |
314 | } |
315 | |
316 | |
317 | SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) { |
318 | if (!fRoot) { |
319 | return SkPDFIndirectReference(); |
320 | } |
321 | if (can_discard(fRoot)) { |
322 | SkDEBUGFAIL("PDF has tag tree but no marked content." ); |
323 | } |
324 | SkPDFIndirectReference ref = doc->reserveRef(); |
325 | |
326 | unsigned pageCount = SkToUInt(doc->pageCount()); |
327 | |
328 | // Build the StructTreeRoot. |
329 | SkPDFDict structTreeRoot("StructTreeRoot" ); |
330 | structTreeRoot.insertRef("K" , prepare_tag_tree_to_emit(ref, fRoot, doc)); |
331 | structTreeRoot.insertInt("ParentTreeNextKey" , SkToInt(pageCount)); |
332 | |
333 | // Build the parent tree, which is a mapping from the marked |
334 | // content IDs on each page to their corressponding tags. |
335 | SkPDFDict parentTree("ParentTree" ); |
336 | auto parentTreeNums = SkPDFMakeArray(); |
337 | |
338 | SkASSERT(fMarksPerPage.size() <= pageCount); |
339 | for (size_t j = 0; j < fMarksPerPage.size(); ++j) { |
340 | const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j]; |
341 | SkPDFArray markToTagArray; |
342 | for (SkPDFTagNode* mark : pageMarks) { |
343 | SkASSERT(mark->fRef); |
344 | markToTagArray.appendRef(mark->fRef); |
345 | } |
346 | parentTreeNums->appendInt(j); |
347 | parentTreeNums->appendRef(doc->emit(markToTagArray)); |
348 | } |
349 | parentTree.insertObject("Nums" , std::move(parentTreeNums)); |
350 | structTreeRoot.insertRef("ParentTree" , doc->emit(parentTree)); |
351 | return doc->emit(structTreeRoot, ref); |
352 | } |
353 | |