1 | // |
2 | // ContentHandler.h |
3 | // |
4 | // Library: XML |
5 | // Package: SAX |
6 | // Module: SAX |
7 | // |
8 | // SAX2 ContentHandler Interface. |
9 | // |
10 | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef SAX_ContentHandler_INCLUDED |
18 | #define SAX_ContentHandler_INCLUDED |
19 | |
20 | |
21 | #include "Poco/XML/XML.h" |
22 | #include "Poco/XML/XMLString.h" |
23 | |
24 | |
25 | namespace Poco { |
26 | namespace XML { |
27 | |
28 | |
29 | class Locator; |
30 | class Attributes; |
31 | |
32 | |
33 | class XML_API ContentHandler |
34 | /// Receive notification of the logical content of a document. |
35 | /// |
36 | /// This is the main interface that most SAX applications implement: if the |
37 | /// application needs to be informed of basic parsing events, it implements |
38 | /// this interface and registers an instance with the SAX parser using the setContentHandler |
39 | /// method. The parser uses the instance to report basic document-related events |
40 | /// like the start and end of elements and character data. |
41 | /// |
42 | /// The order of events in this interface is very important, and mirrors the |
43 | /// order of information in the document itself. For example, all of an element's |
44 | /// content (character data, processing instructions, and/or subelements) will |
45 | /// appear, in order, between the startElement event and the corresponding endElement |
46 | /// event. |
47 | /// |
48 | /// This interface is similar to the now-deprecated SAX 1.0 DocumentHandler |
49 | /// interface, but it adds support for Namespaces and for reporting skipped |
50 | /// entities (in non-validating XML processors). |
51 | /// Receive notification of the logical content of a document. |
52 | { |
53 | public: |
54 | virtual void setDocumentLocator(const Locator* loc) = 0; |
55 | /// Receive an object for locating the origin of SAX document events. |
56 | /// |
57 | /// SAX parsers are strongly encouraged (though not absolutely required) to |
58 | /// supply a locator: if it does so, it must supply the locator to the application |
59 | /// by invoking this method before invoking any of the other methods in the |
60 | /// ContentHandler interface. |
61 | /// |
62 | /// The locator allows the application to determine the end position of any |
63 | /// document-related event, even if the parser is not reporting an error. Typically, |
64 | /// the application will use this information for reporting its own errors (such |
65 | /// as character content that does not match an application's business rules). |
66 | /// The information returned by the locator is probably not sufficient for use |
67 | /// with a search engine. |
68 | /// |
69 | /// Note that the locator will return correct information only during the invocation |
70 | /// SAX event callbacks after startDocument returns and before endDocument is |
71 | /// called. The application should not attempt to use it at any other time. |
72 | |
73 | virtual void startDocument() = 0; |
74 | /// Receive notification of the beginning of a document. |
75 | /// |
76 | /// The SAX parser calls this function one time before calling all other |
77 | /// functions of this class (except SetDocumentLocator). |
78 | |
79 | virtual void endDocument() = 0; |
80 | /// Receive notification of the end of a document. |
81 | /// |
82 | /// The SAX parser will invoke this method only once, and it will be the last |
83 | /// method invoked during the parse. The parser shall not invoke this method |
84 | /// until it has either abandoned parsing (because of an unrecoverable error) |
85 | /// or reached the end of input. |
86 | |
87 | virtual void startElement(const XMLString& uri, const XMLString& localName, const XMLString& qname, const Attributes& attrList) = 0; |
88 | /// Receive notification of the beginning of an element. |
89 | /// |
90 | /// The Parser will invoke this method at the beginning of every element in |
91 | /// the XML document; there will be a corresponding endElement event for every |
92 | /// startElement event (even when the element is empty). All of the element's |
93 | /// content will be reported, in order, before the corresponding endElement |
94 | /// event. |
95 | /// |
96 | /// This event allows up to three name components for each element: |
97 | /// 1. the Namespace URI; |
98 | /// 2. the local name; and |
99 | /// 3. the qualified (prefixed) name. |
100 | /// |
101 | /// Any or all of these may be provided, depending on the values of the http://xml.org/sax/features/namespaces |
102 | /// and the http://xml.org/sax/features/namespace-prefixes properties: |
103 | /// * the Namespace URI and local name are required when the namespaces |
104 | /// property is true (the default), and are optional when the namespaces property |
105 | /// is false (if one is specified, both must be); |
106 | /// * the qualified name is required when the namespace-prefixes property |
107 | /// is true, and is optional when the namespace-prefixes property is false (the |
108 | /// default). |
109 | /// |
110 | /// Note that the attribute list provided will contain only attributes with |
111 | /// explicit values (specified or defaulted): #IMPLIED attributes will be omitted. |
112 | /// The attribute list will contain attributes used for Namespace declarations |
113 | /// (xmlns* attributes) only if the http://xml.org/sax/features/namespace-prefixes |
114 | /// property is true (it is false by default, and support for a true value is |
115 | /// optional). |
116 | /// |
117 | /// Like characters(), attribute values may have characters that need more than |
118 | /// one char value. |
119 | |
120 | virtual void endElement(const XMLString& uri, const XMLString& localName, const XMLString& qname) = 0; |
121 | /// Receive notification of the end of an element. |
122 | /// |
123 | /// The SAX parser will invoke this method at the end of every element in the |
124 | /// XML document; there will be a corresponding startElement event for every |
125 | /// endElement event (even when the element is empty). |
126 | /// |
127 | /// For information on the names, see startElement. |
128 | |
129 | virtual void characters(const XMLChar ch[], int start, int length) = 0; |
130 | /// Receive notification of character data. |
131 | /// |
132 | /// The Parser will call this method to report each chunk of character data. |
133 | /// SAX parsers may return all contiguous character data in a single chunk, |
134 | /// or they may split it into several chunks; however, all of the characters |
135 | /// in any single event must come from the same external entity so that the |
136 | /// Locator provides useful information. |
137 | /// |
138 | /// The application must not attempt to read from the array outside of the specified |
139 | /// range. |
140 | /// |
141 | /// Individual characters may consist of more than one XMLChar value. There |
142 | /// are three important cases where this happens, because characters can't be |
143 | /// represented in just sixteen bits. In one case, characters are represented |
144 | /// in a Surrogate Pair, using two special Unicode values. Such characters are |
145 | /// in the so-called "Astral Planes", with a code point above U+FFFF. A second |
146 | /// case involves composite characters, such as a base character combining with |
147 | /// one or more accent characters. And most important, if XMLChar is a plain |
148 | /// char, characters are encoded in UTF-8. |
149 | /// |
150 | /// Your code should not assume that algorithms using char-at-a-time idioms |
151 | /// will be working in character units; in some cases they will split characters. |
152 | /// This is relevant wherever XML permits arbitrary characters, such as attribute |
153 | /// values, processing instruction data, and comments as well as in data reported |
154 | /// from this method. It's also generally relevant whenever C++ code manipulates |
155 | /// internationalized text; the issue isn't unique to XML. |
156 | /// |
157 | /// Note that some parsers will report whitespace in element content using the |
158 | /// ignorableWhitespace method rather than this one (validating parsers must |
159 | /// do so). |
160 | |
161 | virtual void ignorableWhitespace(const XMLChar ch[], int start, int length) = 0; |
162 | /// Receive notification of ignorable whitespace in element content. |
163 | /// |
164 | /// Validating Parsers must use this method to report each chunk of whitespace |
165 | /// in element content (see the W3C XML 1.0 recommendation, section 2.10): non-validating |
166 | /// parsers may also use this method if they are capable of parsing and using |
167 | /// content models. |
168 | /// |
169 | /// SAX parsers may return all contiguous whitespace in a single chunk, or they |
170 | /// may split it into several chunks; however, all of the characters in any |
171 | /// single event must come from the same external entity, so that the Locator |
172 | /// provides useful information. |
173 | /// |
174 | /// The application must not attempt to read from the array outside of the specified |
175 | /// range. |
176 | |
177 | virtual void processingInstruction(const XMLString& target, const XMLString& data) = 0; |
178 | /// Receive notification of a processing instruction. |
179 | /// |
180 | /// The Parser will invoke this method once for each processing instruction |
181 | /// found: note that processing instructions may occur before or after the main |
182 | /// document element. |
183 | /// |
184 | /// A SAX parser must never report an XML declaration (XML 1.0, section 2.8) |
185 | /// or a text declaration (XML 1.0, section 4.3.1) using this method. |
186 | /// |
187 | /// Like characters(), processing instruction data may have characters that |
188 | /// need more than one char value. |
189 | |
190 | virtual void startPrefixMapping(const XMLString& prefix, const XMLString& uri) = 0; |
191 | /// Begin the scope of a prefix-URI Namespace mapping. |
192 | /// |
193 | /// The information from this event is not necessary for normal Namespace processing: |
194 | /// the SAX XML reader will automatically replace prefixes for element and attribute |
195 | /// names when the http://xml.org/sax/features/namespaces feature is true (the |
196 | /// default). |
197 | /// |
198 | /// There are cases, however, when applications need to use prefixes in character |
199 | /// data or in attribute values, where they cannot safely be expanded automatically; |
200 | /// the start/endPrefixMapping event supplies the information to the application |
201 | /// to expand prefixes in those contexts itself, if necessary. |
202 | /// |
203 | /// Note that start/endPrefixMapping events are not guaranteed to be properly |
204 | /// nested relative to each other: all startPrefixMapping events will occur |
205 | /// immediately before the corresponding startElement event, and all endPrefixMapping |
206 | /// events will occur immediately after the corresponding endElement event, |
207 | /// but their order is not otherwise guaranteed. |
208 | /// |
209 | /// There should never be start/endPrefixMapping events for the "xml" prefix, |
210 | /// since it is predeclared and immutable. |
211 | |
212 | virtual void endPrefixMapping(const XMLString& prefix) = 0; |
213 | /// End the scope of a prefix-URI mapping. |
214 | /// |
215 | /// See startPrefixMapping for details. These events will always occur immediately |
216 | /// after the corresponding endElement event, but the order of endPrefixMapping |
217 | /// events is not otherwise guaranteed. |
218 | |
219 | virtual void skippedEntity(const XMLString& name) = 0; |
220 | /// Receive notification of a skipped entity. This is not called for entity |
221 | /// references within markup constructs such as element start tags or markup |
222 | /// declarations. (The XML recommendation requires reporting skipped external |
223 | /// entities. SAX also reports internal entity expansion/non-expansion, except |
224 | /// within markup constructs.) |
225 | /// |
226 | /// The Parser will invoke this method each time the entity is skipped. Non-validating |
227 | /// processors may skip entities if they have not seen the declarations (because, |
228 | /// for example, the entity was declared in an external DTD subset). All processors |
229 | /// may skip external entities, depending on the values of the http://xml.org/sax/features/external-general-entities |
230 | /// and the http://xml.org/sax/features/external-parameter-entities properties. |
231 | |
232 | protected: |
233 | virtual ~ContentHandler(); |
234 | }; |
235 | |
236 | |
237 | } } // namespace Poco::XML |
238 | |
239 | |
240 | #endif // SAX_ContentHandler_INCLUDED |
241 | |