1//
2// ContentHandler.h
3//
4// Library: XML
5// Package: SAX
6// Module: SAX
7//
8// SAX2 ContentHandler Interface.
9//
10// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
11// and Contributors.
12//
13// SPDX-License-Identifier: BSL-1.0
14//
15
16
17#ifndef SAX_ContentHandler_INCLUDED
18#define SAX_ContentHandler_INCLUDED
19
20
21#include "Poco/XML/XML.h"
22#include "Poco/XML/XMLString.h"
23
24
25namespace Poco {
26namespace XML {
27
28
29class Locator;
30class Attributes;
31
32
33class XML_API ContentHandler
34 /// Receive notification of the logical content of a document.
35 ///
36 /// This is the main interface that most SAX applications implement: if the
37 /// application needs to be informed of basic parsing events, it implements
38 /// this interface and registers an instance with the SAX parser using the setContentHandler
39 /// method. The parser uses the instance to report basic document-related events
40 /// like the start and end of elements and character data.
41 ///
42 /// The order of events in this interface is very important, and mirrors the
43 /// order of information in the document itself. For example, all of an element's
44 /// content (character data, processing instructions, and/or subelements) will
45 /// appear, in order, between the startElement event and the corresponding endElement
46 /// event.
47 ///
48 /// This interface is similar to the now-deprecated SAX 1.0 DocumentHandler
49 /// interface, but it adds support for Namespaces and for reporting skipped
50 /// entities (in non-validating XML processors).
51 /// Receive notification of the logical content of a document.
52{
53public:
54 virtual void setDocumentLocator(const Locator* loc) = 0;
55 /// Receive an object for locating the origin of SAX document events.
56 ///
57 /// SAX parsers are strongly encouraged (though not absolutely required) to
58 /// supply a locator: if it does so, it must supply the locator to the application
59 /// by invoking this method before invoking any of the other methods in the
60 /// ContentHandler interface.
61 ///
62 /// The locator allows the application to determine the end position of any
63 /// document-related event, even if the parser is not reporting an error. Typically,
64 /// the application will use this information for reporting its own errors (such
65 /// as character content that does not match an application's business rules).
66 /// The information returned by the locator is probably not sufficient for use
67 /// with a search engine.
68 ///
69 /// Note that the locator will return correct information only during the invocation
70 /// SAX event callbacks after startDocument returns and before endDocument is
71 /// called. The application should not attempt to use it at any other time.
72
73 virtual void startDocument() = 0;
74 /// Receive notification of the beginning of a document.
75 ///
76 /// The SAX parser calls this function one time before calling all other
77 /// functions of this class (except SetDocumentLocator).
78
79 virtual void endDocument() = 0;
80 /// Receive notification of the end of a document.
81 ///
82 /// The SAX parser will invoke this method only once, and it will be the last
83 /// method invoked during the parse. The parser shall not invoke this method
84 /// until it has either abandoned parsing (because of an unrecoverable error)
85 /// or reached the end of input.
86
87 virtual void startElement(const XMLString& uri, const XMLString& localName, const XMLString& qname, const Attributes& attrList) = 0;
88 /// Receive notification of the beginning of an element.
89 ///
90 /// The Parser will invoke this method at the beginning of every element in
91 /// the XML document; there will be a corresponding endElement event for every
92 /// startElement event (even when the element is empty). All of the element's
93 /// content will be reported, in order, before the corresponding endElement
94 /// event.
95 ///
96 /// This event allows up to three name components for each element:
97 /// 1. the Namespace URI;
98 /// 2. the local name; and
99 /// 3. the qualified (prefixed) name.
100 ///
101 /// Any or all of these may be provided, depending on the values of the http://xml.org/sax/features/namespaces
102 /// and the http://xml.org/sax/features/namespace-prefixes properties:
103 /// * the Namespace URI and local name are required when the namespaces
104 /// property is true (the default), and are optional when the namespaces property
105 /// is false (if one is specified, both must be);
106 /// * the qualified name is required when the namespace-prefixes property
107 /// is true, and is optional when the namespace-prefixes property is false (the
108 /// default).
109 ///
110 /// Note that the attribute list provided will contain only attributes with
111 /// explicit values (specified or defaulted): #IMPLIED attributes will be omitted.
112 /// The attribute list will contain attributes used for Namespace declarations
113 /// (xmlns* attributes) only if the http://xml.org/sax/features/namespace-prefixes
114 /// property is true (it is false by default, and support for a true value is
115 /// optional).
116 ///
117 /// Like characters(), attribute values may have characters that need more than
118 /// one char value.
119
120 virtual void endElement(const XMLString& uri, const XMLString& localName, const XMLString& qname) = 0;
121 /// Receive notification of the end of an element.
122 ///
123 /// The SAX parser will invoke this method at the end of every element in the
124 /// XML document; there will be a corresponding startElement event for every
125 /// endElement event (even when the element is empty).
126 ///
127 /// For information on the names, see startElement.
128
129 virtual void characters(const XMLChar ch[], int start, int length) = 0;
130 /// Receive notification of character data.
131 ///
132 /// The Parser will call this method to report each chunk of character data.
133 /// SAX parsers may return all contiguous character data in a single chunk,
134 /// or they may split it into several chunks; however, all of the characters
135 /// in any single event must come from the same external entity so that the
136 /// Locator provides useful information.
137 ///
138 /// The application must not attempt to read from the array outside of the specified
139 /// range.
140 ///
141 /// Individual characters may consist of more than one XMLChar value. There
142 /// are three important cases where this happens, because characters can't be
143 /// represented in just sixteen bits. In one case, characters are represented
144 /// in a Surrogate Pair, using two special Unicode values. Such characters are
145 /// in the so-called "Astral Planes", with a code point above U+FFFF. A second
146 /// case involves composite characters, such as a base character combining with
147 /// one or more accent characters. And most important, if XMLChar is a plain
148 /// char, characters are encoded in UTF-8.
149 ///
150 /// Your code should not assume that algorithms using char-at-a-time idioms
151 /// will be working in character units; in some cases they will split characters.
152 /// This is relevant wherever XML permits arbitrary characters, such as attribute
153 /// values, processing instruction data, and comments as well as in data reported
154 /// from this method. It's also generally relevant whenever C++ code manipulates
155 /// internationalized text; the issue isn't unique to XML.
156 ///
157 /// Note that some parsers will report whitespace in element content using the
158 /// ignorableWhitespace method rather than this one (validating parsers must
159 /// do so).
160
161 virtual void ignorableWhitespace(const XMLChar ch[], int start, int length) = 0;
162 /// Receive notification of ignorable whitespace in element content.
163 ///
164 /// Validating Parsers must use this method to report each chunk of whitespace
165 /// in element content (see the W3C XML 1.0 recommendation, section 2.10): non-validating
166 /// parsers may also use this method if they are capable of parsing and using
167 /// content models.
168 ///
169 /// SAX parsers may return all contiguous whitespace in a single chunk, or they
170 /// may split it into several chunks; however, all of the characters in any
171 /// single event must come from the same external entity, so that the Locator
172 /// provides useful information.
173 ///
174 /// The application must not attempt to read from the array outside of the specified
175 /// range.
176
177 virtual void processingInstruction(const XMLString& target, const XMLString& data) = 0;
178 /// Receive notification of a processing instruction.
179 ///
180 /// The Parser will invoke this method once for each processing instruction
181 /// found: note that processing instructions may occur before or after the main
182 /// document element.
183 ///
184 /// A SAX parser must never report an XML declaration (XML 1.0, section 2.8)
185 /// or a text declaration (XML 1.0, section 4.3.1) using this method.
186 ///
187 /// Like characters(), processing instruction data may have characters that
188 /// need more than one char value.
189
190 virtual void startPrefixMapping(const XMLString& prefix, const XMLString& uri) = 0;
191 /// Begin the scope of a prefix-URI Namespace mapping.
192 ///
193 /// The information from this event is not necessary for normal Namespace processing:
194 /// the SAX XML reader will automatically replace prefixes for element and attribute
195 /// names when the http://xml.org/sax/features/namespaces feature is true (the
196 /// default).
197 ///
198 /// There are cases, however, when applications need to use prefixes in character
199 /// data or in attribute values, where they cannot safely be expanded automatically;
200 /// the start/endPrefixMapping event supplies the information to the application
201 /// to expand prefixes in those contexts itself, if necessary.
202 ///
203 /// Note that start/endPrefixMapping events are not guaranteed to be properly
204 /// nested relative to each other: all startPrefixMapping events will occur
205 /// immediately before the corresponding startElement event, and all endPrefixMapping
206 /// events will occur immediately after the corresponding endElement event,
207 /// but their order is not otherwise guaranteed.
208 ///
209 /// There should never be start/endPrefixMapping events for the "xml" prefix,
210 /// since it is predeclared and immutable.
211
212 virtual void endPrefixMapping(const XMLString& prefix) = 0;
213 /// End the scope of a prefix-URI mapping.
214 ///
215 /// See startPrefixMapping for details. These events will always occur immediately
216 /// after the corresponding endElement event, but the order of endPrefixMapping
217 /// events is not otherwise guaranteed.
218
219 virtual void skippedEntity(const XMLString& name) = 0;
220 /// Receive notification of a skipped entity. This is not called for entity
221 /// references within markup constructs such as element start tags or markup
222 /// declarations. (The XML recommendation requires reporting skipped external
223 /// entities. SAX also reports internal entity expansion/non-expansion, except
224 /// within markup constructs.)
225 ///
226 /// The Parser will invoke this method each time the entity is skipped. Non-validating
227 /// processors may skip entities if they have not seen the declarations (because,
228 /// for example, the entity was declared in an external DTD subset). All processors
229 /// may skip external entities, depending on the values of the http://xml.org/sax/features/external-general-entities
230 /// and the http://xml.org/sax/features/external-parameter-entities properties.
231
232protected:
233 virtual ~ContentHandler();
234};
235
236
237} } // namespace Poco::XML
238
239
240#endif // SAX_ContentHandler_INCLUDED
241