| 1 | // |
| 2 | // ParserEngine.cpp |
| 3 | // |
| 4 | // Library: XML |
| 5 | // Package: XML |
| 6 | // Module: ParserEngine |
| 7 | // |
| 8 | // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. |
| 9 | // and Contributors. |
| 10 | // |
| 11 | // SPDX-License-Identifier: BSL-1.0 |
| 12 | // |
| 13 | |
| 14 | |
| 15 | #include "Poco/XML/ParserEngine.h" |
| 16 | #include "Poco/XML/NamespaceStrategy.h" |
| 17 | #include "Poco/XML/XMLException.h" |
| 18 | #include "Poco/SAX/EntityResolver.h" |
| 19 | #include "Poco/SAX/EntityResolverImpl.h" |
| 20 | #include "Poco/SAX/DTDHandler.h" |
| 21 | #include "Poco/SAX/DeclHandler.h" |
| 22 | #include "Poco/SAX/ContentHandler.h" |
| 23 | #include "Poco/SAX/LexicalHandler.h" |
| 24 | #include "Poco/SAX/ErrorHandler.h" |
| 25 | #include "Poco/SAX/InputSource.h" |
| 26 | #include "Poco/SAX/Locator.h" |
| 27 | #include "Poco/SAX/LocatorImpl.h" |
| 28 | #include "Poco/SAX/SAXException.h" |
| 29 | #include "Poco/URI.h" |
| 30 | #include <cstring> |
| 31 | |
| 32 | |
| 33 | using Poco::URI; |
| 34 | using Poco::TextEncoding; |
| 35 | |
| 36 | |
| 37 | namespace Poco { |
| 38 | namespace XML { |
| 39 | |
| 40 | |
| 41 | class ContextLocator: public Locator |
| 42 | { |
| 43 | public: |
| 44 | ContextLocator(XML_Parser parser, const XMLString& publicId, const XMLString& systemId): |
| 45 | _parser(parser), |
| 46 | _publicId(publicId), |
| 47 | _systemId(systemId) |
| 48 | { |
| 49 | } |
| 50 | |
| 51 | ~ContextLocator() |
| 52 | { |
| 53 | } |
| 54 | |
| 55 | XMLString getPublicId() const |
| 56 | { |
| 57 | return _publicId; |
| 58 | } |
| 59 | |
| 60 | XMLString getSystemId() const |
| 61 | { |
| 62 | return _systemId; |
| 63 | } |
| 64 | |
| 65 | int getLineNumber() const |
| 66 | { |
| 67 | return XML_GetCurrentLineNumber(_parser); |
| 68 | } |
| 69 | |
| 70 | int getColumnNumber() const |
| 71 | { |
| 72 | return XML_GetCurrentColumnNumber(_parser); |
| 73 | } |
| 74 | |
| 75 | private: |
| 76 | XML_Parser _parser; |
| 77 | XMLString _publicId; |
| 78 | XMLString _systemId; |
| 79 | }; |
| 80 | |
| 81 | |
| 82 | const int ParserEngine::PARSE_BUFFER_SIZE = 4096; |
| 83 | const XMLString ParserEngine::EMPTY_STRING; |
| 84 | |
| 85 | |
| 86 | ParserEngine::ParserEngine(): |
| 87 | _parser(0), |
| 88 | _pBuffer(0), |
| 89 | _encodingSpecified(false), |
| 90 | _expandInternalEntities(true), |
| 91 | _externalGeneralEntities(false), |
| 92 | _externalParameterEntities(false), |
| 93 | _enablePartialReads(false), |
| 94 | _pNamespaceStrategy(new NoNamespacesStrategy()), |
| 95 | _pEntityResolver(0), |
| 96 | _pDTDHandler(0), |
| 97 | _pDeclHandler(0), |
| 98 | _pContentHandler(0), |
| 99 | _pLexicalHandler(0), |
| 100 | _pErrorHandler(0) |
| 101 | { |
| 102 | } |
| 103 | |
| 104 | |
| 105 | ParserEngine::ParserEngine(const XMLString& encoding): |
| 106 | _parser(0), |
| 107 | _pBuffer(0), |
| 108 | _encodingSpecified(true), |
| 109 | _encoding(encoding), |
| 110 | _expandInternalEntities(true), |
| 111 | _externalGeneralEntities(false), |
| 112 | _externalParameterEntities(false), |
| 113 | _enablePartialReads(false), |
| 114 | _pNamespaceStrategy(new NoNamespacesStrategy()), |
| 115 | _pEntityResolver(0), |
| 116 | _pDTDHandler(0), |
| 117 | _pDeclHandler(0), |
| 118 | _pContentHandler(0), |
| 119 | _pLexicalHandler(0), |
| 120 | _pErrorHandler(0) |
| 121 | { |
| 122 | } |
| 123 | |
| 124 | |
| 125 | ParserEngine::~ParserEngine() |
| 126 | { |
| 127 | resetContext(); |
| 128 | if (_parser) XML_ParserFree(_parser); |
| 129 | delete [] _pBuffer; |
| 130 | delete _pNamespaceStrategy; |
| 131 | } |
| 132 | |
| 133 | |
| 134 | void ParserEngine::setEncoding(const XMLString& encoding) |
| 135 | { |
| 136 | _encoding = encoding; |
| 137 | _encodingSpecified = true; |
| 138 | } |
| 139 | |
| 140 | |
| 141 | void ParserEngine::addEncoding(const XMLString& name, TextEncoding* pEncoding) |
| 142 | { |
| 143 | poco_check_ptr (pEncoding); |
| 144 | |
| 145 | if (_encodings.find(name) == _encodings.end()) |
| 146 | _encodings[name] = pEncoding; |
| 147 | else |
| 148 | throw XMLException("Encoding already defined" ); |
| 149 | } |
| 150 | |
| 151 | |
| 152 | void ParserEngine::setNamespaceStrategy(NamespaceStrategy* pStrategy) |
| 153 | { |
| 154 | poco_check_ptr (pStrategy); |
| 155 | |
| 156 | delete _pNamespaceStrategy; |
| 157 | _pNamespaceStrategy = pStrategy; |
| 158 | } |
| 159 | |
| 160 | |
| 161 | void ParserEngine::setExpandInternalEntities(bool flag) |
| 162 | { |
| 163 | _expandInternalEntities = flag; |
| 164 | } |
| 165 | |
| 166 | |
| 167 | void ParserEngine::setExternalGeneralEntities(bool flag) |
| 168 | { |
| 169 | _externalGeneralEntities = flag; |
| 170 | } |
| 171 | |
| 172 | |
| 173 | void ParserEngine::setExternalParameterEntities(bool flag) |
| 174 | { |
| 175 | _externalParameterEntities = flag; |
| 176 | } |
| 177 | |
| 178 | |
| 179 | void ParserEngine::setEntityResolver(EntityResolver* pResolver) |
| 180 | { |
| 181 | _pEntityResolver = pResolver; |
| 182 | } |
| 183 | |
| 184 | |
| 185 | void ParserEngine::setDTDHandler(DTDHandler* pDTDHandler) |
| 186 | { |
| 187 | _pDTDHandler = pDTDHandler; |
| 188 | } |
| 189 | |
| 190 | |
| 191 | void ParserEngine::setDeclHandler(DeclHandler* pDeclHandler) |
| 192 | { |
| 193 | _pDeclHandler = pDeclHandler; |
| 194 | } |
| 195 | |
| 196 | |
| 197 | void ParserEngine::setContentHandler(ContentHandler* pContentHandler) |
| 198 | { |
| 199 | _pContentHandler = pContentHandler; |
| 200 | } |
| 201 | |
| 202 | |
| 203 | void ParserEngine::setLexicalHandler(LexicalHandler* pLexicalHandler) |
| 204 | { |
| 205 | _pLexicalHandler = pLexicalHandler; |
| 206 | } |
| 207 | |
| 208 | |
| 209 | void ParserEngine::setErrorHandler(ErrorHandler* pErrorHandler) |
| 210 | { |
| 211 | _pErrorHandler = pErrorHandler; |
| 212 | } |
| 213 | |
| 214 | |
| 215 | void ParserEngine::setEnablePartialReads(bool flag) |
| 216 | { |
| 217 | _enablePartialReads = flag; |
| 218 | } |
| 219 | |
| 220 | |
| 221 | void ParserEngine::parse(InputSource* pInputSource) |
| 222 | { |
| 223 | init(); |
| 224 | resetContext(); |
| 225 | pushContext(_parser, pInputSource); |
| 226 | if (_pContentHandler) _pContentHandler->setDocumentLocator(this); |
| 227 | if (_pContentHandler) _pContentHandler->startDocument(); |
| 228 | if (pInputSource->getCharacterStream()) |
| 229 | parseCharInputStream(*pInputSource->getCharacterStream()); |
| 230 | else if (pInputSource->getByteStream()) |
| 231 | parseByteInputStream(*pInputSource->getByteStream()); |
| 232 | else throw XMLException("Input source has no stream" ); |
| 233 | if (_pContentHandler) _pContentHandler->endDocument(); |
| 234 | popContext(); |
| 235 | } |
| 236 | |
| 237 | |
| 238 | void ParserEngine::parse(const char* pBuffer, std::size_t size) |
| 239 | { |
| 240 | init(); |
| 241 | resetContext(); |
| 242 | InputSource src; |
| 243 | pushContext(_parser, &src); |
| 244 | if (_pContentHandler) _pContentHandler->setDocumentLocator(this); |
| 245 | if (_pContentHandler) _pContentHandler->startDocument(); |
| 246 | std::size_t processed = 0; |
| 247 | while (processed < size) |
| 248 | { |
| 249 | const int bufferSize = processed + PARSE_BUFFER_SIZE < size ? PARSE_BUFFER_SIZE : static_cast<int>(size - processed); |
| 250 | if (!XML_Parse(_parser, pBuffer + processed, bufferSize, 0)) |
| 251 | handleError(XML_GetErrorCode(_parser)); |
| 252 | processed += bufferSize; |
| 253 | } |
| 254 | if (!XML_Parse(_parser, pBuffer+processed, 0, 1)) |
| 255 | handleError(XML_GetErrorCode(_parser)); |
| 256 | if (_pContentHandler) _pContentHandler->endDocument(); |
| 257 | popContext(); |
| 258 | } |
| 259 | |
| 260 | |
| 261 | void ParserEngine::parseByteInputStream(XMLByteInputStream& istr) |
| 262 | { |
| 263 | std::streamsize n = readBytes(istr, _pBuffer, PARSE_BUFFER_SIZE); |
| 264 | while (n > 0) |
| 265 | { |
| 266 | if (!XML_Parse(_parser, _pBuffer, static_cast<int>(n), 0)) |
| 267 | handleError(XML_GetErrorCode(_parser)); |
| 268 | if (istr.good()) |
| 269 | n = readBytes(istr, _pBuffer, PARSE_BUFFER_SIZE); |
| 270 | else |
| 271 | n = 0; |
| 272 | } |
| 273 | if (!XML_Parse(_parser, _pBuffer, 0, 1)) |
| 274 | handleError(XML_GetErrorCode(_parser)); |
| 275 | } |
| 276 | |
| 277 | |
| 278 | void ParserEngine::parseCharInputStream(XMLCharInputStream& istr) |
| 279 | { |
| 280 | std::streamsize n = readChars(istr, reinterpret_cast<XMLChar*>(_pBuffer), PARSE_BUFFER_SIZE/sizeof(XMLChar)); |
| 281 | while (n > 0) |
| 282 | { |
| 283 | if (!XML_Parse(_parser, _pBuffer, static_cast<int>(n*sizeof(XMLChar)), 0)) |
| 284 | handleError(XML_GetErrorCode(_parser)); |
| 285 | if (istr.good()) |
| 286 | n = readChars(istr, reinterpret_cast<XMLChar*>(_pBuffer), PARSE_BUFFER_SIZE/sizeof(XMLChar)); |
| 287 | else |
| 288 | n = 0; |
| 289 | } |
| 290 | if (!XML_Parse(_parser, _pBuffer, 0, 1)) |
| 291 | handleError(XML_GetErrorCode(_parser)); |
| 292 | } |
| 293 | |
| 294 | |
| 295 | void ParserEngine::parseExternal(XML_Parser extParser, InputSource* pInputSource) |
| 296 | { |
| 297 | pushContext(extParser, pInputSource); |
| 298 | if (pInputSource->getCharacterStream()) |
| 299 | parseExternalCharInputStream(extParser, *pInputSource->getCharacterStream()); |
| 300 | else if (pInputSource->getByteStream()) |
| 301 | parseExternalByteInputStream(extParser, *pInputSource->getByteStream()); |
| 302 | else throw XMLException("Input source has no stream" ); |
| 303 | popContext(); |
| 304 | } |
| 305 | |
| 306 | |
| 307 | void ParserEngine::parseExternalByteInputStream(XML_Parser extParser, XMLByteInputStream& istr) |
| 308 | { |
| 309 | char *pBuffer = new char[PARSE_BUFFER_SIZE]; |
| 310 | try |
| 311 | { |
| 312 | std::streamsize n = readBytes(istr, pBuffer, PARSE_BUFFER_SIZE); |
| 313 | while (n > 0) |
| 314 | { |
| 315 | if (!XML_Parse(extParser, pBuffer, static_cast<int>(n), 0)) |
| 316 | handleError(XML_GetErrorCode(extParser)); |
| 317 | if (istr.good()) |
| 318 | n = readBytes(istr, pBuffer, PARSE_BUFFER_SIZE); |
| 319 | else |
| 320 | n = 0; |
| 321 | } |
| 322 | if (!XML_Parse(extParser, pBuffer, 0, 1)) |
| 323 | handleError(XML_GetErrorCode(extParser)); |
| 324 | } |
| 325 | catch (...) |
| 326 | { |
| 327 | delete [] pBuffer; |
| 328 | throw; |
| 329 | } |
| 330 | delete [] pBuffer; |
| 331 | } |
| 332 | |
| 333 | |
| 334 | void ParserEngine::parseExternalCharInputStream(XML_Parser extParser, XMLCharInputStream& istr) |
| 335 | { |
| 336 | XMLChar *pBuffer = new XMLChar[PARSE_BUFFER_SIZE/sizeof(XMLChar)]; |
| 337 | try |
| 338 | { |
| 339 | std::streamsize n = readChars(istr, pBuffer, PARSE_BUFFER_SIZE/sizeof(XMLChar)); |
| 340 | while (n > 0) |
| 341 | { |
| 342 | if (!XML_Parse(extParser, reinterpret_cast<char*>(pBuffer), static_cast<int>(n*sizeof(XMLChar)), 0)) |
| 343 | handleError(XML_GetErrorCode(extParser)); |
| 344 | if (istr.good()) |
| 345 | n = readChars(istr, pBuffer, static_cast<int>(PARSE_BUFFER_SIZE/sizeof(XMLChar))); |
| 346 | else |
| 347 | n = 0; |
| 348 | } |
| 349 | if (!XML_Parse(extParser, reinterpret_cast<char*>(pBuffer), 0, 1)) |
| 350 | handleError(XML_GetErrorCode(extParser)); |
| 351 | } |
| 352 | catch (...) |
| 353 | { |
| 354 | delete [] pBuffer; |
| 355 | throw; |
| 356 | } |
| 357 | delete [] pBuffer; |
| 358 | } |
| 359 | |
| 360 | |
| 361 | std::streamsize ParserEngine::readBytes(XMLByteInputStream& istr, char* pBuffer, std::streamsize bufferSize) |
| 362 | { |
| 363 | if (_enablePartialReads) |
| 364 | { |
| 365 | istr.read(pBuffer, 1); |
| 366 | if (istr.gcount() == 1) |
| 367 | { |
| 368 | std::streamsize n = istr.readsome(pBuffer + 1, bufferSize - 1); |
| 369 | return n + 1; |
| 370 | } |
| 371 | else return 0; |
| 372 | } |
| 373 | else |
| 374 | { |
| 375 | istr.read(pBuffer, bufferSize); |
| 376 | return istr.gcount(); |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | |
| 381 | std::streamsize ParserEngine::readChars(XMLCharInputStream& istr, XMLChar* pBuffer, std::streamsize bufferSize) |
| 382 | { |
| 383 | if (_enablePartialReads) |
| 384 | { |
| 385 | istr.read(pBuffer, 1); |
| 386 | if (istr.gcount() == 1) |
| 387 | { |
| 388 | std::streamsize n = istr.readsome(pBuffer + 1, bufferSize - 1); |
| 389 | return n + 1; |
| 390 | } |
| 391 | else return 0; |
| 392 | } |
| 393 | else |
| 394 | { |
| 395 | istr.read(pBuffer, bufferSize); |
| 396 | return istr.gcount(); |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | |
| 401 | XMLString ParserEngine::getPublicId() const |
| 402 | { |
| 403 | return locator().getPublicId(); |
| 404 | } |
| 405 | |
| 406 | |
| 407 | XMLString ParserEngine::getSystemId() const |
| 408 | { |
| 409 | return locator().getSystemId(); |
| 410 | } |
| 411 | |
| 412 | |
| 413 | int ParserEngine::getLineNumber() const |
| 414 | { |
| 415 | return locator().getLineNumber(); |
| 416 | } |
| 417 | |
| 418 | |
| 419 | int ParserEngine::getColumnNumber() const |
| 420 | { |
| 421 | return locator().getColumnNumber(); |
| 422 | } |
| 423 | |
| 424 | |
| 425 | namespace |
| 426 | { |
| 427 | static LocatorImpl nullLocator; |
| 428 | } |
| 429 | |
| 430 | |
| 431 | const Locator& ParserEngine::locator() const |
| 432 | { |
| 433 | if (_context.empty()) |
| 434 | return nullLocator; |
| 435 | else |
| 436 | return *_context.back(); |
| 437 | } |
| 438 | |
| 439 | |
| 440 | void ParserEngine::init() |
| 441 | { |
| 442 | if (_parser) |
| 443 | XML_ParserFree(_parser); |
| 444 | |
| 445 | if (!_pBuffer) |
| 446 | _pBuffer = new char[PARSE_BUFFER_SIZE]; |
| 447 | |
| 448 | if (dynamic_cast<NoNamespacePrefixesStrategy*>(_pNamespaceStrategy)) |
| 449 | { |
| 450 | _parser = XML_ParserCreateNS(_encodingSpecified ? _encoding.c_str() : 0, '\t'); |
| 451 | if (_parser) |
| 452 | { |
| 453 | XML_SetNamespaceDeclHandler(_parser, handleStartNamespaceDecl, handleEndNamespaceDecl); |
| 454 | } |
| 455 | } |
| 456 | else if (dynamic_cast<NamespacePrefixesStrategy*>(_pNamespaceStrategy)) |
| 457 | { |
| 458 | _parser = XML_ParserCreateNS(_encodingSpecified ? _encoding.c_str() : 0, '\t'); |
| 459 | if (_parser) |
| 460 | { |
| 461 | XML_SetReturnNSTriplet(_parser, 1); |
| 462 | XML_SetNamespaceDeclHandler(_parser, handleStartNamespaceDecl, handleEndNamespaceDecl); |
| 463 | } |
| 464 | } |
| 465 | else |
| 466 | { |
| 467 | _parser = XML_ParserCreate(_encodingSpecified ? _encoding.c_str() : 0); |
| 468 | } |
| 469 | |
| 470 | if (!_parser) throw XMLException("Cannot create Expat parser" ); |
| 471 | |
| 472 | XML_SetUserData(_parser, this); |
| 473 | XML_SetElementHandler(_parser, handleStartElement, handleEndElement); |
| 474 | XML_SetCharacterDataHandler(_parser, handleCharacterData); |
| 475 | XML_SetProcessingInstructionHandler(_parser, handleProcessingInstruction); |
| 476 | if (_expandInternalEntities) |
| 477 | XML_SetDefaultHandlerExpand(_parser, handleDefault); |
| 478 | else |
| 479 | XML_SetDefaultHandler(_parser, handleDefault); |
| 480 | XML_SetUnparsedEntityDeclHandler(_parser, handleUnparsedEntityDecl); |
| 481 | XML_SetNotationDeclHandler(_parser, handleNotationDecl); |
| 482 | XML_SetExternalEntityRefHandler(_parser, handleExternalEntityRef); |
| 483 | XML_SetCommentHandler(_parser, handleComment); |
| 484 | XML_SetCdataSectionHandler(_parser, handleStartCdataSection, handleEndCdataSection); |
| 485 | XML_SetDoctypeDeclHandler(_parser, handleStartDoctypeDecl, handleEndDoctypeDecl); |
| 486 | XML_SetEntityDeclHandler(_parser, handleEntityDecl); |
| 487 | XML_SetSkippedEntityHandler(_parser, handleSkippedEntity); |
| 488 | XML_SetParamEntityParsing(_parser, _externalParameterEntities ? XML_PARAM_ENTITY_PARSING_ALWAYS : XML_PARAM_ENTITY_PARSING_NEVER); |
| 489 | XML_SetUnknownEncodingHandler(_parser, handleUnknownEncoding, this); |
| 490 | } |
| 491 | |
| 492 | |
| 493 | void ParserEngine::handleError(int errorNo) |
| 494 | { |
| 495 | try |
| 496 | { |
| 497 | switch (errorNo) |
| 498 | { |
| 499 | case XML_ERROR_NO_MEMORY: |
| 500 | throw XMLException("No memory" ); |
| 501 | case XML_ERROR_SYNTAX: |
| 502 | throw SAXParseException("Syntax error" , locator()); |
| 503 | case XML_ERROR_NO_ELEMENTS: |
| 504 | throw SAXParseException("No element found" , locator()); |
| 505 | case XML_ERROR_INVALID_TOKEN: |
| 506 | throw SAXParseException("Invalid token" , locator()); |
| 507 | case XML_ERROR_UNCLOSED_TOKEN: |
| 508 | throw SAXParseException("Unclosed token" , locator()); |
| 509 | case XML_ERROR_PARTIAL_CHAR: |
| 510 | throw SAXParseException("Partial character" , locator()); |
| 511 | case XML_ERROR_TAG_MISMATCH: |
| 512 | throw SAXParseException("Tag mismatch" , locator()); |
| 513 | case XML_ERROR_DUPLICATE_ATTRIBUTE: |
| 514 | throw SAXParseException("Duplicate attribute" , locator()); |
| 515 | case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: |
| 516 | throw SAXParseException("Junk after document element" , locator()); |
| 517 | case XML_ERROR_PARAM_ENTITY_REF: |
| 518 | throw SAXParseException("Illegal parameter entity reference" , locator()); |
| 519 | case XML_ERROR_UNDEFINED_ENTITY: |
| 520 | throw SAXParseException("Undefined entity" , locator()); |
| 521 | case XML_ERROR_RECURSIVE_ENTITY_REF: |
| 522 | throw SAXParseException("Recursive entity reference" , locator()); |
| 523 | case XML_ERROR_ASYNC_ENTITY: |
| 524 | throw SAXParseException("Asynchronous entity" , locator()); |
| 525 | case XML_ERROR_BAD_CHAR_REF: |
| 526 | throw SAXParseException("Reference to invalid character number" , locator()); |
| 527 | case XML_ERROR_BINARY_ENTITY_REF: |
| 528 | throw SAXParseException("Reference to binary entity" , locator()); |
| 529 | case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: |
| 530 | throw SAXParseException("Reference to external entity in attribute" , locator()); |
| 531 | case XML_ERROR_MISPLACED_XML_PI: |
| 532 | throw SAXParseException("XML processing instruction not at start of external entity" , locator()); |
| 533 | case XML_ERROR_UNKNOWN_ENCODING: |
| 534 | throw SAXParseException("Unknown encoding" , locator()); |
| 535 | case XML_ERROR_INCORRECT_ENCODING: |
| 536 | throw SAXParseException("Encoding specified in XML declaration is incorrect" , locator()); |
| 537 | case XML_ERROR_UNCLOSED_CDATA_SECTION: |
| 538 | throw SAXParseException("Unclosed CDATA section" , locator()); |
| 539 | case XML_ERROR_EXTERNAL_ENTITY_HANDLING: |
| 540 | throw SAXParseException("Error in processing external entity reference" , locator()); |
| 541 | case XML_ERROR_NOT_STANDALONE: |
| 542 | throw SAXParseException("Document is not standalone" , locator()); |
| 543 | case XML_ERROR_UNEXPECTED_STATE: |
| 544 | throw SAXParseException("Unexpected parser state - please send a bug report" , locator()); |
| 545 | case XML_ERROR_ENTITY_DECLARED_IN_PE: |
| 546 | throw SAXParseException("Entity declared in parameter entity" , locator()); |
| 547 | case XML_ERROR_FEATURE_REQUIRES_XML_DTD: |
| 548 | throw SAXParseException("Requested feature requires XML_DTD support in Expat" , locator()); |
| 549 | case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: |
| 550 | throw SAXParseException("Cannot change setting once parsing has begun" , locator()); |
| 551 | case XML_ERROR_UNBOUND_PREFIX: |
| 552 | throw SAXParseException("Unbound prefix" , locator()); |
| 553 | case XML_ERROR_UNDECLARING_PREFIX: |
| 554 | throw SAXParseException("Must not undeclare prefix" , locator()); |
| 555 | case XML_ERROR_INCOMPLETE_PE: |
| 556 | throw SAXParseException("Incomplete markup in parameter entity" , locator()); |
| 557 | case XML_ERROR_XML_DECL: |
| 558 | throw SAXParseException("XML declaration not well-formed" , locator()); |
| 559 | case XML_ERROR_TEXT_DECL: |
| 560 | throw SAXParseException("Text declaration not well-formed" , locator()); |
| 561 | case XML_ERROR_PUBLICID: |
| 562 | throw SAXParseException("Illegal character(s) in public identifier" , locator()); |
| 563 | case XML_ERROR_SUSPENDED: |
| 564 | throw SAXParseException("Parser suspended" , locator()); |
| 565 | case XML_ERROR_NOT_SUSPENDED: |
| 566 | throw SAXParseException("Parser not suspended" , locator()); |
| 567 | case XML_ERROR_ABORTED: |
| 568 | throw SAXParseException("Parsing aborted" , locator()); |
| 569 | case XML_ERROR_FINISHED: |
| 570 | throw SAXParseException("Parsing finished" , locator()); |
| 571 | case XML_ERROR_SUSPEND_PE: |
| 572 | throw SAXParseException("Cannot suspend in external parameter entity" , locator()); |
| 573 | } |
| 574 | throw XMLException("Unknown Expat error code" ); |
| 575 | } |
| 576 | catch (SAXException& exc) |
| 577 | { |
| 578 | if (_pErrorHandler) _pErrorHandler->error(exc); |
| 579 | throw; |
| 580 | } |
| 581 | catch (Poco::Exception& exc) |
| 582 | { |
| 583 | if (_pErrorHandler) _pErrorHandler->fatalError(SAXParseException("Fatal error" , locator(), exc)); |
| 584 | throw; |
| 585 | } |
| 586 | } |
| 587 | |
| 588 | |
| 589 | void ParserEngine::pushContext(XML_Parser parser, InputSource* pInputSource) |
| 590 | { |
| 591 | ContextLocator* pLocator = new ContextLocator(parser, pInputSource->getPublicId(), pInputSource->getSystemId()); |
| 592 | _context.push_back(pLocator); |
| 593 | } |
| 594 | |
| 595 | |
| 596 | void ParserEngine::popContext() |
| 597 | { |
| 598 | poco_assert (!_context.empty()); |
| 599 | delete _context.back(); |
| 600 | _context.pop_back(); |
| 601 | } |
| 602 | |
| 603 | |
| 604 | void ParserEngine::resetContext() |
| 605 | { |
| 606 | for (ContextStack::iterator it = _context.begin(); it != _context.end(); ++it) |
| 607 | { |
| 608 | delete *it; |
| 609 | } |
| 610 | _context.clear(); |
| 611 | } |
| 612 | |
| 613 | |
| 614 | void ParserEngine::handleStartElement(void* userData, const XML_Char* name, const XML_Char** atts) |
| 615 | { |
| 616 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 617 | |
| 618 | if (pThis->_pContentHandler) |
| 619 | { |
| 620 | try |
| 621 | { |
| 622 | pThis->_pNamespaceStrategy->startElement(name, atts, XML_GetSpecifiedAttributeCount(pThis->_parser)/2, pThis->_pContentHandler); |
| 623 | } |
| 624 | catch (XMLException& exc) |
| 625 | { |
| 626 | throw SAXParseException(exc.message(), pThis->locator()); |
| 627 | } |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | |
| 632 | void ParserEngine::handleEndElement(void* userData, const XML_Char* name) |
| 633 | { |
| 634 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 635 | |
| 636 | if (pThis->_pContentHandler) |
| 637 | { |
| 638 | try |
| 639 | { |
| 640 | pThis->_pNamespaceStrategy->endElement(name, pThis->_pContentHandler); |
| 641 | } |
| 642 | catch (XMLException& exc) |
| 643 | { |
| 644 | throw SAXParseException(exc.message(), pThis->locator()); |
| 645 | } |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | |
| 650 | void ParserEngine::handleCharacterData(void* userData, const XML_Char* s, int len) |
| 651 | { |
| 652 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 653 | |
| 654 | if (pThis->_pContentHandler) |
| 655 | pThis->_pContentHandler->characters(s, 0, len); |
| 656 | } |
| 657 | |
| 658 | |
| 659 | void ParserEngine::handleProcessingInstruction(void* userData, const XML_Char* target, const XML_Char* data) |
| 660 | { |
| 661 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 662 | |
| 663 | if (pThis->_pContentHandler) |
| 664 | pThis->_pContentHandler->processingInstruction(target, data); |
| 665 | } |
| 666 | |
| 667 | |
| 668 | void ParserEngine::handleDefault(void* userData, const XML_Char* s, int len) |
| 669 | { |
| 670 | } |
| 671 | |
| 672 | |
| 673 | void ParserEngine::handleUnparsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId, const XML_Char* notationName) |
| 674 | { |
| 675 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 676 | |
| 677 | XMLString pubId; |
| 678 | if (publicId) pubId.assign(publicId); |
| 679 | if (pThis->_pDTDHandler) |
| 680 | pThis->_pDTDHandler->unparsedEntityDecl(entityName, publicId ? &pubId : 0, systemId, notationName); |
| 681 | } |
| 682 | |
| 683 | |
| 684 | void ParserEngine::handleNotationDecl(void* userData, const XML_Char* notationName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) |
| 685 | { |
| 686 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 687 | |
| 688 | XMLString pubId; |
| 689 | if (publicId) pubId.assign(publicId); |
| 690 | XMLString sysId; |
| 691 | if (systemId) sysId.assign(systemId); |
| 692 | if (pThis->_pDTDHandler) |
| 693 | pThis->_pDTDHandler->notationDecl(notationName, publicId ? &pubId : 0, systemId ? &sysId : 0); |
| 694 | } |
| 695 | |
| 696 | |
| 697 | int ParserEngine::handleExternalEntityRef(XML_Parser parser, const XML_Char* context, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) |
| 698 | { |
| 699 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(XML_GetUserData(parser)); |
| 700 | |
| 701 | if (!context && !pThis->_externalParameterEntities) return XML_STATUS_ERROR; |
| 702 | if (context && !pThis->_externalGeneralEntities) return XML_STATUS_ERROR; |
| 703 | |
| 704 | InputSource* pInputSource = 0; |
| 705 | EntityResolver* pEntityResolver = 0; |
| 706 | EntityResolverImpl defaultResolver; |
| 707 | |
| 708 | XMLString sysId(systemId); |
| 709 | XMLString pubId; |
| 710 | if (publicId) pubId.assign(publicId); |
| 711 | |
| 712 | URI uri(fromXMLString(pThis->_context.back()->getSystemId())); |
| 713 | uri.resolve(fromXMLString(sysId)); |
| 714 | |
| 715 | if (pThis->_pEntityResolver) |
| 716 | { |
| 717 | pEntityResolver = pThis->_pEntityResolver; |
| 718 | pInputSource = pEntityResolver->resolveEntity(publicId ? &pubId : 0, toXMLString(uri.toString())); |
| 719 | } |
| 720 | if (!pInputSource && pThis->_externalGeneralEntities) |
| 721 | { |
| 722 | pEntityResolver = &defaultResolver; |
| 723 | pInputSource = pEntityResolver->resolveEntity(publicId ? &pubId : 0, toXMLString(uri.toString())); |
| 724 | } |
| 725 | |
| 726 | if (pInputSource) |
| 727 | { |
| 728 | XML_Parser extParser = XML_ExternalEntityParserCreate(pThis->_parser, context, 0); |
| 729 | if (!extParser) throw XMLException("Cannot create external entity parser" ); |
| 730 | |
| 731 | try |
| 732 | { |
| 733 | pThis->parseExternal(extParser, pInputSource); |
| 734 | } |
| 735 | catch (XMLException&) |
| 736 | { |
| 737 | pEntityResolver->releaseInputSource(pInputSource); |
| 738 | XML_ParserFree(extParser); |
| 739 | throw; |
| 740 | } |
| 741 | pEntityResolver->releaseInputSource(pInputSource); |
| 742 | XML_ParserFree(extParser); |
| 743 | return XML_STATUS_OK; |
| 744 | } |
| 745 | else return XML_STATUS_ERROR; |
| 746 | } |
| 747 | |
| 748 | |
| 749 | int ParserEngine::handleUnknownEncoding(void* encodingHandlerData, const XML_Char* name, XML_Encoding* info) |
| 750 | { |
| 751 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(encodingHandlerData); |
| 752 | |
| 753 | XMLString encoding(name); |
| 754 | TextEncoding* knownEncoding = 0; |
| 755 | |
| 756 | EncodingMap::const_iterator it = pThis->_encodings.find(encoding); |
| 757 | if (it != pThis->_encodings.end()) |
| 758 | knownEncoding = it->second; |
| 759 | else |
| 760 | knownEncoding = Poco::TextEncoding::find(fromXMLString(encoding)); |
| 761 | |
| 762 | if (knownEncoding) |
| 763 | { |
| 764 | const TextEncoding::CharacterMap& map = knownEncoding->characterMap(); |
| 765 | for (int i = 0; i < 256; ++i) |
| 766 | info->map[i] = map[i]; |
| 767 | |
| 768 | info->data = knownEncoding; |
| 769 | info->convert = &ParserEngine::convert; |
| 770 | info->release = 0; |
| 771 | return XML_STATUS_OK; |
| 772 | } |
| 773 | else return XML_STATUS_ERROR; |
| 774 | } |
| 775 | |
| 776 | |
| 777 | void ParserEngine::handleComment(void* userData, const XML_Char* data) |
| 778 | { |
| 779 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 780 | |
| 781 | #if defined(XML_UNICODE_WCHAR_T) |
| 782 | if (pThis->_pLexicalHandler) |
| 783 | pThis->_pLexicalHandler->comment(data, 0, (int) std::wcslen(data)); |
| 784 | #else |
| 785 | if (pThis->_pLexicalHandler) |
| 786 | pThis->_pLexicalHandler->comment(data, 0, (int) std::strlen(data)); |
| 787 | #endif |
| 788 | } |
| 789 | |
| 790 | |
| 791 | void ParserEngine::handleStartCdataSection(void* userData) |
| 792 | { |
| 793 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 794 | |
| 795 | if (pThis->_pLexicalHandler) |
| 796 | pThis->_pLexicalHandler->startCDATA(); |
| 797 | } |
| 798 | |
| 799 | |
| 800 | void ParserEngine::handleEndCdataSection(void* userData) |
| 801 | { |
| 802 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 803 | |
| 804 | if (pThis->_pLexicalHandler) |
| 805 | pThis->_pLexicalHandler->endCDATA(); |
| 806 | } |
| 807 | |
| 808 | |
| 809 | void ParserEngine::handleStartNamespaceDecl(void* userData, const XML_Char* prefix, const XML_Char* uri) |
| 810 | { |
| 811 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 812 | |
| 813 | if (pThis->_pContentHandler) |
| 814 | pThis->_pContentHandler->startPrefixMapping((prefix ? XMLString(prefix) : EMPTY_STRING), (uri ? XMLString(uri) : EMPTY_STRING)); |
| 815 | } |
| 816 | |
| 817 | |
| 818 | void ParserEngine::handleEndNamespaceDecl(void* userData, const XML_Char* prefix) |
| 819 | { |
| 820 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 821 | |
| 822 | if (pThis->_pContentHandler) |
| 823 | pThis->_pContentHandler->endPrefixMapping(prefix ? XMLString(prefix) : EMPTY_STRING); |
| 824 | } |
| 825 | |
| 826 | |
| 827 | void ParserEngine::handleStartDoctypeDecl(void* userData, const XML_Char* doctypeName, const XML_Char *systemId, const XML_Char* publicId, int hasInternalSubset) |
| 828 | { |
| 829 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 830 | |
| 831 | if (pThis->_pLexicalHandler) |
| 832 | { |
| 833 | XMLString sysId = systemId ? XMLString(systemId) : EMPTY_STRING; |
| 834 | XMLString pubId = publicId ? XMLString(publicId) : EMPTY_STRING; |
| 835 | pThis->_pLexicalHandler->startDTD(doctypeName, pubId, sysId); |
| 836 | } |
| 837 | } |
| 838 | |
| 839 | |
| 840 | void ParserEngine::handleEndDoctypeDecl(void* userData) |
| 841 | { |
| 842 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 843 | |
| 844 | if (pThis->_pLexicalHandler) |
| 845 | pThis->_pLexicalHandler->endDTD(); |
| 846 | } |
| 847 | |
| 848 | |
| 849 | void ParserEngine::handleEntityDecl(void *userData, const XML_Char *entityName, int isParamEntity, const XML_Char *value, int valueLength, |
| 850 | const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) |
| 851 | { |
| 852 | if (value) |
| 853 | handleInternalParsedEntityDecl(userData, entityName, value, valueLength); |
| 854 | else |
| 855 | handleExternalParsedEntityDecl(userData, entityName, base, systemId, publicId); |
| 856 | } |
| 857 | |
| 858 | |
| 859 | void ParserEngine::handleExternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* base, const XML_Char* systemId, const XML_Char* publicId) |
| 860 | { |
| 861 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 862 | |
| 863 | XMLString pubId; |
| 864 | if (publicId) pubId.assign(publicId); |
| 865 | if (pThis->_pDeclHandler) |
| 866 | pThis->_pDeclHandler->externalEntityDecl(entityName, publicId ? &pubId : 0, systemId); |
| 867 | } |
| 868 | |
| 869 | |
| 870 | void ParserEngine::handleInternalParsedEntityDecl(void* userData, const XML_Char* entityName, const XML_Char* replacementText, int replacementTextLength) |
| 871 | { |
| 872 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 873 | |
| 874 | XMLString replText(replacementText, replacementTextLength); |
| 875 | if (pThis->_pDeclHandler) |
| 876 | pThis->_pDeclHandler->internalEntityDecl(entityName, replText); |
| 877 | } |
| 878 | |
| 879 | |
| 880 | void ParserEngine::handleSkippedEntity(void* userData, const XML_Char* entityName, int isParameterEntity) |
| 881 | { |
| 882 | ParserEngine* pThis = reinterpret_cast<ParserEngine*>(userData); |
| 883 | |
| 884 | if (pThis->_pContentHandler) |
| 885 | pThis->_pContentHandler->skippedEntity(entityName); |
| 886 | } |
| 887 | |
| 888 | |
| 889 | int ParserEngine::convert(void* data, const char* s) |
| 890 | { |
| 891 | TextEncoding* pEncoding = reinterpret_cast<TextEncoding*>(data); |
| 892 | return pEncoding->convert((const unsigned char*) s); |
| 893 | } |
| 894 | |
| 895 | |
| 896 | } } // namespace Poco::XML |
| 897 | |