1 | /* |
2 | * HTMLtree.c : implementation of access function for an HTML tree. |
3 | * |
4 | * See Copyright for the status of this software. |
5 | * |
6 | * daniel@veillard.com |
7 | */ |
8 | |
9 | |
10 | #define IN_LIBXML |
11 | #include "libxml.h" |
12 | #ifdef LIBXML_HTML_ENABLED |
13 | |
14 | #include <string.h> /* for memset() only ! */ |
15 | |
16 | #ifdef HAVE_CTYPE_H |
17 | #include <ctype.h> |
18 | #endif |
19 | #ifdef HAVE_STDLIB_H |
20 | #include <stdlib.h> |
21 | #endif |
22 | |
23 | #include <libxml/xmlmemory.h> |
24 | #include <libxml/HTMLparser.h> |
25 | #include <libxml/HTMLtree.h> |
26 | #include <libxml/entities.h> |
27 | #include <libxml/valid.h> |
28 | #include <libxml/xmlerror.h> |
29 | #include <libxml/parserInternals.h> |
30 | #include <libxml/globals.h> |
31 | #include <libxml/uri.h> |
32 | |
33 | #include "buf.h" |
34 | |
35 | /************************************************************************ |
36 | * * |
37 | * Getting/Setting encoding meta tags * |
38 | * * |
39 | ************************************************************************/ |
40 | |
41 | /** |
42 | * htmlGetMetaEncoding: |
43 | * @doc: the document |
44 | * |
45 | * Encoding definition lookup in the Meta tags |
46 | * |
47 | * Returns the current encoding as flagged in the HTML source |
48 | */ |
49 | const xmlChar * |
50 | htmlGetMetaEncoding(htmlDocPtr doc) { |
51 | htmlNodePtr cur; |
52 | const xmlChar *content; |
53 | const xmlChar *encoding; |
54 | |
55 | if (doc == NULL) |
56 | return(NULL); |
57 | cur = doc->children; |
58 | |
59 | /* |
60 | * Search the html |
61 | */ |
62 | while (cur != NULL) { |
63 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
64 | if (xmlStrEqual(cur->name, BAD_CAST"html" )) |
65 | break; |
66 | if (xmlStrEqual(cur->name, BAD_CAST"head" )) |
67 | goto found_head; |
68 | if (xmlStrEqual(cur->name, BAD_CAST"meta" )) |
69 | goto found_meta; |
70 | } |
71 | cur = cur->next; |
72 | } |
73 | if (cur == NULL) |
74 | return(NULL); |
75 | cur = cur->children; |
76 | |
77 | /* |
78 | * Search the head |
79 | */ |
80 | while (cur != NULL) { |
81 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
82 | if (xmlStrEqual(cur->name, BAD_CAST"head" )) |
83 | break; |
84 | if (xmlStrEqual(cur->name, BAD_CAST"meta" )) |
85 | goto found_meta; |
86 | } |
87 | cur = cur->next; |
88 | } |
89 | if (cur == NULL) |
90 | return(NULL); |
91 | found_head: |
92 | cur = cur->children; |
93 | |
94 | /* |
95 | * Search the meta elements |
96 | */ |
97 | found_meta: |
98 | while (cur != NULL) { |
99 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
100 | if (xmlStrEqual(cur->name, BAD_CAST"meta" )) { |
101 | xmlAttrPtr attr = cur->properties; |
102 | int http; |
103 | const xmlChar *value; |
104 | |
105 | content = NULL; |
106 | http = 0; |
107 | while (attr != NULL) { |
108 | if ((attr->children != NULL) && |
109 | (attr->children->type == XML_TEXT_NODE) && |
110 | (attr->children->next == NULL)) { |
111 | value = attr->children->content; |
112 | if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv" )) |
113 | && (!xmlStrcasecmp(value, BAD_CAST"Content-Type" ))) |
114 | http = 1; |
115 | else if ((value != NULL) |
116 | && (!xmlStrcasecmp(attr->name, BAD_CAST"content" ))) |
117 | content = value; |
118 | if ((http != 0) && (content != NULL)) |
119 | goto found_content; |
120 | } |
121 | attr = attr->next; |
122 | } |
123 | } |
124 | } |
125 | cur = cur->next; |
126 | } |
127 | return(NULL); |
128 | |
129 | found_content: |
130 | encoding = xmlStrstr(content, BAD_CAST"charset=" ); |
131 | if (encoding == NULL) |
132 | encoding = xmlStrstr(content, BAD_CAST"Charset=" ); |
133 | if (encoding == NULL) |
134 | encoding = xmlStrstr(content, BAD_CAST"CHARSET=" ); |
135 | if (encoding != NULL) { |
136 | encoding += 8; |
137 | } else { |
138 | encoding = xmlStrstr(content, BAD_CAST"charset =" ); |
139 | if (encoding == NULL) |
140 | encoding = xmlStrstr(content, BAD_CAST"Charset =" ); |
141 | if (encoding == NULL) |
142 | encoding = xmlStrstr(content, BAD_CAST"CHARSET =" ); |
143 | if (encoding != NULL) |
144 | encoding += 9; |
145 | } |
146 | if (encoding != NULL) { |
147 | while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
148 | } |
149 | return(encoding); |
150 | } |
151 | |
152 | /** |
153 | * htmlSetMetaEncoding: |
154 | * @doc: the document |
155 | * @encoding: the encoding string |
156 | * |
157 | * Sets the current encoding in the Meta tags |
158 | * NOTE: this will not change the document content encoding, just |
159 | * the META flag associated. |
160 | * |
161 | * Returns 0 in case of success and -1 in case of error |
162 | */ |
163 | int |
164 | htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { |
165 | htmlNodePtr cur, meta = NULL, head = NULL; |
166 | const xmlChar *content = NULL; |
167 | char newcontent[100]; |
168 | |
169 | newcontent[0] = 0; |
170 | |
171 | if (doc == NULL) |
172 | return(-1); |
173 | |
174 | /* html isn't a real encoding it's just libxml2 way to get entities */ |
175 | if (!xmlStrcasecmp(encoding, BAD_CAST "html" )) |
176 | return(-1); |
177 | |
178 | if (encoding != NULL) { |
179 | snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s" , |
180 | (char *)encoding); |
181 | newcontent[sizeof(newcontent) - 1] = 0; |
182 | } |
183 | |
184 | cur = doc->children; |
185 | |
186 | /* |
187 | * Search the html |
188 | */ |
189 | while (cur != NULL) { |
190 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
191 | if (xmlStrcasecmp(cur->name, BAD_CAST"html" ) == 0) |
192 | break; |
193 | if (xmlStrcasecmp(cur->name, BAD_CAST"head" ) == 0) |
194 | goto found_head; |
195 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta" ) == 0) |
196 | goto found_meta; |
197 | } |
198 | cur = cur->next; |
199 | } |
200 | if (cur == NULL) |
201 | return(-1); |
202 | cur = cur->children; |
203 | |
204 | /* |
205 | * Search the head |
206 | */ |
207 | while (cur != NULL) { |
208 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
209 | if (xmlStrcasecmp(cur->name, BAD_CAST"head" ) == 0) |
210 | break; |
211 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta" ) == 0) { |
212 | head = cur->parent; |
213 | goto found_meta; |
214 | } |
215 | } |
216 | cur = cur->next; |
217 | } |
218 | if (cur == NULL) |
219 | return(-1); |
220 | found_head: |
221 | head = cur; |
222 | if (cur->children == NULL) |
223 | goto create; |
224 | cur = cur->children; |
225 | |
226 | found_meta: |
227 | /* |
228 | * Search and update all the remaining the meta elements carrying |
229 | * encoding informations |
230 | */ |
231 | while (cur != NULL) { |
232 | if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) { |
233 | if (xmlStrcasecmp(cur->name, BAD_CAST"meta" ) == 0) { |
234 | xmlAttrPtr attr = cur->properties; |
235 | int http; |
236 | const xmlChar *value; |
237 | |
238 | content = NULL; |
239 | http = 0; |
240 | while (attr != NULL) { |
241 | if ((attr->children != NULL) && |
242 | (attr->children->type == XML_TEXT_NODE) && |
243 | (attr->children->next == NULL)) { |
244 | value = attr->children->content; |
245 | if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv" )) |
246 | && (!xmlStrcasecmp(value, BAD_CAST"Content-Type" ))) |
247 | http = 1; |
248 | else |
249 | { |
250 | if ((value != NULL) && |
251 | (!xmlStrcasecmp(attr->name, BAD_CAST"content" ))) |
252 | content = value; |
253 | } |
254 | if ((http != 0) && (content != NULL)) |
255 | break; |
256 | } |
257 | attr = attr->next; |
258 | } |
259 | if ((http != 0) && (content != NULL)) { |
260 | meta = cur; |
261 | break; |
262 | } |
263 | |
264 | } |
265 | } |
266 | cur = cur->next; |
267 | } |
268 | create: |
269 | if (meta == NULL) { |
270 | if ((encoding != NULL) && (head != NULL)) { |
271 | /* |
272 | * Create a new Meta element with the right attributes |
273 | */ |
274 | |
275 | meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta" , NULL); |
276 | if (head->children == NULL) |
277 | xmlAddChild(head, meta); |
278 | else |
279 | xmlAddPrevSibling(head->children, meta); |
280 | xmlNewProp(meta, BAD_CAST"http-equiv" , BAD_CAST"Content-Type" ); |
281 | xmlNewProp(meta, BAD_CAST"content" , BAD_CAST newcontent); |
282 | } |
283 | } else { |
284 | /* remove the meta tag if NULL is passed */ |
285 | if (encoding == NULL) { |
286 | xmlUnlinkNode(meta); |
287 | xmlFreeNode(meta); |
288 | } |
289 | /* change the document only if there is a real encoding change */ |
290 | else if (xmlStrcasestr(content, encoding) == NULL) { |
291 | xmlSetProp(meta, BAD_CAST"content" , BAD_CAST newcontent); |
292 | } |
293 | } |
294 | |
295 | |
296 | return(0); |
297 | } |
298 | |
299 | /** |
300 | * booleanHTMLAttrs: |
301 | * |
302 | * These are the HTML attributes which will be output |
303 | * in minimized form, i.e. <option selected="selected"> will be |
304 | * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method" |
305 | * |
306 | */ |
307 | static const char* htmlBooleanAttrs[] = { |
308 | "checked" , "compact" , "declare" , "defer" , "disabled" , "ismap" , |
309 | "multiple" , "nohref" , "noresize" , "noshade" , "nowrap" , "readonly" , |
310 | "selected" , NULL |
311 | }; |
312 | |
313 | |
314 | /** |
315 | * htmlIsBooleanAttr: |
316 | * @name: the name of the attribute to check |
317 | * |
318 | * Determine if a given attribute is a boolean attribute. |
319 | * |
320 | * returns: false if the attribute is not boolean, true otherwise. |
321 | */ |
322 | int |
323 | htmlIsBooleanAttr(const xmlChar *name) |
324 | { |
325 | int i = 0; |
326 | |
327 | while (htmlBooleanAttrs[i] != NULL) { |
328 | if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0) |
329 | return 1; |
330 | i++; |
331 | } |
332 | return 0; |
333 | } |
334 | |
335 | #ifdef LIBXML_OUTPUT_ENABLED |
336 | /* |
337 | * private routine exported from xmlIO.c |
338 | */ |
339 | xmlOutputBufferPtr |
340 | xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); |
341 | /************************************************************************ |
342 | * * |
343 | * Output error handlers * |
344 | * * |
345 | ************************************************************************/ |
346 | /** |
347 | * htmlSaveErrMemory: |
348 | * @extra: extra informations |
349 | * |
350 | * Handle an out of memory condition |
351 | */ |
352 | static void |
353 | htmlSaveErrMemory(const char *) |
354 | { |
355 | __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra); |
356 | } |
357 | |
358 | /** |
359 | * htmlSaveErr: |
360 | * @code: the error number |
361 | * @node: the location of the error. |
362 | * @extra: extra informations |
363 | * |
364 | * Handle an out of memory condition |
365 | */ |
366 | static void |
367 | htmlSaveErr(int code, xmlNodePtr node, const char *) |
368 | { |
369 | const char *msg = NULL; |
370 | |
371 | switch(code) { |
372 | case XML_SAVE_NOT_UTF8: |
373 | msg = "string is not in UTF-8\n" ; |
374 | break; |
375 | case XML_SAVE_CHAR_INVALID: |
376 | msg = "invalid character value\n" ; |
377 | break; |
378 | case XML_SAVE_UNKNOWN_ENCODING: |
379 | msg = "unknown encoding %s\n" ; |
380 | break; |
381 | case XML_SAVE_NO_DOCTYPE: |
382 | msg = "HTML has no DOCTYPE\n" ; |
383 | break; |
384 | default: |
385 | msg = "unexpected error number\n" ; |
386 | } |
387 | __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra); |
388 | } |
389 | |
390 | /************************************************************************ |
391 | * * |
392 | * Dumping HTML tree content to a simple buffer * |
393 | * * |
394 | ************************************************************************/ |
395 | |
396 | /** |
397 | * htmlBufNodeDumpFormat: |
398 | * @buf: the xmlBufPtr output |
399 | * @doc: the document |
400 | * @cur: the current node |
401 | * @format: should formatting spaces been added |
402 | * |
403 | * Dump an HTML node, recursive behaviour,children are printed too. |
404 | * |
405 | * Returns the number of byte written or -1 in case of error |
406 | */ |
407 | static size_t |
408 | htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, |
409 | int format) { |
410 | size_t use; |
411 | int ret; |
412 | xmlOutputBufferPtr outbuf; |
413 | |
414 | if (cur == NULL) { |
415 | return (-1); |
416 | } |
417 | if (buf == NULL) { |
418 | return (-1); |
419 | } |
420 | outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); |
421 | if (outbuf == NULL) { |
422 | htmlSaveErrMemory("allocating HTML output buffer" ); |
423 | return (-1); |
424 | } |
425 | memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); |
426 | outbuf->buffer = buf; |
427 | outbuf->encoder = NULL; |
428 | outbuf->writecallback = NULL; |
429 | outbuf->closecallback = NULL; |
430 | outbuf->context = NULL; |
431 | outbuf->written = 0; |
432 | |
433 | use = xmlBufUse(buf); |
434 | htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); |
435 | xmlFree(outbuf); |
436 | ret = xmlBufUse(buf) - use; |
437 | return (ret); |
438 | } |
439 | |
440 | /** |
441 | * htmlNodeDump: |
442 | * @buf: the HTML buffer output |
443 | * @doc: the document |
444 | * @cur: the current node |
445 | * |
446 | * Dump an HTML node, recursive behaviour,children are printed too, |
447 | * and formatting returns are added. |
448 | * |
449 | * Returns the number of byte written or -1 in case of error |
450 | */ |
451 | int |
452 | htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { |
453 | xmlBufPtr buffer; |
454 | size_t ret; |
455 | |
456 | if ((buf == NULL) || (cur == NULL)) |
457 | return(-1); |
458 | |
459 | xmlInitParser(); |
460 | buffer = xmlBufFromBuffer(buf); |
461 | if (buffer == NULL) |
462 | return(-1); |
463 | |
464 | ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1); |
465 | |
466 | xmlBufBackToBuffer(buffer); |
467 | |
468 | if (ret > INT_MAX) |
469 | return(-1); |
470 | return((int) ret); |
471 | } |
472 | |
473 | /** |
474 | * htmlNodeDumpFileFormat: |
475 | * @out: the FILE pointer |
476 | * @doc: the document |
477 | * @cur: the current node |
478 | * @encoding: the document encoding |
479 | * @format: should formatting spaces been added |
480 | * |
481 | * Dump an HTML node, recursive behaviour,children are printed too. |
482 | * |
483 | * TODO: if encoding == NULL try to save in the doc encoding |
484 | * |
485 | * returns: the number of byte written or -1 in case of failure. |
486 | */ |
487 | int |
488 | htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, |
489 | xmlNodePtr cur, const char *encoding, int format) { |
490 | xmlOutputBufferPtr buf; |
491 | xmlCharEncodingHandlerPtr handler = NULL; |
492 | int ret; |
493 | |
494 | xmlInitParser(); |
495 | |
496 | if (encoding != NULL) { |
497 | xmlCharEncoding enc; |
498 | |
499 | enc = xmlParseCharEncoding(encoding); |
500 | if (enc != XML_CHAR_ENCODING_UTF8) { |
501 | handler = xmlFindCharEncodingHandler(encoding); |
502 | if (handler == NULL) |
503 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
504 | } |
505 | } |
506 | |
507 | /* |
508 | * Fallback to HTML or ASCII when the encoding is unspecified |
509 | */ |
510 | if (handler == NULL) |
511 | handler = xmlFindCharEncodingHandler("HTML" ); |
512 | if (handler == NULL) |
513 | handler = xmlFindCharEncodingHandler("ascii" ); |
514 | |
515 | /* |
516 | * save the content to a temp buffer. |
517 | */ |
518 | buf = xmlOutputBufferCreateFile(out, handler); |
519 | if (buf == NULL) return(0); |
520 | |
521 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); |
522 | |
523 | ret = xmlOutputBufferClose(buf); |
524 | return(ret); |
525 | } |
526 | |
527 | /** |
528 | * htmlNodeDumpFile: |
529 | * @out: the FILE pointer |
530 | * @doc: the document |
531 | * @cur: the current node |
532 | * |
533 | * Dump an HTML node, recursive behaviour,children are printed too, |
534 | * and formatting returns are added. |
535 | */ |
536 | void |
537 | htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { |
538 | htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); |
539 | } |
540 | |
541 | /** |
542 | * htmlDocDumpMemoryFormat: |
543 | * @cur: the document |
544 | * @mem: OUT: the memory pointer |
545 | * @size: OUT: the memory length |
546 | * @format: should formatting spaces been added |
547 | * |
548 | * Dump an HTML document in memory and return the xmlChar * and it's size. |
549 | * It's up to the caller to free the memory. |
550 | */ |
551 | void |
552 | htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { |
553 | xmlOutputBufferPtr buf; |
554 | xmlCharEncodingHandlerPtr handler = NULL; |
555 | const char *encoding; |
556 | |
557 | xmlInitParser(); |
558 | |
559 | if ((mem == NULL) || (size == NULL)) |
560 | return; |
561 | if (cur == NULL) { |
562 | *mem = NULL; |
563 | *size = 0; |
564 | return; |
565 | } |
566 | |
567 | encoding = (const char *) htmlGetMetaEncoding(cur); |
568 | |
569 | if (encoding != NULL) { |
570 | xmlCharEncoding enc; |
571 | |
572 | enc = xmlParseCharEncoding(encoding); |
573 | if (enc != cur->charset) { |
574 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
575 | /* |
576 | * Not supported yet |
577 | */ |
578 | *mem = NULL; |
579 | *size = 0; |
580 | return; |
581 | } |
582 | |
583 | handler = xmlFindCharEncodingHandler(encoding); |
584 | if (handler == NULL) |
585 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
586 | |
587 | } else { |
588 | handler = xmlFindCharEncodingHandler(encoding); |
589 | } |
590 | } |
591 | |
592 | /* |
593 | * Fallback to HTML or ASCII when the encoding is unspecified |
594 | */ |
595 | if (handler == NULL) |
596 | handler = xmlFindCharEncodingHandler("HTML" ); |
597 | if (handler == NULL) |
598 | handler = xmlFindCharEncodingHandler("ascii" ); |
599 | |
600 | buf = xmlAllocOutputBufferInternal(handler); |
601 | if (buf == NULL) { |
602 | *mem = NULL; |
603 | *size = 0; |
604 | return; |
605 | } |
606 | |
607 | htmlDocContentDumpFormatOutput(buf, cur, NULL, format); |
608 | |
609 | xmlOutputBufferFlush(buf); |
610 | if (buf->conv != NULL) { |
611 | *size = xmlBufUse(buf->conv); |
612 | *mem = xmlStrndup(xmlBufContent(buf->conv), *size); |
613 | } else { |
614 | *size = xmlBufUse(buf->buffer); |
615 | *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); |
616 | } |
617 | (void)xmlOutputBufferClose(buf); |
618 | } |
619 | |
620 | /** |
621 | * htmlDocDumpMemory: |
622 | * @cur: the document |
623 | * @mem: OUT: the memory pointer |
624 | * @size: OUT: the memory length |
625 | * |
626 | * Dump an HTML document in memory and return the xmlChar * and it's size. |
627 | * It's up to the caller to free the memory. |
628 | */ |
629 | void |
630 | htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { |
631 | htmlDocDumpMemoryFormat(cur, mem, size, 1); |
632 | } |
633 | |
634 | |
635 | /************************************************************************ |
636 | * * |
637 | * Dumping HTML tree content to an I/O output buffer * |
638 | * * |
639 | ************************************************************************/ |
640 | |
641 | void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); |
642 | |
643 | /** |
644 | * htmlDtdDumpOutput: |
645 | * @buf: the HTML buffer output |
646 | * @doc: the document |
647 | * @encoding: the encoding string |
648 | * |
649 | * TODO: check whether encoding is needed |
650 | * |
651 | * Dump the HTML document DTD, if any. |
652 | */ |
653 | static void |
654 | htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
655 | const char *encoding ATTRIBUTE_UNUSED) { |
656 | xmlDtdPtr cur = doc->intSubset; |
657 | |
658 | if (cur == NULL) { |
659 | htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL); |
660 | return; |
661 | } |
662 | xmlOutputBufferWriteString(buf, "<!DOCTYPE " ); |
663 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
664 | if (cur->ExternalID != NULL) { |
665 | xmlOutputBufferWriteString(buf, " PUBLIC " ); |
666 | xmlBufWriteQuotedString(buf->buffer, cur->ExternalID); |
667 | if (cur->SystemID != NULL) { |
668 | xmlOutputBufferWriteString(buf, " " ); |
669 | xmlBufWriteQuotedString(buf->buffer, cur->SystemID); |
670 | } |
671 | } else if (cur->SystemID != NULL && |
672 | xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat" )) { |
673 | xmlOutputBufferWriteString(buf, " SYSTEM " ); |
674 | xmlBufWriteQuotedString(buf->buffer, cur->SystemID); |
675 | } |
676 | xmlOutputBufferWriteString(buf, ">\n" ); |
677 | } |
678 | |
679 | /** |
680 | * htmlAttrDumpOutput: |
681 | * @buf: the HTML buffer output |
682 | * @doc: the document |
683 | * @cur: the attribute pointer |
684 | * @encoding: the encoding string |
685 | * |
686 | * Dump an HTML attribute |
687 | */ |
688 | static void |
689 | htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, |
690 | const char *encoding ATTRIBUTE_UNUSED) { |
691 | xmlChar *value; |
692 | |
693 | /* |
694 | * The html output method should not escape a & character |
695 | * occurring in an attribute value immediately followed by |
696 | * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). |
697 | * This is implemented in xmlEncodeEntitiesReentrant |
698 | */ |
699 | |
700 | if (cur == NULL) { |
701 | return; |
702 | } |
703 | xmlOutputBufferWriteString(buf, " " ); |
704 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
705 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
706 | xmlOutputBufferWriteString(buf, ":" ); |
707 | } |
708 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
709 | if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { |
710 | value = xmlNodeListGetString(doc, cur->children, 0); |
711 | if (value) { |
712 | xmlOutputBufferWriteString(buf, "=" ); |
713 | if ((cur->ns == NULL) && (cur->parent != NULL) && |
714 | (cur->parent->ns == NULL) && |
715 | ((!xmlStrcasecmp(cur->name, BAD_CAST "href" )) || |
716 | (!xmlStrcasecmp(cur->name, BAD_CAST "action" )) || |
717 | (!xmlStrcasecmp(cur->name, BAD_CAST "src" )) || |
718 | ((!xmlStrcasecmp(cur->name, BAD_CAST "name" )) && |
719 | (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a" ))))) { |
720 | xmlChar *tmp = value; |
721 | /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ |
722 | xmlBufCCat(buf->buffer, "\"" ); |
723 | |
724 | while (IS_BLANK_CH(*tmp)) tmp++; |
725 | |
726 | /* URI Escape everything, except server side includes. */ |
727 | for ( ; ; ) { |
728 | xmlChar *escaped; |
729 | xmlChar endChar; |
730 | xmlChar *end = NULL; |
731 | xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--" ); |
732 | if (start != NULL) { |
733 | end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->" ); |
734 | if (end != NULL) { |
735 | *start = '\0'; |
736 | } |
737 | } |
738 | |
739 | /* Escape the whole string, or until start (set to '\0'). */ |
740 | escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+" ); |
741 | if (escaped != NULL) { |
742 | xmlBufCat(buf->buffer, escaped); |
743 | xmlFree(escaped); |
744 | } else { |
745 | xmlBufCat(buf->buffer, tmp); |
746 | } |
747 | |
748 | if (end == NULL) { /* Everything has been written. */ |
749 | break; |
750 | } |
751 | |
752 | /* Do not escape anything within server side includes. */ |
753 | *start = '<'; /* Restore the first character of "<!--". */ |
754 | end += 3; /* strlen("-->") */ |
755 | endChar = *end; |
756 | *end = '\0'; |
757 | xmlBufCat(buf->buffer, start); |
758 | *end = endChar; |
759 | tmp = end; |
760 | } |
761 | |
762 | xmlBufCCat(buf->buffer, "\"" ); |
763 | } else { |
764 | xmlBufWriteQuotedString(buf->buffer, value); |
765 | } |
766 | xmlFree(value); |
767 | } else { |
768 | xmlOutputBufferWriteString(buf, "=\"\"" ); |
769 | } |
770 | } |
771 | } |
772 | |
773 | /** |
774 | * htmlAttrListDumpOutput: |
775 | * @buf: the HTML buffer output |
776 | * @doc: the document |
777 | * @cur: the first attribute pointer |
778 | * @encoding: the encoding string |
779 | * |
780 | * Dump a list of HTML attributes |
781 | */ |
782 | static void |
783 | htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { |
784 | if (cur == NULL) { |
785 | return; |
786 | } |
787 | while (cur != NULL) { |
788 | htmlAttrDumpOutput(buf, doc, cur, encoding); |
789 | cur = cur->next; |
790 | } |
791 | } |
792 | |
793 | |
794 | |
795 | /** |
796 | * htmlNodeListDumpOutput: |
797 | * @buf: the HTML buffer output |
798 | * @doc: the document |
799 | * @cur: the first node |
800 | * @encoding: the encoding string |
801 | * @format: should formatting spaces been added |
802 | * |
803 | * Dump an HTML node list, recursive behaviour,children are printed too. |
804 | */ |
805 | static void |
806 | htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
807 | xmlNodePtr cur, const char *encoding, int format) { |
808 | if (cur == NULL) { |
809 | return; |
810 | } |
811 | while (cur != NULL) { |
812 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); |
813 | cur = cur->next; |
814 | } |
815 | } |
816 | |
817 | /** |
818 | * htmlNodeDumpFormatOutput: |
819 | * @buf: the HTML buffer output |
820 | * @doc: the document |
821 | * @cur: the current node |
822 | * @encoding: the encoding string |
823 | * @format: should formatting spaces been added |
824 | * |
825 | * Dump an HTML node, recursive behaviour,children are printed too. |
826 | */ |
827 | void |
828 | htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
829 | xmlNodePtr cur, const char *encoding, int format) { |
830 | const htmlElemDesc * info; |
831 | |
832 | xmlInitParser(); |
833 | |
834 | if ((cur == NULL) || (buf == NULL)) { |
835 | return; |
836 | } |
837 | /* |
838 | * Special cases. |
839 | */ |
840 | if (cur->type == XML_DTD_NODE) |
841 | return; |
842 | if ((cur->type == XML_HTML_DOCUMENT_NODE) || |
843 | (cur->type == XML_DOCUMENT_NODE)){ |
844 | htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); |
845 | return; |
846 | } |
847 | if (cur->type == XML_ATTRIBUTE_NODE) { |
848 | htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); |
849 | return; |
850 | } |
851 | if (cur->type == HTML_TEXT_NODE) { |
852 | if (cur->content != NULL) { |
853 | if (((cur->name == (const xmlChar *)xmlStringText) || |
854 | (cur->name != (const xmlChar *)xmlStringTextNoenc)) && |
855 | ((cur->parent == NULL) || |
856 | ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script" )) && |
857 | (xmlStrcasecmp(cur->parent->name, BAD_CAST "style" ))))) { |
858 | xmlChar *buffer; |
859 | |
860 | buffer = xmlEncodeEntitiesReentrant(doc, cur->content); |
861 | if (buffer != NULL) { |
862 | xmlOutputBufferWriteString(buf, (const char *)buffer); |
863 | xmlFree(buffer); |
864 | } |
865 | } else { |
866 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
867 | } |
868 | } |
869 | return; |
870 | } |
871 | if (cur->type == HTML_COMMENT_NODE) { |
872 | if (cur->content != NULL) { |
873 | xmlOutputBufferWriteString(buf, "<!--" ); |
874 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
875 | xmlOutputBufferWriteString(buf, "-->" ); |
876 | } |
877 | return; |
878 | } |
879 | if (cur->type == HTML_PI_NODE) { |
880 | if (cur->name == NULL) |
881 | return; |
882 | xmlOutputBufferWriteString(buf, "<?" ); |
883 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
884 | if (cur->content != NULL) { |
885 | xmlOutputBufferWriteString(buf, " " ); |
886 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
887 | } |
888 | xmlOutputBufferWriteString(buf, ">" ); |
889 | return; |
890 | } |
891 | if (cur->type == HTML_ENTITY_REF_NODE) { |
892 | xmlOutputBufferWriteString(buf, "&" ); |
893 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
894 | xmlOutputBufferWriteString(buf, ";" ); |
895 | return; |
896 | } |
897 | if (cur->type == HTML_PRESERVE_NODE) { |
898 | if (cur->content != NULL) { |
899 | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
900 | } |
901 | return; |
902 | } |
903 | |
904 | /* |
905 | * Get specific HTML info for that node. |
906 | */ |
907 | if (cur->ns == NULL) |
908 | info = htmlTagLookup(cur->name); |
909 | else |
910 | info = NULL; |
911 | |
912 | xmlOutputBufferWriteString(buf, "<" ); |
913 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
914 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
915 | xmlOutputBufferWriteString(buf, ":" ); |
916 | } |
917 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
918 | if (cur->nsDef) |
919 | xmlNsListDumpOutput(buf, cur->nsDef); |
920 | if (cur->properties != NULL) |
921 | htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); |
922 | |
923 | if ((info != NULL) && (info->empty)) { |
924 | xmlOutputBufferWriteString(buf, ">" ); |
925 | if ((format) && (!info->isinline) && (cur->next != NULL)) { |
926 | if ((cur->next->type != HTML_TEXT_NODE) && |
927 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
928 | (cur->parent != NULL) && |
929 | (cur->parent->name != NULL) && |
930 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
931 | xmlOutputBufferWriteString(buf, "\n" ); |
932 | } |
933 | return; |
934 | } |
935 | if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && |
936 | (cur->children == NULL)) { |
937 | if ((info != NULL) && (info->saveEndTag != 0) && |
938 | (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html" )) && |
939 | (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body" ))) { |
940 | xmlOutputBufferWriteString(buf, ">" ); |
941 | } else { |
942 | xmlOutputBufferWriteString(buf, "></" ); |
943 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
944 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
945 | xmlOutputBufferWriteString(buf, ":" ); |
946 | } |
947 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
948 | xmlOutputBufferWriteString(buf, ">" ); |
949 | } |
950 | if ((format) && (cur->next != NULL) && |
951 | (info != NULL) && (!info->isinline)) { |
952 | if ((cur->next->type != HTML_TEXT_NODE) && |
953 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
954 | (cur->parent != NULL) && |
955 | (cur->parent->name != NULL) && |
956 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
957 | xmlOutputBufferWriteString(buf, "\n" ); |
958 | } |
959 | return; |
960 | } |
961 | xmlOutputBufferWriteString(buf, ">" ); |
962 | if ((cur->type != XML_ELEMENT_NODE) && |
963 | (cur->content != NULL)) { |
964 | /* |
965 | * Uses the OutputBuffer property to automatically convert |
966 | * invalids to charrefs |
967 | */ |
968 | |
969 | xmlOutputBufferWriteString(buf, (const char *) cur->content); |
970 | } |
971 | if (cur->children != NULL) { |
972 | if ((format) && (info != NULL) && (!info->isinline) && |
973 | (cur->children->type != HTML_TEXT_NODE) && |
974 | (cur->children->type != HTML_ENTITY_REF_NODE) && |
975 | (cur->children != cur->last) && |
976 | (cur->name != NULL) && |
977 | (cur->name[0] != 'p')) /* p, pre, param */ |
978 | xmlOutputBufferWriteString(buf, "\n" ); |
979 | htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); |
980 | if ((format) && (info != NULL) && (!info->isinline) && |
981 | (cur->last->type != HTML_TEXT_NODE) && |
982 | (cur->last->type != HTML_ENTITY_REF_NODE) && |
983 | (cur->children != cur->last) && |
984 | (cur->name != NULL) && |
985 | (cur->name[0] != 'p')) /* p, pre, param */ |
986 | xmlOutputBufferWriteString(buf, "\n" ); |
987 | } |
988 | xmlOutputBufferWriteString(buf, "</" ); |
989 | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
990 | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
991 | xmlOutputBufferWriteString(buf, ":" ); |
992 | } |
993 | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
994 | xmlOutputBufferWriteString(buf, ">" ); |
995 | if ((format) && (info != NULL) && (!info->isinline) && |
996 | (cur->next != NULL)) { |
997 | if ((cur->next->type != HTML_TEXT_NODE) && |
998 | (cur->next->type != HTML_ENTITY_REF_NODE) && |
999 | (cur->parent != NULL) && |
1000 | (cur->parent->name != NULL) && |
1001 | (cur->parent->name[0] != 'p')) /* p, pre, param */ |
1002 | xmlOutputBufferWriteString(buf, "\n" ); |
1003 | } |
1004 | } |
1005 | |
1006 | /** |
1007 | * htmlNodeDumpOutput: |
1008 | * @buf: the HTML buffer output |
1009 | * @doc: the document |
1010 | * @cur: the current node |
1011 | * @encoding: the encoding string |
1012 | * |
1013 | * Dump an HTML node, recursive behaviour,children are printed too, |
1014 | * and formatting returns/spaces are added. |
1015 | */ |
1016 | void |
1017 | htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
1018 | xmlNodePtr cur, const char *encoding) { |
1019 | htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); |
1020 | } |
1021 | |
1022 | /** |
1023 | * htmlDocContentDumpFormatOutput: |
1024 | * @buf: the HTML buffer output |
1025 | * @cur: the document |
1026 | * @encoding: the encoding string |
1027 | * @format: should formatting spaces been added |
1028 | * |
1029 | * Dump an HTML document. |
1030 | */ |
1031 | void |
1032 | htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, |
1033 | const char *encoding, int format) { |
1034 | int type; |
1035 | |
1036 | xmlInitParser(); |
1037 | |
1038 | if ((buf == NULL) || (cur == NULL)) |
1039 | return; |
1040 | |
1041 | /* |
1042 | * force to output the stuff as HTML, especially for entities |
1043 | */ |
1044 | type = cur->type; |
1045 | cur->type = XML_HTML_DOCUMENT_NODE; |
1046 | if (cur->intSubset != NULL) { |
1047 | htmlDtdDumpOutput(buf, cur, NULL); |
1048 | } |
1049 | if (cur->children != NULL) { |
1050 | htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); |
1051 | } |
1052 | xmlOutputBufferWriteString(buf, "\n" ); |
1053 | cur->type = (xmlElementType) type; |
1054 | } |
1055 | |
1056 | /** |
1057 | * htmlDocContentDumpOutput: |
1058 | * @buf: the HTML buffer output |
1059 | * @cur: the document |
1060 | * @encoding: the encoding string |
1061 | * |
1062 | * Dump an HTML document. Formating return/spaces are added. |
1063 | */ |
1064 | void |
1065 | htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, |
1066 | const char *encoding) { |
1067 | htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); |
1068 | } |
1069 | |
1070 | /************************************************************************ |
1071 | * * |
1072 | * Saving functions front-ends * |
1073 | * * |
1074 | ************************************************************************/ |
1075 | |
1076 | /** |
1077 | * htmlDocDump: |
1078 | * @f: the FILE* |
1079 | * @cur: the document |
1080 | * |
1081 | * Dump an HTML document to an open FILE. |
1082 | * |
1083 | * returns: the number of byte written or -1 in case of failure. |
1084 | */ |
1085 | int |
1086 | htmlDocDump(FILE *f, xmlDocPtr cur) { |
1087 | xmlOutputBufferPtr buf; |
1088 | xmlCharEncodingHandlerPtr handler = NULL; |
1089 | const char *encoding; |
1090 | int ret; |
1091 | |
1092 | xmlInitParser(); |
1093 | |
1094 | if ((cur == NULL) || (f == NULL)) { |
1095 | return(-1); |
1096 | } |
1097 | |
1098 | encoding = (const char *) htmlGetMetaEncoding(cur); |
1099 | |
1100 | if (encoding != NULL) { |
1101 | xmlCharEncoding enc; |
1102 | |
1103 | enc = xmlParseCharEncoding(encoding); |
1104 | if (enc != cur->charset) { |
1105 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
1106 | /* |
1107 | * Not supported yet |
1108 | */ |
1109 | return(-1); |
1110 | } |
1111 | |
1112 | handler = xmlFindCharEncodingHandler(encoding); |
1113 | if (handler == NULL) |
1114 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
1115 | } else { |
1116 | handler = xmlFindCharEncodingHandler(encoding); |
1117 | } |
1118 | } |
1119 | |
1120 | /* |
1121 | * Fallback to HTML or ASCII when the encoding is unspecified |
1122 | */ |
1123 | if (handler == NULL) |
1124 | handler = xmlFindCharEncodingHandler("HTML" ); |
1125 | if (handler == NULL) |
1126 | handler = xmlFindCharEncodingHandler("ascii" ); |
1127 | |
1128 | buf = xmlOutputBufferCreateFile(f, handler); |
1129 | if (buf == NULL) return(-1); |
1130 | htmlDocContentDumpOutput(buf, cur, NULL); |
1131 | |
1132 | ret = xmlOutputBufferClose(buf); |
1133 | return(ret); |
1134 | } |
1135 | |
1136 | /** |
1137 | * htmlSaveFile: |
1138 | * @filename: the filename (or URL) |
1139 | * @cur: the document |
1140 | * |
1141 | * Dump an HTML document to a file. If @filename is "-" the stdout file is |
1142 | * used. |
1143 | * returns: the number of byte written or -1 in case of failure. |
1144 | */ |
1145 | int |
1146 | htmlSaveFile(const char *filename, xmlDocPtr cur) { |
1147 | xmlOutputBufferPtr buf; |
1148 | xmlCharEncodingHandlerPtr handler = NULL; |
1149 | const char *encoding; |
1150 | int ret; |
1151 | |
1152 | if ((cur == NULL) || (filename == NULL)) |
1153 | return(-1); |
1154 | |
1155 | xmlInitParser(); |
1156 | |
1157 | encoding = (const char *) htmlGetMetaEncoding(cur); |
1158 | |
1159 | if (encoding != NULL) { |
1160 | xmlCharEncoding enc; |
1161 | |
1162 | enc = xmlParseCharEncoding(encoding); |
1163 | if (enc != cur->charset) { |
1164 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
1165 | /* |
1166 | * Not supported yet |
1167 | */ |
1168 | return(-1); |
1169 | } |
1170 | |
1171 | handler = xmlFindCharEncodingHandler(encoding); |
1172 | if (handler == NULL) |
1173 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
1174 | } |
1175 | } |
1176 | |
1177 | /* |
1178 | * Fallback to HTML or ASCII when the encoding is unspecified |
1179 | */ |
1180 | if (handler == NULL) |
1181 | handler = xmlFindCharEncodingHandler("HTML" ); |
1182 | if (handler == NULL) |
1183 | handler = xmlFindCharEncodingHandler("ascii" ); |
1184 | |
1185 | /* |
1186 | * save the content to a temp buffer. |
1187 | */ |
1188 | buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); |
1189 | if (buf == NULL) return(0); |
1190 | |
1191 | htmlDocContentDumpOutput(buf, cur, NULL); |
1192 | |
1193 | ret = xmlOutputBufferClose(buf); |
1194 | return(ret); |
1195 | } |
1196 | |
1197 | /** |
1198 | * htmlSaveFileFormat: |
1199 | * @filename: the filename |
1200 | * @cur: the document |
1201 | * @format: should formatting spaces been added |
1202 | * @encoding: the document encoding |
1203 | * |
1204 | * Dump an HTML document to a file using a given encoding. |
1205 | * |
1206 | * returns: the number of byte written or -1 in case of failure. |
1207 | */ |
1208 | int |
1209 | htmlSaveFileFormat(const char *filename, xmlDocPtr cur, |
1210 | const char *encoding, int format) { |
1211 | xmlOutputBufferPtr buf; |
1212 | xmlCharEncodingHandlerPtr handler = NULL; |
1213 | int ret; |
1214 | |
1215 | if ((cur == NULL) || (filename == NULL)) |
1216 | return(-1); |
1217 | |
1218 | xmlInitParser(); |
1219 | |
1220 | if (encoding != NULL) { |
1221 | xmlCharEncoding enc; |
1222 | |
1223 | enc = xmlParseCharEncoding(encoding); |
1224 | if (enc != cur->charset) { |
1225 | if (cur->charset != XML_CHAR_ENCODING_UTF8) { |
1226 | /* |
1227 | * Not supported yet |
1228 | */ |
1229 | return(-1); |
1230 | } |
1231 | |
1232 | handler = xmlFindCharEncodingHandler(encoding); |
1233 | if (handler == NULL) |
1234 | htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding); |
1235 | } |
1236 | htmlSetMetaEncoding(cur, (const xmlChar *) encoding); |
1237 | } else { |
1238 | htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8" ); |
1239 | } |
1240 | |
1241 | /* |
1242 | * Fallback to HTML or ASCII when the encoding is unspecified |
1243 | */ |
1244 | if (handler == NULL) |
1245 | handler = xmlFindCharEncodingHandler("HTML" ); |
1246 | if (handler == NULL) |
1247 | handler = xmlFindCharEncodingHandler("ascii" ); |
1248 | |
1249 | /* |
1250 | * save the content to a temp buffer. |
1251 | */ |
1252 | buf = xmlOutputBufferCreateFilename(filename, handler, 0); |
1253 | if (buf == NULL) return(0); |
1254 | |
1255 | htmlDocContentDumpFormatOutput(buf, cur, encoding, format); |
1256 | |
1257 | ret = xmlOutputBufferClose(buf); |
1258 | return(ret); |
1259 | } |
1260 | |
1261 | /** |
1262 | * htmlSaveFileEnc: |
1263 | * @filename: the filename |
1264 | * @cur: the document |
1265 | * @encoding: the document encoding |
1266 | * |
1267 | * Dump an HTML document to a file using a given encoding |
1268 | * and formatting returns/spaces are added. |
1269 | * |
1270 | * returns: the number of byte written or -1 in case of failure. |
1271 | */ |
1272 | int |
1273 | htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { |
1274 | return(htmlSaveFileFormat(filename, cur, encoding, 1)); |
1275 | } |
1276 | |
1277 | #endif /* LIBXML_OUTPUT_ENABLED */ |
1278 | |
1279 | #define bottom_HTMLtree |
1280 | #include "elfgcchack.h" |
1281 | #endif /* LIBXML_HTML_ENABLED */ |
1282 | |