| 1 | /* $Id$ $Revision$ */ |
| 2 | /* vim:set shiftwidth=4 ts=8: */ |
| 3 | |
| 4 | /************************************************************************* |
| 5 | * Copyright (c) 2011 AT&T Intellectual Property |
| 6 | * All rights reserved. This program and the accompanying materials |
| 7 | * are made available under the terms of the Eclipse Public License v1.0 |
| 8 | * which accompanies this distribution, and is available at |
| 9 | * http://www.eclipse.org/legal/epl-v10.html |
| 10 | * |
| 11 | * Contributors: See CVS logs. Details at http://www.graphviz.org/ |
| 12 | *************************************************************************/ |
| 13 | |
| 14 | |
| 15 | #include "render.h" |
| 16 | #include "htmltable.h" |
| 17 | #include "htmlparse.h" |
| 18 | #include "htmllex.h" |
| 19 | #include "cdt.h" |
| 20 | #include <ctype.h> |
| 21 | |
| 22 | #ifdef HAVE_EXPAT |
| 23 | #include <expat.h> |
| 24 | #endif |
| 25 | |
| 26 | #ifndef XML_STATUS_ERROR |
| 27 | #define XML_STATUS_ERROR 0 |
| 28 | #endif |
| 29 | |
| 30 | typedef struct { |
| 31 | #ifdef HAVE_EXPAT |
| 32 | XML_Parser parser; |
| 33 | #endif |
| 34 | char* ptr; /* input source */ |
| 35 | int tok; /* token type */ |
| 36 | agxbuf* xb; /* buffer to gather T_string data */ |
| 37 | agxbuf lb; /* buffer for translating lexical data */ |
| 38 | char warn; /* set if warning given */ |
| 39 | char error; /* set if error given */ |
| 40 | char inCell; /* set if in TD to allow T_string */ |
| 41 | char mode; /* for handling artificial <HTML>..</HTML> */ |
| 42 | char *currtok; /* for error reporting */ |
| 43 | char *prevtok; /* for error reporting */ |
| 44 | int currtoklen; |
| 45 | int prevtoklen; |
| 46 | } lexstate_t; |
| 47 | static lexstate_t state; |
| 48 | |
| 49 | /* error_context: |
| 50 | * Print the last 2 "token"s seen. |
| 51 | */ |
| 52 | static void error_context(void) |
| 53 | { |
| 54 | agxbclear(state.xb); |
| 55 | if (state.prevtoklen > 0) |
| 56 | agxbput_n(state.xb, state.prevtok, state.prevtoklen); |
| 57 | agxbput_n(state.xb, state.currtok, state.currtoklen); |
| 58 | agerr(AGPREV, "... %s ...\n" , agxbuse(state.xb)); |
| 59 | } |
| 60 | |
| 61 | /* htmlerror: |
| 62 | * yyerror - called by yacc output |
| 63 | */ |
| 64 | void htmlerror(const char *msg) |
| 65 | { |
| 66 | if (state.error) |
| 67 | return; |
| 68 | state.error = 1; |
| 69 | agerr(AGERR, "%s in line %d \n" , msg, htmllineno()); |
| 70 | error_context(); |
| 71 | } |
| 72 | |
| 73 | #ifdef HAVE_EXPAT |
| 74 | /* lexerror: |
| 75 | * called by lexer when unknown <..> is found. |
| 76 | */ |
| 77 | static void lexerror(const char *name) |
| 78 | { |
| 79 | state.tok = T_error; |
| 80 | state.error = 1; |
| 81 | agerr(AGERR, "Unknown HTML element <%s> on line %d \n" , |
| 82 | name, htmllineno()); |
| 83 | } |
| 84 | |
| 85 | typedef int (*attrFn) (void *, char *); |
| 86 | typedef int (*bcmpfn) (const void *, const void *); |
| 87 | |
| 88 | #define MAX_CHAR (((unsigned char)(~0)) >> 1) |
| 89 | #define MIN_CHAR ((signed char)(~MAX_CHAR)) |
| 90 | #define MAX_UCHAR ((unsigned char)(~0)) |
| 91 | #define MAX_USHORT ((unsigned short)(~0)) |
| 92 | |
| 93 | /* Mechanism for automatically processing attributes */ |
| 94 | typedef struct { |
| 95 | char *name; /* attribute name */ |
| 96 | attrFn action; /* action to perform if name matches */ |
| 97 | } attr_item; |
| 98 | |
| 99 | #define ISIZE (sizeof(attr_item)) |
| 100 | |
| 101 | /* icmp: |
| 102 | * Compare two attr_item. Used in bsearch |
| 103 | */ |
| 104 | static int icmp(attr_item * i, attr_item * j) |
| 105 | { |
| 106 | return strcasecmp(i->name, j->name); |
| 107 | } |
| 108 | |
| 109 | static int bgcolorfn(htmldata_t * p, char *v) |
| 110 | { |
| 111 | p->bgcolor = strdup(v); |
| 112 | return 0; |
| 113 | } |
| 114 | |
| 115 | static int pencolorfn(htmldata_t * p, char *v) |
| 116 | { |
| 117 | p->pencolor = strdup(v); |
| 118 | return 0; |
| 119 | } |
| 120 | |
| 121 | static int hreffn(htmldata_t * p, char *v) |
| 122 | { |
| 123 | p->href = strdup(v); |
| 124 | return 0; |
| 125 | } |
| 126 | |
| 127 | static int sidesfn(htmldata_t * p, char *v) |
| 128 | { |
| 129 | unsigned short flags = 0; |
| 130 | char c; |
| 131 | |
| 132 | while ((c = *v++)) { |
| 133 | switch (tolower(c)) { |
| 134 | case 'l' : |
| 135 | flags |= BORDER_LEFT; |
| 136 | break; |
| 137 | case 't' : |
| 138 | flags |= BORDER_TOP; |
| 139 | break; |
| 140 | case 'r' : |
| 141 | flags |= BORDER_RIGHT; |
| 142 | break; |
| 143 | case 'b' : |
| 144 | flags |= BORDER_BOTTOM; |
| 145 | break; |
| 146 | default : |
| 147 | agerr(AGWARN, "Unrecognized character '%c' (%d) in sides attribute\n" , c, c); |
| 148 | break; |
| 149 | } |
| 150 | } |
| 151 | if (flags != BORDER_MASK) |
| 152 | p->flags |= flags; |
| 153 | return 0; |
| 154 | } |
| 155 | |
| 156 | static int titlefn(htmldata_t * p, char *v) |
| 157 | { |
| 158 | p->title = strdup(v); |
| 159 | return 0; |
| 160 | } |
| 161 | |
| 162 | static int portfn(htmldata_t * p, char *v) |
| 163 | { |
| 164 | p->port = strdup(v); |
| 165 | return 0; |
| 166 | } |
| 167 | |
| 168 | #define DELIM " ," |
| 169 | |
| 170 | static int stylefn(htmldata_t * p, char *v) |
| 171 | { |
| 172 | int rv = 0; |
| 173 | char c; |
| 174 | char* tk; |
| 175 | char* buf = strdup (v); |
| 176 | for (tk = strtok (buf, DELIM); tk; tk = strtok (NULL, DELIM)) { |
| 177 | c = (char) toupper(*tk); |
| 178 | if (c == 'R') { |
| 179 | if (!strcasecmp(tk + 1, "OUNDED" )) p->style |= ROUNDED; |
| 180 | else if (!strcasecmp(tk + 1, "ADIAL" )) p->style |= RADIAL; |
| 181 | else { |
| 182 | agerr(AGWARN, "Illegal value %s for STYLE - ignored\n" , tk); |
| 183 | rv = 1; |
| 184 | } |
| 185 | } |
| 186 | else if(!strcasecmp(tk,"SOLID" )) p->style &= ~(DOTTED|DASHED); |
| 187 | else if(!strcasecmp(tk,"INVISIBLE" ) || !strcasecmp(tk,"INVIS" )) p->style |= INVISIBLE; |
| 188 | else if(!strcasecmp(tk,"DOTTED" )) p->style |= DOTTED; |
| 189 | else if(!strcasecmp(tk,"DASHED" )) p->style |= DASHED; |
| 190 | else { |
| 191 | agerr(AGWARN, "Illegal value %s for STYLE - ignored\n" , tk); |
| 192 | rv = 1; |
| 193 | } |
| 194 | } |
| 195 | free (buf); |
| 196 | return rv; |
| 197 | } |
| 198 | |
| 199 | static int targetfn(htmldata_t * p, char *v) |
| 200 | { |
| 201 | p->target = strdup(v); |
| 202 | return 0; |
| 203 | } |
| 204 | |
| 205 | static int idfn(htmldata_t * p, char *v) |
| 206 | { |
| 207 | p->id = strdup(v); |
| 208 | return 0; |
| 209 | } |
| 210 | |
| 211 | |
| 212 | /* doInt: |
| 213 | * Scan v for integral value. Check that |
| 214 | * the value is >= min and <= max. Return value in ul. |
| 215 | * String s is name of value. |
| 216 | * Return 0 if okay; 1 otherwise. |
| 217 | */ |
| 218 | static int doInt(char *v, char *s, int min, int max, long *ul) |
| 219 | { |
| 220 | int rv = 0; |
| 221 | char *ep; |
| 222 | long b = strtol(v, &ep, 10); |
| 223 | |
| 224 | if (ep == v) { |
| 225 | agerr(AGWARN, "Improper %s value %s - ignored" , s, v); |
| 226 | rv = 1; |
| 227 | } else if (b > max) { |
| 228 | agerr(AGWARN, "%s value %s > %d - too large - ignored" , s, v, max); |
| 229 | rv = 1; |
| 230 | } else if (b < min) { |
| 231 | agerr(AGWARN, "%s value %s < %d - too small - ignored" , s, v, min); |
| 232 | rv = 1; |
| 233 | } else |
| 234 | *ul = b; |
| 235 | return rv; |
| 236 | } |
| 237 | |
| 238 | |
| 239 | static int gradientanglefn(htmldata_t * p, char *v) |
| 240 | { |
| 241 | long u; |
| 242 | |
| 243 | if (doInt(v, "GRADIENTANGLE" , 0, 360, &u)) |
| 244 | return 1; |
| 245 | p->gradientangle = (unsigned short) u; |
| 246 | return 0; |
| 247 | } |
| 248 | |
| 249 | |
| 250 | static int borderfn(htmldata_t * p, char *v) |
| 251 | { |
| 252 | long u; |
| 253 | |
| 254 | if (doInt(v, "BORDER" , 0, MAX_UCHAR, &u)) |
| 255 | return 1; |
| 256 | p->border = (unsigned char) u; |
| 257 | p->flags |= BORDER_SET; |
| 258 | return 0; |
| 259 | } |
| 260 | |
| 261 | static int cellpaddingfn(htmldata_t * p, char *v) |
| 262 | { |
| 263 | long u; |
| 264 | |
| 265 | if (doInt(v, "CELLPADDING" , 0, MAX_UCHAR, &u)) |
| 266 | return 1; |
| 267 | p->pad = (unsigned char) u; |
| 268 | p->flags |= PAD_SET; |
| 269 | return 0; |
| 270 | } |
| 271 | |
| 272 | static int cellspacingfn(htmldata_t * p, char *v) |
| 273 | { |
| 274 | long u; |
| 275 | |
| 276 | if (doInt(v, "CELLSPACING" , MIN_CHAR, MAX_CHAR, &u)) |
| 277 | return 1; |
| 278 | p->space = (signed char) u; |
| 279 | p->flags |= SPACE_SET; |
| 280 | return 0; |
| 281 | } |
| 282 | |
| 283 | static int cellborderfn(htmltbl_t * p, char *v) |
| 284 | { |
| 285 | long u; |
| 286 | |
| 287 | if (doInt(v, "CELLSBORDER" , 0, MAX_CHAR, &u)) |
| 288 | return 1; |
| 289 | p->cb = (unsigned char) u; |
| 290 | return 0; |
| 291 | } |
| 292 | |
| 293 | static int columnsfn(htmltbl_t * p, char *v) |
| 294 | { |
| 295 | if (*v != '*') { |
| 296 | agerr(AGWARN, "Unknown value %s for COLUMNS - ignored\n" , v); |
| 297 | return 1; |
| 298 | } |
| 299 | p->flags |= HTML_VRULE; |
| 300 | return 0; |
| 301 | } |
| 302 | |
| 303 | static int rowsfn(htmltbl_t * p, char *v) |
| 304 | { |
| 305 | if (*v != '*') { |
| 306 | agerr(AGWARN, "Unknown value %s for ROWS - ignored\n" , v); |
| 307 | return 1; |
| 308 | } |
| 309 | p->flags |= HTML_HRULE; |
| 310 | return 0; |
| 311 | } |
| 312 | |
| 313 | static int fixedsizefn(htmldata_t * p, char *v) |
| 314 | { |
| 315 | int rv = 0; |
| 316 | char c = (char) toupper(*(unsigned char *) v); |
| 317 | if ((c == 'T') && !strcasecmp(v + 1, "RUE" )) |
| 318 | p->flags |= FIXED_FLAG; |
| 319 | else if ((c != 'F') || strcasecmp(v + 1, "ALSE" )) { |
| 320 | agerr(AGWARN, "Illegal value %s for FIXEDSIZE - ignored\n" , v); |
| 321 | rv = 1; |
| 322 | } |
| 323 | return rv; |
| 324 | } |
| 325 | |
| 326 | static int valignfn(htmldata_t * p, char *v) |
| 327 | { |
| 328 | int rv = 0; |
| 329 | char c = (char) toupper(*v); |
| 330 | if ((c == 'B') && !strcasecmp(v + 1, "OTTOM" )) |
| 331 | p->flags |= VALIGN_BOTTOM; |
| 332 | else if ((c == 'T') && !strcasecmp(v + 1, "OP" )) |
| 333 | p->flags |= VALIGN_TOP; |
| 334 | else if ((c != 'M') || strcasecmp(v + 1, "IDDLE" )) { |
| 335 | agerr(AGWARN, "Illegal value %s for VALIGN - ignored\n" , v); |
| 336 | rv = 1; |
| 337 | } |
| 338 | return rv; |
| 339 | } |
| 340 | |
| 341 | static int halignfn(htmldata_t * p, char *v) |
| 342 | { |
| 343 | int rv = 0; |
| 344 | char c = (char) toupper(*v); |
| 345 | if ((c == 'L') && !strcasecmp(v + 1, "EFT" )) |
| 346 | p->flags |= HALIGN_LEFT; |
| 347 | else if ((c == 'R') && !strcasecmp(v + 1, "IGHT" )) |
| 348 | p->flags |= HALIGN_RIGHT; |
| 349 | else if ((c != 'C') || strcasecmp(v + 1, "ENTER" )) { |
| 350 | agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n" , v); |
| 351 | rv = 1; |
| 352 | } |
| 353 | return rv; |
| 354 | } |
| 355 | |
| 356 | static int cell_halignfn(htmldata_t * p, char *v) |
| 357 | { |
| 358 | int rv = 0; |
| 359 | char c = (char) toupper(*v); |
| 360 | if ((c == 'L') && !strcasecmp(v + 1, "EFT" )) |
| 361 | p->flags |= HALIGN_LEFT; |
| 362 | else if ((c == 'R') && !strcasecmp(v + 1, "IGHT" )) |
| 363 | p->flags |= HALIGN_RIGHT; |
| 364 | else if ((c == 'T') && !strcasecmp(v + 1, "EXT" )) |
| 365 | p->flags |= HALIGN_TEXT; |
| 366 | else if ((c != 'C') || strcasecmp(v + 1, "ENTER" )) |
| 367 | rv = 1; |
| 368 | if (rv) |
| 369 | agerr(AGWARN, "Illegal value %s for ALIGN in TD - ignored\n" , v); |
| 370 | return rv; |
| 371 | } |
| 372 | |
| 373 | static int balignfn(htmldata_t * p, char *v) |
| 374 | { |
| 375 | int rv = 0; |
| 376 | char c = (char) toupper(*v); |
| 377 | if ((c == 'L') && !strcasecmp(v + 1, "EFT" )) |
| 378 | p->flags |= BALIGN_LEFT; |
| 379 | else if ((c == 'R') && !strcasecmp(v + 1, "IGHT" )) |
| 380 | p->flags |= BALIGN_RIGHT; |
| 381 | else if ((c != 'C') || strcasecmp(v + 1, "ENTER" )) |
| 382 | rv = 1; |
| 383 | if (rv) |
| 384 | agerr(AGWARN, "Illegal value %s for BALIGN in TD - ignored\n" , v); |
| 385 | return rv; |
| 386 | } |
| 387 | |
| 388 | static int heightfn(htmldata_t * p, char *v) |
| 389 | { |
| 390 | long u; |
| 391 | |
| 392 | if (doInt(v, "HEIGHT" , 0, MAX_USHORT, &u)) |
| 393 | return 1; |
| 394 | p->height = (unsigned short) u; |
| 395 | return 0; |
| 396 | } |
| 397 | |
| 398 | static int widthfn(htmldata_t * p, char *v) |
| 399 | { |
| 400 | long u; |
| 401 | |
| 402 | if (doInt(v, "WIDTH" , 0, MAX_USHORT, &u)) |
| 403 | return 1; |
| 404 | p->width = (unsigned short) u; |
| 405 | return 0; |
| 406 | } |
| 407 | |
| 408 | static int rowspanfn(htmlcell_t * p, char *v) |
| 409 | { |
| 410 | long u; |
| 411 | |
| 412 | if (doInt(v, "ROWSPAN" , 0, MAX_USHORT, &u)) |
| 413 | return 1; |
| 414 | if (u == 0) { |
| 415 | agerr(AGWARN, "ROWSPAN value cannot be 0 - ignored\n" ); |
| 416 | return 1; |
| 417 | } |
| 418 | p->rspan = (unsigned short) u; |
| 419 | return 0; |
| 420 | } |
| 421 | |
| 422 | static int colspanfn(htmlcell_t * p, char *v) |
| 423 | { |
| 424 | long u; |
| 425 | |
| 426 | if (doInt(v, "COLSPAN" , 0, MAX_USHORT, &u)) |
| 427 | return 1; |
| 428 | if (u == 0) { |
| 429 | agerr(AGWARN, "COLSPAN value cannot be 0 - ignored\n" ); |
| 430 | return 1; |
| 431 | } |
| 432 | p->cspan = (unsigned short) u; |
| 433 | return 0; |
| 434 | } |
| 435 | |
| 436 | static int fontcolorfn(textfont_t * p, char *v) |
| 437 | { |
| 438 | p->color = v; |
| 439 | return 0; |
| 440 | } |
| 441 | |
| 442 | static int facefn(textfont_t * p, char *v) |
| 443 | { |
| 444 | p->name = v; |
| 445 | return 0; |
| 446 | } |
| 447 | |
| 448 | static int ptsizefn(textfont_t * p, char *v) |
| 449 | { |
| 450 | long u; |
| 451 | |
| 452 | if (doInt(v, "POINT-SIZE" , 0, MAX_UCHAR, &u)) |
| 453 | return 1; |
| 454 | p->size = (double) u; |
| 455 | return 0; |
| 456 | } |
| 457 | |
| 458 | static int srcfn(htmlimg_t * p, char *v) |
| 459 | { |
| 460 | p->src = strdup(v); |
| 461 | return 0; |
| 462 | } |
| 463 | |
| 464 | static int scalefn(htmlimg_t * p, char *v) |
| 465 | { |
| 466 | p->scale = strdup(v); |
| 467 | return 0; |
| 468 | } |
| 469 | |
| 470 | static int alignfn(int *p, char *v) |
| 471 | { |
| 472 | int rv = 0; |
| 473 | char c = (char) toupper(*v); |
| 474 | if ((c == 'R') && !strcasecmp(v + 1, "IGHT" )) |
| 475 | *p = 'r'; |
| 476 | else if ((c == 'L') || !strcasecmp(v + 1, "EFT" )) |
| 477 | *p = 'l'; |
| 478 | else if ((c == 'C') || strcasecmp(v + 1, "ENTER" )) |
| 479 | *p = 'n'; |
| 480 | else { |
| 481 | agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n" , v); |
| 482 | rv = 1; |
| 483 | } |
| 484 | return rv; |
| 485 | } |
| 486 | |
| 487 | /* Tables used in binary search; MUST be alphabetized */ |
| 488 | static attr_item tbl_items[] = { |
| 489 | {"align" , (attrFn) halignfn}, |
| 490 | {"bgcolor" , (attrFn) bgcolorfn}, |
| 491 | {"border" , (attrFn) borderfn}, |
| 492 | {"cellborder" , (attrFn) cellborderfn}, |
| 493 | {"cellpadding" , (attrFn) cellpaddingfn}, |
| 494 | {"cellspacing" , (attrFn) cellspacingfn}, |
| 495 | {"color" , (attrFn) pencolorfn}, |
| 496 | {"columns" , (attrFn) columnsfn}, |
| 497 | {"fixedsize" , (attrFn) fixedsizefn}, |
| 498 | {"gradientangle" , (attrFn) gradientanglefn}, |
| 499 | {"height" , (attrFn) heightfn}, |
| 500 | {"href" , (attrFn) hreffn}, |
| 501 | {"id" , (attrFn) idfn}, |
| 502 | {"port" , (attrFn) portfn}, |
| 503 | {"rows" , (attrFn) rowsfn}, |
| 504 | {"sides" , (attrFn) sidesfn}, |
| 505 | {"style" , (attrFn) stylefn}, |
| 506 | {"target" , (attrFn) targetfn}, |
| 507 | {"title" , (attrFn) titlefn}, |
| 508 | {"tooltip" , (attrFn) titlefn}, |
| 509 | {"valign" , (attrFn) valignfn}, |
| 510 | {"width" , (attrFn) widthfn}, |
| 511 | }; |
| 512 | |
| 513 | static attr_item cell_items[] = { |
| 514 | {"align" , (attrFn) cell_halignfn}, |
| 515 | {"balign" , (attrFn) balignfn}, |
| 516 | {"bgcolor" , (attrFn) bgcolorfn}, |
| 517 | {"border" , (attrFn) borderfn}, |
| 518 | {"cellpadding" , (attrFn) cellpaddingfn}, |
| 519 | {"cellspacing" , (attrFn) cellspacingfn}, |
| 520 | {"color" , (attrFn) pencolorfn}, |
| 521 | {"colspan" , (attrFn) colspanfn}, |
| 522 | {"fixedsize" , (attrFn) fixedsizefn}, |
| 523 | {"gradientangle" , (attrFn) gradientanglefn}, |
| 524 | {"height" , (attrFn) heightfn}, |
| 525 | {"href" , (attrFn) hreffn}, |
| 526 | {"id" , (attrFn) idfn}, |
| 527 | {"port" , (attrFn) portfn}, |
| 528 | {"rowspan" , (attrFn) rowspanfn}, |
| 529 | {"sides" , (attrFn) sidesfn}, |
| 530 | {"style" , (attrFn) stylefn}, |
| 531 | {"target" , (attrFn) targetfn}, |
| 532 | {"title" , (attrFn) titlefn}, |
| 533 | {"tooltip" , (attrFn) titlefn}, |
| 534 | {"valign" , (attrFn) valignfn}, |
| 535 | {"width" , (attrFn) widthfn}, |
| 536 | }; |
| 537 | |
| 538 | static attr_item font_items[] = { |
| 539 | {"color" , (attrFn) fontcolorfn}, |
| 540 | {"face" , (attrFn) facefn}, |
| 541 | {"point-size" , (attrFn) ptsizefn}, |
| 542 | }; |
| 543 | |
| 544 | static attr_item img_items[] = { |
| 545 | {"scale" , (attrFn) scalefn}, |
| 546 | {"src" , (attrFn) srcfn}, |
| 547 | }; |
| 548 | |
| 549 | static attr_item br_items[] = { |
| 550 | {"align" , (attrFn) alignfn}, |
| 551 | }; |
| 552 | |
| 553 | /* doAttrs: |
| 554 | * General function for processing list of name/value attributes. |
| 555 | * Do binary search on items table. If match found, invoke action |
| 556 | * passing it tp and attribute value. |
| 557 | * Table size is given by nel |
| 558 | * Name/value pairs are in array atts, which is null terminated. |
| 559 | * s is the name of the HTML element being processed. |
| 560 | */ |
| 561 | static void |
| 562 | doAttrs(void *tp, attr_item * items, int nel, char **atts, char *s) |
| 563 | { |
| 564 | char *name; |
| 565 | char *val; |
| 566 | attr_item *ip; |
| 567 | attr_item key; |
| 568 | |
| 569 | while ((name = *atts++) != NULL) { |
| 570 | val = *atts++; |
| 571 | key.name = name; |
| 572 | ip = (attr_item *) bsearch(&key, items, nel, ISIZE, (bcmpfn) icmp); |
| 573 | if (ip) |
| 574 | state.warn |= ip->action(tp, val); |
| 575 | else { |
| 576 | agerr(AGWARN, "Illegal attribute %s in %s - ignored\n" , name, |
| 577 | s); |
| 578 | state.warn = 1; |
| 579 | } |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | static void mkBR(char **atts) |
| 584 | { |
| 585 | htmllval.i = UNSET_ALIGN; |
| 586 | doAttrs(&htmllval.i, br_items, sizeof(br_items) / ISIZE, atts, "<BR>" ); |
| 587 | } |
| 588 | |
| 589 | static htmlimg_t *mkImg(char **atts) |
| 590 | { |
| 591 | htmlimg_t *img = NEW(htmlimg_t); |
| 592 | |
| 593 | doAttrs(img, img_items, sizeof(img_items) / ISIZE, atts, "<IMG>" ); |
| 594 | |
| 595 | return img; |
| 596 | } |
| 597 | |
| 598 | static textfont_t *mkFont(GVC_t *gvc, char **atts, int flags, int ul) |
| 599 | { |
| 600 | textfont_t tf = {NULL,NULL,NULL,0.0,0,0}; |
| 601 | |
| 602 | tf.size = -1.0; /* unassigned */ |
| 603 | tf.flags = flags; |
| 604 | if (atts) |
| 605 | doAttrs(&tf, font_items, sizeof(font_items) / ISIZE, atts, "<FONT>" ); |
| 606 | |
| 607 | return dtinsert(gvc->textfont_dt, &tf); |
| 608 | } |
| 609 | |
| 610 | static htmlcell_t *mkCell(char **atts) |
| 611 | { |
| 612 | htmlcell_t *cell = NEW(htmlcell_t); |
| 613 | |
| 614 | cell->cspan = 1; |
| 615 | cell->rspan = 1; |
| 616 | doAttrs(cell, cell_items, sizeof(cell_items) / ISIZE, atts, "<TD>" ); |
| 617 | |
| 618 | return cell; |
| 619 | } |
| 620 | |
| 621 | static htmltbl_t *mkTbl(char **atts) |
| 622 | { |
| 623 | htmltbl_t *tbl = NEW(htmltbl_t); |
| 624 | |
| 625 | tbl->rc = -1; /* flag that table is a raw, parsed table */ |
| 626 | tbl->cb = -1; /* unset cell border attribute */ |
| 627 | doAttrs(tbl, tbl_items, sizeof(tbl_items) / ISIZE, atts, "<TABLE>" ); |
| 628 | |
| 629 | return tbl; |
| 630 | } |
| 631 | |
| 632 | static void startElement(void *user, const char *name, char **atts) |
| 633 | { |
| 634 | GVC_t *gvc = (GVC_t*)user; |
| 635 | |
| 636 | if (strcasecmp(name, "TABLE" ) == 0) { |
| 637 | htmllval.tbl = mkTbl(atts); |
| 638 | state.inCell = 0; |
| 639 | state.tok = T_table; |
| 640 | } else if ((strcasecmp(name, "TR" ) == 0) |
| 641 | || (strcasecmp(name, "TH" ) == 0)) { |
| 642 | state.inCell = 0; |
| 643 | state.tok = T_row; |
| 644 | } else if (strcasecmp(name, "TD" ) == 0) { |
| 645 | state.inCell = 1; |
| 646 | htmllval.cell = mkCell(atts); |
| 647 | state.tok = T_cell; |
| 648 | } else if (strcasecmp(name, "FONT" ) == 0) { |
| 649 | htmllval.font = mkFont(gvc, atts, 0, 0); |
| 650 | state.tok = T_font; |
| 651 | } else if (strcasecmp(name, "B" ) == 0) { |
| 652 | htmllval.font = mkFont(gvc, 0, HTML_BF, 0); |
| 653 | state.tok = T_bold; |
| 654 | } else if (strcasecmp(name, "S" ) == 0) { |
| 655 | htmllval.font = mkFont(gvc, 0, HTML_S, 0); |
| 656 | state.tok = T_s; |
| 657 | } else if (strcasecmp(name, "U" ) == 0) { |
| 658 | htmllval.font = mkFont(gvc, 0, HTML_UL, 1); |
| 659 | state.tok = T_underline; |
| 660 | } else if (strcasecmp(name, "O" ) == 0) { |
| 661 | htmllval.font = mkFont(gvc, 0, HTML_OL, 1); |
| 662 | state.tok = T_overline; |
| 663 | } else if (strcasecmp(name, "I" ) == 0) { |
| 664 | htmllval.font = mkFont(gvc, 0, HTML_IF, 0); |
| 665 | state.tok = T_italic; |
| 666 | } else if (strcasecmp(name, "SUP" ) == 0) { |
| 667 | htmllval.font = mkFont(gvc, 0, HTML_SUP, 0); |
| 668 | state.tok = T_sup; |
| 669 | } else if (strcasecmp(name, "SUB" ) == 0) { |
| 670 | htmllval.font = mkFont(gvc, 0, HTML_SUB, 0); |
| 671 | state.tok = T_sub; |
| 672 | } else if (strcasecmp(name, "BR" ) == 0) { |
| 673 | mkBR(atts); |
| 674 | state.tok = T_br; |
| 675 | } else if (strcasecmp(name, "HR" ) == 0) { |
| 676 | state.tok = T_hr; |
| 677 | } else if (strcasecmp(name, "VR" ) == 0) { |
| 678 | state.tok = T_vr; |
| 679 | } else if (strcasecmp(name, "IMG" ) == 0) { |
| 680 | htmllval.img = mkImg(atts); |
| 681 | state.tok = T_img; |
| 682 | } else if (strcasecmp(name, "HTML" ) == 0) { |
| 683 | state.tok = T_html; |
| 684 | } else { |
| 685 | lexerror(name); |
| 686 | } |
| 687 | } |
| 688 | |
| 689 | static void endElement(void *user, const char *name) |
| 690 | { |
| 691 | if (strcasecmp(name, "TABLE" ) == 0) { |
| 692 | state.tok = T_end_table; |
| 693 | state.inCell = 1; |
| 694 | } else if ((strcasecmp(name, "TR" ) == 0) |
| 695 | || (strcasecmp(name, "TH" ) == 0)) { |
| 696 | state.tok = T_end_row; |
| 697 | } else if (strcasecmp(name, "TD" ) == 0) { |
| 698 | state.tok = T_end_cell; |
| 699 | state.inCell = 0; |
| 700 | } else if (strcasecmp(name, "HTML" ) == 0) { |
| 701 | state.tok = T_end_html; |
| 702 | } else if (strcasecmp(name, "FONT" ) == 0) { |
| 703 | state.tok = T_end_font; |
| 704 | } else if (strcasecmp(name, "B" ) == 0) { |
| 705 | state.tok = T_n_bold; |
| 706 | } else if (strcasecmp(name, "U" ) == 0) { |
| 707 | state.tok = T_n_underline; |
| 708 | } else if (strcasecmp(name, "O" ) == 0) { |
| 709 | state.tok = T_n_overline; |
| 710 | } else if (strcasecmp(name, "I" ) == 0) { |
| 711 | state.tok = T_n_italic; |
| 712 | } else if (strcasecmp(name, "SUP" ) == 0) { |
| 713 | state.tok = T_n_sup; |
| 714 | } else if (strcasecmp(name, "SUB" ) == 0) { |
| 715 | state.tok = T_n_sub; |
| 716 | } else if (strcasecmp(name, "S" ) == 0) { |
| 717 | state.tok = T_n_s; |
| 718 | } else if (strcasecmp(name, "BR" ) == 0) { |
| 719 | if (state.tok == T_br) |
| 720 | state.tok = T_BR; |
| 721 | else |
| 722 | state.tok = T_end_br; |
| 723 | } else if (strcasecmp(name, "HR" ) == 0) { |
| 724 | if (state.tok == T_hr) |
| 725 | state.tok = T_HR; |
| 726 | else |
| 727 | state.tok = T_end_hr; |
| 728 | } else if (strcasecmp(name, "VR" ) == 0) { |
| 729 | if (state.tok == T_vr) |
| 730 | state.tok = T_VR; |
| 731 | else |
| 732 | state.tok = T_end_vr; |
| 733 | } else if (strcasecmp(name, "IMG" ) == 0) { |
| 734 | if (state.tok == T_img) |
| 735 | state.tok = T_IMG; |
| 736 | else |
| 737 | state.tok = T_end_img; |
| 738 | } else { |
| 739 | lexerror(name); |
| 740 | } |
| 741 | } |
| 742 | |
| 743 | /* characterData: |
| 744 | * Generate T_string token. Do this only when immediately in |
| 745 | * <TD>..</TD> or <HTML>..</HTML>, i.e., when inCell is true. |
| 746 | * Strip out formatting characters but keep spaces. |
| 747 | * Distinguish between all whitespace vs. strings with non-whitespace |
| 748 | * characters. |
| 749 | */ |
| 750 | static void characterData(void *user, const char *s, int length) |
| 751 | { |
| 752 | int i, cnt = 0; |
| 753 | unsigned char c; |
| 754 | |
| 755 | if (state.inCell) { |
| 756 | for (i = length; i; i--) { |
| 757 | c = *s++; |
| 758 | if (c >= ' ') { |
| 759 | cnt++; |
| 760 | agxbputc(state.xb, c); |
| 761 | } |
| 762 | } |
| 763 | if (cnt) state.tok = T_string; |
| 764 | } |
| 765 | } |
| 766 | #endif |
| 767 | |
| 768 | int initHTMLlexer(char *src, agxbuf * xb, htmlenv_t *env) |
| 769 | { |
| 770 | #ifdef HAVE_EXPAT |
| 771 | state.xb = xb; |
| 772 | agxbinit (&state.lb, SMALLBUF, NULL); |
| 773 | state.ptr = src; |
| 774 | state.mode = 0; |
| 775 | state.warn = 0; |
| 776 | state.error = 0; |
| 777 | state.currtoklen = 0; |
| 778 | state.prevtoklen = 0; |
| 779 | state.inCell = 1; |
| 780 | state.parser = XML_ParserCreate(charsetToStr(GD_charset(env->g))); |
| 781 | XML_SetUserData(state.parser, GD_gvc(env->g)); |
| 782 | XML_SetElementHandler(state.parser, |
| 783 | (XML_StartElementHandler) startElement, |
| 784 | endElement); |
| 785 | XML_SetCharacterDataHandler(state.parser, characterData); |
| 786 | return 0; |
| 787 | #else |
| 788 | static int first; |
| 789 | if (!first) { |
| 790 | agerr(AGWARN, |
| 791 | "Not built with libexpat. Table formatting is not available.\n" ); |
| 792 | first++; |
| 793 | } |
| 794 | return 1; |
| 795 | #endif |
| 796 | } |
| 797 | |
| 798 | int clearHTMLlexer() |
| 799 | { |
| 800 | #ifdef HAVE_EXPAT |
| 801 | int rv = state.warn | state.error; |
| 802 | XML_ParserFree(state.parser); |
| 803 | agxbfree (&state.lb); |
| 804 | return rv; |
| 805 | #else |
| 806 | return 1; |
| 807 | #endif |
| 808 | } |
| 809 | |
| 810 | #ifdef HAVE_EXPAT |
| 811 | /* eatComment: |
| 812 | * Given first character after open comment, eat characters |
| 813 | * up to comment close, returning pointer to closing > if it exists, |
| 814 | * or null character otherwise. |
| 815 | * We rely on HTML strings having matched nested <>. |
| 816 | */ |
| 817 | static char *(char *p) |
| 818 | { |
| 819 | int depth = 1; |
| 820 | char *s = p; |
| 821 | char c; |
| 822 | |
| 823 | while (depth && (c = *s++)) { |
| 824 | if (c == '<') |
| 825 | depth++; |
| 826 | else if (c == '>') |
| 827 | depth--; |
| 828 | } |
| 829 | s--; /* move back to '\0' or '>' */ |
| 830 | if (*s) { |
| 831 | char *t = s - 2; |
| 832 | if ((t < p) || strncmp(t, "--" , 2)) { |
| 833 | agerr(AGWARN, "Unclosed comment\n" ); |
| 834 | state.warn = 1; |
| 835 | } |
| 836 | } |
| 837 | return s; |
| 838 | } |
| 839 | |
| 840 | /* findNext: |
| 841 | * Return next XML unit. This is either <..>, an HTML |
| 842 | * comment <!-- ... -->, or characters up to next <. |
| 843 | */ |
| 844 | static char *findNext(char *s, agxbuf* xb) |
| 845 | { |
| 846 | char* t = s + 1; |
| 847 | char c; |
| 848 | |
| 849 | if (*s == '<') { |
| 850 | if ((*t == '!') && !strncmp(t + 1, "--" , 2)) |
| 851 | t = eatComment(t + 3); |
| 852 | else |
| 853 | while (*t && (*t != '>')) |
| 854 | t++; |
| 855 | if (*t != '>') { |
| 856 | agerr(AGWARN, "Label closed before end of HTML element\n" ); |
| 857 | state.warn = 1; |
| 858 | } else |
| 859 | t++; |
| 860 | } else { |
| 861 | t = s; |
| 862 | while ((c = *t) && (c != '<')) { |
| 863 | if ((c == '&') && (*(t+1) != '#')) { |
| 864 | t = scanEntity(t + 1, xb); |
| 865 | } |
| 866 | else { |
| 867 | agxbputc(xb, c); |
| 868 | t++; |
| 869 | } |
| 870 | } |
| 871 | } |
| 872 | return t; |
| 873 | } |
| 874 | #endif |
| 875 | |
| 876 | int htmllineno() |
| 877 | { |
| 878 | #ifdef HAVE_EXPAT |
| 879 | return XML_GetCurrentLineNumber(state.parser); |
| 880 | #else |
| 881 | return 0; |
| 882 | #endif |
| 883 | } |
| 884 | |
| 885 | #ifdef DEBUG |
| 886 | static void printTok(int tok) |
| 887 | { |
| 888 | char *s; |
| 889 | |
| 890 | switch (tok) { |
| 891 | case T_end_br: |
| 892 | s = "T_end_br" ; |
| 893 | break; |
| 894 | case T_end_img: |
| 895 | s = "T_end_img" ; |
| 896 | break; |
| 897 | case T_row: |
| 898 | s = "T_row" ; |
| 899 | break; |
| 900 | case T_end_row: |
| 901 | s = "T_end_row" ; |
| 902 | break; |
| 903 | case T_html: |
| 904 | s = "T_html" ; |
| 905 | break; |
| 906 | case T_end_html: |
| 907 | s = "T_end_html" ; |
| 908 | break; |
| 909 | case T_end_table: |
| 910 | s = "T_end_table" ; |
| 911 | break; |
| 912 | case T_end_cell: |
| 913 | s = "T_end_cell" ; |
| 914 | break; |
| 915 | case T_end_font: |
| 916 | s = "T_end_font" ; |
| 917 | break; |
| 918 | case T_string: |
| 919 | s = "T_string" ; |
| 920 | break; |
| 921 | case T_error: |
| 922 | s = "T_error" ; |
| 923 | break; |
| 924 | case T_n_italic: |
| 925 | s = "T_n_italic" ; |
| 926 | break; |
| 927 | case T_n_bold: |
| 928 | s = "T_n_bold" ; |
| 929 | break; |
| 930 | case T_n_underline: |
| 931 | s = "T_n_underline" ; |
| 932 | break; |
| 933 | case T_n_overline: |
| 934 | s = "T_n_overline" ; |
| 935 | break; |
| 936 | case T_n_sup: |
| 937 | s = "T_n_sup" ; |
| 938 | break; |
| 939 | case T_n_sub: |
| 940 | s = "T_n_sub" ; |
| 941 | break; |
| 942 | case T_n_s: |
| 943 | s = "T_n_s" ; |
| 944 | break; |
| 945 | case T_HR: |
| 946 | s = "T_HR" ; |
| 947 | break; |
| 948 | case T_hr: |
| 949 | s = "T_hr" ; |
| 950 | break; |
| 951 | case T_end_hr: |
| 952 | s = "T_end_hr" ; |
| 953 | break; |
| 954 | case T_VR: |
| 955 | s = "T_VR" ; |
| 956 | break; |
| 957 | case T_vr: |
| 958 | s = "T_vr" ; |
| 959 | break; |
| 960 | case T_end_vr: |
| 961 | s = "T_end_vr" ; |
| 962 | break; |
| 963 | case T_BR: |
| 964 | s = "T_BR" ; |
| 965 | break; |
| 966 | case T_br: |
| 967 | s = "T_br" ; |
| 968 | break; |
| 969 | case T_IMG: |
| 970 | s = "T_IMG" ; |
| 971 | break; |
| 972 | case T_img: |
| 973 | s = "T_img" ; |
| 974 | break; |
| 975 | case T_table: |
| 976 | s = "T_table" ; |
| 977 | break; |
| 978 | case T_cell: |
| 979 | s = "T_cell" ; |
| 980 | break; |
| 981 | case T_font: |
| 982 | s = "T_font" ; |
| 983 | break; |
| 984 | case T_italic: |
| 985 | s = "T_italic" ; |
| 986 | break; |
| 987 | case T_bold: |
| 988 | s = "T_bold" ; |
| 989 | break; |
| 990 | case T_underline: |
| 991 | s = "T_underline" ; |
| 992 | break; |
| 993 | case T_overline: |
| 994 | s = "T_overline" ; |
| 995 | break; |
| 996 | case T_sup: |
| 997 | s = "T_sup" ; |
| 998 | break; |
| 999 | case T_sub: |
| 1000 | s = "T_sub" ; |
| 1001 | break; |
| 1002 | case T_s: |
| 1003 | s = "T_s" ; |
| 1004 | break; |
| 1005 | default: |
| 1006 | s = "<unknown>" ; |
| 1007 | } |
| 1008 | if (tok == T_string) { |
| 1009 | fprintf(stderr, "%s \"" , s); |
| 1010 | fwrite(agxbstart(state.xb), 1, agxblen(state.xb), stderr); |
| 1011 | fprintf(stderr, "\"\n" ); |
| 1012 | } else |
| 1013 | fprintf(stderr, "%s\n" , s); |
| 1014 | } |
| 1015 | |
| 1016 | #endif |
| 1017 | |
| 1018 | int htmllex() |
| 1019 | { |
| 1020 | #ifdef HAVE_EXPAT |
| 1021 | static char *begin_html = "<HTML>" ; |
| 1022 | static char *end_html = "</HTML>" ; |
| 1023 | |
| 1024 | char *s; |
| 1025 | char *endp = 0; |
| 1026 | int len, llen; |
| 1027 | int rv; |
| 1028 | |
| 1029 | state.tok = 0; |
| 1030 | do { |
| 1031 | if (state.mode == 2) |
| 1032 | return EOF; |
| 1033 | if (state.mode == 0) { |
| 1034 | state.mode = 1; |
| 1035 | s = begin_html; |
| 1036 | len = strlen(s); |
| 1037 | endp = 0; |
| 1038 | } else { |
| 1039 | s = state.ptr; |
| 1040 | if (*s == '\0') { |
| 1041 | state.mode = 2; |
| 1042 | s = end_html; |
| 1043 | len = strlen(s); |
| 1044 | } else { |
| 1045 | endp = findNext(s,&state.lb); |
| 1046 | len = endp - s; |
| 1047 | } |
| 1048 | } |
| 1049 | state.prevtok = state.currtok; |
| 1050 | state.prevtoklen = state.currtoklen; |
| 1051 | state.currtok = s; |
| 1052 | state.currtoklen = len; |
| 1053 | if ((llen = agxblen(&state.lb))) |
| 1054 | rv = XML_Parse(state.parser, agxbuse(&state.lb),llen, 0); |
| 1055 | else |
| 1056 | rv = XML_Parse(state.parser, s, len, (len ? 0 : 1)); |
| 1057 | if (rv == XML_STATUS_ERROR) { |
| 1058 | if (!state.error) { |
| 1059 | agerr(AGERR, "%s in line %d \n" , |
| 1060 | XML_ErrorString(XML_GetErrorCode(state.parser)), |
| 1061 | htmllineno()); |
| 1062 | error_context(); |
| 1063 | state.error = 1; |
| 1064 | state.tok = T_error; |
| 1065 | } |
| 1066 | } |
| 1067 | if (endp) |
| 1068 | state.ptr = endp; |
| 1069 | } while (state.tok == 0); |
| 1070 | #if DEBUG |
| 1071 | printTok (state.tok); |
| 1072 | #endif |
| 1073 | return state.tok; |
| 1074 | #else |
| 1075 | return EOF; |
| 1076 | #endif |
| 1077 | } |
| 1078 | |
| 1079 | |