1/* $Id$ $Revision$ */
2/* vim:set shiftwidth=4 ts=8: */
3
4/*************************************************************************
5 * Copyright (c) 2011 AT&T Intellectual Property
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the Eclipse Public License v1.0
8 * which accompanies this distribution, and is available at
9 * http://www.eclipse.org/legal/epl-v10.html
10 *
11 * Contributors: See CVS logs. Details at http://www.graphviz.org/
12 *************************************************************************/
13
14
15#include "convert.h"
16#include "agxbuf.h"
17#include <getopt.h>
18#ifdef HAVE_EXPAT
19#include <expat.h>
20#include <ctype.h>
21
22#ifndef XML_STATUS_ERROR
23#define XML_STATUS_ERROR 0
24#endif
25
26#define STACK_DEPTH 32
27#define BUFSIZE 20000
28#define SMALLBUF 1000
29#define NAMEBUF 100
30
31#define GRAPHML_ATTR "_graphml_"
32#define GRAPHML_ID "_graphml_id"
33#define GRAPHML_ROLE "_graphml_role"
34#define GRAPHML_HYPER "_graphml_hypergraph"
35#define GRAPHML_FROM "_graphml_fromorder"
36#define GRAPHML_TO "_graphml_toorder"
37#define GRAPHML_TYPE "_graphml_type"
38#define GRAPHML_COMP "_graphml_composite_"
39#define GRAPHML_LOC "_graphml_locator_"
40
41#define TAG_NONE -1
42#define TAG_GRAPH 0
43#define TAG_NODE 1
44#define TAG_EDGE 2
45
46static FILE *outFile;
47static char *CmdName;
48static char **Files;
49static int Verbose;
50static char* gname = "";
51
52typedef struct slist slist;
53struct slist {
54 slist *next;
55 char buf[1];
56};
57
58#define NEW(t) (t*)malloc(sizeof(t))
59#define N_NEW(n,t) (t*)malloc((n)*sizeof(t))
60/* Round x up to next multiple of y, which is a power of 2 */
61#define ROUND2(x,y) (((x) + ((y)-1)) & ~((y)-1))
62
63static void pushString(slist ** stk, const char *s)
64{
65 int sz = ROUND2(sizeof(slist) + strlen(s), sizeof(void *));
66 slist *sp = (slist *) N_NEW(sz, char);
67 strcpy(sp->buf, s);
68 sp->next = *stk;
69 *stk = sp;
70}
71
72static void popString(slist ** stk)
73{
74 slist *sp = *stk;
75 if (!sp) {
76 fprintf(stderr, "PANIC: graphml2gv: empty element stack\n");
77 exit(1);
78 }
79 *stk = sp->next;
80 free(sp);
81}
82
83static char *topString(slist * stk)
84{
85 if (!stk) {
86 fprintf(stderr, "PANIC: graphml2gv: empty element stack\n");
87 exit(1);
88 }
89 return stk->buf;
90}
91
92static void freeString(slist * stk)
93{
94 slist *sp;
95
96 while (stk) {
97 sp = stk->next;
98 free(stk);
99 stk = sp;
100 }
101}
102
103typedef struct userdata {
104 agxbuf xml_attr_name;
105 agxbuf xml_attr_value;
106 agxbuf composite_buffer;
107 char* gname;
108 slist *elements;
109 int listen;
110 int closedElementType;
111 int globalAttrType;
112 int compositeReadState;
113 int edgeinverted;
114 Dt_t *nameMap;
115} userdata_t;
116
117static Agraph_t *root; /* root graph */
118static int Current_class; /* Current element type */
119static Agraph_t *G; /* Current graph */
120static Agnode_t *N; /* Set if Current_class == TAG_NODE */
121static Agedge_t *E; /* Set if Current_class == TAG_EDGE */
122
123static int GSP;
124static Agraph_t *Gstack[STACK_DEPTH];
125
126typedef struct {
127 Dtlink_t link;
128 char *name;
129 char *unique_name;
130} namev_t;
131
132static namev_t *make_nitem(Dt_t * d, namev_t * objp, Dtdisc_t * disc)
133{
134 namev_t *np = NEW(namev_t);
135 np->name = objp->name;
136 np->unique_name = 0;
137 return np;
138}
139
140static void free_nitem(Dt_t * d, namev_t * np, Dtdisc_t * disc)
141{
142 free(np->unique_name);
143 free(np);
144}
145
146static Dtdisc_t nameDisc = {
147 offsetof(namev_t, name),
148 -1,
149 offsetof(namev_t, link),
150 (Dtmake_f) make_nitem,
151 (Dtfree_f) free_nitem,
152 NIL(Dtcompar_f),
153 NIL(Dthash_f),
154 NIL(Dtmemory_f),
155 NIL(Dtevent_f)
156};
157
158static userdata_t *genUserdata(char* dfltname)
159{
160 userdata_t *user = NEW(userdata_t);
161 agxbinit(&(user->xml_attr_name), NAMEBUF, 0);
162 agxbinit(&(user->xml_attr_value), SMALLBUF, 0);
163 agxbinit(&(user->composite_buffer), SMALLBUF, 0);
164 user->listen = FALSE;
165 user->elements = 0;
166 user->closedElementType = TAG_NONE;
167 user->globalAttrType = TAG_NONE;
168 user->compositeReadState = FALSE;
169 user->edgeinverted = FALSE;
170 user->gname = dfltname;
171 user->nameMap = dtopen(&nameDisc, Dtoset);
172 return user;
173}
174
175static void freeUserdata(userdata_t * ud)
176{
177 dtclose(ud->nameMap);
178 agxbfree(&(ud->xml_attr_name));
179 agxbfree(&(ud->xml_attr_value));
180 agxbfree(&(ud->composite_buffer));
181 freeString(ud->elements);
182 free(ud);
183}
184
185static void addToMap(Dt_t * map, char *name, char *uniqueName)
186{
187 namev_t obj;
188 namev_t *objp;
189
190 obj.name = name;
191 objp = dtinsert(map, &obj);
192 assert(objp->unique_name == 0);
193 objp->unique_name = strdup(uniqueName);
194}
195
196static char *mapLookup(Dt_t * nm, char *name)
197{
198 namev_t *objp = dtmatch(nm, name);
199 if (objp)
200 return objp->unique_name;
201 else
202 return 0;
203}
204
205static int isAnonGraph(char *name)
206{
207 if (*name++ != '%')
208 return 0;
209 while (isdigit(*name))
210 name++; /* skip over digits */
211 return (*name == '\0');
212}
213
214static void push_subg(Agraph_t * g)
215{
216 if (GSP == STACK_DEPTH) {
217 fprintf(stderr, "graphml2gv: Too many (> %d) nestings of subgraphs\n",
218 STACK_DEPTH);
219 exit(1);
220 } else if (GSP == 0)
221 root = g;
222 G = Gstack[GSP++] = g;
223}
224
225static Agraph_t *pop_subg(void)
226{
227 Agraph_t *g;
228 if (GSP == 0) {
229 fprintf(stderr, "graphml2gv: Gstack underflow in graph parser\n");
230 exit(1);
231 }
232 g = Gstack[--GSP];
233 if (GSP > 0)
234 G = Gstack[GSP - 1];
235 return g;
236}
237
238static Agnode_t *bind_node(const char *name)
239{
240 N = agnode(G, (char *) name, 1);
241 return N;
242}
243
244static Agedge_t *bind_edge(const char *tail, const char *head)
245{
246 Agnode_t *tailNode, *headNode;
247 char *key = 0;
248
249 tailNode = agnode(G, (char *) tail, 1);
250 headNode = agnode(G, (char *) head, 1);
251 E = agedge(G, tailNode, headNode, key, 1);
252 return E;
253}
254
255static int get_xml_attr(char *attrname, const char **atts)
256{
257 int count = 0;
258 while (atts[count] != NULL) {
259 if (strcmp(attrname, atts[count]) == 0) {
260 return count + 1;
261 }
262 count += 2;
263 }
264 return -1;
265}
266
267static void setName(Dt_t * names, Agobj_t * n, char *value)
268{
269 Agsym_t *ap;
270 char *oldName;
271
272 ap = agattr(root, AGTYPE(n), GRAPHML_ID, "");
273 agxset(n, ap, agnameof(n));
274 oldName = agxget(n, ap); /* set/get gives us new copy */
275 addToMap(names, oldName, value);
276 agrename(n, value);
277}
278
279static char *defval = "";
280
281static void
282setNodeAttr(Agnode_t * np, char *name, char *value, userdata_t * ud)
283{
284 Agsym_t *ap;
285
286 if (strcmp(name, "name") == 0) {
287 setName(ud->nameMap, (Agobj_t *) np, value);
288 } else {
289 ap = agattr(root, AGNODE, name, 0);
290 if (!ap)
291 ap = agattr(root, AGNODE, name, defval);
292 agxset(np, ap, value);
293 }
294}
295
296#define NODELBL "node:"
297#define NLBLLEN (sizeof(NODELBL)-1)
298#define EDGELBL "edge:"
299#define ELBLLEN (sizeof(EDGELBL)-1)
300
301/* setGlobalNodeAttr:
302 * Set global node attribute.
303 * The names must always begin with "node:".
304 */
305static void
306setGlobalNodeAttr(Agraph_t * g, char *name, char *value, userdata_t * ud)
307{
308 if (strncmp(name, NODELBL, NLBLLEN))
309 fprintf(stderr,
310 "Warning: global node attribute %s in graph %s does not begin with the prefix %s\n",
311 name, agnameof(g), NODELBL);
312 else
313 name += NLBLLEN;
314 if ((g != root) && !agattr(root, AGNODE, name, 0))
315 agattr(root, AGNODE, name, defval);
316 agattr(G, AGNODE, name, value);
317}
318
319static void
320setEdgeAttr(Agedge_t * ep, char *name, char *value, userdata_t * ud)
321{
322 Agsym_t *ap;
323 char *attrname;
324
325 if (strcmp(name, "headport") == 0) {
326 if (ud->edgeinverted)
327 attrname = "tailport";
328 else
329 attrname = "headport";
330 ap = agattr(root, AGEDGE, attrname, 0);
331 if (!ap)
332 ap = agattr(root, AGEDGE, attrname, defval);
333 agxset(ep, ap, value);
334 } else if (strcmp(name, "tailport") == 0) {
335 if (ud->edgeinverted)
336 attrname = "headport";
337 else
338 attrname = "tailport";
339 ap = agattr(root, AGEDGE, attrname, 0);
340 if (!ap)
341 ap = agattr(root, AGEDGE, attrname, defval);
342 agxset(ep, ap, value);
343 } else {
344 ap = agattr(root, AGEDGE, name, 0);
345 if (!ap)
346 ap = agattr(root, AGEDGE, name, defval);
347 agxset(ep, ap, value);
348 }
349}
350
351/* setGlobalEdgeAttr:
352 * Set global edge attribute.
353 * The names always begin with "edge:".
354 */
355static void
356setGlobalEdgeAttr(Agraph_t * g, char *name, char *value, userdata_t * ud)
357{
358 if (strncmp(name, EDGELBL, ELBLLEN))
359 fprintf(stderr,
360 "Warning: global edge attribute %s in graph %s does not begin with the prefix %s\n",
361 name, agnameof(g), EDGELBL);
362 else
363 name += ELBLLEN;
364 if ((g != root) && !agattr(root, AGEDGE, name, 0))
365 agattr(root, AGEDGE, name, defval);
366 agattr(g, AGEDGE, name, value);
367}
368
369static void
370setGraphAttr(Agraph_t * g, char *name, char *value, userdata_t * ud)
371{
372 Agsym_t *ap;
373
374 if ((g == root) && !strcmp(name, "strict") && !strcmp(value, "true")) {
375 g->desc.strict = 1;
376 } else if (strcmp(name, "name") == 0)
377 setName(ud->nameMap, (Agobj_t *) g, value);
378 else {
379 ap = agattr(root, AGRAPH, name, 0);
380 if (ap)
381 agxset(g, ap, value);
382 else if (g == root)
383 agattr(root, AGRAPH, name, value);
384 else {
385 ap = agattr(root, AGRAPH, name, defval);
386 agxset(g, ap, value);
387 }
388 }
389}
390
391static void setAttr(char *name, char *value, userdata_t * ud)
392{
393 switch (Current_class) {
394 case TAG_GRAPH:
395 setGraphAttr(G, name, value, ud);
396 break;
397 case TAG_NODE:
398 setNodeAttr(N, name, value, ud);
399 break;
400 case TAG_EDGE:
401 setEdgeAttr(E, name, value, ud);
402 break;
403 }
404}
405
406/*------------- expat handlers ----------------------------------*/
407
408static void
409startElementHandler(void *userData, const char *name, const char **atts)
410{
411 int pos;
412 userdata_t *ud = (userdata_t *) userData;
413 Agraph_t *g = NULL;
414
415 if (strcmp(name, "graphml") == 0) {
416 /* do nothing */
417 } else if (strcmp(name, "graph") == 0) {
418 const char *edgeMode = "";
419 const char *id;
420 Agdesc_t dir;
421 char buf[NAMEBUF]; /* holds % + number */
422
423 Current_class = TAG_GRAPH;
424 if (ud->closedElementType == TAG_GRAPH) {
425 fprintf(stderr,
426 "Warning: Node contains more than one graph.\n");
427 }
428 pos = get_xml_attr("id", atts);
429 if (pos > 0) {
430 id = atts[pos];
431 }
432 else
433 id = ud->gname;
434 pos = get_xml_attr("edgedefault", atts);
435 if (pos > 0) {
436 edgeMode = atts[pos];
437 }
438
439 if (GSP == 0) {
440 if (strcmp(edgeMode, "directed") == 0) {
441 dir = Agdirected;
442 } else if (strcmp(edgeMode, "undirected") == 0) {
443 dir = Agundirected;
444 } else {
445 if (Verbose) {
446 fprintf(stderr,
447 "Warning: graph has no edgedefault attribute - assume directed\n");
448 }
449 dir = Agdirected;
450 }
451 g = agopen((char *) id, dir, &AgDefaultDisc);
452 push_subg(g);
453 } else {
454 Agraph_t *subg;
455 if (isAnonGraph((char *) id)) {
456 static int anon_id = 1;
457 sprintf(buf, "%%%d", anon_id++);
458 id = buf;
459 }
460 subg = agsubg(G, (char *) id, 1);
461 push_subg(subg);
462 }
463
464 pushString(&ud->elements, id);
465 } else if (strcmp(name, "node") == 0) {
466 Current_class = TAG_NODE;
467 pos = get_xml_attr("id", atts);
468 if (pos > 0) {
469 const char *attrname;
470 attrname = atts[pos];
471 if (G == 0)
472 fprintf(stderr,"node %s outside graph, ignored\n",attrname);
473 else
474 bind_node(attrname);
475
476 pushString(&ud->elements, attrname);
477 }
478
479 } else if (strcmp(name, "edge") == 0) {
480 const char *head = "", *tail = "";
481 char *tname;
482 Agnode_t *t;
483
484 Current_class = TAG_EDGE;
485 pos = get_xml_attr("source", atts);
486 if (pos > 0)
487 tail = atts[pos];
488 pos = get_xml_attr("target", atts);
489 if (pos > 0)
490 head = atts[pos];
491
492 tname = mapLookup(ud->nameMap, (char *) tail);
493 if (tname)
494 tail = tname;
495
496 tname = mapLookup(ud->nameMap, (char *) head);
497 if (tname)
498 head = tname;
499
500 if (G == 0)
501 fprintf(stderr,"edge source %s target %s outside graph, ignored\n",(char*)tail,(char*)head);
502 else {
503 bind_edge(tail, head);
504
505 t = AGTAIL(E);
506 tname = agnameof(t);
507
508 if (strcmp(tname, tail) == 0) {
509 ud->edgeinverted = FALSE;
510 } else if (strcmp(tname, head) == 0) {
511 ud->edgeinverted = TRUE;
512 }
513
514 pos = get_xml_attr("id", atts);
515 if (pos > 0) {
516 setEdgeAttr(E, GRAPHML_ID, (char *) atts[pos], ud);
517 }
518 }
519 } else {
520 /* must be some extension */
521 fprintf(stderr,
522 "Unknown node %s - ignoring.\n",
523 name);
524 }
525}
526
527static void endElementHandler(void *userData, const char *name)
528{
529 userdata_t *ud = (userdata_t *) userData;
530
531 if (strcmp(name, "graph") == 0) {
532 pop_subg();
533 popString(&ud->elements);
534 ud->closedElementType = TAG_GRAPH;
535 } else if (strcmp(name, "node") == 0) {
536 char *ele_name = topString(ud->elements);
537 if (ud->closedElementType == TAG_GRAPH) {
538 Agnode_t *node = agnode(root, ele_name, 0);
539 if (node) agdelete(root, node);
540 }
541 popString(&ud->elements);
542 Current_class = TAG_GRAPH;
543 N = 0;
544 ud->closedElementType = TAG_NODE;
545 } else if (strcmp(name, "edge") == 0) {
546 Current_class = TAG_GRAPH;
547 E = 0;
548 ud->closedElementType = TAG_EDGE;
549 ud->edgeinverted = FALSE;
550 } else if (strcmp(name, "attr") == 0) {
551 char *name;
552 char *value;
553 char buf[SMALLBUF] = GRAPHML_COMP;
554 char *dynbuf = 0;
555
556 ud->closedElementType = TAG_NONE;
557 if (ud->compositeReadState) {
558 int len = sizeof(GRAPHML_COMP) + agxblen(&ud->xml_attr_name);
559 if (len <= SMALLBUF) {
560 name = buf;
561 } else {
562 name = dynbuf = N_NEW(len, char);
563 strcpy(name, GRAPHML_COMP);
564 }
565 strcpy(name + sizeof(GRAPHML_COMP) - 1,
566 agxbuse(&ud->xml_attr_name));
567 value = agxbuse(&ud->composite_buffer);
568 agxbclear(&ud->xml_attr_value);
569 ud->compositeReadState = FALSE;
570 } else {
571 name = agxbuse(&ud->xml_attr_name);
572 value = agxbuse(&ud->xml_attr_value);
573 }
574
575 switch (ud->globalAttrType) {
576 case TAG_NONE:
577 setAttr(name, value, ud);
578 break;
579 case TAG_NODE:
580 setGlobalNodeAttr(G, name, value, ud);
581 break;
582 case TAG_EDGE:
583 setGlobalEdgeAttr(G, name, value, ud);
584 break;
585 case TAG_GRAPH:
586 setGraphAttr(G, name, value, ud);
587 break;
588 }
589 if (dynbuf)
590 free(dynbuf);
591 ud->globalAttrType = TAG_NONE;
592 }
593}
594
595static void characterDataHandler(void *userData, const char *s, int length)
596{
597 userdata_t *ud = (userdata_t *) userData;
598
599 if (!ud->listen)
600 return;
601
602 if (ud->compositeReadState) {
603 agxbput_n(&ud->composite_buffer, (char *) s, length);
604 return;
605 }
606
607 agxbput_n(&ud->xml_attr_value, (char *) s, length);
608}
609
610Agraph_t *graphml_to_gv(char* gname, FILE * graphmlFile, int* rv)
611{
612 char buf[BUFSIZE];
613 int done;
614 userdata_t *udata = genUserdata(gname);
615 XML_Parser parser = XML_ParserCreate(NULL);
616
617 *rv = 0;
618 XML_SetUserData(parser, udata);
619 XML_SetElementHandler(parser, startElementHandler, endElementHandler);
620 XML_SetCharacterDataHandler(parser, characterDataHandler);
621
622 Current_class = TAG_GRAPH;
623 root = 0;
624 do {
625 size_t len = fread(buf, 1, sizeof(buf), graphmlFile);
626 if (len == 0)
627 break;
628 done = len < sizeof(buf);
629 if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
630 fprintf(stderr,
631 "%s at line %lu\n",
632 XML_ErrorString(XML_GetErrorCode(parser)),
633 XML_GetCurrentLineNumber(parser));
634 *rv = 1;
635 break;
636 }
637 } while (!done);
638 XML_ParserFree(parser);
639 freeUserdata(udata);
640
641 return root;
642}
643
644static FILE *getFile(void)
645{
646 FILE *rv = NULL;
647 static FILE *savef = NULL;
648 static int cnt = 0;
649
650 if (Files == NULL) {
651 if (cnt++ == 0) {
652 rv = stdin;
653 }
654 } else {
655 if (savef)
656 fclose(savef);
657 while (Files[cnt]) {
658 if ((rv = fopen(Files[cnt++], "r")) != 0)
659 break;
660 else
661 fprintf(stderr, "Can't open %s\n", Files[cnt - 1]);
662 }
663 }
664 savef = rv;
665 return rv;
666}
667
668static FILE *openFile(char *name, char *mode)
669{
670 FILE *fp;
671 char *modestr;
672
673 fp = fopen(name, mode);
674 if (!fp) {
675 if (*mode == 'r')
676 modestr = "reading";
677 else
678 modestr = "writing";
679 fprintf(stderr, "%s: could not open file %s for %s\n",
680 CmdName, name, modestr);
681 perror(name);
682 exit(1);
683 }
684 return fp;
685}
686
687static const char *use = "Usage: %s [-gd?] [-o<file>] [<graphs>]\n\
688 -g<name> : use <name> as template for graph names\n\
689 -o<file> : output to <file> (stdout)\n\
690 -v : verbose mode\n\
691 -? : usage\n";
692
693static void usage(int v)
694{
695 fprintf(stderr, use, CmdName);
696 exit(v);
697}
698
699static char *cmdName(char *path)
700{
701 char *sp;
702
703 sp = strrchr(path, '/');
704 if (sp)
705 sp++;
706 else
707 sp = path;
708 return sp;
709}
710
711static void initargs(int argc, char **argv)
712{
713 int c;
714
715 CmdName = cmdName(argv[0]);
716 opterr = 0;
717 while ((c = getopt(argc, argv, ":vg:o:")) != -1) {
718 switch (c) {
719 case 'g':
720 gname = optarg;
721 break;
722 case 'v':
723 Verbose = 1;
724 break;
725 case 'o':
726 outFile = openFile(optarg, "w");
727 break;
728 case ':':
729 fprintf(stderr, "%s: option -%c missing argument\n", CmdName, optopt);
730 usage(1);
731 break;
732 case '?':
733 if (optopt == '?')
734 usage(0);
735 else {
736 fprintf(stderr, "%s: option -%c unrecognized\n", CmdName,
737 optopt);
738 usage(1);
739 }
740 }
741 }
742
743 argv += optind;
744 argc -= optind;
745
746 if (argc)
747 Files = argv;
748 if (!outFile)
749 outFile = stdout;
750}
751
752static char*
753nameOf (char* name, int cnt)
754{
755 static char* buf = 0;
756
757 if (*name == '\0')
758 return name;
759 if (cnt) {
760 if (!buf)
761 buf = N_NEW (strlen(name)+32,char); /* 32 to handle any integer plus null byte */
762 sprintf (buf, "%s%d", name, cnt);
763 return buf;
764 }
765 else
766 return name;
767}
768
769#endif
770
771int main(int argc, char **argv)
772{
773 Agraph_t *G;
774 Agraph_t *prev = 0;
775 FILE *inFile;
776 int rv = 0, gcnt = 0;
777
778#ifdef HAVE_EXPAT
779 initargs(argc, argv);
780 while ((inFile = getFile())) {
781 while ((G = graphml_to_gv(nameOf(gname, gcnt), inFile, &rv))) {
782 gcnt++;
783 if (prev)
784 agclose(prev);
785 prev = G;
786 if (Verbose)
787 fprintf (stderr, "%s: %d nodes %d edges\n",
788 agnameof (G), agnnodes(G), agnedges(G));
789 agwrite(G, outFile);
790 fflush(outFile);
791 }
792 }
793 exit(rv);
794#else
795 fputs("cvtgxl: not configured for conversion from GXL to GV\n", stderr);
796 exit(1);
797#endif
798}
799