1/* vim:set shiftwidth=4 ts=4: */
2
3#include <spinehdr.h>
4#include <subset.h>
5#include <quad.h>
6#include "union_find.h"
7#include "assert.h"
8#include <stdio.h>
9#include <stdlib.h>
10#include <math.h>
11#ifdef MAIN
12#include <getopt.h>
13#include "ingraphs.h"
14
15typedef struct {
16 FILE *outfp;
17 char **Files;
18 float sparse_ratio;
19 int verbose;
20} opts_t;
21#endif
22
23typedef int (*qsort_cmpf) (const void *, const void *);
24
25#define MIN(a,b) ((a)<(b)?(a):(b))
26#define MAX(a,b) ((a)>(b)?(a):(b))
27
28void* mcalloc (size_t cnt, size_t sz)
29{
30 void* p = calloc(cnt, sz);
31 /* fprintf(stderr, "alloc %lu bytes %p\n", cnt*sz, p); */
32 return p;
33}
34
35#if 0
36static float ewt(Agedge_t * e)
37{
38 return ED_wt(e);
39}
40#endif
41
42static int cmpe(void *v0, void *v1)
43{
44 Agedge_t *e0 = *(Agedge_t **) v0;
45 Agedge_t *e1 = *(Agedge_t **) v1;
46
47 if (ED_wt(e0) > ED_wt(e1))
48 return -1;
49 else if (ED_wt(e0) < ED_wt(e1))
50 return 1;
51 else
52 return 0;
53}
54
55
56static void doEdge(Agraph_t * g, Agnode_t * v, Agnode_t * w, int *quadcnt)
57{
58 Agedge_t *e = agedge(g, v, w, 0, 0);
59 if (!e)
60 e = agedge(g, w, v, 0, 0);
61 if (!e) {
62 fprintf(stderr, "Could not find edge %s--%s\n", agnameof(v),
63 agnameof(w));
64 exit(1);
65 }
66 quadcnt[ED_id(e)] += 1;
67}
68
69static void
70recordQuads(Agnode_t * v, Agnode_t * w, Dt_t * subset, void *state)
71{
72 Dtlink_t *link0;
73 Dtlink_t *link1;
74 Agnode_t *u0;
75 Agnode_t *u1;
76 int *quadcnt = (int *) state;
77 Agraph_t *g = agroot(v);
78
79 for (link0 = dtflatten(subset); link0; link0 = dtlink(subset, link0)) {
80 u0 = (Agnode_t *) (((ptritem *) dtobj(subset, link0))->v);
81 for (link1 = dtlink(subset, link0); link1;
82 link1 = dtlink(subset, link1)) {
83 u1 = (Agnode_t *) (((ptritem *) dtobj(subset, link1))->v);
84 doEdge(g, v, u0, quadcnt);
85 doEdge(g, w, u0, quadcnt);
86 doEdge(g, v, u1, quadcnt);
87 doEdge(g, w, u1, quadcnt);
88 }
89 }
90}
91
92static void
93reweightEdge (Agedge_t* e, Dt_t* nbr0, Dt_t* nbr1, Agedge_t*** nbrs, int verbose)
94{
95 Agnode_t* tail = agtail(e);
96 Agnode_t* head = aghead(e);
97 Agedge_t** tail_edgelist = nbrs[ND_id(tail)];
98 long int len0 = nbrs[ND_id(tail)+1] - tail_edgelist;
99 Agedge_t** head_edgelist = nbrs[ND_id(head)];
100 long int len1 = nbrs[ND_id(head)+1] - head_edgelist;
101 long int minlen = MIN(len0, len1);
102 long int i;
103 long int maxi = 0;
104 double wt, maxwt = 0;
105 double common_cnt = 0;
106 double union_cnt = 0;
107 Agnode_t* n0;
108 Agnode_t* n1;
109 double oldwt;
110
111 clearSubset(nbr0);
112 clearSubset(nbr1);
113
114 if (verbose > 1)
115 oldwt = ED_wt(e);
116
117#ifdef TEST_JACCARD
118 Dt_t* nbr00 = mkSubset();
119 Dt_t* nbr11 = mkSubset();
120 double wt0;
121#endif
122 for (i = 0; i < minlen; i++) {
123 n0 = (*tail_edgelist++)->node;
124 n1 = (*head_edgelist++)->node;
125 assert(n0 != tail);
126 assert(n1 != head);
127#ifdef TEST_JACCARD
128 addSubset(nbr00, n0);
129 addSubset(nbr11, n1);
130#endif
131/* fprintf (stderr, "add %s %s\n", agnameof(n0), agnameof(n1)); */
132 if (n0 != n1) {
133 if (inSubset(nbr1, n0)) common_cnt++;
134 else {
135 addSubset (nbr0, n0);
136 union_cnt++;
137 }
138 if (inSubset(nbr0, n1)) common_cnt++;
139 else {
140 addSubset (nbr1, n1);
141 union_cnt++;
142 }
143 }
144 else {
145 common_cnt++;
146 union_cnt++;
147 }
148 wt = common_cnt/union_cnt;
149#ifdef TEST_JACCARD
150 wt0 = ((double)intersect_size(nbr00,nbr11))/union_size(nbr00,nbr11);
151 assert(wt == wt0);
152#endif
153 if (wt > maxwt) {
154 maxi = i;
155 maxwt = wt;
156 }
157 }
158 if (len0 > minlen) {
159 for (; i < len0; i++) {
160 n0 = (*tail_edgelist++)->node;
161/* fprintf (stderr, "add %s -\n", agnameof(n0)); */
162#ifdef TEST_JACCARD
163 addSubset(nbr00, n0);
164#endif
165 if (inSubset(nbr1, n0)) common_cnt++;
166 else union_cnt++;
167 wt = common_cnt/union_cnt;
168#ifdef TEST_JACCARD
169 wt0 = ((double)intersect_size(nbr00,nbr11))/union_size(nbr00,nbr11);
170 assert(wt == wt0);
171#endif
172 if (wt > maxwt) {
173 maxi = i;
174 maxwt = wt;
175 }
176 }
177 }
178 else if (len1 > minlen) {
179 for (; i < len1; i++) {
180 n1 = (*head_edgelist++)->node;
181/* fprintf (stderr, "add - %s\n", agnameof(n1)); */
182#ifdef TEST_JACCARD
183 addSubset(nbr11, n1);
184#endif
185 if (inSubset(nbr0, n1)) common_cnt++;
186 else union_cnt++;
187 wt = common_cnt/union_cnt;
188#ifdef TEST_JACCARD
189 wt0 = ((double)intersect_size(nbr00,nbr11))/union_size(nbr00,nbr11);
190 assert(wt == wt0);
191#endif
192 if (wt > maxwt) {
193 maxi = i;
194 maxwt = wt;
195 }
196 }
197 }
198
199 ED_wt(e) = maxwt;
200 if (verbose > 1)
201 fprintf(stderr, "%s : %s %f %ld/%ld %f\n",
202 agnameof(tail), agnameof(head), maxwt, maxi, MAX(len0, len1), oldwt);
203#ifdef TEST_JACCARD
204 closeSubset(nbr00);
205 closeSubset(nbr11);
206#endif
207}
208
209static void setEdgeWeights(Agraph_t * g, int verbose)
210{
211 int *quadcnt = N_NEW((size_t)agnedges(g), int);
212 int ncnt = agnnodes(g);
213 int *que = N_NEW((size_t)ncnt, int);
214 Agnode_t *n;
215 Agedge_t *e;
216 int iwt;
217 int edgecnt = 0;
218 long int i = 0;
219
220 /* Count the number q(u,v) of quadrilaterals each edge is in.
221 * This is stored in quadcnt, indexed by the edges index.
222 */
223 genQuads(g, recordQuads, quadcnt);
224
225 /* Weight node v by q(v), the sum of the q(u,v) for all neighbors v of u.
226 * This is stored in que, indexed by the node's index.
227 */
228 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
229 ND_id(n) = i++;
230 iwt = 0;
231 for (e = agfstout(g, n); e; e = agnxtout(g, e)) {
232 iwt += quadcnt[ED_id(e)];
233 edgecnt++;
234 }
235 for (e = agfstin(g, n); e; e = agnxtin(g, e)) {
236 iwt += quadcnt[ED_id(e)];
237 }
238 que[ND_id(n)] = iwt;
239#if 0
240 fprintf(stderr, " %s quad %d\n", agnameof(n), que[ND_id(n)]);
241#endif
242 }
243
244 /* Assign each edge a normalized initial weight Q(u,v) which is q(u,v)/sqrt(q(u)q(v)).
245 * This is stored in ED_wt.
246 */
247 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
248 for (e = agfstout(g, n); e; e = agnxtout(g, e)) {
249 int quadv = quadcnt[ED_id(e)];
250 if (quadv)
251 ED_wt(e) =
252 quadv /
253 sqrt((double) (que[ND_id(n)] * que[ND_id(aghead(e))]));
254 else
255 ED_wt(e) = 0;
256#if 0
257 fprintf(stderr, " %s--%s quadcnt %d wt %f\n",
258 agnameof(n), agnameof(aghead(e)), quadcnt[ED_id(e)],
259 jD_wt(e));
260#endif
261 }
262 }
263
264 free(quadcnt);
265 free(que);
266
267 Agedge_t** edges = N_NEW((size_t)(2*edgecnt+1),Agedge_t*);
268 Agedge_t*** nbrs = N_NEW((size_t)ncnt+1, Agedge_t**);
269 Agedge_t** edgelist = edges;
270 size_t degree;
271 /* For each node, sort its edges by Q(u,v). */
272 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
273 degree = 0;
274 nbrs[ND_id(n)] = edgelist;
275 for (e = agfstout(g, n); e; e = agnxtout(g, e)) {
276 degree++;
277 *edgelist++ = e;
278 }
279 for (e = agfstin(g, n); e; e = agnxtin(g, e)) {
280 degree++;
281 *edgelist++ = e;
282 }
283 qsort(nbrs[ND_id(n)], degree, sizeof(Agedge_t **), (qsort_cmpf)cmpe);
284/*
285 fprintf (stderr, "sort %s(%d) degree %lu %d %p\n",
286 agnameof(n), ND_id(n), degree, agdegree(g,n,1,1), nbrs[ND_id(n)]);
287*/
288 }
289 nbrs[ncnt] = edgelist;
290
291#if 0
292 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
293 edgelist = nbrs[ND_id(n)];
294 long int len0 = nbrs[ND_id(n)+1] - edgelist;
295 fprintf (stderr, "%s len %lu degree %d\n", agnameof(n), len0, agdegree(g,n,1,1));
296 for (i = 0; i < len0; i++) {
297 e = *edgelist++;
298 fprintf (stderr, "%s %s %lf\n",
299 agnameof(agtail(e)), agnameof(aghead(e)), ED_wt(e));
300 }
301 }
302#endif
303
304 /* Finally, for each edge (u,v), compute the Jaccard coefficient of the union of
305 * k neighbors of u and v, using the sorted edges. The final edge weight is the
306 * maximum Jaccard coefficient.
307 */
308 Dt_t* nbr0 = mkSubset();
309 Dt_t* nbr1 = mkSubset();
310 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
311 edgelist = nbrs[ND_id(n)];
312 long int len0 = nbrs[ND_id(n)+1] - edgelist;
313 for (i = 0; i < len0; i++) {
314 e = *edgelist++;
315 if (AGTYPE(e)==AGINEDGE) continue;
316 reweightEdge (e, nbr0, nbr1, nbrs, verbose);
317 }
318 }
319 closeSubset(nbr0);
320 closeSubset(nbr1);
321 free(edges);
322 free(nbrs);
323}
324
325/* Return number in range [0,nedges] */
326static size_t computeIndex(size_t nedges, float s)
327{
328 size_t r = ceilf(nedges * (1 - s));
329 return r;
330}
331
332static size_t doBucket(Agedge_t ** edgelist, size_t idx, Dt_t * M)
333{
334 Agedge_t *e;
335 float weight = ED_wt(edgelist[idx]);
336
337 while ((e = edgelist[idx]) && (ED_wt(e) == weight)) {
338 idx++;
339 if (UF_find(agtail(e)) != UF_find(aghead(e)))
340 addSubset(M, e);
341 }
342
343 return idx;
344}
345
346static int walker(Agedge_t * e, Agraph_t * sg)
347{
348 UF_union(agtail(e), aghead(e));
349 agsubedge(sg, e, 1);
350 return 0;
351}
352
353static Agraph_t *findUMST(Agraph_t * g, Agedge_t ** edgelist, size_t nedges)
354{
355 Agraph_t *sg = agsubg(g, "UMST", 1);
356 Dt_t *M = mkSubset();
357
358 size_t i = 0;
359 while (i < nedges) {
360 i = doBucket(edgelist, i, M);
361 /* for each edge in M, add to sg, and union nodes */
362 walkSubset(M, (walkfn) walker, sg);
363 if (i < nedges)
364 clearSubset(M);
365 }
366 closeSubset(M);
367 return sg;
368}
369
370/* Remove loops and multiedges. */
371static void cleanGraph (Agraph_t * g, int verbose)
372{
373 Agnode_t *n;
374 Agnode_t *head;
375 Agedge_t *e;
376 Agedge_t *nexte;
377 Agedge_t *inexte;
378 Agedge_t *backe;
379 Agedge_t *preve = NULL;
380 int loopcnt = 0;
381 int multicnt = 0;
382 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
383 preve = NULL;
384 for (e = agfstout(g, n); e; e = nexte) {
385 head = aghead(e);
386 nexte = agnxtout(g, e);
387 if (head == n) {
388 agdelete(g, e);
389 preve = NULL;
390 loopcnt++;
391 }
392 else if (preve && (head == aghead(preve))) {
393 agdelete(g, e);
394 multicnt++;
395 }
396 else {
397 preve = e;
398 if ((backe = agedge(g, head, n, 0, 0))) {
399 while (backe && (agtail(backe) == head)) {
400 multicnt++;
401 inexte = agnxtin(g, backe);
402 agdelete(g, backe);
403 backe = inexte;
404 }
405 }
406
407 }
408 }
409 }
410 if (verbose)
411 fprintf (stderr, "cleanGraph: %d loops %d multiedges removed\n", loopcnt, multicnt);
412}
413
414void genSpine(Agraph_t * g, float sparse_ratio, int verbose)
415{
416 Agraph_t *sg_union;
417 Agnode_t *n;
418 Agedge_t *e;
419 Agedge_t **edgelist;
420 size_t i, index;
421 size_t nedges;
422 float threshold;
423
424 cleanGraph (g, verbose);
425 nedges = agnedges(g);
426 if (verbose)
427 fprintf(stderr, "Graph %s %d nodes %lu edges:\n", agnameof(g),
428 agnnodes(g), nedges);
429 aginit(g, AGNODE, "nodeinfo", sizeof(nodeinfo_t), 1);
430 aginit(g, AGEDGE, "edgeinfo", sizeof(edgeinfo_t), 1);
431
432 int eidx = 0;
433 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
434 for (e = agfstout(g, n); e; e = agnxtout(g, e)) {
435 ED_id(e) = eidx++;
436 }
437 }
438 if (verbose)
439 fprintf(stderr, "setEdgeWeights\n");
440 setEdgeWeights(g, verbose);
441
442 /* sort edges by non-increasing weight */
443 if (verbose)
444 fprintf(stderr, "sorting\n");
445 edgelist = N_NEW((size_t)(nedges + 1), Agedge_t *); /* NULL at end */
446 i = 0;
447 for (n = agfstnode(g); n; n = agnxtnode(g, n)) {
448 for (e = agfstout(g, n); e; e = agnxtout(g, e)) {
449 edgelist[i++] = e;
450 }
451 }
452 qsort(edgelist, nedges, sizeof(Agedge_t *), (qsort_cmpf)cmpe);
453#if 0
454 float curwt = -1;
455 int cnt = 0;
456 for (i = 0; i <= nedges; i++) {
457 e = edgelist[i];
458 if (e && (ED_wt(e) == curwt))
459 cnt++;
460 else {
461 if (cnt)
462 fprintf(stderr, "%f %d\n", curwt, cnt);
463 if (e) {
464 cnt = 1;
465 curwt = ED_wt(e);
466 }
467 }
468 }
469#endif
470 if (verbose)
471 fprintf(stderr, "heaviest wt %f least wt %f\n", ED_wt(edgelist[0]),
472 ED_wt(edgelist[nedges - 1]));
473
474 /* compute UMST */
475 sg_union = findUMST(g, edgelist, nedges);
476 int umst_size = agnedges(sg_union);
477 if (verbose)
478 fprintf(stderr, " union of maximum spanning trees: %d edges\n",
479 umst_size);
480
481 /* Find index of the |E|(1-sparse_ratio)th edge */
482 index = computeIndex(nedges, sparse_ratio);
483 if (verbose)
484 fprintf(stderr, " index %lu out of %lu\n", index, nedges);
485
486 /* set of edges with weights above threshold */
487 /* Add all edges with wt >= wt(edgelist[index]) */
488 /* As edgelist is sorted, first index edges */
489 int extra_edges = 0;
490 for (i = 1; i <= index; i++) {
491 e = edgelist[i - 1];
492 if (verbose) {
493 if (agsubedge(sg_union, e, 0) == NULL)
494 extra_edges++;
495 }
496 agsubedge(sg_union, e, 1);
497 }
498
499 /* Add any additional edges with same weight as e */
500 if (index) {
501 threshold = ED_wt(e);
502 for (; i <= nedges; i++) {
503 e = edgelist[i - 1];
504 if (ED_wt(e) >= threshold) {
505 if (verbose) {
506 if (agsubedge(sg_union, e, 0) == NULL)
507 extra_edges++;
508 }
509 agsubedge(sg_union, e, 1);
510 } else
511 break;
512 }
513 }
514 if (verbose)
515 fprintf(stderr,
516 "number of extra edges not in UMST %d total edges %d\n",
517 extra_edges, agnedges(sg_union));
518
519 /* layout graph sg_union */
520 Agsym_t *sym = agattr(g, AGEDGE, "spine", "0");
521 int ncnt = 0;
522 int ecnt = 0;
523 for (n = agfstnode(sg_union); n; n = agnxtnode(sg_union, n)) {
524 ncnt++;
525 for (e = agfstout(sg_union, n); e; e = agnxtout(sg_union, e)) {
526 ecnt++;
527 agxset(e, sym, "1");
528 }
529 }
530 if (verbose)
531 fprintf(stderr, "final ncnt %d ecnt %d\n", ncnt, ecnt);
532}
533
534#ifdef MAIN
535
536static FILE *openFile(char *cmd, char *name, char *mode)
537{
538 FILE *fp;
539 char *modestr;
540
541 fp = fopen(name, mode);
542 if (!fp) {
543 if (*mode == 'r')
544 modestr = "reading";
545 else
546 modestr = "writing";
547 fprintf(stderr, "%s: could not open file %s for %s\n",
548 cmd, name, modestr);
549 exit(1);
550 }
551 return fp;
552}
553
554static int setInt(char *s, int *v)
555{
556 char *endp;
557 long int l = strtol(s, char &endp, 10);
558
559 if (s == endp) {
560 fprintf(stderr, "Option value \"%s\" must be an integer\n", s);
561 return 1;
562 }
563 if (l < 0) {
564 fprintf(stderr, "Option value \"%s\" must be non-negative.\n", s);
565 return 1;
566 }
567 *v = (int)l;
568 return 0;
569
570}
571
572static int setFloat(char *s, float *v)
573{
574 char *endp;
575 float f = strtof(s, &endp);
576 if (s == endp) {
577 fprintf(stderr, "Option value \"%s\" must be a float\n", s);
578 return 1;
579 }
580 if ((f < 0) || (f > 1)) {
581 fprintf(stderr, "Option value \"%s\" must be in the range [0,1]\n",
582 s);
583 return 1;
584 }
585 *v = f;
586 return 0;
587
588}
589
590static char *Usage = "Usage: %s [-?] [options]\n\
591 -r<n> : sparsification ratio [0,1] (0.5) \n\
592 -o<outfile> : put output in <outfile> (stderr)\n\
593 -v[<val>] : verbose mode \n\
594 -? : print usage\n";
595
596static void usage(char *cmd, int v)
597{
598 fprintf(v ? stderr : stdout, Usage, cmd);
599 exit(v);
600}
601
602static void doOpts(int argc, char *argv[], opts_t * op)
603{
604 int c;
605 char *cmd = argv[0];
606
607 op->outfp = NULL;
608 op->Files = NULL;
609 op->verbose = 0;
610 op->sparse_ratio = 0.5;
611
612 opterr = 0;
613 while ((c = getopt(argc, argv, "o:r:v::")) != -1) {
614 switch (c) {
615 case 'o':
616 op->outfp = openFile(cmd, optarg, "w");
617 break;
618 case 'v':
619 if (optarg)
620 if (setInt(optarg, &(op->verbose))) {
621 fprintf(stderr, "%s: bad value for flag -%c - ignored\n",
622 cmd, c);
623 }
624 }
625 else
626 op->verbose = 1;
627 break;
628 case 'r':
629 if (setFloat(optarg, &(op->sparse_ratio))) {
630 fprintf(stderr, "%s: bad value for flag -%c - ignored\n",
631 cmd, c);
632 }
633 break;
634 case '?':
635 if (optopt == '?')
636 usage(cmd, 0);
637 else
638 fprintf(stderr, "%s: option -%c unrecognized - ignored\n",
639 cmd, optopt);
640 break;
641 default:
642 break;
643 }
644 }
645 argv += optind;
646 argc -= optind;
647
648 if (argc)
649 op->Files = argv;
650 if (op->outfp == NULL)
651 op->outfp = stderr;
652 if (op->verbose)
653 fprintf(stderr, "sparse ratio = %f\n", op->sparse_ratio);
654}
655
656static Agraph_t *gread(FILE * fp)
657{
658 return agread(fp, (Agdisc_t *) 0);
659}
660
661int main(int argc, char *argv[])
662{
663 opts_t opts;
664 ingraph_state ig;
665 Agraph_t *g;
666
667 doOpts(argc, argv, &opts);
668 newIngraph(&ig, opts.Files, gread);
669 while ((g = nextGraph(&ig)) != 0) {
670 genSpine(g, opts.sparse_ratio, opts.verbose);
671 agwrite(g, opts.outfp);
672 agclose(g);
673 }
674
675 return 0;
676}
677#endif
678