1 | /* $Id$ $Revision$ */ |
2 | /* vim:set shiftwidth=4 ts=8: */ |
3 | |
4 | /************************************************************************* |
5 | * Copyright (c) 2011 AT&T Intellectual Property |
6 | * All rights reserved. This program and the accompanying materials |
7 | * are made available under the terms of the Eclipse Public License v1.0 |
8 | * which accompanies this distribution, and is available at |
9 | * http://www.eclipse.org/legal/epl-v10.html |
10 | * |
11 | * Contributors: See CVS logs. Details at http://www.graphviz.org/ |
12 | *************************************************************************/ |
13 | |
14 | #include "digcola.h" |
15 | #ifdef DIGCOLA |
16 | #include <math.h> |
17 | #include <stdlib.h> |
18 | #include <time.h> |
19 | #include <stdio.h> |
20 | #include <float.h> |
21 | #include "stress.h" |
22 | #include "dijkstra.h" |
23 | #include "bfs.h" |
24 | #include "matrix_ops.h" |
25 | #include "kkutils.h" |
26 | #include "conjgrad.h" |
27 | #include "quad_prog_solver.h" |
28 | #include "matrix_ops.h" |
29 | |
30 | #define localConstrMajorIterations 15 |
31 | #define levels_sep_tol 1e-1 |
32 | |
33 | int stress_majorization_with_hierarchy(vtx_data * graph, /* Input graph in sparse representation */ |
34 | int n, /* Number of nodes */ |
35 | int nedges_graph, /* Number of edges */ |
36 | double **d_coords, /* Coordinates of nodes (output layout) */ |
37 | node_t ** nodes, /* Original nodes */ |
38 | int dim, /* Dimemsionality of layout */ |
39 | int opts, /* options */ |
40 | int model, /* difference model */ |
41 | int maxi, /* max iterations */ |
42 | double levels_gap) |
43 | { |
44 | int iterations = 0; /* Output: number of iteration of the process */ |
45 | |
46 | /************************************************* |
47 | ** Computation of full, dense, unrestricted k-D ** |
48 | ** stress minimization by majorization ** |
49 | ** This function imposes HIERARCHY CONSTRAINTS ** |
50 | *************************************************/ |
51 | |
52 | int i, j, k; |
53 | boolean directionalityExist = FALSE; |
54 | float *lap1 = NULL; |
55 | float *dist_accumulator = NULL; |
56 | float *tmp_coords = NULL; |
57 | float **b = NULL; |
58 | #ifdef NONCORE |
59 | FILE *fp = NULL; |
60 | #endif |
61 | double *degrees = NULL; |
62 | float *lap2 = NULL; |
63 | int lap_length; |
64 | float *f_storage = NULL; |
65 | float **coords = NULL; |
66 | |
67 | double conj_tol = tolerance_cg; /* tolerance of Conjugate Gradient */ |
68 | float **unpackedLap = NULL; |
69 | CMajEnv *cMajEnv = NULL; |
70 | double y_0; |
71 | int length; |
72 | int smart_ini = opts & opt_smart_init; |
73 | DistType diameter; |
74 | float *Dij = NULL; |
75 | /* to compensate noises, we never consider gaps smaller than 'abs_tol' */ |
76 | double abs_tol = 1e-2; |
77 | /* Additionally, we never consider gaps smaller than 'abs_tol'*<avg_gap> */ |
78 | double relative_tol = levels_sep_tol; |
79 | int *ordering = NULL, *levels = NULL; |
80 | float constant_term; |
81 | int count; |
82 | double degree; |
83 | int step; |
84 | float val; |
85 | double old_stress, new_stress; |
86 | boolean converged; |
87 | int len; |
88 | int num_levels; |
89 | float *hierarchy_boundaries; |
90 | |
91 | if (graph[0].edists != NULL) { |
92 | for (i = 0; i < n; i++) { |
93 | for (j = 1; j < graph[i].nedges; j++) { |
94 | directionalityExist = directionalityExist |
95 | || (graph[i].edists[j] != 0); |
96 | } |
97 | } |
98 | } |
99 | if (!directionalityExist) { |
100 | return stress_majorization_kD_mkernel(graph, n, nedges_graph, |
101 | d_coords, nodes, dim, opts, |
102 | model, maxi); |
103 | } |
104 | |
105 | /****************************************************************** |
106 | ** First, partition nodes into layers: These are our constraints ** |
107 | ******************************************************************/ |
108 | |
109 | if (smart_ini) { |
110 | double *x; |
111 | double *y; |
112 | if (dim > 2) { |
113 | /* the dim==2 case is handled below */ |
114 | if (stress_majorization_kD_mkernel(graph, n, nedges_graph, |
115 | d_coords + 1, nodes, dim - 1, |
116 | opts, model, 15) < 0) |
117 | return -1; |
118 | /* now copy the y-axis into the (dim-1)-axis */ |
119 | for (i = 0; i < n; i++) { |
120 | d_coords[dim - 1][i] = d_coords[1][i]; |
121 | } |
122 | } |
123 | |
124 | x = d_coords[0]; |
125 | y = d_coords[1]; |
126 | if (compute_y_coords(graph, n, y, n)) { |
127 | iterations = -1; |
128 | goto finish; |
129 | } |
130 | if (compute_hierarchy(graph, n, abs_tol, relative_tol, y, &ordering, |
131 | &levels, &num_levels)) { |
132 | iterations = -1; |
133 | goto finish; |
134 | } |
135 | if (num_levels < 1) { |
136 | /* no hierarchy found, use faster algorithm */ |
137 | return stress_majorization_kD_mkernel(graph, n, nedges_graph, |
138 | d_coords, nodes, dim, |
139 | opts, model, maxi); |
140 | } |
141 | |
142 | if (levels_gap > 0) { |
143 | /* ensure that levels are separated in the initial layout */ |
144 | double displacement = 0; |
145 | int stop; |
146 | for (i = 0; i < num_levels; i++) { |
147 | displacement += |
148 | MAX((double) 0, |
149 | levels_gap - (y[ordering[levels[i]]] + |
150 | displacement - |
151 | y[ordering[levels[i] - 1]])); |
152 | stop = i < num_levels - 1 ? levels[i + 1] : n; |
153 | for (j = levels[i]; j < stop; j++) { |
154 | y[ordering[j]] += displacement; |
155 | } |
156 | } |
157 | } |
158 | if (dim == 2) { |
159 | if (IMDS_given_dim(graph, n, y, x, Epsilon)) { |
160 | iterations = -1; |
161 | goto finish; |
162 | } |
163 | } |
164 | } else { |
165 | initLayout(graph, n, dim, d_coords, nodes); |
166 | if (compute_hierarchy(graph, n, abs_tol, relative_tol, NULL, &ordering, |
167 | &levels, &num_levels)) { |
168 | iterations = -1; |
169 | goto finish; |
170 | } |
171 | } |
172 | if (n == 1) |
173 | return 0; |
174 | |
175 | hierarchy_boundaries = N_GNEW(num_levels, float); |
176 | |
177 | /**************************************************** |
178 | ** Compute the all-pairs-shortest-distances matrix ** |
179 | ****************************************************/ |
180 | |
181 | if (maxi == 0) |
182 | return iterations; |
183 | |
184 | if (Verbose) |
185 | start_timer(); |
186 | |
187 | if (model == MODEL_SUBSET) { |
188 | /* weight graph to separate high-degree nodes */ |
189 | /* and perform slower Dijkstra-based computation */ |
190 | if (Verbose) |
191 | fprintf(stderr, "Calculating subset model" ); |
192 | Dij = compute_apsp_artifical_weights_packed(graph, n); |
193 | } else if (model == MODEL_CIRCUIT) { |
194 | Dij = circuitModel(graph, n); |
195 | if (!Dij) { |
196 | agerr(AGWARN, |
197 | "graph is disconnected. Hence, the circuit model\n" ); |
198 | agerr(AGPREV, |
199 | "is undefined. Reverting to the shortest path model.\n" ); |
200 | } |
201 | } else if (model == MODEL_MDS) { |
202 | if (Verbose) |
203 | fprintf(stderr, "Calculating MDS model" ); |
204 | Dij = mdsModel(graph, n); |
205 | } |
206 | if (!Dij) { |
207 | if (Verbose) |
208 | fprintf(stderr, "Calculating shortest paths" ); |
209 | Dij = compute_apsp_packed(graph, n); |
210 | } |
211 | if (Verbose) { |
212 | fprintf(stderr, ": %.2f sec\n" , elapsed_sec()); |
213 | fprintf(stderr, "Setting initial positions" ); |
214 | start_timer(); |
215 | } |
216 | |
217 | diameter = -1; |
218 | length = n + n * (n - 1) / 2; |
219 | for (i = 0; i < length; i++) { |
220 | if (Dij[i] > diameter) { |
221 | diameter = (int) Dij[i]; |
222 | } |
223 | } |
224 | |
225 | if (!smart_ini) { |
226 | /* for numerical stability, scale down layout */ |
227 | /* No Jiggling, might conflict with constraints */ |
228 | double max = 1; |
229 | for (i = 0; i < dim; i++) { |
230 | for (j = 0; j < n; j++) { |
231 | if (fabs(d_coords[i][j]) > max) { |
232 | max = fabs(d_coords[i][j]); |
233 | } |
234 | } |
235 | } |
236 | for (i = 0; i < dim; i++) { |
237 | for (j = 0; j < n; j++) { |
238 | d_coords[i][j] *= 10 / max; |
239 | } |
240 | } |
241 | } |
242 | |
243 | if (levels_gap > 0) { |
244 | int length = n + n * (n - 1) / 2; |
245 | double sum1, sum2, scale_ratio; |
246 | int count; |
247 | sum1 = (float) (n * (n - 1) / 2); |
248 | sum2 = 0; |
249 | for (count = 0, i = 0; i < n - 1; i++) { |
250 | count++; // skip self distance |
251 | for (j = i + 1; j < n; j++, count++) { |
252 | sum2 += distance_kD(d_coords, dim, i, j) / Dij[count]; |
253 | } |
254 | } |
255 | scale_ratio = sum2 / sum1; |
256 | /* double scale_ratio=10; */ |
257 | for (i = 0; i < length; i++) { |
258 | Dij[i] *= (float) scale_ratio; |
259 | } |
260 | } |
261 | |
262 | /************************** |
263 | ** Layout initialization ** |
264 | **************************/ |
265 | |
266 | for (i = 0; i < dim; i++) { |
267 | orthog1(n, d_coords[i]); |
268 | } |
269 | |
270 | /* for the y-coords, don't center them, but translate them so y[0]=0 */ |
271 | y_0 = d_coords[1][0]; |
272 | for (i = 0; i < n; i++) { |
273 | d_coords[1][i] -= y_0; |
274 | } |
275 | |
276 | coords = N_GNEW(dim, float *); |
277 | f_storage = N_GNEW(dim * n, float); |
278 | for (i = 0; i < dim; i++) { |
279 | coords[i] = f_storage + i * n; |
280 | for (j = 0; j < n; j++) { |
281 | coords[i][j] = (float) (d_coords[i][j]); |
282 | } |
283 | } |
284 | |
285 | /* compute constant term in stress sum |
286 | * which is \sum_{i<j} w_{ij}d_{ij}^2 |
287 | */ |
288 | constant_term = (float) (n * (n - 1) / 2); |
289 | |
290 | if (Verbose) |
291 | fprintf(stderr, ": %.2f sec" , elapsed_sec()); |
292 | |
293 | /************************** |
294 | ** Laplacian computation ** |
295 | **************************/ |
296 | |
297 | lap2 = Dij; |
298 | lap_length = n + n * (n - 1) / 2; |
299 | square_vec(lap_length, lap2); |
300 | /* compute off-diagonal entries */ |
301 | invert_vec(lap_length, lap2); |
302 | |
303 | /* compute diagonal entries */ |
304 | count = 0; |
305 | degrees = N_GNEW(n, double); |
306 | set_vector_val(n, 0, degrees); |
307 | for (i = 0; i < n - 1; i++) { |
308 | degree = 0; |
309 | count++; // skip main diag entry |
310 | for (j = 1; j < n - i; j++, count++) { |
311 | val = lap2[count]; |
312 | degree += val; |
313 | degrees[i + j] -= val; |
314 | } |
315 | degrees[i] -= degree; |
316 | } |
317 | for (step = n, count = 0, i = 0; i < n; i++, count += step, step--) { |
318 | lap2[count] = (float) degrees[i]; |
319 | } |
320 | |
321 | #ifdef NONCORE |
322 | fpos_t pos; |
323 | if (n > max_nodes_in_mem) { |
324 | #define FILENAME "tmp_Dij$$$.bin" |
325 | fp = fopen(FILENAME, "wb" ); |
326 | fwrite(lap2, sizeof(float), lap_length, fp); |
327 | fclose(fp); |
328 | fp = NULL; |
329 | } |
330 | #endif |
331 | |
332 | /************************* |
333 | ** Layout optimization ** |
334 | *************************/ |
335 | |
336 | b = N_GNEW(dim, float *); |
337 | b[0] = N_GNEW(dim * n, float); |
338 | for (k = 1; k < dim; k++) { |
339 | b[k] = b[0] + k * n; |
340 | } |
341 | |
342 | tmp_coords = N_GNEW(n, float); |
343 | dist_accumulator = N_GNEW(n, float); |
344 | #ifdef NONCORE |
345 | if (n <= max_nodes_in_mem) { |
346 | #endif |
347 | lap1 = N_GNEW(lap_length, float); |
348 | #ifdef NONCORE |
349 | } else { |
350 | lap1 = lap2; |
351 | fp = fopen(FILENAME, "rb" ); |
352 | fgetpos(fp, &pos); |
353 | } |
354 | #endif |
355 | |
356 | old_stress = DBL_MAX; /* at least one iteration */ |
357 | |
358 | unpackedLap = unpackMatrix(lap2, n); |
359 | cMajEnv = |
360 | initConstrainedMajorization(lap2, n, ordering, levels, num_levels); |
361 | |
362 | for (converged = FALSE, iterations = 0; |
363 | iterations < maxi && !converged; iterations++) { |
364 | |
365 | /* First, construct Laplacian of 1/(d_ij*|p_i-p_j|) */ |
366 | set_vector_val(n, 0, degrees); |
367 | #ifdef NONCORE |
368 | if (n <= max_nodes_in_mem) { |
369 | #endif |
370 | sqrt_vecf(lap_length, lap2, lap1); |
371 | #ifdef NONCORE |
372 | } else { |
373 | sqrt_vec(lap_length, lap1); |
374 | } |
375 | #endif |
376 | for (count = 0, i = 0; i < n - 1; i++) { |
377 | len = n - i - 1; |
378 | /* init 'dist_accumulator' with zeros */ |
379 | set_vector_valf(n, 0, dist_accumulator); |
380 | |
381 | /* put into 'dist_accumulator' all squared distances |
382 | * between 'i' and 'i'+1,...,'n'-1 |
383 | */ |
384 | for (k = 0; k < dim; k++) { |
385 | set_vector_valf(len, coords[k][i], tmp_coords); |
386 | vectors_mult_additionf(len, tmp_coords, -1, |
387 | coords[k] + i + 1); |
388 | square_vec(len, tmp_coords); |
389 | vectors_additionf(len, tmp_coords, dist_accumulator, |
390 | dist_accumulator); |
391 | } |
392 | |
393 | /* convert to 1/d_{ij} */ |
394 | invert_sqrt_vec(len, dist_accumulator); |
395 | /* detect overflows */ |
396 | for (j = 0; j < len; j++) { |
397 | if (dist_accumulator[j] >= FLT_MAX |
398 | || dist_accumulator[j] < 0) { |
399 | dist_accumulator[j] = 0; |
400 | } |
401 | } |
402 | |
403 | count++; /* save place for the main diagonal entry */ |
404 | degree = 0; |
405 | for (j = 0; j < len; j++, count++) { |
406 | val = lap1[count] *= dist_accumulator[j]; |
407 | degree += val; |
408 | degrees[i + j + 1] -= val; |
409 | } |
410 | degrees[i] -= degree; |
411 | } |
412 | for (step = n, count = 0, i = 0; i < n; i++, count += step, step--) { |
413 | lap1[count] = (float) degrees[i]; |
414 | } |
415 | |
416 | /* Now compute b[] (L^(X(t))*X(t)) */ |
417 | for (k = 0; k < dim; k++) { |
418 | /* b[k] := lap1*coords[k] */ |
419 | right_mult_with_vector_ff(lap1, n, coords[k], b[k]); |
420 | } |
421 | |
422 | /* compute new stress |
423 | * remember that the Laplacians are negated, so we subtract |
424 | * instead of add and vice versa |
425 | */ |
426 | new_stress = 0; |
427 | for (k = 0; k < dim; k++) { |
428 | new_stress += vectors_inner_productf(n, coords[k], b[k]); |
429 | } |
430 | new_stress *= 2; |
431 | new_stress += constant_term; // only after mult by 2 |
432 | #ifdef NONCORE |
433 | if (n > max_nodes_in_mem) { |
434 | /* restore lap2 from disk */ |
435 | fsetpos(fp, &pos); |
436 | fread(lap2, sizeof(float), lap_length, fp); |
437 | } |
438 | #endif |
439 | for (k = 0; k < dim; k++) { |
440 | right_mult_with_vector_ff(lap2, n, coords[k], tmp_coords); |
441 | new_stress -= vectors_inner_productf(n, coords[k], tmp_coords); |
442 | } |
443 | |
444 | #ifdef ALTERNATIVE_STRESS_CALC |
445 | { |
446 | double mat_stress = new_stress; |
447 | double compute_stress(float **coords, float *lap, int dim, |
448 | int n); |
449 | new_stress = compute_stress(coords, lap2, dim, n); |
450 | if (fabs(mat_stress - new_stress) / |
451 | min(mat_stress, new_stress) > 0.001) { |
452 | fprintf(stderr, |
453 | "Diff stress vals: %lf %lf (iteration #%d)\n" , |
454 | mat_stress, new_stress, iterations); |
455 | } |
456 | } |
457 | #endif |
458 | /* check for convergence */ |
459 | converged = |
460 | fabs(new_stress - old_stress) / fabs(old_stress + 1e-10) < |
461 | Epsilon; |
462 | converged = converged || (iterations > 1 |
463 | && new_stress > old_stress); |
464 | /* in first iteration we allowed stress increase, which |
465 | * might result ny imposing constraints |
466 | */ |
467 | old_stress = new_stress; |
468 | |
469 | for (k = 0; k < dim; k++) { |
470 | /* now we find the optimizer of trace(X'LX)+X'B by solving 'dim' |
471 | * system of equations, thereby obtaining the new coordinates. |
472 | * If we use the constraints (given by the var's: 'ordering', |
473 | * 'levels' and 'num_levels'), we cannot optimize |
474 | * trace(X'LX)+X'B by simply solving equations, but we have |
475 | * to use a quadratic programming solver |
476 | * note: 'lap2' is a packed symmetric matrix, that is its |
477 | * upper-triangular part is arranged in a vector row-wise |
478 | * also note: 'lap2' is really the negated laplacian (the |
479 | * laplacian is -'lap2') |
480 | */ |
481 | |
482 | if (k == 1) { |
483 | /* use quad solver in the y-dimension */ |
484 | constrained_majorization_new_with_gaps(cMajEnv, b[k], |
485 | coords, dim, k, |
486 | localConstrMajorIterations, |
487 | hierarchy_boundaries, |
488 | levels_gap); |
489 | |
490 | } else { |
491 | /* use conjugate gradient for all dimensions except y */ |
492 | if (conjugate_gradient_mkernel(lap2, coords[k], b[k], n, |
493 | conj_tol, n)) { |
494 | iterations = -1; |
495 | goto finish; |
496 | } |
497 | } |
498 | } |
499 | } |
500 | free(hierarchy_boundaries); |
501 | deleteCMajEnv(cMajEnv); |
502 | |
503 | if (coords != NULL) { |
504 | for (i = 0; i < dim; i++) { |
505 | for (j = 0; j < n; j++) { |
506 | d_coords[i][j] = coords[i][j]; |
507 | } |
508 | } |
509 | free(coords[0]); |
510 | free(coords); |
511 | } |
512 | |
513 | if (b) { |
514 | free(b[0]); |
515 | free(b); |
516 | } |
517 | free(tmp_coords); |
518 | free(dist_accumulator); |
519 | free(degrees); |
520 | free(lap2); |
521 | |
522 | |
523 | #ifdef NONCORE |
524 | if (n <= max_nodes_in_mem) { |
525 | #endif |
526 | free(lap1); |
527 | #ifdef NONCORE |
528 | } |
529 | if (fp) |
530 | fclose(fp); |
531 | #endif |
532 | |
533 | finish: |
534 | free(ordering); |
535 | |
536 | free(levels); |
537 | |
538 | if (unpackedLap) { |
539 | free(unpackedLap[0]); |
540 | free(unpackedLap); |
541 | } |
542 | return iterations; |
543 | } |
544 | #endif /* DIGCOLA */ |
545 | |