| 1 | /* $Id$Revision: */ |
| 2 | /* vim:set shiftwidth=4 ts=8: */ |
| 3 | |
| 4 | /************************************************************************* |
| 5 | * Copyright (c) 2011 AT&T Intellectual Property |
| 6 | * All rights reserved. This program and the accompanying materials |
| 7 | * are made available under the terms of the Eclipse Public License v1.0 |
| 8 | * which accompanies this distribution, and is available at |
| 9 | * http://www.eclipse.org/legal/epl-v10.html |
| 10 | * |
| 11 | * Contributors: See CVS logs. Details at http://www.graphviz.org/ |
| 12 | *************************************************************************/ |
| 13 | |
| 14 | /* Modularity Quality definitation: |
| 15 | |
| 16 | We assume undirected graph. Directed graph should be converted by summing edge weights. |
| 17 | |
| 18 | Given a partition P of V into k clusters. |
| 19 | |
| 20 | Let E(i,j) be the set of edges between cluster i and j. |
| 21 | Let |E(i,j)| be the sum of edge weights of edges in E(i,j). |
| 22 | |
| 23 | Let E(i,i) be the set of edges within cluster i, but excluding self-edges. |
| 24 | Let |E(i,i)| be the sum of edge weights of edges in E(i,i). |
| 25 | |
| 26 | Let V(i) be the sets of vertices in i |
| 27 | |
| 28 | The intra-cluster edges concentration for a cluster i is |
| 29 | (the denominator could be |V(i)|*(|V(i)-1)/2 strictly speaking as we exclude self-edges): |
| 30 | |
| 31 | |E(i,i)| |
| 32 | ----------- |
| 33 | (|V(i)|^2/2) |
| 34 | |
| 35 | The inter-cluster edges concentration between cluster i and j is |
| 36 | |
| 37 | |E(i,j)| |
| 38 | ------------ |
| 39 | |V(i)|*|V(j)| |
| 40 | |
| 41 | So the cluster index is defined as the average intra cluster edge concentration, minus |
| 42 | the inter-cluster edge concentration: |
| 43 | |
| 44 | . |E(i,i)| |E(i,j)| |
| 45 | MQ(P) = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1)/2)) * \sum_{i<j} ------------------- = mq_in/k - mq_out/(k*(k-1)/2) |
| 46 | . (|V(i)|^2/2) |V(i)|*|V(j)| |
| 47 | |
| 48 | or |
| 49 | |
| 50 | . |E(i,i)| |E(i,j)| |
| 51 | MQ(P)/2 = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1))) * \sum_{i<j} ------------------ = mq_in/k - mq_out/(k*(k-1)) |
| 52 | . |V(i)|^2 |V(i)|*|V(j)| |
| 53 | |
| 54 | Notice that if we assume the graph is unweights (edge weights = 1), then 0<= MQ <= 1. |
| 55 | For weighted graph, MQ may not be within 0 to 1. We could normalized it, but |
| 56 | for comparing clustering quality of the same graph but different partitioning, this |
| 57 | unnormalized quantity is not a problem. |
| 58 | |
| 59 | */ |
| 60 | |
| 61 | #define STANDALONE |
| 62 | #include "general.h" |
| 63 | #include "SparseMatrix.h" |
| 64 | #include "mq.h" |
| 65 | #include "LinkedList.h" |
| 66 | |
| 67 | static real get_mq(SparseMatrix A, int *assignment, int *ncluster0, real *mq_in0, real *mq_out0, real **dout0){ |
| 68 | /* given a symmetric matrix representation of a graph and an assignment of nodes into clusters, calculate the modularity quality. |
| 69 | assignment: assignmenet[i] gives the cluster assignment of node i. 0 <= assignment[i] < ncluster. |
| 70 | ncluster: number of clusters |
| 71 | mq_in: the part of MQ to do with intra-cluster edges, before divide by 1/k |
| 72 | mq_out: the part of MQ to do with inter-cluster edges, before divide by 1/(k*(k-1)) |
| 73 | mq = 2*(mq_in/k - mq_out/(k*(k-1))); |
| 74 | */ |
| 75 | int ncluster = 0; |
| 76 | int n = A->m; |
| 77 | int test_pattern_symmetry_only = FALSE; |
| 78 | int *counts, *ia = A->ia, *ja = A->ja, k, i, j, jj; |
| 79 | real mq_in = 0, mq_out = 0, *a = NULL, Vi, Vj; |
| 80 | int c; |
| 81 | real *dout; |
| 82 | |
| 83 | |
| 84 | assert(SparseMatrix_is_symmetric(A, test_pattern_symmetry_only)); |
| 85 | assert(A->n == n); |
| 86 | if (A->type == MATRIX_TYPE_REAL) a = (real*) A->a; |
| 87 | |
| 88 | counts = MALLOC(sizeof(int)*n); |
| 89 | |
| 90 | for (i = 0; i < n; i++) counts[i] = 0; |
| 91 | |
| 92 | for (i = 0; i < n; i++){ |
| 93 | assert(assignment[i] >= 0 && assignment[i] < n); |
| 94 | if (counts[assignment[i]] == 0) ncluster++; |
| 95 | counts[assignment[i]]++; |
| 96 | } |
| 97 | k = ncluster; |
| 98 | assert(ncluster <= n); |
| 99 | |
| 100 | for (i = 0; i < n; i++){ |
| 101 | assert(assignment[i] < ncluster); |
| 102 | c = assignment[i]; |
| 103 | Vi = counts[c]; |
| 104 | for (j = ia[i] ; j < ia[i+1]; j++){ |
| 105 | /* ASSUME UNDIRECTED */ |
| 106 | jj = ja[j]; |
| 107 | if (jj >= i) continue; |
| 108 | assert(assignment[jj] < ncluster); |
| 109 | Vj = counts[assignment[jj]]; |
| 110 | if (assignment[jj] == c){ |
| 111 | if (a) { |
| 112 | mq_in += a[j]/(Vi*Vi); |
| 113 | } else { |
| 114 | mq_in += 1./(Vi*Vi); |
| 115 | } |
| 116 | } else { |
| 117 | if (a) { |
| 118 | mq_out += a[j]/(Vi*Vj); |
| 119 | } else { |
| 120 | mq_out += 1./(Vi*Vj); |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | /* calculate scaled out degree */ |
| 128 | dout = MALLOC(sizeof(real)*n); |
| 129 | for (i = 0; i < n; i++){ |
| 130 | dout[i] = 0; |
| 131 | for (j = ia[i]; j < ia[i+1]; j++){ |
| 132 | jj = ja[j]; |
| 133 | if (jj == i) continue; |
| 134 | if (a){ |
| 135 | dout[i] += a[j]/(real) counts[assignment[jj]]; |
| 136 | } else { |
| 137 | dout[i] += 1./(real) counts[assignment[jj]]; |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | *ncluster0 = k; |
| 143 | *mq_in0 = mq_in; |
| 144 | *mq_out0 = mq_out; |
| 145 | *dout0 = dout; |
| 146 | FREE(counts); |
| 147 | |
| 148 | if (k > 1){ |
| 149 | return 2*(mq_in/k - mq_out/(k*(k-1))); |
| 150 | } else { |
| 151 | return 2*mq_in; |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_init(SparseMatrix A, int level){ |
| 156 | Multilevel_MQ_Clustering grid; |
| 157 | int n = A->n, i; |
| 158 | int *matching; |
| 159 | |
| 160 | assert(A->type == MATRIX_TYPE_REAL); |
| 161 | assert(SparseMatrix_is_symmetric(A, FALSE)); |
| 162 | |
| 163 | if (!A) return NULL; |
| 164 | assert(A->m == n); |
| 165 | grid = MALLOC(sizeof(struct Multilevel_MQ_Clustering_struct)); |
| 166 | grid->level = level; |
| 167 | grid->n = n; |
| 168 | grid->A = A; |
| 169 | grid->P = NULL; |
| 170 | grid->R = NULL; |
| 171 | grid->next = NULL; |
| 172 | grid->prev = NULL; |
| 173 | grid->delete_top_level_A = FALSE; |
| 174 | matching = grid->matching = MALLOC(sizeof(real)*(n)); |
| 175 | grid->deg_intra = NULL; |
| 176 | grid->dout = NULL; |
| 177 | grid->wgt = NULL; |
| 178 | |
| 179 | if (level == 0){ |
| 180 | real mq = 0, mq_in, mq_out; |
| 181 | int n = A->n, ncluster; |
| 182 | real *deg_intra, *wgt, *dout; |
| 183 | |
| 184 | grid->deg_intra = MALLOC(sizeof(real)*(n)); |
| 185 | deg_intra = grid->deg_intra; |
| 186 | |
| 187 | grid->wgt = MALLOC(sizeof(real)*n); |
| 188 | wgt = grid->wgt; |
| 189 | |
| 190 | for (i = 0; i < n; i++){ |
| 191 | deg_intra[i] = 0; |
| 192 | wgt[i] = 1.; |
| 193 | } |
| 194 | for (i = 0; i < n; i++) matching[i] = i; |
| 195 | mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout); |
| 196 | fprintf(stderr,"ncluster = %d, mq = %f\n" , ncluster, mq); |
| 197 | grid->mq = mq; |
| 198 | grid->mq_in = mq_in; |
| 199 | grid->mq_out = mq_out; |
| 200 | grid->dout = dout; |
| 201 | grid->ncluster = ncluster; |
| 202 | |
| 203 | } |
| 204 | |
| 205 | |
| 206 | return grid; |
| 207 | } |
| 208 | |
| 209 | void Multilevel_MQ_Clustering_delete(Multilevel_MQ_Clustering grid){ |
| 210 | if (!grid) return; |
| 211 | if (grid->A){ |
| 212 | if (grid->level == 0) { |
| 213 | if (grid->delete_top_level_A) SparseMatrix_delete(grid->A); |
| 214 | } else { |
| 215 | SparseMatrix_delete(grid->A); |
| 216 | } |
| 217 | } |
| 218 | SparseMatrix_delete(grid->P); |
| 219 | SparseMatrix_delete(grid->R); |
| 220 | FREE(grid->matching); |
| 221 | FREE(grid->deg_intra); |
| 222 | FREE(grid->dout); |
| 223 | FREE(grid->wgt); |
| 224 | Multilevel_MQ_Clustering_delete(grid->next); |
| 225 | FREE(grid); |
| 226 | } |
| 227 | |
| 228 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_establish(Multilevel_MQ_Clustering grid, int maxcluster){ |
| 229 | int *matching = grid->matching; |
| 230 | SparseMatrix A = grid->A; |
| 231 | int n = grid->n, level = grid->level, nc = 0, nclusters = n; |
| 232 | real mq = 0, mq_in = 0, mq_out = 0, mq_new, mq_in_new, mq_out_new, mq_max = 0, mq_in_max = 0, mq_out_max = 0; |
| 233 | int *ia = A->ia, *ja = A->ja; |
| 234 | real *a, amax = 0; |
| 235 | real *deg_intra = grid->deg_intra, *wgt = grid->wgt; |
| 236 | real *deg_intra_new, *wgt_new = NULL; |
| 237 | int i, j, k, jj, jc, jmax; |
| 238 | real *deg_inter, gain = 0, *dout = grid->dout, *dout_new, deg_in_i, deg_in_j, wgt_i, wgt_j, a_ij, dout_i, dout_j, dout_max = 0, wgt_jmax = 0; |
| 239 | int *mask; |
| 240 | real maxgain = 0; |
| 241 | real total_gain = 0; |
| 242 | SingleLinkedList *neighbors = NULL, lst; |
| 243 | |
| 244 | |
| 245 | neighbors = MALLOC(sizeof(SingleLinkedList)*n); |
| 246 | for (i = 0; i < n; i++) neighbors[i] = NULL; |
| 247 | |
| 248 | mq = grid->mq; |
| 249 | mq_in = grid->mq_in; |
| 250 | mq_out = grid->mq_out; |
| 251 | |
| 252 | deg_intra_new = MALLOC(sizeof(real)*n); |
| 253 | wgt_new = MALLOC(sizeof(real)*n); |
| 254 | deg_inter = MALLOC(sizeof(real)*n); |
| 255 | mask = MALLOC(sizeof(int)*n); |
| 256 | dout_new = MALLOC(sizeof(real)*n); |
| 257 | for (i = 0; i < n; i++) mask[i] = -1; |
| 258 | |
| 259 | assert(n == A->n); |
| 260 | for (i = 0; i < n; i++) matching[i] = UNMATCHED; |
| 261 | |
| 262 | /* gain in merging node A into cluster B is |
| 263 | mq_in_new = mq_in - |E(A,A)|/(V(A))^2 - |E(B,B)|/(V(B))^2 + (|E(A,A)|+|E(B,B)|+|E(A,B)|)/(|V(A)|+|V(B)|)^2 |
| 264 | . = mq_in - deg_intra(A)/|A|^2 - deg_intra(B)/|B|^2 + (deg_intra(A)+deg_intra(B)+a(A,B))/(|A|+|B|)^2 |
| 265 | |
| 266 | mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|) |
| 267 | . + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|) |
| 268 | . = mq_out + a(A,B)/(|A|*|B|)-\sum_{C and A connected} a(A,C)/(|A|*|C|)-\sum_{C and B connected} a(B,C)/(|B|*|C|) |
| 269 | . + \sum_{C connected to A or B, C!=A, C!=B} (a(A,C)+a(B,C))/(|C|*(|A|+|B|)) |
| 270 | Denote: |
| 271 | dout(i) = \sum_{j -- i} a(i,j)/|j| |
| 272 | then |
| 273 | |
| 274 | mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|) |
| 275 | . + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|) |
| 276 | . = mq_out + a(A,B)/(|A|*|B|)-dout(A)/|A| - dout(B)/|B| |
| 277 | . + (dout(A)+dout(B))/(|A|+|B|) - (a(A,B)/|A|+a(A,B)/|B|)/(|A|+|B|) |
| 278 | . = mq_out -dout(A)/|A| - dout(B)/|B| + (dout(A)+dout(B))/(|A|+|B|) |
| 279 | after merging A and B into cluster AB, |
| 280 | dout(AB) = dout(A) + dout(B); |
| 281 | dout(C) := dout(C) - a(A,C)/|A| - a(B,C)/|B| + a(A,C)/(|A|+|B|) + a(B, C)/(|A|+|B|) |
| 282 | |
| 283 | mq_new = mq_in_new/(k-1) - mq_out_new/((k-1)*(k-2)) |
| 284 | gain = mq_new - mq |
| 285 | */ |
| 286 | a = (real*) A->a; |
| 287 | for (i = 0; i < n; i++){ |
| 288 | if (matching[i] != UNMATCHED) continue; |
| 289 | /* accumulate connections between i and clusters */ |
| 290 | for (j = ia[i]; j < ia[i+1]; j++){ |
| 291 | jj = ja[j]; |
| 292 | if (jj == i) continue; |
| 293 | if ((jc=matching[jj]) != UNMATCHED){ |
| 294 | if (mask[jc] != i) { |
| 295 | mask[jc] = i; |
| 296 | deg_inter[jc] = a[j]; |
| 297 | } else { |
| 298 | deg_inter[jc] += a[j]; |
| 299 | } |
| 300 | } |
| 301 | } |
| 302 | deg_in_i = deg_intra[i]; |
| 303 | wgt_i = wgt[i]; |
| 304 | dout_i = dout[i]; |
| 305 | |
| 306 | maxgain = 0; |
| 307 | jmax = -1; |
| 308 | for (j = ia[i]; j < ia[i+1]; j++){ |
| 309 | jj = ja[j]; |
| 310 | if (jj == i) continue; |
| 311 | jc = matching[jj]; |
| 312 | if (jc == UNMATCHED){ |
| 313 | a_ij = a[j]; |
| 314 | wgt_j = wgt[jj]; |
| 315 | deg_in_j = deg_intra[jj]; |
| 316 | dout_j = dout[jj]; |
| 317 | } else if (deg_inter[jc] < 0){ |
| 318 | continue; |
| 319 | } else { |
| 320 | a_ij = deg_inter[jc]; |
| 321 | wgt_j = wgt_new[jc]; |
| 322 | deg_inter[jc] = -1; /* so that we do not redo the calulation when we hit another neighbor in cluster jc */ |
| 323 | deg_in_j = deg_intra_new[jc]; |
| 324 | dout_j = dout_new[jc]; |
| 325 | } |
| 326 | |
| 327 | mq_in_new = mq_in - deg_in_i/pow(wgt_i, 2) - deg_in_j/pow(wgt_j,2) |
| 328 | + (deg_in_i + deg_in_j + a_ij)/pow(wgt_i + wgt_j,2); |
| 329 | |
| 330 | mq_out_new = mq_out - dout_i/wgt_i - dout_j/wgt_j + (dout_i + dout_j)/(wgt_i + wgt_j); |
| 331 | |
| 332 | if (nclusters > 2){ |
| 333 | mq_new = 2*(mq_in_new/(nclusters - 1) - mq_out_new/((nclusters - 1)*(nclusters - 2))); |
| 334 | } else { |
| 335 | mq_new = 2*mq_in_new/(nclusters - 1); |
| 336 | } |
| 337 | |
| 338 | #ifdef DEBUG |
| 339 | {int ncluster; |
| 340 | double mq2, mq_in2, mq_out2, *dout2; |
| 341 | int *matching2, nc2 = nc; |
| 342 | matching2 = MALLOC(sizeof(int)*A->m); |
| 343 | matching2 = MEMCPY(matching2, matching, sizeof(real)*A->m); |
| 344 | if (jc != UNMATCHED) { |
| 345 | matching2[i] = jc; |
| 346 | } else { |
| 347 | matching2[i] = nc2; |
| 348 | matching2[jj] = nc2; |
| 349 | nc2++; |
| 350 | } |
| 351 | for (k = 0; k < n; k++) if (matching2[k] == UNMATCHED) matching2[k] =nc2++; |
| 352 | mq2 = get_mq(A, matching2, &ncluster, &mq_in2, &mq_out2, &dout2); |
| 353 | fprintf(stderr," {dout_i, dout_j}={%f,%f}, {predicted, calculated}: mq = {%f, %f}, mq_in ={%f,%f}, mq_out = {%f,%f}\n" ,dout_i, dout_j, mq_new, mq2, mq_in_new, mq_in2, mq_out_new, mq_out2); |
| 354 | |
| 355 | mq_new = mq2; |
| 356 | |
| 357 | } |
| 358 | #endif |
| 359 | |
| 360 | gain = mq_new - mq; |
| 361 | if (Verbose) fprintf(stderr,"gain in merging node %d with node %d = %f-%f = %f\n" , i, jj, mq, mq_new, gain); |
| 362 | if (j == ia[i] || gain > maxgain){ |
| 363 | maxgain = gain; |
| 364 | jmax = jj; |
| 365 | amax = a_ij; |
| 366 | dout_max = dout_j; |
| 367 | wgt_jmax = wgt_j; |
| 368 | mq_max = mq_new; |
| 369 | mq_in_max = mq_in_new; |
| 370 | mq_out_max = mq_out_new; |
| 371 | } |
| 372 | |
| 373 | } |
| 374 | |
| 375 | /* now merge i and jmax */ |
| 376 | if (maxgain > 0 || (nc >= 1 && nc > maxcluster)){ |
| 377 | total_gain += maxgain; |
| 378 | jc = matching[jmax]; |
| 379 | if (jc == UNMATCHED){ |
| 380 | fprintf(stderr, "maxgain=%f, merge %d, %d\n" ,maxgain, i, jmax); |
| 381 | neighbors[nc] = SingleLinkedList_new_int(jmax); |
| 382 | neighbors[nc] = SingleLinkedList_prepend_int(neighbors[nc], i); |
| 383 | dout_new[nc] = dout_i + dout_max; |
| 384 | matching[i] = matching[jmax] = nc; |
| 385 | wgt_new[nc] = wgt[i] + wgt[jmax]; |
| 386 | deg_intra_new[nc] = deg_intra[i] + deg_intra[jmax] + amax; |
| 387 | nc++; |
| 388 | } else { |
| 389 | fprintf(stderr,"maxgain=%f, merge with existing cluster %d, %d\n" ,maxgain, i, jc); |
| 390 | neighbors[jc] = SingleLinkedList_prepend_int(neighbors[jc], i); |
| 391 | dout_new[jc] = dout_i + dout_max; |
| 392 | wgt_new[jc] += wgt[i]; |
| 393 | matching[i] = jc; |
| 394 | deg_intra_new[jc] += deg_intra[i] + amax; |
| 395 | } |
| 396 | mq = mq_max; |
| 397 | mq_in = mq_in_max; |
| 398 | mq_out = mq_out_max; |
| 399 | nclusters--; |
| 400 | } else { |
| 401 | fprintf(stderr,"gain: %f -- no gain, skip merging node %d\n" , maxgain, i); |
| 402 | assert(maxgain <= 0); |
| 403 | neighbors[nc] = SingleLinkedList_new_int(i); |
| 404 | matching[i] = nc; |
| 405 | deg_intra_new[nc] = deg_intra[i]; |
| 406 | wgt_new[nc] = wgt[i]; |
| 407 | nc++; |
| 408 | } |
| 409 | |
| 410 | |
| 411 | /* update scaled outdegree of neighbors of i and its merged node/cluster jmax */ |
| 412 | jc = matching[i]; |
| 413 | lst = neighbors[jc]; |
| 414 | do { |
| 415 | mask[*((int*) SingleLinkedList_get_data(lst))] = n+i; |
| 416 | lst = SingleLinkedList_get_next(lst); |
| 417 | } while (lst); |
| 418 | |
| 419 | lst = neighbors[jc]; |
| 420 | |
| 421 | do { |
| 422 | k = *((int*) SingleLinkedList_get_data(lst)); |
| 423 | for (j = ia[k]; j < ia[k+1]; j++){ |
| 424 | jj = ja[j]; |
| 425 | if (mask[jj] == n+i) continue;/* link to within cluster */ |
| 426 | if ((jc = matching[jj]) == UNMATCHED){ |
| 427 | if (k == i){ |
| 428 | dout[jj] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax); |
| 429 | } else { |
| 430 | dout[jj] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax); |
| 431 | } |
| 432 | } else { |
| 433 | if (k == i){ |
| 434 | dout_new[jc] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax); |
| 435 | } else { |
| 436 | dout_new[jc] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax); |
| 437 | } |
| 438 | } |
| 439 | } |
| 440 | lst = SingleLinkedList_get_next(lst); |
| 441 | } while (lst); |
| 442 | |
| 443 | } |
| 444 | |
| 445 | fprintf(stderr,"verbose=%d\n" ,Verbose); |
| 446 | if (Verbose) fprintf(stderr,"mq = %f new mq = %f level = %d, n = %d, nc = %d, gain = %g, mq_in = %f, mq_out = %f\n" , mq, mq + total_gain, |
| 447 | level, n, nc, total_gain, mq_in, mq_out); |
| 448 | |
| 449 | #ifdef DEBUG |
| 450 | {int ncluster; |
| 451 | |
| 452 | mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout); |
| 453 | fprintf(stderr," mq = %f\n" ,mq); |
| 454 | |
| 455 | } |
| 456 | #endif |
| 457 | |
| 458 | if (nc >= 1 && (total_gain > 0 || nc < n)){ |
| 459 | /* now set up restriction and prolongation operator */ |
| 460 | SparseMatrix P, R, R0, B, cA; |
| 461 | real one = 1.; |
| 462 | Multilevel_MQ_Clustering cgrid; |
| 463 | |
| 464 | R0 = SparseMatrix_new(nc, n, 1, MATRIX_TYPE_REAL, FORMAT_COORD); |
| 465 | for (i = 0; i < n; i++){ |
| 466 | jj = matching[i]; |
| 467 | SparseMatrix_coordinate_form_add_entries(R0, 1, &jj, &i, &one); |
| 468 | } |
| 469 | R = SparseMatrix_from_coordinate_format(R0); |
| 470 | SparseMatrix_delete(R0); |
| 471 | P = SparseMatrix_transpose(R); |
| 472 | B = SparseMatrix_multiply(R, A); |
| 473 | if (!B) goto RETURN; |
| 474 | cA = SparseMatrix_multiply(B, P); |
| 475 | if (!cA) goto RETURN; |
| 476 | SparseMatrix_delete(B); |
| 477 | grid->P = P; |
| 478 | grid->R = R; |
| 479 | level++; |
| 480 | cgrid = Multilevel_MQ_Clustering_init(cA, level); |
| 481 | deg_intra_new = REALLOC(deg_intra_new, nc*sizeof(real)); |
| 482 | wgt_new = REALLOC(wgt_new, nc*sizeof(real)); |
| 483 | cgrid->deg_intra = deg_intra_new; |
| 484 | cgrid->mq = grid->mq + total_gain; |
| 485 | cgrid->wgt = wgt_new; |
| 486 | dout_new = REALLOC(dout_new, nc*sizeof(real)); |
| 487 | cgrid->dout = dout_new; |
| 488 | |
| 489 | cgrid = Multilevel_MQ_Clustering_establish(cgrid, maxcluster); |
| 490 | |
| 491 | grid->next = cgrid; |
| 492 | cgrid->prev = grid; |
| 493 | } else { |
| 494 | /* no more improvement, stop and final clustering found */ |
| 495 | for (i = 0; i < n; i++) matching[i] = i; |
| 496 | |
| 497 | FREE(deg_intra_new); |
| 498 | FREE(wgt_new); |
| 499 | FREE(dout_new); |
| 500 | } |
| 501 | |
| 502 | RETURN: |
| 503 | for (i = 0; i < n; i++) SingleLinkedList_delete(neighbors[i], free); |
| 504 | FREE(neighbors); |
| 505 | |
| 506 | FREE(deg_inter); |
| 507 | FREE(mask); |
| 508 | return grid; |
| 509 | } |
| 510 | |
| 511 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_new(SparseMatrix A0, int maxcluster){ |
| 512 | /* maxcluster is used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters |
| 513 | is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 */ |
| 514 | Multilevel_MQ_Clustering grid; |
| 515 | SparseMatrix A = A0; |
| 516 | |
| 517 | if (maxcluster <= 0) maxcluster = A->m; |
| 518 | if (!SparseMatrix_is_symmetric(A, FALSE) || A->type != MATRIX_TYPE_REAL){ |
| 519 | A = SparseMatrix_get_real_adjacency_matrix_symmetrized(A); |
| 520 | } |
| 521 | grid = Multilevel_MQ_Clustering_init(A, 0); |
| 522 | |
| 523 | grid = Multilevel_MQ_Clustering_establish(grid, maxcluster); |
| 524 | |
| 525 | if (A != A0) grid->delete_top_level_A = TRUE;/* be sure to clean up later */ |
| 526 | return grid; |
| 527 | } |
| 528 | |
| 529 | |
| 530 | static void hierachical_mq_clustering(SparseMatrix A, int maxcluster, |
| 531 | int *nclusters, int **assignment, real *mq, int *flag){ |
| 532 | /* find a clustering of vertices by maximize mq |
| 533 | A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1. |
| 534 | maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters |
| 535 | . is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 |
| 536 | nclusters: on output the number of clusters |
| 537 | assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters |
| 538 | */ |
| 539 | |
| 540 | Multilevel_MQ_Clustering grid, cgrid; |
| 541 | int *matching, i; |
| 542 | SparseMatrix P; |
| 543 | real *u; |
| 544 | assert(A->m == A->n); |
| 545 | |
| 546 | *mq = 0.; |
| 547 | |
| 548 | *flag = 0; |
| 549 | |
| 550 | grid = Multilevel_MQ_Clustering_new(A, maxcluster); |
| 551 | |
| 552 | /* find coarsest */ |
| 553 | cgrid = grid; |
| 554 | while (cgrid->next){ |
| 555 | cgrid = cgrid->next; |
| 556 | } |
| 557 | |
| 558 | /* project clustering up */ |
| 559 | u = MALLOC(sizeof(real)*cgrid->n); |
| 560 | for (i = 0; i < cgrid->n; i++) u[i] = (real) (cgrid->matching)[i]; |
| 561 | *nclusters = cgrid->n; |
| 562 | *mq = cgrid->mq; |
| 563 | |
| 564 | while (cgrid->prev){ |
| 565 | real *v = NULL; |
| 566 | P = cgrid->prev->P; |
| 567 | SparseMatrix_multiply_vector(P, u, &v, FALSE); |
| 568 | FREE(u); |
| 569 | u = v; |
| 570 | cgrid = cgrid->prev; |
| 571 | } |
| 572 | |
| 573 | if (*assignment){ |
| 574 | matching = *assignment; |
| 575 | } else { |
| 576 | matching = MALLOC(sizeof(int)*(grid->n)); |
| 577 | *assignment = matching; |
| 578 | } |
| 579 | for (i = 0; i < grid->n; i++) (matching)[i] = (int) u[i]; |
| 580 | FREE(u); |
| 581 | |
| 582 | Multilevel_MQ_Clustering_delete(grid); |
| 583 | |
| 584 | } |
| 585 | |
| 586 | |
| 587 | |
| 588 | void mq_clustering(SparseMatrix A, int inplace, int maxcluster, int use_value, |
| 589 | int *nclusters, int **assignment, real *mq, int *flag){ |
| 590 | /* find a clustering of vertices by maximize mq |
| 591 | A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1. |
| 592 | inplace: whether A can e modified. If true, A will be modified by removing diagonal. |
| 593 | maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters |
| 594 | . is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 |
| 595 | nclusters: on output the number of clusters |
| 596 | assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters |
| 597 | */ |
| 598 | SparseMatrix B; |
| 599 | |
| 600 | *flag = 0; |
| 601 | |
| 602 | assert(A->m == A->n); |
| 603 | |
| 604 | B = SparseMatrix_symmetrize(A, FALSE); |
| 605 | |
| 606 | if (!inplace && B == A) { |
| 607 | B = SparseMatrix_copy(A); |
| 608 | } |
| 609 | |
| 610 | B = SparseMatrix_remove_diagonal(B); |
| 611 | |
| 612 | if (B->type != MATRIX_TYPE_REAL || !use_value) B = SparseMatrix_set_entries_to_real_one(B); |
| 613 | |
| 614 | hierachical_mq_clustering(B, maxcluster, nclusters, assignment, mq, flag); |
| 615 | |
| 616 | if (B != A) SparseMatrix_delete(B); |
| 617 | |
| 618 | } |
| 619 | |