| 1 | /* $Id$Revision: */ | 
|---|
| 2 | /* vim:set shiftwidth=4 ts=8: */ | 
|---|
| 3 |  | 
|---|
| 4 | /************************************************************************* | 
|---|
| 5 | * Copyright (c) 2011 AT&T Intellectual Property | 
|---|
| 6 | * All rights reserved. This program and the accompanying materials | 
|---|
| 7 | * are made available under the terms of the Eclipse Public License v1.0 | 
|---|
| 8 | * which accompanies this distribution, and is available at | 
|---|
| 9 | * http://www.eclipse.org/legal/epl-v10.html | 
|---|
| 10 | * | 
|---|
| 11 | * Contributors: See CVS logs. Details at http://www.graphviz.org/ | 
|---|
| 12 | *************************************************************************/ | 
|---|
| 13 |  | 
|---|
| 14 | /* Modularity Quality definitation: | 
|---|
| 15 |  | 
|---|
| 16 | We assume undirected graph. Directed graph should be converted by summing edge weights. | 
|---|
| 17 |  | 
|---|
| 18 | Given a partition P of V into k clusters. | 
|---|
| 19 |  | 
|---|
| 20 | Let E(i,j) be the set of edges between cluster i and j. | 
|---|
| 21 | Let |E(i,j)| be the sum of edge weights of edges in E(i,j). | 
|---|
| 22 |  | 
|---|
| 23 | Let E(i,i) be the set of edges within cluster i, but excluding self-edges. | 
|---|
| 24 | Let |E(i,i)| be the sum of edge weights of edges in E(i,i). | 
|---|
| 25 |  | 
|---|
| 26 | Let V(i) be the sets of vertices in i | 
|---|
| 27 |  | 
|---|
| 28 | The intra-cluster edges concentration for a cluster i is | 
|---|
| 29 | (the denominator could be |V(i)|*(|V(i)-1)/2 strictly speaking as we exclude self-edges): | 
|---|
| 30 |  | 
|---|
| 31 | |E(i,i)| | 
|---|
| 32 | ----------- | 
|---|
| 33 | (|V(i)|^2/2) | 
|---|
| 34 |  | 
|---|
| 35 | The inter-cluster edges concentration between cluster i and j is | 
|---|
| 36 |  | 
|---|
| 37 | |E(i,j)| | 
|---|
| 38 | ------------ | 
|---|
| 39 | |V(i)|*|V(j)| | 
|---|
| 40 |  | 
|---|
| 41 | So the cluster index is defined as the average intra cluster edge concentration, minus | 
|---|
| 42 | the inter-cluster edge concentration: | 
|---|
| 43 |  | 
|---|
| 44 | .                               |E(i,i)|                                        |E(i,j)| | 
|---|
| 45 | MQ(P) = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1)/2)) * \sum_{i<j} ------------------- = mq_in/k - mq_out/(k*(k-1)/2) | 
|---|
| 46 | .                              (|V(i)|^2/2)                                   |V(i)|*|V(j)| | 
|---|
| 47 |  | 
|---|
| 48 | or | 
|---|
| 49 |  | 
|---|
| 50 | .                                 |E(i,i)|                                     |E(i,j)| | 
|---|
| 51 | MQ(P)/2 = (1/k) * \sum_{i=1...k} ------------ - (1/(k*(k-1))) * \sum_{i<j} ------------------ = mq_in/k - mq_out/(k*(k-1)) | 
|---|
| 52 | .                                |V(i)|^2                                   |V(i)|*|V(j)| | 
|---|
| 53 |  | 
|---|
| 54 | Notice that if we assume the graph is unweights (edge weights = 1), then 0<= MQ <= 1. | 
|---|
| 55 | For weighted graph, MQ may not be within 0 to 1. We could normalized it, but | 
|---|
| 56 | for comparing clustering quality of the same graph but different partitioning, this | 
|---|
| 57 | unnormalized quantity is not a problem. | 
|---|
| 58 |  | 
|---|
| 59 | */ | 
|---|
| 60 |  | 
|---|
| 61 | #define STANDALONE | 
|---|
| 62 | #include "general.h" | 
|---|
| 63 | #include "SparseMatrix.h" | 
|---|
| 64 | #include "mq.h" | 
|---|
| 65 | #include "LinkedList.h" | 
|---|
| 66 |  | 
|---|
| 67 | static real get_mq(SparseMatrix A, int *assignment, int *ncluster0, real *mq_in0, real *mq_out0, real **dout0){ | 
|---|
| 68 | /* given a symmetric matrix representation of a graph and an assignment of nodes into clusters, calculate the modularity quality. | 
|---|
| 69 | assignment: assignmenet[i] gives the cluster assignment of node i. 0 <= assignment[i] < ncluster. | 
|---|
| 70 | ncluster: number of clusters | 
|---|
| 71 | mq_in: the part of MQ to do with intra-cluster edges, before divide by 1/k | 
|---|
| 72 | mq_out: the part of MQ to do with inter-cluster edges, before divide by 1/(k*(k-1)) | 
|---|
| 73 | mq = 2*(mq_in/k - mq_out/(k*(k-1))); | 
|---|
| 74 | */ | 
|---|
| 75 | int ncluster = 0; | 
|---|
| 76 | int n = A->m; | 
|---|
| 77 | int test_pattern_symmetry_only = FALSE; | 
|---|
| 78 | int *counts, *ia = A->ia, *ja = A->ja, k, i, j, jj; | 
|---|
| 79 | real mq_in = 0, mq_out = 0, *a = NULL, Vi, Vj; | 
|---|
| 80 | int c; | 
|---|
| 81 | real *dout; | 
|---|
| 82 |  | 
|---|
| 83 |  | 
|---|
| 84 | assert(SparseMatrix_is_symmetric(A, test_pattern_symmetry_only)); | 
|---|
| 85 | assert(A->n == n); | 
|---|
| 86 | if (A->type == MATRIX_TYPE_REAL) a = (real*) A->a; | 
|---|
| 87 |  | 
|---|
| 88 | counts = MALLOC(sizeof(int)*n); | 
|---|
| 89 |  | 
|---|
| 90 | for (i = 0; i < n; i++) counts[i] = 0; | 
|---|
| 91 |  | 
|---|
| 92 | for (i = 0; i < n; i++){ | 
|---|
| 93 | assert(assignment[i] >= 0 && assignment[i] < n); | 
|---|
| 94 | if (counts[assignment[i]] == 0) ncluster++; | 
|---|
| 95 | counts[assignment[i]]++; | 
|---|
| 96 | } | 
|---|
| 97 | k = ncluster; | 
|---|
| 98 | assert(ncluster <= n); | 
|---|
| 99 |  | 
|---|
| 100 | for (i = 0; i < n; i++){ | 
|---|
| 101 | assert(assignment[i] < ncluster); | 
|---|
| 102 | c = assignment[i]; | 
|---|
| 103 | Vi = counts[c]; | 
|---|
| 104 | for (j = ia[i] ; j < ia[i+1]; j++){ | 
|---|
| 105 | /* ASSUME UNDIRECTED */ | 
|---|
| 106 | jj = ja[j]; | 
|---|
| 107 | if (jj >= i) continue; | 
|---|
| 108 | assert(assignment[jj] < ncluster); | 
|---|
| 109 | Vj = counts[assignment[jj]]; | 
|---|
| 110 | if (assignment[jj] == c){ | 
|---|
| 111 | if (a) { | 
|---|
| 112 | mq_in += a[j]/(Vi*Vi); | 
|---|
| 113 | } else { | 
|---|
| 114 | mq_in += 1./(Vi*Vi); | 
|---|
| 115 | } | 
|---|
| 116 | } else { | 
|---|
| 117 | if (a) { | 
|---|
| 118 | mq_out += a[j]/(Vi*Vj); | 
|---|
| 119 | } else { | 
|---|
| 120 | mq_out += 1./(Vi*Vj); | 
|---|
| 121 | } | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | /* calculate scaled out degree */ | 
|---|
| 128 | dout = MALLOC(sizeof(real)*n); | 
|---|
| 129 | for (i = 0; i < n; i++){ | 
|---|
| 130 | dout[i] = 0; | 
|---|
| 131 | for (j = ia[i]; j < ia[i+1]; j++){ | 
|---|
| 132 | jj = ja[j]; | 
|---|
| 133 | if (jj == i) continue; | 
|---|
| 134 | if (a){ | 
|---|
| 135 | dout[i] += a[j]/(real) counts[assignment[jj]]; | 
|---|
| 136 | } else { | 
|---|
| 137 | dout[i] += 1./(real) counts[assignment[jj]]; | 
|---|
| 138 | } | 
|---|
| 139 | } | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | *ncluster0 = k; | 
|---|
| 143 | *mq_in0 = mq_in; | 
|---|
| 144 | *mq_out0 = mq_out; | 
|---|
| 145 | *dout0 = dout; | 
|---|
| 146 | FREE(counts); | 
|---|
| 147 |  | 
|---|
| 148 | if (k > 1){ | 
|---|
| 149 | return 2*(mq_in/k - mq_out/(k*(k-1))); | 
|---|
| 150 | } else { | 
|---|
| 151 | return 2*mq_in; | 
|---|
| 152 | } | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_init(SparseMatrix A, int level){ | 
|---|
| 156 | Multilevel_MQ_Clustering grid; | 
|---|
| 157 | int n = A->n, i; | 
|---|
| 158 | int *matching; | 
|---|
| 159 |  | 
|---|
| 160 | assert(A->type == MATRIX_TYPE_REAL); | 
|---|
| 161 | assert(SparseMatrix_is_symmetric(A, FALSE)); | 
|---|
| 162 |  | 
|---|
| 163 | if (!A) return NULL; | 
|---|
| 164 | assert(A->m == n); | 
|---|
| 165 | grid = MALLOC(sizeof(struct Multilevel_MQ_Clustering_struct)); | 
|---|
| 166 | grid->level = level; | 
|---|
| 167 | grid->n = n; | 
|---|
| 168 | grid->A = A; | 
|---|
| 169 | grid->P = NULL; | 
|---|
| 170 | grid->R = NULL; | 
|---|
| 171 | grid->next = NULL; | 
|---|
| 172 | grid->prev = NULL; | 
|---|
| 173 | grid->delete_top_level_A = FALSE; | 
|---|
| 174 | matching = grid->matching = MALLOC(sizeof(real)*(n)); | 
|---|
| 175 | grid->deg_intra = NULL; | 
|---|
| 176 | grid->dout = NULL; | 
|---|
| 177 | grid->wgt = NULL; | 
|---|
| 178 |  | 
|---|
| 179 | if (level == 0){ | 
|---|
| 180 | real mq = 0, mq_in, mq_out; | 
|---|
| 181 | int n = A->n, ncluster; | 
|---|
| 182 | real *deg_intra, *wgt, *dout; | 
|---|
| 183 |  | 
|---|
| 184 | grid->deg_intra = MALLOC(sizeof(real)*(n)); | 
|---|
| 185 | deg_intra = grid->deg_intra; | 
|---|
| 186 |  | 
|---|
| 187 | grid->wgt = MALLOC(sizeof(real)*n); | 
|---|
| 188 | wgt = grid->wgt; | 
|---|
| 189 |  | 
|---|
| 190 | for (i = 0; i < n; i++){ | 
|---|
| 191 | deg_intra[i] = 0; | 
|---|
| 192 | wgt[i] = 1.; | 
|---|
| 193 | } | 
|---|
| 194 | for (i = 0; i < n; i++) matching[i] = i; | 
|---|
| 195 | mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout); | 
|---|
| 196 | fprintf(stderr, "ncluster = %d, mq = %f\n", ncluster, mq); | 
|---|
| 197 | grid->mq = mq; | 
|---|
| 198 | grid->mq_in = mq_in; | 
|---|
| 199 | grid->mq_out = mq_out; | 
|---|
| 200 | grid->dout = dout; | 
|---|
| 201 | grid->ncluster = ncluster; | 
|---|
| 202 |  | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 |  | 
|---|
| 206 | return grid; | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | void Multilevel_MQ_Clustering_delete(Multilevel_MQ_Clustering grid){ | 
|---|
| 210 | if (!grid) return; | 
|---|
| 211 | if (grid->A){ | 
|---|
| 212 | if (grid->level == 0) { | 
|---|
| 213 | if (grid->delete_top_level_A) SparseMatrix_delete(grid->A); | 
|---|
| 214 | } else { | 
|---|
| 215 | SparseMatrix_delete(grid->A); | 
|---|
| 216 | } | 
|---|
| 217 | } | 
|---|
| 218 | SparseMatrix_delete(grid->P); | 
|---|
| 219 | SparseMatrix_delete(grid->R); | 
|---|
| 220 | FREE(grid->matching); | 
|---|
| 221 | FREE(grid->deg_intra); | 
|---|
| 222 | FREE(grid->dout); | 
|---|
| 223 | FREE(grid->wgt); | 
|---|
| 224 | Multilevel_MQ_Clustering_delete(grid->next); | 
|---|
| 225 | FREE(grid); | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_establish(Multilevel_MQ_Clustering grid, int maxcluster){ | 
|---|
| 229 | int *matching = grid->matching; | 
|---|
| 230 | SparseMatrix A = grid->A; | 
|---|
| 231 | int n = grid->n, level = grid->level, nc = 0, nclusters = n; | 
|---|
| 232 | real mq = 0, mq_in = 0, mq_out = 0, mq_new, mq_in_new, mq_out_new, mq_max = 0, mq_in_max = 0, mq_out_max = 0; | 
|---|
| 233 | int *ia = A->ia, *ja = A->ja; | 
|---|
| 234 | real *a, amax = 0; | 
|---|
| 235 | real *deg_intra = grid->deg_intra, *wgt = grid->wgt; | 
|---|
| 236 | real *deg_intra_new, *wgt_new = NULL; | 
|---|
| 237 | int i, j, k, jj, jc, jmax; | 
|---|
| 238 | real *deg_inter, gain = 0, *dout = grid->dout, *dout_new, deg_in_i, deg_in_j, wgt_i, wgt_j, a_ij, dout_i, dout_j, dout_max = 0, wgt_jmax = 0; | 
|---|
| 239 | int *mask; | 
|---|
| 240 | real maxgain = 0; | 
|---|
| 241 | real total_gain = 0; | 
|---|
| 242 | SingleLinkedList *neighbors = NULL, lst; | 
|---|
| 243 |  | 
|---|
| 244 |  | 
|---|
| 245 | neighbors = MALLOC(sizeof(SingleLinkedList)*n); | 
|---|
| 246 | for (i = 0; i < n; i++) neighbors[i] = NULL; | 
|---|
| 247 |  | 
|---|
| 248 | mq = grid->mq; | 
|---|
| 249 | mq_in = grid->mq_in; | 
|---|
| 250 | mq_out = grid->mq_out; | 
|---|
| 251 |  | 
|---|
| 252 | deg_intra_new = MALLOC(sizeof(real)*n); | 
|---|
| 253 | wgt_new = MALLOC(sizeof(real)*n); | 
|---|
| 254 | deg_inter = MALLOC(sizeof(real)*n); | 
|---|
| 255 | mask = MALLOC(sizeof(int)*n); | 
|---|
| 256 | dout_new = MALLOC(sizeof(real)*n); | 
|---|
| 257 | for (i = 0; i < n; i++) mask[i] = -1; | 
|---|
| 258 |  | 
|---|
| 259 | assert(n == A->n); | 
|---|
| 260 | for (i = 0; i < n; i++) matching[i] = UNMATCHED; | 
|---|
| 261 |  | 
|---|
| 262 | /* gain in merging node A into cluster B is | 
|---|
| 263 | mq_in_new = mq_in - |E(A,A)|/(V(A))^2 - |E(B,B)|/(V(B))^2 + (|E(A,A)|+|E(B,B)|+|E(A,B)|)/(|V(A)|+|V(B)|)^2 | 
|---|
| 264 | .         = mq_in - deg_intra(A)/|A|^2 - deg_intra(B)/|B|^2 + (deg_intra(A)+deg_intra(B)+a(A,B))/(|A|+|B|)^2 | 
|---|
| 265 |  | 
|---|
| 266 | mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|) | 
|---|
| 267 | .                  + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|) | 
|---|
| 268 | .          = mq_out + a(A,B)/(|A|*|B|)-\sum_{C and A connected} a(A,C)/(|A|*|C|)-\sum_{C and B connected} a(B,C)/(|B|*|C|) | 
|---|
| 269 | .                  + \sum_{C connected to A or B, C!=A, C!=B} (a(A,C)+a(B,C))/(|C|*(|A|+|B|)) | 
|---|
| 270 | Denote: | 
|---|
| 271 | dout(i) = \sum_{j -- i} a(i,j)/|j| | 
|---|
| 272 | then | 
|---|
| 273 |  | 
|---|
| 274 | mq_out_new = mq_out - |E(A,B)|/(|V(A)|*V(B)|)-\sum_{C and A connected, C!=B} |E(A,C)|/(|V(A)|*|V(C)|)-\sum_{C and B connected,C!=B} |E(B,C)|/(|V(B)|*|V(C)|) | 
|---|
| 275 | .                  + \sum_{C connected to A or B, C!=A, C!=B} (|E(A,C)|+|E(B,C)|)/(|V(C)|*(|V(A)|+|V(B)|) | 
|---|
| 276 | .          = mq_out + a(A,B)/(|A|*|B|)-dout(A)/|A| - dout(B)/|B| | 
|---|
| 277 | .                  + (dout(A)+dout(B))/(|A|+|B|) - (a(A,B)/|A|+a(A,B)/|B|)/(|A|+|B|) | 
|---|
| 278 | .          = mq_out -dout(A)/|A| - dout(B)/|B| + (dout(A)+dout(B))/(|A|+|B|) | 
|---|
| 279 | after merging A and B into cluster AB, | 
|---|
| 280 | dout(AB) = dout(A) + dout(B); | 
|---|
| 281 | dout(C) := dout(C) - a(A,C)/|A| - a(B,C)/|B| + a(A,C)/(|A|+|B|) + a(B, C)/(|A|+|B|) | 
|---|
| 282 |  | 
|---|
| 283 | mq_new = mq_in_new/(k-1) - mq_out_new/((k-1)*(k-2)) | 
|---|
| 284 | gain = mq_new - mq | 
|---|
| 285 | */ | 
|---|
| 286 | a = (real*) A->a; | 
|---|
| 287 | for (i = 0; i < n; i++){ | 
|---|
| 288 | if (matching[i] != UNMATCHED) continue; | 
|---|
| 289 | /* accumulate connections between i and clusters */ | 
|---|
| 290 | for (j = ia[i]; j < ia[i+1]; j++){ | 
|---|
| 291 | jj = ja[j]; | 
|---|
| 292 | if (jj == i) continue; | 
|---|
| 293 | if ((jc=matching[jj]) != UNMATCHED){ | 
|---|
| 294 | if (mask[jc] != i) { | 
|---|
| 295 | mask[jc] = i; | 
|---|
| 296 | deg_inter[jc] = a[j]; | 
|---|
| 297 | } else { | 
|---|
| 298 | deg_inter[jc] += a[j]; | 
|---|
| 299 | } | 
|---|
| 300 | } | 
|---|
| 301 | } | 
|---|
| 302 | deg_in_i = deg_intra[i]; | 
|---|
| 303 | wgt_i = wgt[i]; | 
|---|
| 304 | dout_i = dout[i]; | 
|---|
| 305 |  | 
|---|
| 306 | maxgain = 0; | 
|---|
| 307 | jmax = -1; | 
|---|
| 308 | for (j = ia[i]; j < ia[i+1]; j++){ | 
|---|
| 309 | jj = ja[j]; | 
|---|
| 310 | if (jj == i) continue; | 
|---|
| 311 | jc = matching[jj]; | 
|---|
| 312 | if (jc == UNMATCHED){ | 
|---|
| 313 | a_ij = a[j]; | 
|---|
| 314 | wgt_j = wgt[jj]; | 
|---|
| 315 | deg_in_j = deg_intra[jj]; | 
|---|
| 316 | dout_j = dout[jj]; | 
|---|
| 317 | } else if (deg_inter[jc] < 0){ | 
|---|
| 318 | continue; | 
|---|
| 319 | } else { | 
|---|
| 320 | a_ij = deg_inter[jc]; | 
|---|
| 321 | wgt_j = wgt_new[jc]; | 
|---|
| 322 | deg_inter[jc] = -1; /* so that we do not redo the calulation when we hit another neighbor in cluster jc */ | 
|---|
| 323 | deg_in_j = deg_intra_new[jc]; | 
|---|
| 324 | dout_j = dout_new[jc]; | 
|---|
| 325 | } | 
|---|
| 326 |  | 
|---|
| 327 | mq_in_new = mq_in - deg_in_i/pow(wgt_i, 2) - deg_in_j/pow(wgt_j,2) | 
|---|
| 328 | + (deg_in_i + deg_in_j + a_ij)/pow(wgt_i + wgt_j,2); | 
|---|
| 329 |  | 
|---|
| 330 | mq_out_new = mq_out - dout_i/wgt_i - dout_j/wgt_j + (dout_i + dout_j)/(wgt_i + wgt_j); | 
|---|
| 331 |  | 
|---|
| 332 | if (nclusters > 2){ | 
|---|
| 333 | mq_new =  2*(mq_in_new/(nclusters - 1) - mq_out_new/((nclusters - 1)*(nclusters - 2))); | 
|---|
| 334 | } else { | 
|---|
| 335 | mq_new =  2*mq_in_new/(nclusters - 1); | 
|---|
| 336 | } | 
|---|
| 337 |  | 
|---|
| 338 | #ifdef DEBUG | 
|---|
| 339 | {int ncluster; | 
|---|
| 340 | double mq2, mq_in2, mq_out2, *dout2; | 
|---|
| 341 | int *matching2, nc2 = nc; | 
|---|
| 342 | matching2 = MALLOC(sizeof(int)*A->m); | 
|---|
| 343 | matching2 = MEMCPY(matching2, matching, sizeof(real)*A->m); | 
|---|
| 344 | if (jc != UNMATCHED) { | 
|---|
| 345 | matching2[i] = jc; | 
|---|
| 346 | } else { | 
|---|
| 347 | matching2[i] = nc2; | 
|---|
| 348 | matching2[jj] = nc2; | 
|---|
| 349 | nc2++; | 
|---|
| 350 | } | 
|---|
| 351 | for (k = 0; k < n; k++) if (matching2[k] == UNMATCHED) matching2[k] =nc2++; | 
|---|
| 352 | mq2 = get_mq(A, matching2, &ncluster, &mq_in2, &mq_out2, &dout2); | 
|---|
| 353 | fprintf(stderr, " {dout_i, dout_j}={%f,%f}, {predicted, calculated}: mq = {%f, %f}, mq_in ={%f,%f}, mq_out = {%f,%f}\n",dout_i, dout_j, mq_new, mq2, mq_in_new, mq_in2, mq_out_new, mq_out2); | 
|---|
| 354 |  | 
|---|
| 355 | mq_new = mq2; | 
|---|
| 356 |  | 
|---|
| 357 | } | 
|---|
| 358 | #endif | 
|---|
| 359 |  | 
|---|
| 360 | gain = mq_new - mq; | 
|---|
| 361 | if (Verbose) fprintf(stderr, "gain in merging node %d with node %d = %f-%f = %f\n", i, jj, mq, mq_new, gain); | 
|---|
| 362 | if (j == ia[i] || gain > maxgain){ | 
|---|
| 363 | maxgain = gain; | 
|---|
| 364 | jmax = jj; | 
|---|
| 365 | amax = a_ij; | 
|---|
| 366 | dout_max = dout_j; | 
|---|
| 367 | wgt_jmax = wgt_j; | 
|---|
| 368 | mq_max = mq_new; | 
|---|
| 369 | mq_in_max = mq_in_new; | 
|---|
| 370 | mq_out_max = mq_out_new; | 
|---|
| 371 | } | 
|---|
| 372 |  | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | /* now merge i and jmax */ | 
|---|
| 376 | if (maxgain > 0 || (nc >= 1 && nc > maxcluster)){ | 
|---|
| 377 | total_gain += maxgain; | 
|---|
| 378 | jc = matching[jmax]; | 
|---|
| 379 | if (jc == UNMATCHED){ | 
|---|
| 380 | fprintf(stderr, "maxgain=%f, merge %d, %d\n",maxgain, i, jmax); | 
|---|
| 381 | neighbors[nc] = SingleLinkedList_new_int(jmax); | 
|---|
| 382 | neighbors[nc] = SingleLinkedList_prepend_int(neighbors[nc], i); | 
|---|
| 383 | dout_new[nc] = dout_i + dout_max; | 
|---|
| 384 | matching[i] = matching[jmax] = nc; | 
|---|
| 385 | wgt_new[nc] = wgt[i] + wgt[jmax]; | 
|---|
| 386 | deg_intra_new[nc] = deg_intra[i] + deg_intra[jmax] + amax; | 
|---|
| 387 | nc++; | 
|---|
| 388 | } else { | 
|---|
| 389 | fprintf(stderr, "maxgain=%f, merge with existing cluster %d, %d\n",maxgain, i, jc); | 
|---|
| 390 | neighbors[jc] = SingleLinkedList_prepend_int(neighbors[jc], i); | 
|---|
| 391 | dout_new[jc] = dout_i + dout_max; | 
|---|
| 392 | wgt_new[jc] += wgt[i]; | 
|---|
| 393 | matching[i] = jc; | 
|---|
| 394 | deg_intra_new[jc] += deg_intra[i] + amax; | 
|---|
| 395 | } | 
|---|
| 396 | mq = mq_max; | 
|---|
| 397 | mq_in = mq_in_max; | 
|---|
| 398 | mq_out = mq_out_max; | 
|---|
| 399 | nclusters--; | 
|---|
| 400 | } else { | 
|---|
| 401 | fprintf(stderr, "gain: %f -- no gain, skip merging node %d\n", maxgain, i); | 
|---|
| 402 | assert(maxgain <= 0); | 
|---|
| 403 | neighbors[nc] = SingleLinkedList_new_int(i); | 
|---|
| 404 | matching[i] = nc; | 
|---|
| 405 | deg_intra_new[nc] = deg_intra[i]; | 
|---|
| 406 | wgt_new[nc] = wgt[i]; | 
|---|
| 407 | nc++; | 
|---|
| 408 | } | 
|---|
| 409 |  | 
|---|
| 410 |  | 
|---|
| 411 | /* update scaled outdegree of neighbors of i and its merged node/cluster jmax */ | 
|---|
| 412 | jc = matching[i]; | 
|---|
| 413 | lst = neighbors[jc]; | 
|---|
| 414 | do { | 
|---|
| 415 | mask[*((int*) SingleLinkedList_get_data(lst))] = n+i; | 
|---|
| 416 | lst = SingleLinkedList_get_next(lst); | 
|---|
| 417 | } while (lst); | 
|---|
| 418 |  | 
|---|
| 419 | lst = neighbors[jc]; | 
|---|
| 420 |  | 
|---|
| 421 | do { | 
|---|
| 422 | k = *((int*) SingleLinkedList_get_data(lst)); | 
|---|
| 423 | for (j = ia[k]; j < ia[k+1]; j++){ | 
|---|
| 424 | jj = ja[j]; | 
|---|
| 425 | if (mask[jj] == n+i) continue;/* link to within cluster */ | 
|---|
| 426 | if ((jc = matching[jj]) == UNMATCHED){ | 
|---|
| 427 | if (k == i){ | 
|---|
| 428 | dout[jj] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax); | 
|---|
| 429 | } else { | 
|---|
| 430 | dout[jj] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax); | 
|---|
| 431 | } | 
|---|
| 432 | } else { | 
|---|
| 433 | if (k == i){ | 
|---|
| 434 | dout_new[jc] += -a[j]/wgt_i + a[j]/(wgt_i + wgt_jmax); | 
|---|
| 435 | } else { | 
|---|
| 436 | dout_new[jc] += -a[j]/wgt_jmax + a[j]/(wgt_i + wgt_jmax); | 
|---|
| 437 | } | 
|---|
| 438 | } | 
|---|
| 439 | } | 
|---|
| 440 | lst = SingleLinkedList_get_next(lst); | 
|---|
| 441 | } while (lst); | 
|---|
| 442 |  | 
|---|
| 443 | } | 
|---|
| 444 |  | 
|---|
| 445 | fprintf(stderr, "verbose=%d\n",Verbose); | 
|---|
| 446 | if (Verbose) fprintf(stderr, "mq = %f new mq = %f level = %d, n = %d, nc = %d, gain = %g, mq_in = %f, mq_out = %f\n", mq, mq + total_gain, | 
|---|
| 447 | level, n, nc, total_gain, mq_in, mq_out); | 
|---|
| 448 |  | 
|---|
| 449 | #ifdef DEBUG | 
|---|
| 450 | {int ncluster; | 
|---|
| 451 |  | 
|---|
| 452 | mq = get_mq(A, matching, &ncluster, &mq_in, &mq_out, &dout); | 
|---|
| 453 | fprintf(stderr, " mq = %f\n",mq); | 
|---|
| 454 |  | 
|---|
| 455 | } | 
|---|
| 456 | #endif | 
|---|
| 457 |  | 
|---|
| 458 | if (nc >= 1 && (total_gain > 0 || nc < n)){ | 
|---|
| 459 | /* now set up restriction and prolongation operator */ | 
|---|
| 460 | SparseMatrix P, R, R0, B, cA; | 
|---|
| 461 | real one = 1.; | 
|---|
| 462 | Multilevel_MQ_Clustering cgrid; | 
|---|
| 463 |  | 
|---|
| 464 | R0 = SparseMatrix_new(nc, n, 1, MATRIX_TYPE_REAL, FORMAT_COORD); | 
|---|
| 465 | for (i = 0; i < n; i++){ | 
|---|
| 466 | jj = matching[i]; | 
|---|
| 467 | SparseMatrix_coordinate_form_add_entries(R0, 1, &jj, &i, &one); | 
|---|
| 468 | } | 
|---|
| 469 | R = SparseMatrix_from_coordinate_format(R0); | 
|---|
| 470 | SparseMatrix_delete(R0); | 
|---|
| 471 | P = SparseMatrix_transpose(R); | 
|---|
| 472 | B = SparseMatrix_multiply(R, A); | 
|---|
| 473 | if (!B) goto RETURN; | 
|---|
| 474 | cA = SparseMatrix_multiply(B, P); | 
|---|
| 475 | if (!cA) goto RETURN; | 
|---|
| 476 | SparseMatrix_delete(B); | 
|---|
| 477 | grid->P = P; | 
|---|
| 478 | grid->R = R; | 
|---|
| 479 | level++; | 
|---|
| 480 | cgrid = Multilevel_MQ_Clustering_init(cA, level); | 
|---|
| 481 | deg_intra_new = REALLOC(deg_intra_new, nc*sizeof(real)); | 
|---|
| 482 | wgt_new = REALLOC(wgt_new, nc*sizeof(real)); | 
|---|
| 483 | cgrid->deg_intra = deg_intra_new; | 
|---|
| 484 | cgrid->mq = grid->mq + total_gain; | 
|---|
| 485 | cgrid->wgt = wgt_new; | 
|---|
| 486 | dout_new =  REALLOC(dout_new, nc*sizeof(real)); | 
|---|
| 487 | cgrid->dout = dout_new; | 
|---|
| 488 |  | 
|---|
| 489 | cgrid = Multilevel_MQ_Clustering_establish(cgrid, maxcluster); | 
|---|
| 490 |  | 
|---|
| 491 | grid->next = cgrid; | 
|---|
| 492 | cgrid->prev = grid; | 
|---|
| 493 | } else { | 
|---|
| 494 | /* no more improvement, stop and final clustering found */ | 
|---|
| 495 | for (i = 0; i < n; i++) matching[i] = i; | 
|---|
| 496 |  | 
|---|
| 497 | FREE(deg_intra_new); | 
|---|
| 498 | FREE(wgt_new); | 
|---|
| 499 | FREE(dout_new); | 
|---|
| 500 | } | 
|---|
| 501 |  | 
|---|
| 502 | RETURN: | 
|---|
| 503 | for (i = 0; i < n; i++) SingleLinkedList_delete(neighbors[i], free); | 
|---|
| 504 | FREE(neighbors); | 
|---|
| 505 |  | 
|---|
| 506 | FREE(deg_inter); | 
|---|
| 507 | FREE(mask); | 
|---|
| 508 | return grid; | 
|---|
| 509 | } | 
|---|
| 510 |  | 
|---|
| 511 | Multilevel_MQ_Clustering Multilevel_MQ_Clustering_new(SparseMatrix A0, int maxcluster){ | 
|---|
| 512 | /* maxcluster is used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters | 
|---|
| 513 | is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 */ | 
|---|
| 514 | Multilevel_MQ_Clustering grid; | 
|---|
| 515 | SparseMatrix A = A0; | 
|---|
| 516 |  | 
|---|
| 517 | if (maxcluster <= 0) maxcluster = A->m; | 
|---|
| 518 | if (!SparseMatrix_is_symmetric(A, FALSE) || A->type != MATRIX_TYPE_REAL){ | 
|---|
| 519 | A = SparseMatrix_get_real_adjacency_matrix_symmetrized(A); | 
|---|
| 520 | } | 
|---|
| 521 | grid = Multilevel_MQ_Clustering_init(A, 0); | 
|---|
| 522 |  | 
|---|
| 523 | grid = Multilevel_MQ_Clustering_establish(grid, maxcluster); | 
|---|
| 524 |  | 
|---|
| 525 | if (A != A0) grid->delete_top_level_A = TRUE;/* be sure to clean up later */ | 
|---|
| 526 | return grid; | 
|---|
| 527 | } | 
|---|
| 528 |  | 
|---|
| 529 |  | 
|---|
| 530 | static void hierachical_mq_clustering(SparseMatrix A, int maxcluster, | 
|---|
| 531 | int *nclusters, int **assignment, real *mq, int *flag){ | 
|---|
| 532 | /* find a clustering of vertices by maximize mq | 
|---|
| 533 | A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1. | 
|---|
| 534 | maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters | 
|---|
| 535 | .   is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 | 
|---|
| 536 | nclusters: on output the number of clusters | 
|---|
| 537 | assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters | 
|---|
| 538 | */ | 
|---|
| 539 |  | 
|---|
| 540 | Multilevel_MQ_Clustering grid, cgrid; | 
|---|
| 541 | int *matching, i; | 
|---|
| 542 | SparseMatrix P; | 
|---|
| 543 | real *u; | 
|---|
| 544 | assert(A->m == A->n); | 
|---|
| 545 |  | 
|---|
| 546 | *mq = 0.; | 
|---|
| 547 |  | 
|---|
| 548 | *flag = 0; | 
|---|
| 549 |  | 
|---|
| 550 | grid = Multilevel_MQ_Clustering_new(A, maxcluster); | 
|---|
| 551 |  | 
|---|
| 552 | /* find coarsest */ | 
|---|
| 553 | cgrid = grid; | 
|---|
| 554 | while (cgrid->next){ | 
|---|
| 555 | cgrid = cgrid->next; | 
|---|
| 556 | } | 
|---|
| 557 |  | 
|---|
| 558 | /* project clustering up */ | 
|---|
| 559 | u =  MALLOC(sizeof(real)*cgrid->n); | 
|---|
| 560 | for (i = 0; i < cgrid->n; i++) u[i] = (real) (cgrid->matching)[i]; | 
|---|
| 561 | *nclusters = cgrid->n; | 
|---|
| 562 | *mq = cgrid->mq; | 
|---|
| 563 |  | 
|---|
| 564 | while (cgrid->prev){ | 
|---|
| 565 | real *v = NULL; | 
|---|
| 566 | P = cgrid->prev->P; | 
|---|
| 567 | SparseMatrix_multiply_vector(P, u, &v, FALSE); | 
|---|
| 568 | FREE(u); | 
|---|
| 569 | u = v; | 
|---|
| 570 | cgrid = cgrid->prev; | 
|---|
| 571 | } | 
|---|
| 572 |  | 
|---|
| 573 | if (*assignment){ | 
|---|
| 574 | matching = *assignment; | 
|---|
| 575 | } else { | 
|---|
| 576 | matching = MALLOC(sizeof(int)*(grid->n)); | 
|---|
| 577 | *assignment = matching; | 
|---|
| 578 | } | 
|---|
| 579 | for (i = 0; i < grid->n; i++) (matching)[i] = (int) u[i]; | 
|---|
| 580 | FREE(u); | 
|---|
| 581 |  | 
|---|
| 582 | Multilevel_MQ_Clustering_delete(grid); | 
|---|
| 583 |  | 
|---|
| 584 | } | 
|---|
| 585 |  | 
|---|
| 586 |  | 
|---|
| 587 |  | 
|---|
| 588 | void mq_clustering(SparseMatrix A, int inplace, int maxcluster, int use_value, | 
|---|
| 589 | int *nclusters, int **assignment, real *mq, int *flag){ | 
|---|
| 590 | /* find a clustering of vertices by maximize mq | 
|---|
| 591 | A: symmetric square matrix n x n. If real value, value will be used as edges weights, otherwise edge weights are considered as 1. | 
|---|
| 592 | inplace: whether A can e modified. If true, A will be modified by removing diagonal. | 
|---|
| 593 | maxcluster: used to specify the maximum number of cluster desired, e.g., maxcluster=10 means that a maximum of 10 clusters | 
|---|
| 594 | .   is desired. this may not always be realized, and mq may be low when this is specified. Default: maxcluster = 0 | 
|---|
| 595 | nclusters: on output the number of clusters | 
|---|
| 596 | assignment: dimension n. Node i is assigned to cluster "assignment[i]". 0 <= assignment < nclusters | 
|---|
| 597 | */ | 
|---|
| 598 | SparseMatrix B; | 
|---|
| 599 |  | 
|---|
| 600 | *flag = 0; | 
|---|
| 601 |  | 
|---|
| 602 | assert(A->m == A->n); | 
|---|
| 603 |  | 
|---|
| 604 | B = SparseMatrix_symmetrize(A, FALSE); | 
|---|
| 605 |  | 
|---|
| 606 | if (!inplace && B == A) { | 
|---|
| 607 | B = SparseMatrix_copy(A); | 
|---|
| 608 | } | 
|---|
| 609 |  | 
|---|
| 610 | B = SparseMatrix_remove_diagonal(B); | 
|---|
| 611 |  | 
|---|
| 612 | if (B->type != MATRIX_TYPE_REAL || !use_value) B = SparseMatrix_set_entries_to_real_one(B); | 
|---|
| 613 |  | 
|---|
| 614 | hierachical_mq_clustering(B, maxcluster, nclusters, assignment, mq, flag); | 
|---|
| 615 |  | 
|---|
| 616 | if (B != A) SparseMatrix_delete(B); | 
|---|
| 617 |  | 
|---|
| 618 | } | 
|---|
| 619 |  | 
|---|