gonum.org/v1/gonum@v0.14.0/graph/community/louvain_undirected.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package community 6 7 import ( 8 "math" 9 "sort" 10 11 "golang.org/x/exp/rand" 12 13 "gonum.org/v1/gonum/graph" 14 "gonum.org/v1/gonum/graph/internal/ordered" 15 "gonum.org/v1/gonum/graph/internal/set" 16 "gonum.org/v1/gonum/graph/iterator" 17 ) 18 19 // qUndirected returns the modularity Q score of the graph g subdivided into the 20 // given communities at the given resolution. If communities is nil, the 21 // unclustered modularity score is returned. The resolution parameter 22 // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110. 23 // qUndirected will panic if g has any edge with negative edge weight. 24 // 25 // Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j) 26 // 27 // graph.Undirect may be used as a shim to allow calculation of Q for 28 // directed graphs. 29 func qUndirected(g graph.Undirected, communities [][]graph.Node, resolution float64) float64 { 30 nodes := graph.NodesOf(g.Nodes()) 31 weight := positiveWeightFuncFor(g) 32 33 // Calculate the total edge weight of the graph 34 // and the table of penetrating edge weight sums. 35 var m2 float64 36 k := make(map[int64]float64, len(nodes)) 37 for _, u := range nodes { 38 uid := u.ID() 39 w := weight(uid, uid) 40 to := g.From(uid) 41 for to.Next() { 42 w += weight(uid, to.Node().ID()) 43 } 44 m2 += w 45 k[uid] = w 46 } 47 48 if communities == nil { 49 var q float64 50 for _, u := range nodes { 51 uid := u.ID() 52 kU := k[uid] 53 q += weight(uid, uid) - resolution*kU*kU/m2 54 } 55 return q / m2 56 } 57 58 // Iterate over the communities, calculating 59 // the non-self edge weights for the upper 60 // triangle and adjust the diagonal. 61 var q float64 62 for _, c := range communities { 63 for i, u := range c { 64 uid := u.ID() 65 kU := k[uid] 66 q += weight(uid, uid) - resolution*kU*kU/m2 67 for _, v := range c[i+1:] { 68 vid := v.ID() 69 q += 2 * (weight(uid, vid) - resolution*kU*k[vid]/m2) 70 } 71 } 72 } 73 return q / m2 74 } 75 76 // louvainUndirected returns the hierarchical modularization of g at the given 77 // resolution using the Louvain algorithm. If src is nil, rand.Intn is used as 78 // the random generator. louvainUndirected will panic if g has any edge with negative edge 79 // weight. 80 // 81 // graph.Undirect may be used as a shim to allow modularization of directed graphs. 82 func louvainUndirected(g graph.Undirected, resolution float64, src rand.Source) *ReducedUndirected { 83 // See louvain.tex for a detailed description 84 // of the algorithm used here. 85 86 c := reduceUndirected(g, nil) 87 rnd := rand.Intn 88 if src != nil { 89 rnd = rand.New(src).Intn 90 } 91 for { 92 l := newUndirectedLocalMover(c, c.communities, resolution) 93 if l == nil { 94 return c 95 } 96 if done := l.localMovingHeuristic(rnd); done { 97 return c 98 } 99 c = reduceUndirected(c, l.communities) 100 } 101 } 102 103 // ReducedUndirected is an undirected graph of communities derived from a 104 // parent graph by reduction. 105 type ReducedUndirected struct { 106 // nodes is the set of nodes held 107 // by the graph. In a ReducedUndirected 108 // the node ID is the index into 109 // nodes. 110 nodes []community 111 undirectedEdges 112 113 // communities is the community 114 // structure of the graph. 115 communities [][]graph.Node 116 117 parent *ReducedUndirected 118 } 119 120 var ( 121 reducedUndirected = (*ReducedUndirected)(nil) 122 123 _ graph.WeightedUndirected = reducedUndirected 124 _ ReducedGraph = reducedUndirected 125 ) 126 127 // Communities returns the community memberships of the nodes in the 128 // graph used to generate the reduced graph. 129 func (g *ReducedUndirected) Communities() [][]graph.Node { 130 communities := make([][]graph.Node, len(g.communities)) 131 if g.parent == nil { 132 for i, members := range g.communities { 133 comm := make([]graph.Node, len(members)) 134 for j, n := range members { 135 nodes := g.nodes[n.ID()].nodes 136 if len(nodes) != 1 { 137 panic("community: unexpected number of nodes in base graph community") 138 } 139 comm[j] = nodes[0] 140 } 141 communities[i] = comm 142 } 143 return communities 144 } 145 sub := g.parent.Communities() 146 for i, members := range g.communities { 147 var comm []graph.Node 148 for _, n := range members { 149 comm = append(comm, sub[n.ID()]...) 150 } 151 communities[i] = comm 152 } 153 return communities 154 } 155 156 // Structure returns the community structure of the current level of 157 // the module clustering. The first index of the returned value 158 // corresponds to the index of the nodes in the next higher level if 159 // it exists. The returned value should not be mutated. 160 func (g *ReducedUndirected) Structure() [][]graph.Node { 161 return g.communities 162 } 163 164 // Expanded returns the next lower level of the module clustering or nil 165 // if at the lowest level. 166 func (g *ReducedUndirected) Expanded() ReducedGraph { 167 return g.parent 168 } 169 170 // reduceUndirected returns a reduced graph constructed from g divided 171 // into the given communities. The communities value is mutated 172 // by the call to reduceUndirected. If communities is nil and g is a 173 // ReducedUndirected, it is returned unaltered. 174 func reduceUndirected(g graph.Undirected, communities [][]graph.Node) *ReducedUndirected { 175 if communities == nil { 176 if r, ok := g.(*ReducedUndirected); ok { 177 return r 178 } 179 180 nodes := graph.NodesOf(g.Nodes()) 181 // TODO(kortschak) This sort is necessary really only 182 // for testing. In practice we would not be using the 183 // community provided by the user for a Q calculation. 184 // Probably we should use a function to map the 185 // communities in the test sets to the remapped order. 186 ordered.ByID(nodes) 187 communities = make([][]graph.Node, len(nodes)) 188 for i := range nodes { 189 communities[i] = []graph.Node{node(i)} 190 } 191 192 weight := positiveWeightFuncFor(g) 193 r := ReducedUndirected{ 194 nodes: make([]community, len(nodes)), 195 undirectedEdges: undirectedEdges{ 196 edges: make([][]int, len(nodes)), 197 weights: make(map[[2]int]float64), 198 }, 199 communities: communities, 200 } 201 communityOf := make(map[int64]int, len(nodes)) 202 for i, n := range nodes { 203 r.nodes[i] = community{id: i, nodes: []graph.Node{n}} 204 communityOf[n.ID()] = i 205 } 206 for _, u := range nodes { 207 uid := u.ID() 208 ucid := communityOf[uid] 209 var out []int 210 to := g.From(uid) 211 for to.Next() { 212 vid := to.Node().ID() 213 vcid := communityOf[vid] 214 if vcid != ucid { 215 out = append(out, vcid) 216 } 217 if ucid < vcid { 218 // Only store the weight once. 219 r.weights[[2]int{ucid, vcid}] = weight(uid, vid) 220 } 221 } 222 r.edges[ucid] = out 223 } 224 return &r 225 } 226 227 // Remove zero length communities destructively. 228 var commNodes int 229 for i := 0; i < len(communities); { 230 comm := communities[i] 231 if len(comm) == 0 { 232 communities[i] = communities[len(communities)-1] 233 communities[len(communities)-1] = nil 234 communities = communities[:len(communities)-1] 235 } else { 236 commNodes += len(comm) 237 i++ 238 } 239 } 240 241 r := ReducedUndirected{ 242 nodes: make([]community, len(communities)), 243 undirectedEdges: undirectedEdges{ 244 edges: make([][]int, len(communities)), 245 weights: make(map[[2]int]float64), 246 }, 247 } 248 r.communities = make([][]graph.Node, len(communities)) 249 for i := range r.communities { 250 r.communities[i] = []graph.Node{node(i)} 251 } 252 if g, ok := g.(*ReducedUndirected); ok { 253 // Make sure we retain the truncated 254 // community structure. 255 g.communities = communities 256 r.parent = g 257 } 258 weight := positiveWeightFuncFor(g) 259 communityOf := make(map[int64]int, commNodes) 260 for i, comm := range communities { 261 r.nodes[i] = community{id: i, nodes: comm} 262 for _, n := range comm { 263 communityOf[n.ID()] = i 264 } 265 } 266 for ucid, comm := range communities { 267 var out []int 268 for i, u := range comm { 269 uid := u.ID() 270 r.nodes[ucid].weight += weight(uid, uid) 271 for _, v := range comm[i+1:] { 272 r.nodes[ucid].weight += 2 * weight(uid, v.ID()) 273 } 274 to := g.From(uid) 275 for to.Next() { 276 vid := to.Node().ID() 277 vcid := communityOf[vid] 278 found := false 279 for _, e := range out { 280 if e == vcid { 281 found = true 282 break 283 } 284 } 285 if !found && vcid != ucid { 286 out = append(out, vcid) 287 } 288 if ucid < vcid { 289 // Only store the weight once. 290 r.weights[[2]int{ucid, vcid}] += weight(uid, vid) 291 } 292 } 293 } 294 r.edges[ucid] = out 295 } 296 return &r 297 } 298 299 // Node returns the node with the given ID if it exists in the graph, 300 // and nil otherwise. 301 func (g *ReducedUndirected) Node(id int64) graph.Node { 302 if g.has(id) { 303 return g.nodes[id] 304 } 305 return nil 306 } 307 308 // has returns whether the node exists within the graph. 309 func (g *ReducedUndirected) has(id int64) bool { 310 return 0 <= id && id < int64(len(g.nodes)) 311 } 312 313 // Nodes returns all the nodes in the graph. 314 func (g *ReducedUndirected) Nodes() graph.Nodes { 315 nodes := make([]graph.Node, len(g.nodes)) 316 for i := range g.nodes { 317 nodes[i] = node(i) 318 } 319 return iterator.NewOrderedNodes(nodes) 320 } 321 322 // From returns all nodes in g that can be reached directly from u. 323 func (g *ReducedUndirected) From(uid int64) graph.Nodes { 324 out := g.edges[uid] 325 nodes := make([]graph.Node, len(out)) 326 for i, vid := range out { 327 nodes[i] = g.nodes[vid] 328 } 329 return iterator.NewOrderedNodes(nodes) 330 } 331 332 // HasEdgeBetween returns whether an edge exists between nodes x and y. 333 func (g *ReducedUndirected) HasEdgeBetween(xid, yid int64) bool { 334 if xid == yid || !isValidID(xid) || !isValidID(yid) { 335 return false 336 } 337 if xid > yid { 338 xid, yid = yid, xid 339 } 340 _, ok := g.weights[[2]int{int(xid), int(yid)}] 341 return ok 342 } 343 344 // Edge returns the edge from u to v if such an edge exists and nil otherwise. 345 // The node v must be directly reachable from u as defined by the From method. 346 func (g *ReducedUndirected) Edge(uid, vid int64) graph.Edge { 347 return g.WeightedEdgeBetween(uid, vid) 348 } 349 350 // WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise. 351 // The node v must be directly reachable from u as defined by the From method. 352 func (g *ReducedUndirected) WeightedEdge(uid, vid int64) graph.WeightedEdge { 353 return g.WeightedEdgeBetween(uid, vid) 354 } 355 356 // EdgeBetween returns the edge between nodes x and y. 357 func (g *ReducedUndirected) EdgeBetween(xid, yid int64) graph.Edge { 358 return g.WeightedEdgeBetween(xid, yid) 359 } 360 361 // WeightedEdgeBetween returns the weighted edge between nodes x and y. 362 func (g *ReducedUndirected) WeightedEdgeBetween(xid, yid int64) graph.WeightedEdge { 363 if xid == yid || !isValidID(xid) || !isValidID(yid) { 364 return nil 365 } 366 if yid < xid { 367 xid, yid = yid, xid 368 } 369 w, ok := g.weights[[2]int{int(xid), int(yid)}] 370 if !ok { 371 return nil 372 } 373 return edge{from: g.nodes[xid], to: g.nodes[yid], weight: w} 374 } 375 376 // Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge. 377 // If x and y are the same node the internal node weight is returned. If there is no joining 378 // edge between the two nodes the weight value returned is zero. Weight returns true if an edge 379 // exists between x and y or if x and y have the same ID, false otherwise. 380 func (g *ReducedUndirected) Weight(xid, yid int64) (w float64, ok bool) { 381 if !isValidID(xid) || !isValidID(yid) { 382 return 0, false 383 } 384 if xid == yid { 385 return g.nodes[xid].weight, true 386 } 387 if xid > yid { 388 xid, yid = yid, xid 389 } 390 w, ok = g.weights[[2]int{int(xid), int(yid)}] 391 return w, ok 392 } 393 394 // undirectedLocalMover is a step in graph modularity optimization. 395 type undirectedLocalMover struct { 396 g *ReducedUndirected 397 398 // nodes is the set of working nodes. 399 nodes []graph.Node 400 // edgeWeightOf is the weighted degree 401 // of each node indexed by ID. 402 edgeWeightOf []float64 403 404 // m2 is the total sum of 405 // edge weights in g. 406 m2 float64 407 408 // weight is the weight function 409 // provided by g or a function 410 // that returns the Weight value 411 // of the non-nil edge between x 412 // and y. 413 weight func(xid, yid int64) float64 414 415 // communities is the current 416 // division of g. 417 communities [][]graph.Node 418 // memberships is a mapping between 419 // node ID and community membership. 420 memberships []int 421 422 // resolution is the Reichardt and 423 // Bornholdt γ parameter as defined 424 // in doi:10.1103/PhysRevE.74.016110. 425 resolution float64 426 427 // moved indicates that a call to 428 // move has been made since the last 429 // call to shuffle. 430 moved bool 431 432 // changed indicates that a move 433 // has been made since the creation 434 // of the local mover. 435 changed bool 436 } 437 438 // newUndirectedLocalMover returns a new undirectedLocalMover initialized with 439 // the graph g, a set of communities and a modularity resolution parameter. The 440 // node IDs of g must be contiguous in [0,n) where n is the number of nodes. 441 // If g has a zero edge weight sum, nil is returned. 442 func newUndirectedLocalMover(g *ReducedUndirected, communities [][]graph.Node, resolution float64) *undirectedLocalMover { 443 nodes := graph.NodesOf(g.Nodes()) 444 l := undirectedLocalMover{ 445 g: g, 446 nodes: nodes, 447 edgeWeightOf: make([]float64, len(nodes)), 448 communities: communities, 449 memberships: make([]int, len(nodes)), 450 resolution: resolution, 451 weight: positiveWeightFuncFor(g), 452 } 453 454 // Calculate the total edge weight of the graph 455 // and degree weights for each node. 456 for _, u := range l.nodes { 457 uid := u.ID() 458 w := l.weight(uid, uid) 459 to := g.From(uid) 460 for to.Next() { 461 w += l.weight(uid, to.Node().ID()) 462 } 463 l.edgeWeightOf[uid] = w 464 l.m2 += w 465 } 466 if l.m2 == 0 { 467 return nil 468 } 469 470 // Assign membership mappings. 471 for i, c := range communities { 472 for _, u := range c { 473 l.memberships[u.ID()] = i 474 } 475 } 476 477 return &l 478 } 479 480 // localMovingHeuristic performs the Louvain local moving heuristic until 481 // no further moves can be made. It returns a boolean indicating that the 482 // undirectedLocalMover has not made any improvement to the community 483 // structure and so the Louvain algorithm is done. 484 func (l *undirectedLocalMover) localMovingHeuristic(rnd func(int) int) (done bool) { 485 for { 486 l.shuffle(rnd) 487 for _, n := range l.nodes { 488 dQ, dst, src := l.deltaQ(n) 489 if dQ <= deltaQtol { 490 continue 491 } 492 l.move(dst, src) 493 } 494 if !l.moved { 495 return !l.changed 496 } 497 } 498 } 499 500 // shuffle performs a Fisher-Yates shuffle on the nodes held by the 501 // undirectedLocalMover using the random source rnd which should return 502 // an integer in the range [0,n). 503 func (l *undirectedLocalMover) shuffle(rnd func(n int) int) { 504 l.moved = false 505 for i := range l.nodes[:len(l.nodes)-1] { 506 j := i + rnd(len(l.nodes)-i) 507 l.nodes[i], l.nodes[j] = l.nodes[j], l.nodes[i] 508 } 509 } 510 511 // move moves the node at src to the community at dst. 512 func (l *undirectedLocalMover) move(dst int, src commIdx) { 513 l.moved = true 514 l.changed = true 515 516 srcComm := l.communities[src.community] 517 n := srcComm[src.node] 518 519 l.memberships[n.ID()] = dst 520 521 l.communities[dst] = append(l.communities[dst], n) 522 srcComm[src.node], srcComm[len(srcComm)-1] = srcComm[len(srcComm)-1], nil 523 l.communities[src.community] = srcComm[:len(srcComm)-1] 524 } 525 526 // deltaQ returns the highest gain in modularity attainable by moving 527 // n from its current community to another connected community and 528 // the index of the chosen destination. The index into the 529 // undirectedLocalMover's communities field is returned in src if n 530 // is in communities. 531 func (l *undirectedLocalMover) deltaQ(n graph.Node) (deltaQ float64, dst int, src commIdx) { 532 id := n.ID() 533 a_aa := l.weight(id, id) 534 k_a := l.edgeWeightOf[id] 535 m2 := l.m2 536 gamma := l.resolution 537 538 // Find communities connected to n. 539 connected := make(set.Ints) 540 // The following for loop is equivalent to: 541 // 542 // for _, v := range l.g.From(n) { 543 // connected.Add(l.memberships[v.ID()]) 544 // } 545 // 546 // This is done to avoid an allocation. 547 for _, vid := range l.g.edges[id] { 548 connected.Add(l.memberships[vid]) 549 } 550 // Insert the node's own community. 551 connected.Add(l.memberships[id]) 552 553 candidates := make([]int, 0, len(connected)) 554 for i := range connected { 555 candidates = append(candidates, i) 556 } 557 sort.Ints(candidates) 558 559 // Calculate the highest modularity gain 560 // from moving into another community and 561 // keep the index of that community. 562 var dQremove float64 563 dQadd, dst, src := math.Inf(-1), -1, commIdx{-1, -1} 564 for _, i := range candidates { 565 c := l.communities[i] 566 var k_aC, sigma_totC float64 // C is a substitution for ^𝛼 or ^𝛽. 567 var removal bool 568 for j, u := range c { 569 uid := u.ID() 570 if uid == id { 571 if src.community != -1 { 572 panic("community: multiple sources") 573 } 574 src = commIdx{i, j} 575 removal = true 576 } 577 578 k_aC += l.weight(id, uid) 579 // sigma_totC could be kept for each community 580 // and updated for moves, changing the calculation 581 // of sigma_totC here from O(n_c) to O(1), but 582 // in practice the time savings do not appear 583 // to be compelling and do not make up for the 584 // increase in code complexity and space required. 585 sigma_totC += l.edgeWeightOf[uid] 586 } 587 588 // See louvain.tex for a derivation of these equations. 589 switch { 590 case removal: 591 // The community c was the current community, 592 // so calculate the change due to removal. 593 dQremove = k_aC /*^𝛼*/ - a_aa - gamma*k_a*(sigma_totC /*^𝛼*/ -k_a)/m2 594 595 default: 596 // Otherwise calculate the change due to an addition 597 // to c and retain if it is the current best. 598 dQ := k_aC /*^𝛽*/ - gamma*k_a*sigma_totC /*^𝛽*/ /m2 599 if dQ > dQadd { 600 dQadd = dQ 601 dst = i 602 } 603 } 604 } 605 606 return 2 * (dQadd - dQremove) / m2, dst, src 607 }