gonum.org/v1/gonum@v0.14.0/graph/community/louvain_directed.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package community 6 7 import ( 8 "math" 9 "sort" 10 11 "golang.org/x/exp/rand" 12 13 "gonum.org/v1/gonum/graph" 14 "gonum.org/v1/gonum/graph/internal/ordered" 15 "gonum.org/v1/gonum/graph/internal/set" 16 "gonum.org/v1/gonum/graph/iterator" 17 ) 18 19 // qDirected returns the modularity Q score of the graph g subdivided into the 20 // given communities at the given resolution. If communities is nil, the 21 // unclustered modularity score is returned. The resolution parameter 22 // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110. 23 // qDirected will panic if g has any edge with negative edge weight. 24 // 25 // Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j) 26 func qDirected(g graph.Directed, communities [][]graph.Node, resolution float64) float64 { 27 nodes := graph.NodesOf(g.Nodes()) 28 weight := positiveWeightFuncFor(g) 29 30 // Calculate the total edge weight of the graph 31 // and the table of penetrating edge weight sums. 32 var m float64 33 k := make(map[int64]directedWeights, len(nodes)) 34 for _, n := range nodes { 35 var wOut float64 36 u := n 37 uid := u.ID() 38 to := g.From(uid) 39 for to.Next() { 40 wOut += weight(uid, to.Node().ID()) 41 } 42 var wIn float64 43 v := n 44 vid := v.ID() 45 from := g.To(vid) 46 for from.Next() { 47 wIn += weight(from.Node().ID(), vid) 48 } 49 id := n.ID() 50 w := weight(id, id) 51 m += w + wOut // We only need to count edges once. 52 k[id] = directedWeights{out: w + wOut, in: w + wIn} 53 } 54 55 if communities == nil { 56 var q float64 57 for _, u := range nodes { 58 uid := u.ID() 59 kU := k[uid] 60 q += weight(uid, uid) - resolution*kU.out*kU.in/m 61 } 62 return q / m 63 } 64 65 var q float64 66 for _, c := range communities { 67 for _, u := range c { 68 uid := u.ID() 69 kU := k[uid] 70 for _, v := range c { 71 vid := v.ID() 72 kV := k[vid] 73 q += weight(uid, vid) - resolution*kU.out*kV.in/m 74 } 75 } 76 } 77 return q / m 78 } 79 80 // louvainDirected returns the hierarchical modularization of g at the given 81 // resolution using the Louvain algorithm. If src is nil, rand.Intn is used 82 // as the random generator. louvainDirected will panic if g has any edge with negative 83 // edge weight. 84 func louvainDirected(g graph.Directed, resolution float64, src rand.Source) ReducedGraph { 85 // See louvain.tex for a detailed description 86 // of the algorithm used here. 87 88 c := reduceDirected(g, nil) 89 rnd := rand.Intn 90 if src != nil { 91 rnd = rand.New(src).Intn 92 } 93 for { 94 l := newDirectedLocalMover(c, c.communities, resolution) 95 if l == nil { 96 return c 97 } 98 if done := l.localMovingHeuristic(rnd); done { 99 return c 100 } 101 c = reduceDirected(c, l.communities) 102 } 103 } 104 105 // ReducedDirected is a directed graph of communities derived from a 106 // parent graph by reduction. 107 type ReducedDirected struct { 108 // nodes is the set of nodes held 109 // by the graph. In a ReducedDirected 110 // the node ID is the index into 111 // nodes. 112 nodes []community 113 directedEdges 114 115 // communities is the community 116 // structure of the graph. 117 communities [][]graph.Node 118 119 parent *ReducedDirected 120 } 121 122 var ( 123 reducedDirected = (*ReducedDirected)(nil) 124 125 _ graph.WeightedDirected = reducedDirected 126 _ ReducedGraph = reducedDirected 127 ) 128 129 // Communities returns the community memberships of the nodes in the 130 // graph used to generate the reduced graph. 131 func (g *ReducedDirected) Communities() [][]graph.Node { 132 communities := make([][]graph.Node, len(g.communities)) 133 if g.parent == nil { 134 for i, members := range g.communities { 135 comm := make([]graph.Node, len(members)) 136 for j, n := range members { 137 nodes := g.nodes[n.ID()].nodes 138 if len(nodes) != 1 { 139 panic("community: unexpected number of nodes in base graph community") 140 } 141 comm[j] = nodes[0] 142 } 143 communities[i] = comm 144 } 145 return communities 146 } 147 sub := g.parent.Communities() 148 for i, members := range g.communities { 149 var comm []graph.Node 150 for _, n := range members { 151 comm = append(comm, sub[n.ID()]...) 152 } 153 communities[i] = comm 154 } 155 return communities 156 } 157 158 // Structure returns the community structure of the current level of 159 // the module clustering. The first index of the returned value 160 // corresponds to the index of the nodes in the next higher level if 161 // it exists. The returned value should not be mutated. 162 func (g *ReducedDirected) Structure() [][]graph.Node { 163 return g.communities 164 } 165 166 // Expanded returns the next lower level of the module clustering or nil 167 // if at the lowest level. 168 func (g *ReducedDirected) Expanded() ReducedGraph { 169 return g.parent 170 } 171 172 // reduceDirected returns a reduced graph constructed from g divided 173 // into the given communities. The communities value is mutated 174 // by the call to reduceDirected. If communities is nil and g is a 175 // ReducedDirected, it is returned unaltered. 176 func reduceDirected(g graph.Directed, communities [][]graph.Node) *ReducedDirected { 177 if communities == nil { 178 if r, ok := g.(*ReducedDirected); ok { 179 return r 180 } 181 182 nodes := graph.NodesOf(g.Nodes()) 183 // TODO(kortschak) This sort is necessary really only 184 // for testing. In practice we would not be using the 185 // community provided by the user for a Q calculation. 186 // Probably we should use a function to map the 187 // communities in the test sets to the remapped order. 188 ordered.ByID(nodes) 189 communities = make([][]graph.Node, len(nodes)) 190 for i := range nodes { 191 communities[i] = []graph.Node{node(i)} 192 } 193 194 weight := positiveWeightFuncFor(g) 195 r := ReducedDirected{ 196 nodes: make([]community, len(nodes)), 197 directedEdges: directedEdges{ 198 edgesFrom: make([][]int, len(nodes)), 199 edgesTo: make([][]int, len(nodes)), 200 weights: make(map[[2]int]float64), 201 }, 202 communities: communities, 203 } 204 communityOf := make(map[int64]int, len(nodes)) 205 for i, n := range nodes { 206 r.nodes[i] = community{id: i, nodes: []graph.Node{n}} 207 communityOf[n.ID()] = i 208 } 209 for _, n := range nodes { 210 id := communityOf[n.ID()] 211 212 var out []int 213 u := n 214 uid := u.ID() 215 to := g.From(uid) 216 for to.Next() { 217 vid := to.Node().ID() 218 vcid := communityOf[vid] 219 if vcid != id { 220 out = append(out, vcid) 221 } 222 r.weights[[2]int{id, vcid}] = weight(uid, vid) 223 } 224 r.edgesFrom[id] = out 225 226 var in []int 227 v := n 228 vid := v.ID() 229 from := g.To(vid) 230 for from.Next() { 231 uid := from.Node().ID() 232 ucid := communityOf[uid] 233 if ucid != id { 234 in = append(in, ucid) 235 } 236 r.weights[[2]int{ucid, id}] = weight(uid, vid) 237 } 238 r.edgesTo[id] = in 239 } 240 return &r 241 } 242 243 // Remove zero length communities destructively. 244 var commNodes int 245 for i := 0; i < len(communities); { 246 comm := communities[i] 247 if len(comm) == 0 { 248 communities[i] = communities[len(communities)-1] 249 communities[len(communities)-1] = nil 250 communities = communities[:len(communities)-1] 251 } else { 252 commNodes += len(comm) 253 i++ 254 } 255 } 256 257 r := ReducedDirected{ 258 nodes: make([]community, len(communities)), 259 directedEdges: directedEdges{ 260 edgesFrom: make([][]int, len(communities)), 261 edgesTo: make([][]int, len(communities)), 262 weights: make(map[[2]int]float64), 263 }, 264 } 265 r.communities = make([][]graph.Node, len(communities)) 266 for i := range r.communities { 267 r.communities[i] = []graph.Node{node(i)} 268 } 269 if g, ok := g.(*ReducedDirected); ok { 270 // Make sure we retain the truncated 271 // community structure. 272 g.communities = communities 273 r.parent = g 274 } 275 weight := positiveWeightFuncFor(g) 276 communityOf := make(map[int64]int, commNodes) 277 for i, comm := range communities { 278 r.nodes[i] = community{id: i, nodes: comm} 279 for _, n := range comm { 280 communityOf[n.ID()] = i 281 } 282 } 283 for id, comm := range communities { 284 var out, in []int 285 for _, n := range comm { 286 u := n 287 uid := u.ID() 288 for _, v := range comm { 289 r.nodes[id].weight += weight(uid, v.ID()) 290 } 291 292 to := g.From(uid) 293 for to.Next() { 294 vid := to.Node().ID() 295 vcid := communityOf[vid] 296 found := false 297 for _, e := range out { 298 if e == vcid { 299 found = true 300 break 301 } 302 } 303 if !found && vcid != id { 304 out = append(out, vcid) 305 } 306 // Add half weights because the other 307 // ends of edges are also counted. 308 r.weights[[2]int{id, vcid}] += weight(uid, vid) / 2 309 } 310 311 v := n 312 vid := v.ID() 313 from := g.To(vid) 314 for from.Next() { 315 uid := from.Node().ID() 316 ucid := communityOf[uid] 317 found := false 318 for _, e := range in { 319 if e == ucid { 320 found = true 321 break 322 } 323 } 324 if !found && ucid != id { 325 in = append(in, ucid) 326 } 327 // Add half weights because the other 328 // ends of edges are also counted. 329 r.weights[[2]int{ucid, id}] += weight(uid, vid) / 2 330 } 331 } 332 r.edgesFrom[id] = out 333 r.edgesTo[id] = in 334 } 335 return &r 336 } 337 338 // Node returns the node with the given ID if it exists in the graph, 339 // and nil otherwise. 340 func (g *ReducedDirected) Node(id int64) graph.Node { 341 if g.has(id) { 342 return g.nodes[id] 343 } 344 return nil 345 } 346 347 // has returns whether the node exists within the graph. 348 func (g *ReducedDirected) has(id int64) bool { 349 return 0 <= id && id < int64(len(g.nodes)) 350 } 351 352 // Nodes returns all the nodes in the graph. 353 func (g *ReducedDirected) Nodes() graph.Nodes { 354 nodes := make([]graph.Node, len(g.nodes)) 355 for i := range g.nodes { 356 nodes[i] = node(i) 357 } 358 return iterator.NewOrderedNodes(nodes) 359 } 360 361 // From returns all nodes in g that can be reached directly from u. 362 func (g *ReducedDirected) From(uid int64) graph.Nodes { 363 out := g.edgesFrom[uid] 364 nodes := make([]graph.Node, len(out)) 365 for i, vid := range out { 366 nodes[i] = g.nodes[vid] 367 } 368 return iterator.NewOrderedNodes(nodes) 369 } 370 371 // To returns all nodes in g that can reach directly to v. 372 func (g *ReducedDirected) To(vid int64) graph.Nodes { 373 in := g.edgesTo[vid] 374 nodes := make([]graph.Node, len(in)) 375 for i, uid := range in { 376 nodes[i] = g.nodes[uid] 377 } 378 return iterator.NewOrderedNodes(nodes) 379 } 380 381 // HasEdgeBetween returns whether an edge exists between nodes x and y. 382 func (g *ReducedDirected) HasEdgeBetween(xid, yid int64) bool { 383 if xid == yid || !isValidID(xid) || !isValidID(yid) { 384 return false 385 } 386 _, ok := g.weights[[2]int{int(xid), int(yid)}] 387 if ok { 388 return true 389 } 390 _, ok = g.weights[[2]int{int(yid), int(xid)}] 391 return ok 392 } 393 394 // HasEdgeFromTo returns whether an edge exists from node u to v. 395 func (g *ReducedDirected) HasEdgeFromTo(uid, vid int64) bool { 396 if uid == vid || !isValidID(uid) || !isValidID(vid) { 397 return false 398 } 399 _, ok := g.weights[[2]int{int(uid), int(vid)}] 400 return ok 401 } 402 403 // Edge returns the edge from u to v if such an edge exists and nil otherwise. 404 // The node v must be directly reachable from u as defined by the From method. 405 func (g *ReducedDirected) Edge(uid, vid int64) graph.Edge { 406 return g.WeightedEdge(uid, vid) 407 } 408 409 // WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise. 410 // The node v must be directly reachable from u as defined by the From method. 411 func (g *ReducedDirected) WeightedEdge(uid, vid int64) graph.WeightedEdge { 412 if uid == vid || !isValidID(uid) || !isValidID(vid) { 413 return nil 414 } 415 w, ok := g.weights[[2]int{int(uid), int(vid)}] 416 if !ok { 417 return nil 418 } 419 return edge{from: g.nodes[uid], to: g.nodes[vid], weight: w} 420 } 421 422 // Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge. 423 // If x and y are the same node the internal node weight is returned. If there is no joining 424 // edge between the two nodes the weight value returned is zero. Weight returns true if an edge 425 // exists between x and y or if x and y have the same ID, false otherwise. 426 func (g *ReducedDirected) Weight(xid, yid int64) (w float64, ok bool) { 427 if !isValidID(xid) || !isValidID(yid) { 428 return 0, false 429 } 430 if xid == yid { 431 return g.nodes[xid].weight, true 432 } 433 w, ok = g.weights[[2]int{int(xid), int(yid)}] 434 return w, ok 435 } 436 437 // directedLocalMover is a step in graph modularity optimization. 438 type directedLocalMover struct { 439 g *ReducedDirected 440 441 // nodes is the set of working nodes. 442 nodes []graph.Node 443 // edgeWeightsOf is the weighted degree 444 // of each node indexed by ID. 445 edgeWeightsOf []directedWeights 446 447 // m is the total sum of edge 448 // weights in g. 449 m float64 450 451 // weight is the weight function 452 // provided by g or a function 453 // that returns the Weight value 454 // of the non-nil edge between x 455 // and y. 456 weight func(xid, yid int64) float64 457 458 // communities is the current 459 // division of g. 460 communities [][]graph.Node 461 // memberships is a mapping between 462 // node ID and community membership. 463 memberships []int 464 465 // resolution is the Reichardt and 466 // Bornholdt γ parameter as defined 467 // in doi:10.1103/PhysRevE.74.016110. 468 resolution float64 469 470 // moved indicates that a call to 471 // move has been made since the last 472 // call to shuffle. 473 moved bool 474 475 // changed indicates that a move 476 // has been made since the creation 477 // of the local mover. 478 changed bool 479 } 480 481 type directedWeights struct { 482 out, in float64 483 } 484 485 // newDirectedLocalMover returns a new directedLocalMover initialized with 486 // the graph g, a set of communities and a modularity resolution parameter. 487 // The node IDs of g must be contiguous in [0,n) where n is the number of 488 // nodes. 489 // If g has a zero edge weight sum, nil is returned. 490 func newDirectedLocalMover(g *ReducedDirected, communities [][]graph.Node, resolution float64) *directedLocalMover { 491 nodes := graph.NodesOf(g.Nodes()) 492 l := directedLocalMover{ 493 g: g, 494 nodes: nodes, 495 edgeWeightsOf: make([]directedWeights, len(nodes)), 496 communities: communities, 497 memberships: make([]int, len(nodes)), 498 resolution: resolution, 499 weight: positiveWeightFuncFor(g), 500 } 501 502 // Calculate the total edge weight of the graph 503 // and degree weights for each node. 504 for _, n := range l.nodes { 505 u := n 506 var wOut float64 507 uid := u.ID() 508 to := g.From(uid) 509 for to.Next() { 510 wOut += l.weight(uid, to.Node().ID()) 511 } 512 513 v := n 514 var wIn float64 515 vid := v.ID() 516 from := g.To(vid) 517 for from.Next() { 518 wIn += l.weight(from.Node().ID(), vid) 519 } 520 521 id := n.ID() 522 w := l.weight(id, id) 523 l.edgeWeightsOf[id] = directedWeights{out: w + wOut, in: w + wIn} 524 l.m += w + wOut 525 } 526 527 // Assign membership mappings. 528 for i, c := range communities { 529 for _, n := range c { 530 l.memberships[n.ID()] = i 531 } 532 } 533 534 return &l 535 } 536 537 // localMovingHeuristic performs the Louvain local moving heuristic until 538 // no further moves can be made. It returns a boolean indicating that the 539 // directedLocalMover has not made any improvement to the community structure and 540 // so the Louvain algorithm is done. 541 func (l *directedLocalMover) localMovingHeuristic(rnd func(int) int) (done bool) { 542 for { 543 l.shuffle(rnd) 544 for _, n := range l.nodes { 545 dQ, dst, src := l.deltaQ(n) 546 if dQ <= deltaQtol { 547 continue 548 } 549 l.move(dst, src) 550 } 551 if !l.moved { 552 return !l.changed 553 } 554 } 555 } 556 557 // shuffle performs a Fisher-Yates shuffle on the nodes held by the 558 // directedLocalMover using the random source rnd which should return an 559 // integer in the range [0,n). 560 func (l *directedLocalMover) shuffle(rnd func(n int) int) { 561 l.moved = false 562 for i := range l.nodes[:len(l.nodes)-1] { 563 j := i + rnd(len(l.nodes)-i) 564 l.nodes[i], l.nodes[j] = l.nodes[j], l.nodes[i] 565 } 566 } 567 568 // move moves the node at src to the community at dst. 569 func (l *directedLocalMover) move(dst int, src commIdx) { 570 l.moved = true 571 l.changed = true 572 573 srcComm := l.communities[src.community] 574 n := srcComm[src.node] 575 576 l.memberships[n.ID()] = dst 577 578 l.communities[dst] = append(l.communities[dst], n) 579 srcComm[src.node], srcComm[len(srcComm)-1] = srcComm[len(srcComm)-1], nil 580 l.communities[src.community] = srcComm[:len(srcComm)-1] 581 } 582 583 // deltaQ returns the highest gain in modularity attainable by moving 584 // n from its current community to another connected community and 585 // the index of the chosen destination. The index into the directedLocalMover's 586 // communities field is returned in src if n is in communities. 587 func (l *directedLocalMover) deltaQ(n graph.Node) (deltaQ float64, dst int, src commIdx) { 588 id := n.ID() 589 590 a_aa := l.weight(id, id) 591 k_a := l.edgeWeightsOf[id] 592 m := l.m 593 gamma := l.resolution 594 595 // Find communities connected to n. 596 connected := make(set.Ints) 597 // The following for loop is equivalent to: 598 // 599 // for _, v := range l.g.From(n) { 600 // connected.Add(l.memberships[v.ID()]) 601 // } 602 // for _, v := range l.g.To(n) { 603 // connected.Add(l.memberships[v.ID()]) 604 // } 605 // 606 // This is done to avoid two allocations. 607 for _, vid := range l.g.edgesFrom[id] { 608 connected.Add(l.memberships[vid]) 609 } 610 for _, vid := range l.g.edgesTo[id] { 611 connected.Add(l.memberships[vid]) 612 } 613 // Insert the node's own community. 614 connected.Add(l.memberships[id]) 615 616 candidates := make([]int, 0, len(connected)) 617 for i := range connected { 618 candidates = append(candidates, i) 619 } 620 sort.Ints(candidates) 621 622 // Calculate the highest modularity gain 623 // from moving into another community and 624 // keep the index of that community. 625 var dQremove float64 626 dQadd, dst, src := math.Inf(-1), -1, commIdx{-1, -1} 627 for _, i := range candidates { 628 c := l.communities[i] 629 var k_aC, sigma_totC directedWeights // C is a substitution for ^𝛼 or ^𝛽. 630 var removal bool 631 for j, u := range c { 632 uid := u.ID() 633 if uid == id { 634 if src.community != -1 { 635 panic("community: multiple sources") 636 } 637 src = commIdx{i, j} 638 removal = true 639 } 640 641 k_aC.in += l.weight(uid, id) 642 k_aC.out += l.weight(id, uid) 643 // sigma_totC could be kept for each community 644 // and updated for moves, changing the calculation 645 // of sigma_totC here from O(n_c) to O(1), but 646 // in practice the time savings do not appear 647 // to be compelling and do not make up for the 648 // increase in code complexity and space required. 649 w := l.edgeWeightsOf[uid] 650 sigma_totC.in += w.in 651 sigma_totC.out += w.out 652 } 653 654 // See louvain.tex for a derivation of these equations. 655 switch { 656 case removal: 657 // The community c was the current community, 658 // so calculate the change due to removal. 659 dQremove = (k_aC.in /*^𝛼*/ - a_aa) + (k_aC.out /*^𝛼*/ - a_aa) - 660 gamma*(k_a.in*(sigma_totC.out /*^𝛼*/ -k_a.out)+k_a.out*(sigma_totC.in /*^𝛼*/ -k_a.in))/m 661 662 default: 663 // Otherwise calculate the change due to an addition 664 // to c and retain if it is the current best. 665 dQ := k_aC.in /*^𝛽*/ + k_aC.out /*^𝛽*/ - 666 gamma*(k_a.in*sigma_totC.out /*^𝛽*/ +k_a.out*sigma_totC.in /*^𝛽*/)/m 667 668 if dQ > dQadd { 669 dQadd = dQ 670 dst = i 671 } 672 } 673 } 674 675 return (dQadd - dQremove) / m, dst, src 676 }