github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/graph/community/louvain_directed.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package community 6 7 import ( 8 "math" 9 "sort" 10 11 "golang.org/x/exp/rand" 12 13 "github.com/jingcheng-WU/gonum/graph" 14 "github.com/jingcheng-WU/gonum/graph/internal/ordered" 15 "github.com/jingcheng-WU/gonum/graph/internal/set" 16 "github.com/jingcheng-WU/gonum/graph/iterator" 17 ) 18 19 // qDirected returns the modularity Q score of the graph g subdivided into the 20 // given communities at the given resolution. If communities is nil, the 21 // unclustered modularity score is returned. The resolution parameter 22 // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110. 23 // qDirected will panic if g has any edge with negative edge weight. 24 // 25 // Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j) 26 // 27 func qDirected(g graph.Directed, communities [][]graph.Node, resolution float64) float64 { 28 nodes := graph.NodesOf(g.Nodes()) 29 weight := positiveWeightFuncFor(g) 30 31 // Calculate the total edge weight of the graph 32 // and the table of penetrating edge weight sums. 33 var m float64 34 k := make(map[int64]directedWeights, len(nodes)) 35 for _, n := range nodes { 36 var wOut float64 37 u := n 38 uid := u.ID() 39 to := g.From(uid) 40 for to.Next() { 41 wOut += weight(uid, to.Node().ID()) 42 } 43 var wIn float64 44 v := n 45 vid := v.ID() 46 from := g.To(vid) 47 for from.Next() { 48 wIn += weight(from.Node().ID(), vid) 49 } 50 id := n.ID() 51 w := weight(id, id) 52 m += w + wOut // We only need to count edges once. 53 k[id] = directedWeights{out: w + wOut, in: w + wIn} 54 } 55 56 if communities == nil { 57 var q float64 58 for _, u := range nodes { 59 uid := u.ID() 60 kU := k[uid] 61 q += weight(uid, uid) - resolution*kU.out*kU.in/m 62 } 63 return q / m 64 } 65 66 var q float64 67 for _, c := range communities { 68 for _, u := range c { 69 uid := u.ID() 70 kU := k[uid] 71 for _, v := range c { 72 vid := v.ID() 73 kV := k[vid] 74 q += weight(uid, vid) - resolution*kU.out*kV.in/m 75 } 76 } 77 } 78 return q / m 79 } 80 81 // louvainDirected returns the hierarchical modularization of g at the given 82 // resolution using the Louvain algorithm. If src is nil, rand.Intn is used 83 // as the random generator. louvainDirected will panic if g has any edge with negative 84 // edge weight. 85 func louvainDirected(g graph.Directed, resolution float64, src rand.Source) ReducedGraph { 86 // See louvain.tex for a detailed description 87 // of the algorithm used here. 88 89 c := reduceDirected(g, nil) 90 rnd := rand.Intn 91 if src != nil { 92 rnd = rand.New(src).Intn 93 } 94 for { 95 l := newDirectedLocalMover(c, c.communities, resolution) 96 if l == nil { 97 return c 98 } 99 if done := l.localMovingHeuristic(rnd); done { 100 return c 101 } 102 c = reduceDirected(c, l.communities) 103 } 104 } 105 106 // ReducedDirected is a directed graph of communities derived from a 107 // parent graph by reduction. 108 type ReducedDirected struct { 109 // nodes is the set of nodes held 110 // by the graph. In a ReducedDirected 111 // the node ID is the index into 112 // nodes. 113 nodes []community 114 directedEdges 115 116 // communities is the community 117 // structure of the graph. 118 communities [][]graph.Node 119 120 parent *ReducedDirected 121 } 122 123 var ( 124 reducedDirected = (*ReducedDirected)(nil) 125 126 _ graph.WeightedDirected = reducedDirected 127 _ ReducedGraph = reducedDirected 128 ) 129 130 // Communities returns the community memberships of the nodes in the 131 // graph used to generate the reduced graph. 132 func (g *ReducedDirected) Communities() [][]graph.Node { 133 communities := make([][]graph.Node, len(g.communities)) 134 if g.parent == nil { 135 for i, members := range g.communities { 136 comm := make([]graph.Node, len(members)) 137 for j, n := range members { 138 nodes := g.nodes[n.ID()].nodes 139 if len(nodes) != 1 { 140 panic("community: unexpected number of nodes in base graph community") 141 } 142 comm[j] = nodes[0] 143 } 144 communities[i] = comm 145 } 146 return communities 147 } 148 sub := g.parent.Communities() 149 for i, members := range g.communities { 150 var comm []graph.Node 151 for _, n := range members { 152 comm = append(comm, sub[n.ID()]...) 153 } 154 communities[i] = comm 155 } 156 return communities 157 } 158 159 // Structure returns the community structure of the current level of 160 // the module clustering. The first index of the returned value 161 // corresponds to the index of the nodes in the next higher level if 162 // it exists. The returned value should not be mutated. 163 func (g *ReducedDirected) Structure() [][]graph.Node { 164 return g.communities 165 } 166 167 // Expanded returns the next lower level of the module clustering or nil 168 // if at the lowest level. 169 func (g *ReducedDirected) Expanded() ReducedGraph { 170 return g.parent 171 } 172 173 // reduceDirected returns a reduced graph constructed from g divided 174 // into the given communities. The communities value is mutated 175 // by the call to reduceDirected. If communities is nil and g is a 176 // ReducedDirected, it is returned unaltered. 177 func reduceDirected(g graph.Directed, communities [][]graph.Node) *ReducedDirected { 178 if communities == nil { 179 if r, ok := g.(*ReducedDirected); ok { 180 return r 181 } 182 183 nodes := graph.NodesOf(g.Nodes()) 184 // TODO(kortschak) This sort is necessary really only 185 // for testing. In practice we would not be using the 186 // community provided by the user for a Q calculation. 187 // Probably we should use a function to map the 188 // communities in the test sets to the remapped order. 189 sort.Sort(ordered.ByID(nodes)) 190 communities = make([][]graph.Node, len(nodes)) 191 for i := range nodes { 192 communities[i] = []graph.Node{node(i)} 193 } 194 195 weight := positiveWeightFuncFor(g) 196 r := ReducedDirected{ 197 nodes: make([]community, len(nodes)), 198 directedEdges: directedEdges{ 199 edgesFrom: make([][]int, len(nodes)), 200 edgesTo: make([][]int, len(nodes)), 201 weights: make(map[[2]int]float64), 202 }, 203 communities: communities, 204 } 205 communityOf := make(map[int64]int, len(nodes)) 206 for i, n := range nodes { 207 r.nodes[i] = community{id: i, nodes: []graph.Node{n}} 208 communityOf[n.ID()] = i 209 } 210 for _, n := range nodes { 211 id := communityOf[n.ID()] 212 213 var out []int 214 u := n 215 uid := u.ID() 216 to := g.From(uid) 217 for to.Next() { 218 vid := to.Node().ID() 219 vcid := communityOf[vid] 220 if vcid != id { 221 out = append(out, vcid) 222 } 223 r.weights[[2]int{id, vcid}] = weight(uid, vid) 224 } 225 r.edgesFrom[id] = out 226 227 var in []int 228 v := n 229 vid := v.ID() 230 from := g.To(vid) 231 for from.Next() { 232 uid := from.Node().ID() 233 ucid := communityOf[uid] 234 if ucid != id { 235 in = append(in, ucid) 236 } 237 r.weights[[2]int{ucid, id}] = weight(uid, vid) 238 } 239 r.edgesTo[id] = in 240 } 241 return &r 242 } 243 244 // Remove zero length communities destructively. 245 var commNodes int 246 for i := 0; i < len(communities); { 247 comm := communities[i] 248 if len(comm) == 0 { 249 communities[i] = communities[len(communities)-1] 250 communities[len(communities)-1] = nil 251 communities = communities[:len(communities)-1] 252 } else { 253 commNodes += len(comm) 254 i++ 255 } 256 } 257 258 r := ReducedDirected{ 259 nodes: make([]community, len(communities)), 260 directedEdges: directedEdges{ 261 edgesFrom: make([][]int, len(communities)), 262 edgesTo: make([][]int, len(communities)), 263 weights: make(map[[2]int]float64), 264 }, 265 } 266 r.communities = make([][]graph.Node, len(communities)) 267 for i := range r.communities { 268 r.communities[i] = []graph.Node{node(i)} 269 } 270 if g, ok := g.(*ReducedDirected); ok { 271 // Make sure we retain the truncated 272 // community structure. 273 g.communities = communities 274 r.parent = g 275 } 276 weight := positiveWeightFuncFor(g) 277 communityOf := make(map[int64]int, commNodes) 278 for i, comm := range communities { 279 r.nodes[i] = community{id: i, nodes: comm} 280 for _, n := range comm { 281 communityOf[n.ID()] = i 282 } 283 } 284 for id, comm := range communities { 285 var out, in []int 286 for _, n := range comm { 287 u := n 288 uid := u.ID() 289 for _, v := range comm { 290 r.nodes[id].weight += weight(uid, v.ID()) 291 } 292 293 to := g.From(uid) 294 for to.Next() { 295 vid := to.Node().ID() 296 vcid := communityOf[vid] 297 found := false 298 for _, e := range out { 299 if e == vcid { 300 found = true 301 break 302 } 303 } 304 if !found && vcid != id { 305 out = append(out, vcid) 306 } 307 // Add half weights because the other 308 // ends of edges are also counted. 309 r.weights[[2]int{id, vcid}] += weight(uid, vid) / 2 310 } 311 312 v := n 313 vid := v.ID() 314 from := g.To(vid) 315 for from.Next() { 316 uid := from.Node().ID() 317 ucid := communityOf[uid] 318 found := false 319 for _, e := range in { 320 if e == ucid { 321 found = true 322 break 323 } 324 } 325 if !found && ucid != id { 326 in = append(in, ucid) 327 } 328 // Add half weights because the other 329 // ends of edges are also counted. 330 r.weights[[2]int{ucid, id}] += weight(uid, vid) / 2 331 } 332 } 333 r.edgesFrom[id] = out 334 r.edgesTo[id] = in 335 } 336 return &r 337 } 338 339 // Node returns the node with the given ID if it exists in the graph, 340 // and nil otherwise. 341 func (g *ReducedDirected) Node(id int64) graph.Node { 342 if g.has(id) { 343 return g.nodes[id] 344 } 345 return nil 346 } 347 348 // has returns whether the node exists within the graph. 349 func (g *ReducedDirected) has(id int64) bool { 350 return 0 <= id && id < int64(len(g.nodes)) 351 } 352 353 // Nodes returns all the nodes in the graph. 354 func (g *ReducedDirected) Nodes() graph.Nodes { 355 nodes := make([]graph.Node, len(g.nodes)) 356 for i := range g.nodes { 357 nodes[i] = node(i) 358 } 359 return iterator.NewOrderedNodes(nodes) 360 } 361 362 // From returns all nodes in g that can be reached directly from u. 363 func (g *ReducedDirected) From(uid int64) graph.Nodes { 364 out := g.edgesFrom[uid] 365 nodes := make([]graph.Node, len(out)) 366 for i, vid := range out { 367 nodes[i] = g.nodes[vid] 368 } 369 return iterator.NewOrderedNodes(nodes) 370 } 371 372 // To returns all nodes in g that can reach directly to v. 373 func (g *ReducedDirected) To(vid int64) graph.Nodes { 374 in := g.edgesTo[vid] 375 nodes := make([]graph.Node, len(in)) 376 for i, uid := range in { 377 nodes[i] = g.nodes[uid] 378 } 379 return iterator.NewOrderedNodes(nodes) 380 } 381 382 // HasEdgeBetween returns whether an edge exists between nodes x and y. 383 func (g *ReducedDirected) HasEdgeBetween(xid, yid int64) bool { 384 if xid == yid || !isValidID(xid) || !isValidID(yid) { 385 return false 386 } 387 _, ok := g.weights[[2]int{int(xid), int(yid)}] 388 if ok { 389 return true 390 } 391 _, ok = g.weights[[2]int{int(yid), int(xid)}] 392 return ok 393 } 394 395 // HasEdgeFromTo returns whether an edge exists from node u to v. 396 func (g *ReducedDirected) HasEdgeFromTo(uid, vid int64) bool { 397 if uid == vid || !isValidID(uid) || !isValidID(vid) { 398 return false 399 } 400 _, ok := g.weights[[2]int{int(uid), int(vid)}] 401 return ok 402 } 403 404 // Edge returns the edge from u to v if such an edge exists and nil otherwise. 405 // The node v must be directly reachable from u as defined by the From method. 406 func (g *ReducedDirected) Edge(uid, vid int64) graph.Edge { 407 return g.WeightedEdge(uid, vid) 408 } 409 410 // WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise. 411 // The node v must be directly reachable from u as defined by the From method. 412 func (g *ReducedDirected) WeightedEdge(uid, vid int64) graph.WeightedEdge { 413 if uid == vid || !isValidID(uid) || !isValidID(vid) { 414 return nil 415 } 416 w, ok := g.weights[[2]int{int(uid), int(vid)}] 417 if !ok { 418 return nil 419 } 420 return edge{from: g.nodes[uid], to: g.nodes[vid], weight: w} 421 } 422 423 // Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge. 424 // If x and y are the same node the internal node weight is returned. If there is no joining 425 // edge between the two nodes the weight value returned is zero. Weight returns true if an edge 426 // exists between x and y or if x and y have the same ID, false otherwise. 427 func (g *ReducedDirected) Weight(xid, yid int64) (w float64, ok bool) { 428 if !isValidID(xid) || !isValidID(yid) { 429 return 0, false 430 } 431 if xid == yid { 432 return g.nodes[xid].weight, true 433 } 434 w, ok = g.weights[[2]int{int(xid), int(yid)}] 435 return w, ok 436 } 437 438 // directedLocalMover is a step in graph modularity optimization. 439 type directedLocalMover struct { 440 g *ReducedDirected 441 442 // nodes is the set of working nodes. 443 nodes []graph.Node 444 // edgeWeightsOf is the weighted degree 445 // of each node indexed by ID. 446 edgeWeightsOf []directedWeights 447 448 // m is the total sum of edge 449 // weights in g. 450 m float64 451 452 // weight is the weight function 453 // provided by g or a function 454 // that returns the Weight value 455 // of the non-nil edge between x 456 // and y. 457 weight func(xid, yid int64) float64 458 459 // communities is the current 460 // division of g. 461 communities [][]graph.Node 462 // memberships is a mapping between 463 // node ID and community membership. 464 memberships []int 465 466 // resolution is the Reichardt and 467 // Bornholdt γ parameter as defined 468 // in doi:10.1103/PhysRevE.74.016110. 469 resolution float64 470 471 // moved indicates that a call to 472 // move has been made since the last 473 // call to shuffle. 474 moved bool 475 476 // changed indicates that a move 477 // has been made since the creation 478 // of the local mover. 479 changed bool 480 } 481 482 type directedWeights struct { 483 out, in float64 484 } 485 486 // newDirectedLocalMover returns a new directedLocalMover initialized with 487 // the graph g, a set of communities and a modularity resolution parameter. 488 // The node IDs of g must be contiguous in [0,n) where n is the number of 489 // nodes. 490 // If g has a zero edge weight sum, nil is returned. 491 func newDirectedLocalMover(g *ReducedDirected, communities [][]graph.Node, resolution float64) *directedLocalMover { 492 nodes := graph.NodesOf(g.Nodes()) 493 l := directedLocalMover{ 494 g: g, 495 nodes: nodes, 496 edgeWeightsOf: make([]directedWeights, len(nodes)), 497 communities: communities, 498 memberships: make([]int, len(nodes)), 499 resolution: resolution, 500 weight: positiveWeightFuncFor(g), 501 } 502 503 // Calculate the total edge weight of the graph 504 // and degree weights for each node. 505 for _, n := range l.nodes { 506 u := n 507 var wOut float64 508 uid := u.ID() 509 to := g.From(uid) 510 for to.Next() { 511 wOut += l.weight(uid, to.Node().ID()) 512 } 513 514 v := n 515 var wIn float64 516 vid := v.ID() 517 from := g.To(vid) 518 for from.Next() { 519 wIn += l.weight(from.Node().ID(), vid) 520 } 521 522 id := n.ID() 523 w := l.weight(id, id) 524 l.edgeWeightsOf[id] = directedWeights{out: w + wOut, in: w + wIn} 525 l.m += w + wOut 526 } 527 528 // Assign membership mappings. 529 for i, c := range communities { 530 for _, n := range c { 531 l.memberships[n.ID()] = i 532 } 533 } 534 535 return &l 536 } 537 538 // localMovingHeuristic performs the Louvain local moving heuristic until 539 // no further moves can be made. It returns a boolean indicating that the 540 // directedLocalMover has not made any improvement to the community structure and 541 // so the Louvain algorithm is done. 542 func (l *directedLocalMover) localMovingHeuristic(rnd func(int) int) (done bool) { 543 for { 544 l.shuffle(rnd) 545 for _, n := range l.nodes { 546 dQ, dst, src := l.deltaQ(n) 547 if dQ <= deltaQtol { 548 continue 549 } 550 l.move(dst, src) 551 } 552 if !l.moved { 553 return !l.changed 554 } 555 } 556 } 557 558 // shuffle performs a Fisher-Yates shuffle on the nodes held by the 559 // directedLocalMover using the random source rnd which should return an 560 // integer in the range [0,n). 561 func (l *directedLocalMover) shuffle(rnd func(n int) int) { 562 l.moved = false 563 for i := range l.nodes[:len(l.nodes)-1] { 564 j := i + rnd(len(l.nodes)-i) 565 l.nodes[i], l.nodes[j] = l.nodes[j], l.nodes[i] 566 } 567 } 568 569 // move moves the node at src to the community at dst. 570 func (l *directedLocalMover) move(dst int, src commIdx) { 571 l.moved = true 572 l.changed = true 573 574 srcComm := l.communities[src.community] 575 n := srcComm[src.node] 576 577 l.memberships[n.ID()] = dst 578 579 l.communities[dst] = append(l.communities[dst], n) 580 srcComm[src.node], srcComm[len(srcComm)-1] = srcComm[len(srcComm)-1], nil 581 l.communities[src.community] = srcComm[:len(srcComm)-1] 582 } 583 584 // deltaQ returns the highest gain in modularity attainable by moving 585 // n from its current community to another connected community and 586 // the index of the chosen destination. The index into the directedLocalMover's 587 // communities field is returned in src if n is in communities. 588 func (l *directedLocalMover) deltaQ(n graph.Node) (deltaQ float64, dst int, src commIdx) { 589 id := n.ID() 590 591 a_aa := l.weight(id, id) 592 k_a := l.edgeWeightsOf[id] 593 m := l.m 594 gamma := l.resolution 595 596 // Find communities connected to n. 597 connected := make(set.Ints) 598 // The following for loop is equivalent to: 599 // 600 // for _, v := range l.g.From(n) { 601 // connected.Add(l.memberships[v.ID()]) 602 // } 603 // for _, v := range l.g.To(n) { 604 // connected.Add(l.memberships[v.ID()]) 605 // } 606 // 607 // This is done to avoid two allocations. 608 for _, vid := range l.g.edgesFrom[id] { 609 connected.Add(l.memberships[vid]) 610 } 611 for _, vid := range l.g.edgesTo[id] { 612 connected.Add(l.memberships[vid]) 613 } 614 // Insert the node's own community. 615 connected.Add(l.memberships[id]) 616 617 candidates := make([]int, 0, len(connected)) 618 for i := range connected { 619 candidates = append(candidates, i) 620 } 621 sort.Ints(candidates) 622 623 // Calculate the highest modularity gain 624 // from moving into another community and 625 // keep the index of that community. 626 var dQremove float64 627 dQadd, dst, src := math.Inf(-1), -1, commIdx{-1, -1} 628 for _, i := range candidates { 629 c := l.communities[i] 630 var k_aC, sigma_totC directedWeights // C is a substitution for ^𝛼 or ^𝛽. 631 var removal bool 632 for j, u := range c { 633 uid := u.ID() 634 if uid == id { 635 if src.community != -1 { 636 panic("community: multiple sources") 637 } 638 src = commIdx{i, j} 639 removal = true 640 } 641 642 k_aC.in += l.weight(uid, id) 643 k_aC.out += l.weight(id, uid) 644 // sigma_totC could be kept for each community 645 // and updated for moves, changing the calculation 646 // of sigma_totC here from O(n_c) to O(1), but 647 // in practice the time savings do not appear 648 // to be compelling and do not make up for the 649 // increase in code complexity and space required. 650 w := l.edgeWeightsOf[uid] 651 sigma_totC.in += w.in 652 sigma_totC.out += w.out 653 } 654 655 // See louvain.tex for a derivation of these equations. 656 switch { 657 case removal: 658 // The community c was the current community, 659 // so calculate the change due to removal. 660 dQremove = (k_aC.in /*^𝛼*/ - a_aa) + (k_aC.out /*^𝛼*/ - a_aa) - 661 gamma*(k_a.in*(sigma_totC.out /*^𝛼*/ -k_a.out)+k_a.out*(sigma_totC.in /*^𝛼*/ -k_a.in))/m 662 663 default: 664 // Otherwise calculate the change due to an addition 665 // to c and retain if it is the current best. 666 dQ := k_aC.in /*^𝛽*/ + k_aC.out /*^𝛽*/ - 667 gamma*(k_a.in*sigma_totC.out /*^𝛽*/ +k_a.out*sigma_totC.in /*^𝛽*/)/m 668 669 if dQ > dQadd { 670 dQadd = dQ 671 dst = i 672 } 673 } 674 } 675 676 return (dQadd - dQremove) / m, dst, src 677 }