github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/graph/community/louvain_undirected.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package community
     6  
     7  import (
     8  	"math"
     9  	"sort"
    10  
    11  	"golang.org/x/exp/rand"
    12  
    13  	"github.com/jingcheng-WU/gonum/graph"
    14  	"github.com/jingcheng-WU/gonum/graph/internal/ordered"
    15  	"github.com/jingcheng-WU/gonum/graph/internal/set"
    16  	"github.com/jingcheng-WU/gonum/graph/iterator"
    17  )
    18  
    19  // qUndirected returns the modularity Q score of the graph g subdivided into the
    20  // given communities at the given resolution. If communities is nil, the
    21  // unclustered modularity score is returned. The resolution parameter
    22  // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110.
    23  // qUndirected will panic if g has any edge with negative edge weight.
    24  //
    25  //  Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j)
    26  //
    27  // graph.Undirect may be used as a shim to allow calculation of Q for
    28  // directed graphs.
    29  func qUndirected(g graph.Undirected, communities [][]graph.Node, resolution float64) float64 {
    30  	nodes := graph.NodesOf(g.Nodes())
    31  	weight := positiveWeightFuncFor(g)
    32  
    33  	// Calculate the total edge weight of the graph
    34  	// and the table of penetrating edge weight sums.
    35  	var m2 float64
    36  	k := make(map[int64]float64, len(nodes))
    37  	for _, u := range nodes {
    38  		uid := u.ID()
    39  		w := weight(uid, uid)
    40  		to := g.From(uid)
    41  		for to.Next() {
    42  			w += weight(uid, to.Node().ID())
    43  		}
    44  		m2 += w
    45  		k[uid] = w
    46  	}
    47  
    48  	if communities == nil {
    49  		var q float64
    50  		for _, u := range nodes {
    51  			uid := u.ID()
    52  			kU := k[uid]
    53  			q += weight(uid, uid) - resolution*kU*kU/m2
    54  		}
    55  		return q / m2
    56  	}
    57  
    58  	// Iterate over the communities, calculating
    59  	// the non-self edge weights for the upper
    60  	// triangle and adjust the diagonal.
    61  	var q float64
    62  	for _, c := range communities {
    63  		for i, u := range c {
    64  			uid := u.ID()
    65  			kU := k[uid]
    66  			q += weight(uid, uid) - resolution*kU*kU/m2
    67  			for _, v := range c[i+1:] {
    68  				vid := v.ID()
    69  				q += 2 * (weight(uid, vid) - resolution*kU*k[vid]/m2)
    70  			}
    71  		}
    72  	}
    73  	return q / m2
    74  }
    75  
    76  // louvainUndirected returns the hierarchical modularization of g at the given
    77  // resolution using the Louvain algorithm. If src is nil, rand.Intn is used as
    78  // the random generator. louvainUndirected will panic if g has any edge with negative edge
    79  // weight.
    80  //
    81  // graph.Undirect may be used as a shim to allow modularization of directed graphs.
    82  func louvainUndirected(g graph.Undirected, resolution float64, src rand.Source) *ReducedUndirected {
    83  	// See louvain.tex for a detailed description
    84  	// of the algorithm used here.
    85  
    86  	c := reduceUndirected(g, nil)
    87  	rnd := rand.Intn
    88  	if src != nil {
    89  		rnd = rand.New(src).Intn
    90  	}
    91  	for {
    92  		l := newUndirectedLocalMover(c, c.communities, resolution)
    93  		if l == nil {
    94  			return c
    95  		}
    96  		if done := l.localMovingHeuristic(rnd); done {
    97  			return c
    98  		}
    99  		c = reduceUndirected(c, l.communities)
   100  	}
   101  }
   102  
   103  // ReducedUndirected is an undirected graph of communities derived from a
   104  // parent graph by reduction.
   105  type ReducedUndirected struct {
   106  	// nodes is the set of nodes held
   107  	// by the graph. In a ReducedUndirected
   108  	// the node ID is the index into
   109  	// nodes.
   110  	nodes []community
   111  	undirectedEdges
   112  
   113  	// communities is the community
   114  	// structure of the graph.
   115  	communities [][]graph.Node
   116  
   117  	parent *ReducedUndirected
   118  }
   119  
   120  var (
   121  	reducedUndirected = (*ReducedUndirected)(nil)
   122  
   123  	_ graph.WeightedUndirected = reducedUndirected
   124  	_ ReducedGraph             = reducedUndirected
   125  )
   126  
   127  // Communities returns the community memberships of the nodes in the
   128  // graph used to generate the reduced graph.
   129  func (g *ReducedUndirected) Communities() [][]graph.Node {
   130  	communities := make([][]graph.Node, len(g.communities))
   131  	if g.parent == nil {
   132  		for i, members := range g.communities {
   133  			comm := make([]graph.Node, len(members))
   134  			for j, n := range members {
   135  				nodes := g.nodes[n.ID()].nodes
   136  				if len(nodes) != 1 {
   137  					panic("community: unexpected number of nodes in base graph community")
   138  				}
   139  				comm[j] = nodes[0]
   140  			}
   141  			communities[i] = comm
   142  		}
   143  		return communities
   144  	}
   145  	sub := g.parent.Communities()
   146  	for i, members := range g.communities {
   147  		var comm []graph.Node
   148  		for _, n := range members {
   149  			comm = append(comm, sub[n.ID()]...)
   150  		}
   151  		communities[i] = comm
   152  	}
   153  	return communities
   154  }
   155  
   156  // Structure returns the community structure of the current level of
   157  // the module clustering. The first index of the returned value
   158  // corresponds to the index of the nodes in the next higher level if
   159  // it exists. The returned value should not be mutated.
   160  func (g *ReducedUndirected) Structure() [][]graph.Node {
   161  	return g.communities
   162  }
   163  
   164  // Expanded returns the next lower level of the module clustering or nil
   165  // if at the lowest level.
   166  func (g *ReducedUndirected) Expanded() ReducedGraph {
   167  	return g.parent
   168  }
   169  
   170  // reduceUndirected returns a reduced graph constructed from g divided
   171  // into the given communities. The communities value is mutated
   172  // by the call to reduceUndirected. If communities is nil and g is a
   173  // ReducedUndirected, it is returned unaltered.
   174  func reduceUndirected(g graph.Undirected, communities [][]graph.Node) *ReducedUndirected {
   175  	if communities == nil {
   176  		if r, ok := g.(*ReducedUndirected); ok {
   177  			return r
   178  		}
   179  
   180  		nodes := graph.NodesOf(g.Nodes())
   181  		// TODO(kortschak) This sort is necessary really only
   182  		// for testing. In practice we would not be using the
   183  		// community provided by the user for a Q calculation.
   184  		// Probably we should use a function to map the
   185  		// communities in the test sets to the remapped order.
   186  		sort.Sort(ordered.ByID(nodes))
   187  		communities = make([][]graph.Node, len(nodes))
   188  		for i := range nodes {
   189  			communities[i] = []graph.Node{node(i)}
   190  		}
   191  
   192  		weight := positiveWeightFuncFor(g)
   193  		r := ReducedUndirected{
   194  			nodes: make([]community, len(nodes)),
   195  			undirectedEdges: undirectedEdges{
   196  				edges:   make([][]int, len(nodes)),
   197  				weights: make(map[[2]int]float64),
   198  			},
   199  			communities: communities,
   200  		}
   201  		communityOf := make(map[int64]int, len(nodes))
   202  		for i, n := range nodes {
   203  			r.nodes[i] = community{id: i, nodes: []graph.Node{n}}
   204  			communityOf[n.ID()] = i
   205  		}
   206  		for _, u := range nodes {
   207  			uid := u.ID()
   208  			ucid := communityOf[uid]
   209  			var out []int
   210  			to := g.From(uid)
   211  			for to.Next() {
   212  				vid := to.Node().ID()
   213  				vcid := communityOf[vid]
   214  				if vcid != ucid {
   215  					out = append(out, vcid)
   216  				}
   217  				if ucid < vcid {
   218  					// Only store the weight once.
   219  					r.weights[[2]int{ucid, vcid}] = weight(uid, vid)
   220  				}
   221  			}
   222  			r.edges[ucid] = out
   223  		}
   224  		return &r
   225  	}
   226  
   227  	// Remove zero length communities destructively.
   228  	var commNodes int
   229  	for i := 0; i < len(communities); {
   230  		comm := communities[i]
   231  		if len(comm) == 0 {
   232  			communities[i] = communities[len(communities)-1]
   233  			communities[len(communities)-1] = nil
   234  			communities = communities[:len(communities)-1]
   235  		} else {
   236  			commNodes += len(comm)
   237  			i++
   238  		}
   239  	}
   240  
   241  	r := ReducedUndirected{
   242  		nodes: make([]community, len(communities)),
   243  		undirectedEdges: undirectedEdges{
   244  			edges:   make([][]int, len(communities)),
   245  			weights: make(map[[2]int]float64),
   246  		},
   247  	}
   248  	r.communities = make([][]graph.Node, len(communities))
   249  	for i := range r.communities {
   250  		r.communities[i] = []graph.Node{node(i)}
   251  	}
   252  	if g, ok := g.(*ReducedUndirected); ok {
   253  		// Make sure we retain the truncated
   254  		// community structure.
   255  		g.communities = communities
   256  		r.parent = g
   257  	}
   258  	weight := positiveWeightFuncFor(g)
   259  	communityOf := make(map[int64]int, commNodes)
   260  	for i, comm := range communities {
   261  		r.nodes[i] = community{id: i, nodes: comm}
   262  		for _, n := range comm {
   263  			communityOf[n.ID()] = i
   264  		}
   265  	}
   266  	for ucid, comm := range communities {
   267  		var out []int
   268  		for i, u := range comm {
   269  			uid := u.ID()
   270  			r.nodes[ucid].weight += weight(uid, uid)
   271  			for _, v := range comm[i+1:] {
   272  				r.nodes[ucid].weight += 2 * weight(uid, v.ID())
   273  			}
   274  			to := g.From(uid)
   275  			for to.Next() {
   276  				vid := to.Node().ID()
   277  				vcid := communityOf[vid]
   278  				found := false
   279  				for _, e := range out {
   280  					if e == vcid {
   281  						found = true
   282  						break
   283  					}
   284  				}
   285  				if !found && vcid != ucid {
   286  					out = append(out, vcid)
   287  				}
   288  				if ucid < vcid {
   289  					// Only store the weight once.
   290  					r.weights[[2]int{ucid, vcid}] += weight(uid, vid)
   291  				}
   292  			}
   293  		}
   294  		r.edges[ucid] = out
   295  	}
   296  	return &r
   297  }
   298  
   299  // Node returns the node with the given ID if it exists in the graph,
   300  // and nil otherwise.
   301  func (g *ReducedUndirected) Node(id int64) graph.Node {
   302  	if g.has(id) {
   303  		return g.nodes[id]
   304  	}
   305  	return nil
   306  }
   307  
   308  // has returns whether the node exists within the graph.
   309  func (g *ReducedUndirected) has(id int64) bool {
   310  	return 0 <= id && id < int64(len(g.nodes))
   311  }
   312  
   313  // Nodes returns all the nodes in the graph.
   314  func (g *ReducedUndirected) Nodes() graph.Nodes {
   315  	nodes := make([]graph.Node, len(g.nodes))
   316  	for i := range g.nodes {
   317  		nodes[i] = node(i)
   318  	}
   319  	return iterator.NewOrderedNodes(nodes)
   320  }
   321  
   322  // From returns all nodes in g that can be reached directly from u.
   323  func (g *ReducedUndirected) From(uid int64) graph.Nodes {
   324  	out := g.edges[uid]
   325  	nodes := make([]graph.Node, len(out))
   326  	for i, vid := range out {
   327  		nodes[i] = g.nodes[vid]
   328  	}
   329  	return iterator.NewOrderedNodes(nodes)
   330  }
   331  
   332  // HasEdgeBetween returns whether an edge exists between nodes x and y.
   333  func (g *ReducedUndirected) HasEdgeBetween(xid, yid int64) bool {
   334  	if xid == yid || !isValidID(xid) || !isValidID(yid) {
   335  		return false
   336  	}
   337  	if xid > yid {
   338  		xid, yid = yid, xid
   339  	}
   340  	_, ok := g.weights[[2]int{int(xid), int(yid)}]
   341  	return ok
   342  }
   343  
   344  // Edge returns the edge from u to v if such an edge exists and nil otherwise.
   345  // The node v must be directly reachable from u as defined by the From method.
   346  func (g *ReducedUndirected) Edge(uid, vid int64) graph.Edge {
   347  	return g.WeightedEdgeBetween(uid, vid)
   348  }
   349  
   350  // WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise.
   351  // The node v must be directly reachable from u as defined by the From method.
   352  func (g *ReducedUndirected) WeightedEdge(uid, vid int64) graph.WeightedEdge {
   353  	return g.WeightedEdgeBetween(uid, vid)
   354  }
   355  
   356  // EdgeBetween returns the edge between nodes x and y.
   357  func (g *ReducedUndirected) EdgeBetween(xid, yid int64) graph.Edge {
   358  	return g.WeightedEdgeBetween(xid, yid)
   359  }
   360  
   361  // WeightedEdgeBetween returns the weighted edge between nodes x and y.
   362  func (g *ReducedUndirected) WeightedEdgeBetween(xid, yid int64) graph.WeightedEdge {
   363  	if xid == yid || !isValidID(xid) || !isValidID(yid) {
   364  		return nil
   365  	}
   366  	if yid < xid {
   367  		xid, yid = yid, xid
   368  	}
   369  	w, ok := g.weights[[2]int{int(xid), int(yid)}]
   370  	if !ok {
   371  		return nil
   372  	}
   373  	return edge{from: g.nodes[xid], to: g.nodes[yid], weight: w}
   374  }
   375  
   376  // Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge.
   377  // If x and y are the same node the internal node weight is returned. If there is no joining
   378  // edge between the two nodes the weight value returned is zero. Weight returns true if an edge
   379  // exists between x and y or if x and y have the same ID, false otherwise.
   380  func (g *ReducedUndirected) Weight(xid, yid int64) (w float64, ok bool) {
   381  	if !isValidID(xid) || !isValidID(yid) {
   382  		return 0, false
   383  	}
   384  	if xid == yid {
   385  		return g.nodes[xid].weight, true
   386  	}
   387  	if xid > yid {
   388  		xid, yid = yid, xid
   389  	}
   390  	w, ok = g.weights[[2]int{int(xid), int(yid)}]
   391  	return w, ok
   392  }
   393  
   394  // undirectedLocalMover is a step in graph modularity optimization.
   395  type undirectedLocalMover struct {
   396  	g *ReducedUndirected
   397  
   398  	// nodes is the set of working nodes.
   399  	nodes []graph.Node
   400  	// edgeWeightOf is the weighted degree
   401  	// of each node indexed by ID.
   402  	edgeWeightOf []float64
   403  
   404  	// m2 is the total sum of
   405  	// edge weights in g.
   406  	m2 float64
   407  
   408  	// weight is the weight function
   409  	// provided by g or a function
   410  	// that returns the Weight value
   411  	// of the non-nil edge between x
   412  	// and y.
   413  	weight func(xid, yid int64) float64
   414  
   415  	// communities is the current
   416  	// division of g.
   417  	communities [][]graph.Node
   418  	// memberships is a mapping between
   419  	// node ID and community membership.
   420  	memberships []int
   421  
   422  	// resolution is the Reichardt and
   423  	// Bornholdt γ parameter as defined
   424  	// in doi:10.1103/PhysRevE.74.016110.
   425  	resolution float64
   426  
   427  	// moved indicates that a call to
   428  	// move has been made since the last
   429  	// call to shuffle.
   430  	moved bool
   431  
   432  	// changed indicates that a move
   433  	// has been made since the creation
   434  	// of the local mover.
   435  	changed bool
   436  }
   437  
   438  // newUndirectedLocalMover returns a new undirectedLocalMover initialized with
   439  // the graph g, a set of communities and a modularity resolution parameter. The
   440  // node IDs of g must be contiguous in [0,n) where n is the number of nodes.
   441  // If g has a zero edge weight sum, nil is returned.
   442  func newUndirectedLocalMover(g *ReducedUndirected, communities [][]graph.Node, resolution float64) *undirectedLocalMover {
   443  	nodes := graph.NodesOf(g.Nodes())
   444  	l := undirectedLocalMover{
   445  		g:            g,
   446  		nodes:        nodes,
   447  		edgeWeightOf: make([]float64, len(nodes)),
   448  		communities:  communities,
   449  		memberships:  make([]int, len(nodes)),
   450  		resolution:   resolution,
   451  		weight:       positiveWeightFuncFor(g),
   452  	}
   453  
   454  	// Calculate the total edge weight of the graph
   455  	// and degree weights for each node.
   456  	for _, u := range l.nodes {
   457  		uid := u.ID()
   458  		w := l.weight(uid, uid)
   459  		to := g.From(uid)
   460  		for to.Next() {
   461  			w += l.weight(uid, to.Node().ID())
   462  		}
   463  		l.edgeWeightOf[uid] = w
   464  		l.m2 += w
   465  	}
   466  	if l.m2 == 0 {
   467  		return nil
   468  	}
   469  
   470  	// Assign membership mappings.
   471  	for i, c := range communities {
   472  		for _, u := range c {
   473  			l.memberships[u.ID()] = i
   474  		}
   475  	}
   476  
   477  	return &l
   478  }
   479  
   480  // localMovingHeuristic performs the Louvain local moving heuristic until
   481  // no further moves can be made. It returns a boolean indicating that the
   482  // undirectedLocalMover has not made any improvement to the community
   483  // structure and so the Louvain algorithm is done.
   484  func (l *undirectedLocalMover) localMovingHeuristic(rnd func(int) int) (done bool) {
   485  	for {
   486  		l.shuffle(rnd)
   487  		for _, n := range l.nodes {
   488  			dQ, dst, src := l.deltaQ(n)
   489  			if dQ <= deltaQtol {
   490  				continue
   491  			}
   492  			l.move(dst, src)
   493  		}
   494  		if !l.moved {
   495  			return !l.changed
   496  		}
   497  	}
   498  }
   499  
   500  // shuffle performs a Fisher-Yates shuffle on the nodes held by the
   501  // undirectedLocalMover using the random source rnd which should return
   502  // an integer in the range [0,n).
   503  func (l *undirectedLocalMover) shuffle(rnd func(n int) int) {
   504  	l.moved = false
   505  	for i := range l.nodes[:len(l.nodes)-1] {
   506  		j := i + rnd(len(l.nodes)-i)
   507  		l.nodes[i], l.nodes[j] = l.nodes[j], l.nodes[i]
   508  	}
   509  }
   510  
   511  // move moves the node at src to the community at dst.
   512  func (l *undirectedLocalMover) move(dst int, src commIdx) {
   513  	l.moved = true
   514  	l.changed = true
   515  
   516  	srcComm := l.communities[src.community]
   517  	n := srcComm[src.node]
   518  
   519  	l.memberships[n.ID()] = dst
   520  
   521  	l.communities[dst] = append(l.communities[dst], n)
   522  	srcComm[src.node], srcComm[len(srcComm)-1] = srcComm[len(srcComm)-1], nil
   523  	l.communities[src.community] = srcComm[:len(srcComm)-1]
   524  }
   525  
   526  // deltaQ returns the highest gain in modularity attainable by moving
   527  // n from its current community to another connected community and
   528  // the index of the chosen destination. The index into the
   529  // undirectedLocalMover's communities field is returned in src if n
   530  // is in communities.
   531  func (l *undirectedLocalMover) deltaQ(n graph.Node) (deltaQ float64, dst int, src commIdx) {
   532  	id := n.ID()
   533  	a_aa := l.weight(id, id)
   534  	k_a := l.edgeWeightOf[id]
   535  	m2 := l.m2
   536  	gamma := l.resolution
   537  
   538  	// Find communities connected to n.
   539  	connected := make(set.Ints)
   540  	// The following for loop is equivalent to:
   541  	//
   542  	//  for _, v := range l.g.From(n) {
   543  	//  	connected.Add(l.memberships[v.ID()])
   544  	//  }
   545  	//
   546  	// This is done to avoid an allocation.
   547  	for _, vid := range l.g.edges[id] {
   548  		connected.Add(l.memberships[vid])
   549  	}
   550  	// Insert the node's own community.
   551  	connected.Add(l.memberships[id])
   552  
   553  	candidates := make([]int, 0, len(connected))
   554  	for i := range connected {
   555  		candidates = append(candidates, i)
   556  	}
   557  	sort.Ints(candidates)
   558  
   559  	// Calculate the highest modularity gain
   560  	// from moving into another community and
   561  	// keep the index of that community.
   562  	var dQremove float64
   563  	dQadd, dst, src := math.Inf(-1), -1, commIdx{-1, -1}
   564  	for _, i := range candidates {
   565  		c := l.communities[i]
   566  		var k_aC, sigma_totC float64 // C is a substitution for ^𝛼 or ^𝛽.
   567  		var removal bool
   568  		for j, u := range c {
   569  			uid := u.ID()
   570  			if uid == id {
   571  				if src.community != -1 {
   572  					panic("community: multiple sources")
   573  				}
   574  				src = commIdx{i, j}
   575  				removal = true
   576  			}
   577  
   578  			k_aC += l.weight(id, uid)
   579  			// sigma_totC could be kept for each community
   580  			// and updated for moves, changing the calculation
   581  			// of sigma_totC here from O(n_c) to O(1), but
   582  			// in practice the time savings do not appear
   583  			// to be compelling and do not make up for the
   584  			// increase in code complexity and space required.
   585  			sigma_totC += l.edgeWeightOf[uid]
   586  		}
   587  
   588  		// See louvain.tex for a derivation of these equations.
   589  		switch {
   590  		case removal:
   591  			// The community c was the current community,
   592  			// so calculate the change due to removal.
   593  			dQremove = k_aC /*^𝛼*/ - a_aa - gamma*k_a*(sigma_totC /*^𝛼*/ -k_a)/m2
   594  
   595  		default:
   596  			// Otherwise calculate the change due to an addition
   597  			// to c and retain if it is the current best.
   598  			dQ := k_aC /*^𝛽*/ - gamma*k_a*sigma_totC /*^𝛽*/ /m2
   599  			if dQ > dQadd {
   600  				dQadd = dQ
   601  				dst = i
   602  			}
   603  		}
   604  	}
   605  
   606  	return 2 * (dQadd - dQremove) / m2, dst, src
   607  }