github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/graph/community/louvain_common.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package community
     6  
     7  import (
     8  	"fmt"
     9  	"sort"
    10  
    11  	"golang.org/x/exp/rand"
    12  
    13  	"github.com/jingcheng-WU/gonum/graph"
    14  	"github.com/jingcheng-WU/gonum/graph/internal/set"
    15  )
    16  
    17  // Q returns the modularity Q score of the graph g subdivided into the
    18  // given communities at the given resolution. If communities is nil, the
    19  // unclustered modularity score is returned. The resolution parameter
    20  // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110.
    21  // Q will panic if g has any edge with negative edge weight.
    22  //
    23  // If g is undirected, Q is calculated according to
    24  //  Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j),
    25  // If g is directed, it is calculated according to
    26  //  Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j).
    27  //
    28  // graph.Undirect may be used as a shim to allow calculation of Q for
    29  // directed graphs with the undirected modularity function.
    30  func Q(g graph.Graph, communities [][]graph.Node, resolution float64) float64 {
    31  	switch g := g.(type) {
    32  	case graph.Undirected:
    33  		return qUndirected(g, communities, resolution)
    34  	case graph.Directed:
    35  		return qDirected(g, communities, resolution)
    36  	default:
    37  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
    38  	}
    39  }
    40  
    41  // ReducedGraph is a modularised graph.
    42  type ReducedGraph interface {
    43  	graph.Graph
    44  
    45  	// Communities returns the community memberships
    46  	// of the nodes in the graph used to generate
    47  	// the reduced graph.
    48  	Communities() [][]graph.Node
    49  
    50  	// Structure returns the community structure of
    51  	// the current level of the module clustering.
    52  	// Each slice in the returned value recursively
    53  	// describes the membership of a community at
    54  	// the current level by indexing via the node
    55  	// ID into the structure of the non-nil
    56  	// ReducedGraph returned by Expanded, or when the
    57  	// ReducedGraph is nil, by containing nodes
    58  	// from the original input graph.
    59  	//
    60  	// The returned value should not be mutated.
    61  	Structure() [][]graph.Node
    62  
    63  	// Expanded returns the next lower level of the
    64  	// module clustering or nil if at the lowest level.
    65  	//
    66  	// The returned ReducedGraph will be the same
    67  	// concrete type as the receiver.
    68  	Expanded() ReducedGraph
    69  }
    70  
    71  // Modularize returns the hierarchical modularization of g at the given resolution
    72  // using the Louvain algorithm. If src is nil, rand.Intn is used as the random
    73  // generator. Modularize will panic if g has any edge with negative edge weight.
    74  //
    75  // If g is undirected it is modularised to minimise
    76  //  Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j),
    77  // If g is directed it is modularised to minimise
    78  //  Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j).
    79  //
    80  // The concrete type of the ReducedGraph will be a pointer to either a
    81  // ReducedUndirected or a ReducedDirected depending on the type of g.
    82  //
    83  // graph.Undirect may be used as a shim to allow modularization of
    84  // directed graphs with the undirected modularity function.
    85  func Modularize(g graph.Graph, resolution float64, src rand.Source) ReducedGraph {
    86  	switch g := g.(type) {
    87  	case graph.Undirected:
    88  		return louvainUndirected(g, resolution, src)
    89  	case graph.Directed:
    90  		return louvainDirected(g, resolution, src)
    91  	default:
    92  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
    93  	}
    94  }
    95  
    96  // Multiplex is a multiplex graph.
    97  type Multiplex interface {
    98  	// Nodes returns the nodes
    99  	// for the multiplex graph.
   100  	// All layers must refer to the same
   101  	// set of nodes.
   102  	Nodes() graph.Nodes
   103  
   104  	// Depth returns the number of layers
   105  	// in the multiplex graph.
   106  	Depth() int
   107  }
   108  
   109  // QMultiplex returns the modularity Q score of the multiplex graph layers
   110  // subdivided into the given communities at the given resolutions and weights. Q is
   111  // returned as the vector of weighted Q scores for each layer of the multiplex graph.
   112  // If communities is nil, the unclustered modularity score is returned.
   113  // If weights is nil layers are equally weighted, otherwise the length of
   114  // weights must equal the number of layers. If resolutions is nil, a resolution
   115  // of 1.0 is used for all layers, otherwise either a single element slice may be used
   116  // to specify a global resolution, or the length of resolutions must equal the number
   117  // of layers. The resolution parameter is γ as defined in Reichardt and Bornholdt
   118  // doi:10.1103/PhysRevE.74.016110.
   119  // QMultiplex will panic if the graph has any layer weight-scaled edge with
   120  // negative edge weight.
   121  //
   122  // If g is undirected, Q is calculated according to
   123  //  Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m_{layer} ] \delta(c_i,c_j),
   124  // If g is directed, it is calculated according to
   125  //  Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j).
   126  //
   127  // Note that Q values for multiplex graphs are not scaled by the total layer edge weight.
   128  //
   129  // graph.Undirect may be used as a shim to allow calculation of Q for
   130  // directed graphs.
   131  func QMultiplex(g Multiplex, communities [][]graph.Node, weights, resolutions []float64) []float64 {
   132  	if weights != nil && len(weights) != g.Depth() {
   133  		panic("community: weights vector length mismatch")
   134  	}
   135  	if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() {
   136  		panic("community: resolutions vector length mismatch")
   137  	}
   138  
   139  	switch g := g.(type) {
   140  	case UndirectedMultiplex:
   141  		return qUndirectedMultiplex(g, communities, weights, resolutions)
   142  	case DirectedMultiplex:
   143  		return qDirectedMultiplex(g, communities, weights, resolutions)
   144  	default:
   145  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
   146  	}
   147  }
   148  
   149  // ReducedMultiplex is a modularised multiplex graph.
   150  type ReducedMultiplex interface {
   151  	Multiplex
   152  
   153  	// Communities returns the community memberships
   154  	// of the nodes in the graph used to generate
   155  	// the reduced graph.
   156  	Communities() [][]graph.Node
   157  
   158  	// Structure returns the community structure of
   159  	// the current level of the module clustering.
   160  	// Each slice in the returned value recursively
   161  	// describes the membership of a community at
   162  	// the current level by indexing via the node
   163  	// ID into the structure of the non-nil
   164  	// ReducedGraph returned by Expanded, or when the
   165  	// ReducedGraph is nil, by containing nodes
   166  	// from the original input graph.
   167  	//
   168  	// The returned value should not be mutated.
   169  	Structure() [][]graph.Node
   170  
   171  	// Expanded returns the next lower level of the
   172  	// module clustering or nil if at the lowest level.
   173  	//
   174  	// The returned ReducedGraph will be the same
   175  	// concrete type as the receiver.
   176  	Expanded() ReducedMultiplex
   177  }
   178  
   179  // ModularizeMultiplex returns the hierarchical modularization of g at the given resolution
   180  // using the Louvain algorithm. If all is true and g have negatively weighted layers, all
   181  // communities will be searched during the modularization. If src is nil, rand.Intn is
   182  // used as the random generator. ModularizeMultiplex will panic if g has any edge with
   183  // edge weight that does not sign-match the layer weight.
   184  //
   185  // If g is undirected it is modularised to minimise
   186  //  Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m ] \delta(c_i,c_j).
   187  // If g is directed it is modularised to minimise
   188  //  Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j).
   189  //
   190  // The concrete type of the ReducedMultiplex will be a pointer to a
   191  // ReducedUndirectedMultiplex.
   192  //
   193  // graph.Undirect may be used as a shim to allow modularization of
   194  // directed graphs with the undirected modularity function.
   195  func ModularizeMultiplex(g Multiplex, weights, resolutions []float64, all bool, src rand.Source) ReducedMultiplex {
   196  	if weights != nil && len(weights) != g.Depth() {
   197  		panic("community: weights vector length mismatch")
   198  	}
   199  	if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() {
   200  		panic("community: resolutions vector length mismatch")
   201  	}
   202  
   203  	switch g := g.(type) {
   204  	case UndirectedMultiplex:
   205  		return louvainUndirectedMultiplex(g, weights, resolutions, all, src)
   206  	case DirectedMultiplex:
   207  		return louvainDirectedMultiplex(g, weights, resolutions, all, src)
   208  	default:
   209  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
   210  	}
   211  }
   212  
   213  // undirectedEdges is the edge structure of a reduced undirected graph.
   214  type undirectedEdges struct {
   215  	// edges and weights is the set
   216  	// of edges between nodes.
   217  	// weights is keyed such that
   218  	// the first element of the key
   219  	// is less than the second.
   220  	edges   [][]int
   221  	weights map[[2]int]float64
   222  }
   223  
   224  // directedEdges is the edge structure of a reduced directed graph.
   225  type directedEdges struct {
   226  	// edgesFrom, edgesTo and weights
   227  	// is the set of edges between nodes.
   228  	edgesFrom [][]int
   229  	edgesTo   [][]int
   230  	weights   map[[2]int]float64
   231  }
   232  
   233  // isValidID returns whether id is a valid ID for a community,
   234  // multiplexCommunity or node. These are all graph.Node types
   235  // stored in []T with a mapping between their index and their ID
   236  // so IDs must be positive and fit within the int type.
   237  func isValidID(id int64) bool {
   238  	return id == int64(int(id)) && id >= 0
   239  }
   240  
   241  // community is a reduced graph node describing its membership.
   242  type community struct {
   243  	// community graphs are internal, in-memory
   244  	// with dense IDs, so id is always an int.
   245  	id int
   246  
   247  	nodes []graph.Node
   248  
   249  	weight float64
   250  }
   251  
   252  func (n community) ID() int64 { return int64(n.id) }
   253  
   254  // edge is a reduced graph edge.
   255  type edge struct {
   256  	from, to community
   257  	weight   float64
   258  }
   259  
   260  func (e edge) From() graph.Node         { return e.from }
   261  func (e edge) To() graph.Node           { return e.to }
   262  func (e edge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e }
   263  func (e edge) Weight() float64          { return e.weight }
   264  
   265  // multiplexCommunity is a reduced multiplex graph node describing its membership.
   266  type multiplexCommunity struct {
   267  	// community graphs are internal, in-memory
   268  	// with dense IDs, so id is always an int.
   269  	id int
   270  
   271  	nodes []graph.Node
   272  
   273  	weights []float64
   274  }
   275  
   276  func (n multiplexCommunity) ID() int64 { return int64(n.id) }
   277  
   278  // multiplexEdge is a reduced graph edge for a multiplex graph.
   279  type multiplexEdge struct {
   280  	from, to multiplexCommunity
   281  	weight   float64
   282  }
   283  
   284  func (e multiplexEdge) From() graph.Node         { return e.from }
   285  func (e multiplexEdge) To() graph.Node           { return e.to }
   286  func (e multiplexEdge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e }
   287  func (e multiplexEdge) Weight() float64          { return e.weight }
   288  
   289  // commIdx is an index of a node in a community held by a localMover.
   290  type commIdx struct {
   291  	community int
   292  	node      int
   293  }
   294  
   295  // node is defined to avoid an import of .../graph/simple. node is
   296  // used in in-memory, dense ID graphs and so is always an int.
   297  type node int
   298  
   299  func (n node) ID() int64 { return int64(n) }
   300  
   301  // minTaker is a set iterator.
   302  type minTaker interface {
   303  	TakeMin(p *int) bool
   304  }
   305  
   306  // dense is a dense integer set iterator.
   307  type dense struct {
   308  	pos int
   309  	n   int
   310  }
   311  
   312  // TakeMin mimics intsets.Sparse TakeMin for dense sets. If the dense
   313  // iterator position is less than the iterator size, TakeMin sets *p
   314  // to the iterator position and increments the position and returns
   315  // true.
   316  // Otherwise, it returns false and *p is undefined.
   317  func (d *dense) TakeMin(p *int) bool {
   318  	if d.pos >= d.n {
   319  		return false
   320  	}
   321  	*p = d.pos
   322  	d.pos++
   323  	return true
   324  }
   325  
   326  // slice is a sparse integer set iterator.
   327  type slice struct {
   328  	pos   int
   329  	elems []int
   330  }
   331  
   332  // newSlice returns a new slice of elements from s, sorted ascending.
   333  func newSlice(s set.Ints) *slice {
   334  	elems := make([]int, 0, len(s))
   335  	for i := range s {
   336  		elems = append(elems, i)
   337  	}
   338  	sort.Ints(elems)
   339  	return &slice{elems: elems}
   340  }
   341  
   342  // TakeMin mimics intsets.Sparse TakeMin for a sorted set. If the set
   343  // iterator position is less than the iterator size, TakeMin sets *p
   344  // to the iterator position's element and increments the position
   345  // and returns true.
   346  // Otherwise, it returns false and *p is undefined.
   347  func (s *slice) TakeMin(p *int) bool {
   348  	if s.pos >= len(s.elems) {
   349  		return false
   350  	}
   351  	*p = s.elems[s.pos]
   352  	s.pos++
   353  	return true
   354  }
   355  
   356  const (
   357  	negativeWeight = "community: unexpected negative edge weight"
   358  	positiveWeight = "community: unexpected positive edge weight"
   359  
   360  	// deltaQtol is the tolerance for progression of the local moving heuristic's improvement of Q.
   361  	deltaQtol = 1e-15
   362  )
   363  
   364  // positiveWeightFuncFor returns a constructed weight function for the
   365  // positively weighted g. Unweighted graphs have unit weight for existing
   366  // edges.
   367  func positiveWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 {
   368  	if wg, ok := g.(graph.Weighted); ok {
   369  		return func(xid, yid int64) float64 {
   370  			w, ok := wg.Weight(xid, yid)
   371  			if !ok {
   372  				return 0
   373  			}
   374  			if w < 0 {
   375  				panic(negativeWeight)
   376  			}
   377  			return w
   378  		}
   379  	}
   380  	return func(xid, yid int64) float64 {
   381  		e := g.Edge(xid, yid)
   382  		if e == nil {
   383  			return 0
   384  		}
   385  		return 1
   386  	}
   387  }
   388  
   389  // negativeWeightFuncFor returns a constructed weight function for the
   390  // negatively weighted g. Unweighted graphs have unit weight for existing
   391  // edges.
   392  func negativeWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 {
   393  	if wg, ok := g.(graph.Weighted); ok {
   394  		return func(xid, yid int64) float64 {
   395  			w, ok := wg.Weight(xid, yid)
   396  			if !ok {
   397  				return 0
   398  			}
   399  			if w > 0 {
   400  				panic(positiveWeight)
   401  			}
   402  			return -w
   403  		}
   404  	}
   405  	return func(xid, yid int64) float64 {
   406  		e := g.Edge(xid, yid)
   407  		if e == nil {
   408  			return 0
   409  		}
   410  		return 1
   411  	}
   412  }
   413  
   414  // depth returns max(1, len(weights)). It is used to ensure
   415  // that multiplex community weights are properly initialised.
   416  func depth(weights []float64) int {
   417  	if weights == nil {
   418  		return 1
   419  	}
   420  	return len(weights)
   421  }