gonum.org/v1/gonum@v0.14.0/graph/community/louvain_common.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package community
     6  
     7  import (
     8  	"fmt"
     9  	"sort"
    10  
    11  	"golang.org/x/exp/rand"
    12  
    13  	"gonum.org/v1/gonum/graph"
    14  	"gonum.org/v1/gonum/graph/internal/set"
    15  )
    16  
    17  // Q returns the modularity Q score of the graph g subdivided into the
    18  // given communities at the given resolution. If communities is nil, the
    19  // unclustered modularity score is returned. The resolution parameter
    20  // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110.
    21  // Q will panic if g has any edge with negative edge weight.
    22  //
    23  // If g is undirected, Q is calculated according to
    24  //
    25  //	Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j),
    26  //
    27  // If g is directed, it is calculated according to
    28  //
    29  //	Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j).
    30  //
    31  // graph.Undirect may be used as a shim to allow calculation of Q for
    32  // directed graphs with the undirected modularity function.
    33  func Q(g graph.Graph, communities [][]graph.Node, resolution float64) float64 {
    34  	switch g := g.(type) {
    35  	case graph.Undirected:
    36  		return qUndirected(g, communities, resolution)
    37  	case graph.Directed:
    38  		return qDirected(g, communities, resolution)
    39  	default:
    40  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
    41  	}
    42  }
    43  
    44  // ReducedGraph is a modularised graph.
    45  type ReducedGraph interface {
    46  	graph.Graph
    47  
    48  	// Communities returns the community memberships
    49  	// of the nodes in the graph used to generate
    50  	// the reduced graph.
    51  	Communities() [][]graph.Node
    52  
    53  	// Structure returns the community structure of
    54  	// the current level of the module clustering.
    55  	// Each slice in the returned value recursively
    56  	// describes the membership of a community at
    57  	// the current level by indexing via the node
    58  	// ID into the structure of the non-nil
    59  	// ReducedGraph returned by Expanded, or when the
    60  	// ReducedGraph is nil, by containing nodes
    61  	// from the original input graph.
    62  	//
    63  	// The returned value should not be mutated.
    64  	Structure() [][]graph.Node
    65  
    66  	// Expanded returns the next lower level of the
    67  	// module clustering or nil if at the lowest level.
    68  	//
    69  	// The returned ReducedGraph will be the same
    70  	// concrete type as the receiver.
    71  	Expanded() ReducedGraph
    72  }
    73  
    74  // Modularize returns the hierarchical modularization of g at the given resolution
    75  // using the Louvain algorithm. If src is nil, rand.Intn is used as the random
    76  // generator. Modularize will panic if g has any edge with negative edge weight.
    77  //
    78  // If g is undirected it is modularised to minimise
    79  //
    80  //	Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j),
    81  //
    82  // If g is directed it is modularised to minimise
    83  //
    84  //	Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j).
    85  //
    86  // The concrete type of the ReducedGraph will be a pointer to either a
    87  // ReducedUndirected or a ReducedDirected depending on the type of g.
    88  //
    89  // graph.Undirect may be used as a shim to allow modularization of
    90  // directed graphs with the undirected modularity function.
    91  func Modularize(g graph.Graph, resolution float64, src rand.Source) ReducedGraph {
    92  	switch g := g.(type) {
    93  	case graph.Undirected:
    94  		return louvainUndirected(g, resolution, src)
    95  	case graph.Directed:
    96  		return louvainDirected(g, resolution, src)
    97  	default:
    98  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
    99  	}
   100  }
   101  
   102  // Multiplex is a multiplex graph.
   103  type Multiplex interface {
   104  	// Nodes returns the nodes
   105  	// for the multiplex graph.
   106  	// All layers must refer to the same
   107  	// set of nodes.
   108  	Nodes() graph.Nodes
   109  
   110  	// Depth returns the number of layers
   111  	// in the multiplex graph.
   112  	Depth() int
   113  }
   114  
   115  // QMultiplex returns the modularity Q score of the multiplex graph layers
   116  // subdivided into the given communities at the given resolutions and weights. Q is
   117  // returned as the vector of weighted Q scores for each layer of the multiplex graph.
   118  // If communities is nil, the unclustered modularity score is returned.
   119  // If weights is nil layers are equally weighted, otherwise the length of
   120  // weights must equal the number of layers. If resolutions is nil, a resolution
   121  // of 1.0 is used for all layers, otherwise either a single element slice may be used
   122  // to specify a global resolution, or the length of resolutions must equal the number
   123  // of layers. The resolution parameter is γ as defined in Reichardt and Bornholdt
   124  // doi:10.1103/PhysRevE.74.016110.
   125  // QMultiplex will panic if the graph has any layer weight-scaled edge with
   126  // negative edge weight.
   127  //
   128  // If g is undirected, Q is calculated according to
   129  //
   130  //	Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m_{layer} ] \delta(c_i,c_j),
   131  //
   132  // If g is directed, it is calculated according to
   133  //
   134  //	Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j).
   135  //
   136  // Note that Q values for multiplex graphs are not scaled by the total layer edge weight.
   137  //
   138  // graph.Undirect may be used as a shim to allow calculation of Q for
   139  // directed graphs.
   140  func QMultiplex(g Multiplex, communities [][]graph.Node, weights, resolutions []float64) []float64 {
   141  	if weights != nil && len(weights) != g.Depth() {
   142  		panic("community: weights vector length mismatch")
   143  	}
   144  	if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() {
   145  		panic("community: resolutions vector length mismatch")
   146  	}
   147  
   148  	switch g := g.(type) {
   149  	case UndirectedMultiplex:
   150  		return qUndirectedMultiplex(g, communities, weights, resolutions)
   151  	case DirectedMultiplex:
   152  		return qDirectedMultiplex(g, communities, weights, resolutions)
   153  	default:
   154  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
   155  	}
   156  }
   157  
   158  // ReducedMultiplex is a modularised multiplex graph.
   159  type ReducedMultiplex interface {
   160  	Multiplex
   161  
   162  	// Communities returns the community memberships
   163  	// of the nodes in the graph used to generate
   164  	// the reduced graph.
   165  	Communities() [][]graph.Node
   166  
   167  	// Structure returns the community structure of
   168  	// the current level of the module clustering.
   169  	// Each slice in the returned value recursively
   170  	// describes the membership of a community at
   171  	// the current level by indexing via the node
   172  	// ID into the structure of the non-nil
   173  	// ReducedGraph returned by Expanded, or when the
   174  	// ReducedGraph is nil, by containing nodes
   175  	// from the original input graph.
   176  	//
   177  	// The returned value should not be mutated.
   178  	Structure() [][]graph.Node
   179  
   180  	// Expanded returns the next lower level of the
   181  	// module clustering or nil if at the lowest level.
   182  	//
   183  	// The returned ReducedGraph will be the same
   184  	// concrete type as the receiver.
   185  	Expanded() ReducedMultiplex
   186  }
   187  
   188  // ModularizeMultiplex returns the hierarchical modularization of g at the given resolution
   189  // using the Louvain algorithm. If all is true and g have negatively weighted layers, all
   190  // communities will be searched during the modularization. If src is nil, rand.Intn is
   191  // used as the random generator. ModularizeMultiplex will panic if g has any edge with
   192  // edge weight that does not sign-match the layer weight.
   193  //
   194  // If g is undirected it is modularised to minimise
   195  //
   196  //	Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m ] \delta(c_i,c_j).
   197  //
   198  // If g is directed it is modularised to minimise
   199  //
   200  //	Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j).
   201  //
   202  // The concrete type of the ReducedMultiplex will be a pointer to a
   203  // ReducedUndirectedMultiplex.
   204  //
   205  // graph.Undirect may be used as a shim to allow modularization of
   206  // directed graphs with the undirected modularity function.
   207  func ModularizeMultiplex(g Multiplex, weights, resolutions []float64, all bool, src rand.Source) ReducedMultiplex {
   208  	if weights != nil && len(weights) != g.Depth() {
   209  		panic("community: weights vector length mismatch")
   210  	}
   211  	if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() {
   212  		panic("community: resolutions vector length mismatch")
   213  	}
   214  
   215  	switch g := g.(type) {
   216  	case UndirectedMultiplex:
   217  		return louvainUndirectedMultiplex(g, weights, resolutions, all, src)
   218  	case DirectedMultiplex:
   219  		return louvainDirectedMultiplex(g, weights, resolutions, all, src)
   220  	default:
   221  		panic(fmt.Sprintf("community: invalid graph type: %T", g))
   222  	}
   223  }
   224  
   225  // undirectedEdges is the edge structure of a reduced undirected graph.
   226  type undirectedEdges struct {
   227  	// edges and weights is the set
   228  	// of edges between nodes.
   229  	// weights is keyed such that
   230  	// the first element of the key
   231  	// is less than the second.
   232  	edges   [][]int
   233  	weights map[[2]int]float64
   234  }
   235  
   236  // directedEdges is the edge structure of a reduced directed graph.
   237  type directedEdges struct {
   238  	// edgesFrom, edgesTo and weights
   239  	// is the set of edges between nodes.
   240  	edgesFrom [][]int
   241  	edgesTo   [][]int
   242  	weights   map[[2]int]float64
   243  }
   244  
   245  // isValidID returns whether id is a valid ID for a community,
   246  // multiplexCommunity or node. These are all graph.Node types
   247  // stored in []T with a mapping between their index and their ID
   248  // so IDs must be positive and fit within the int type.
   249  func isValidID(id int64) bool {
   250  	return id == int64(int(id)) && id >= 0
   251  }
   252  
   253  // community is a reduced graph node describing its membership.
   254  type community struct {
   255  	// community graphs are internal, in-memory
   256  	// with dense IDs, so id is always an int.
   257  	id int
   258  
   259  	nodes []graph.Node
   260  
   261  	weight float64
   262  }
   263  
   264  func (n community) ID() int64 { return int64(n.id) }
   265  
   266  // edge is a reduced graph edge.
   267  type edge struct {
   268  	from, to community
   269  	weight   float64
   270  }
   271  
   272  func (e edge) From() graph.Node         { return e.from }
   273  func (e edge) To() graph.Node           { return e.to }
   274  func (e edge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e }
   275  func (e edge) Weight() float64          { return e.weight }
   276  
   277  // multiplexCommunity is a reduced multiplex graph node describing its membership.
   278  type multiplexCommunity struct {
   279  	// community graphs are internal, in-memory
   280  	// with dense IDs, so id is always an int.
   281  	id int
   282  
   283  	nodes []graph.Node
   284  
   285  	weights []float64
   286  }
   287  
   288  func (n multiplexCommunity) ID() int64 { return int64(n.id) }
   289  
   290  // multiplexEdge is a reduced graph edge for a multiplex graph.
   291  type multiplexEdge struct {
   292  	from, to multiplexCommunity
   293  	weight   float64
   294  }
   295  
   296  func (e multiplexEdge) From() graph.Node         { return e.from }
   297  func (e multiplexEdge) To() graph.Node           { return e.to }
   298  func (e multiplexEdge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e }
   299  func (e multiplexEdge) Weight() float64          { return e.weight }
   300  
   301  // commIdx is an index of a node in a community held by a localMover.
   302  type commIdx struct {
   303  	community int
   304  	node      int
   305  }
   306  
   307  // node is defined to avoid an import of .../graph/simple. node is
   308  // used in in-memory, dense ID graphs and so is always an int.
   309  type node int
   310  
   311  func (n node) ID() int64 { return int64(n) }
   312  
   313  // minTaker is a set iterator.
   314  type minTaker interface {
   315  	TakeMin(p *int) bool
   316  }
   317  
   318  // dense is a dense integer set iterator.
   319  type dense struct {
   320  	pos int
   321  	n   int
   322  }
   323  
   324  // TakeMin mimics intsets.Sparse TakeMin for dense sets. If the dense
   325  // iterator position is less than the iterator size, TakeMin sets *p
   326  // to the iterator position and increments the position and returns
   327  // true.
   328  // Otherwise, it returns false and *p is undefined.
   329  func (d *dense) TakeMin(p *int) bool {
   330  	if d.pos >= d.n {
   331  		return false
   332  	}
   333  	*p = d.pos
   334  	d.pos++
   335  	return true
   336  }
   337  
   338  // slice is a sparse integer set iterator.
   339  type slice struct {
   340  	pos   int
   341  	elems []int
   342  }
   343  
   344  // newSlice returns a new slice of elements from s, sorted ascending.
   345  func newSlice(s set.Ints) *slice {
   346  	elems := make([]int, 0, len(s))
   347  	for i := range s {
   348  		elems = append(elems, i)
   349  	}
   350  	sort.Ints(elems)
   351  	return &slice{elems: elems}
   352  }
   353  
   354  // TakeMin mimics intsets.Sparse TakeMin for a sorted set. If the set
   355  // iterator position is less than the iterator size, TakeMin sets *p
   356  // to the iterator position's element and increments the position
   357  // and returns true.
   358  // Otherwise, it returns false and *p is undefined.
   359  func (s *slice) TakeMin(p *int) bool {
   360  	if s.pos >= len(s.elems) {
   361  		return false
   362  	}
   363  	*p = s.elems[s.pos]
   364  	s.pos++
   365  	return true
   366  }
   367  
   368  const (
   369  	negativeWeight = "community: unexpected negative edge weight"
   370  	positiveWeight = "community: unexpected positive edge weight"
   371  
   372  	// deltaQtol is the tolerance for progression of the local moving heuristic's improvement of Q.
   373  	deltaQtol = 1e-15
   374  )
   375  
   376  // positiveWeightFuncFor returns a constructed weight function for the
   377  // positively weighted g. Unweighted graphs have unit weight for existing
   378  // edges.
   379  func positiveWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 {
   380  	if wg, ok := g.(graph.Weighted); ok {
   381  		return func(xid, yid int64) float64 {
   382  			w, ok := wg.Weight(xid, yid)
   383  			if !ok {
   384  				return 0
   385  			}
   386  			if w < 0 {
   387  				panic(negativeWeight)
   388  			}
   389  			return w
   390  		}
   391  	}
   392  	return func(xid, yid int64) float64 {
   393  		e := g.Edge(xid, yid)
   394  		if e == nil {
   395  			return 0
   396  		}
   397  		return 1
   398  	}
   399  }
   400  
   401  // negativeWeightFuncFor returns a constructed weight function for the
   402  // negatively weighted g. Unweighted graphs have unit weight for existing
   403  // edges.
   404  func negativeWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 {
   405  	if wg, ok := g.(graph.Weighted); ok {
   406  		return func(xid, yid int64) float64 {
   407  			w, ok := wg.Weight(xid, yid)
   408  			if !ok {
   409  				return 0
   410  			}
   411  			if w > 0 {
   412  				panic(positiveWeight)
   413  			}
   414  			return -w
   415  		}
   416  	}
   417  	return func(xid, yid int64) float64 {
   418  		e := g.Edge(xid, yid)
   419  		if e == nil {
   420  			return 0
   421  		}
   422  		return 1
   423  	}
   424  }
   425  
   426  // depth returns max(1, len(weights)). It is used to ensure
   427  // that multiplex community weights are properly initialised.
   428  func depth(weights []float64) int {
   429  	if weights == nil {
   430  		return 1
   431  	}
   432  	return len(weights)
   433  }