github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/graph/community/louvain_common.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package community 6 7 import ( 8 "fmt" 9 "sort" 10 11 "golang.org/x/exp/rand" 12 13 "github.com/jingcheng-WU/gonum/graph" 14 "github.com/jingcheng-WU/gonum/graph/internal/set" 15 ) 16 17 // Q returns the modularity Q score of the graph g subdivided into the 18 // given communities at the given resolution. If communities is nil, the 19 // unclustered modularity score is returned. The resolution parameter 20 // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110. 21 // Q will panic if g has any edge with negative edge weight. 22 // 23 // If g is undirected, Q is calculated according to 24 // Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j), 25 // If g is directed, it is calculated according to 26 // Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j). 27 // 28 // graph.Undirect may be used as a shim to allow calculation of Q for 29 // directed graphs with the undirected modularity function. 30 func Q(g graph.Graph, communities [][]graph.Node, resolution float64) float64 { 31 switch g := g.(type) { 32 case graph.Undirected: 33 return qUndirected(g, communities, resolution) 34 case graph.Directed: 35 return qDirected(g, communities, resolution) 36 default: 37 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 38 } 39 } 40 41 // ReducedGraph is a modularised graph. 42 type ReducedGraph interface { 43 graph.Graph 44 45 // Communities returns the community memberships 46 // of the nodes in the graph used to generate 47 // the reduced graph. 48 Communities() [][]graph.Node 49 50 // Structure returns the community structure of 51 // the current level of the module clustering. 52 // Each slice in the returned value recursively 53 // describes the membership of a community at 54 // the current level by indexing via the node 55 // ID into the structure of the non-nil 56 // ReducedGraph returned by Expanded, or when the 57 // ReducedGraph is nil, by containing nodes 58 // from the original input graph. 59 // 60 // The returned value should not be mutated. 61 Structure() [][]graph.Node 62 63 // Expanded returns the next lower level of the 64 // module clustering or nil if at the lowest level. 65 // 66 // The returned ReducedGraph will be the same 67 // concrete type as the receiver. 68 Expanded() ReducedGraph 69 } 70 71 // Modularize returns the hierarchical modularization of g at the given resolution 72 // using the Louvain algorithm. If src is nil, rand.Intn is used as the random 73 // generator. Modularize will panic if g has any edge with negative edge weight. 74 // 75 // If g is undirected it is modularised to minimise 76 // Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j), 77 // If g is directed it is modularised to minimise 78 // Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j). 79 // 80 // The concrete type of the ReducedGraph will be a pointer to either a 81 // ReducedUndirected or a ReducedDirected depending on the type of g. 82 // 83 // graph.Undirect may be used as a shim to allow modularization of 84 // directed graphs with the undirected modularity function. 85 func Modularize(g graph.Graph, resolution float64, src rand.Source) ReducedGraph { 86 switch g := g.(type) { 87 case graph.Undirected: 88 return louvainUndirected(g, resolution, src) 89 case graph.Directed: 90 return louvainDirected(g, resolution, src) 91 default: 92 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 93 } 94 } 95 96 // Multiplex is a multiplex graph. 97 type Multiplex interface { 98 // Nodes returns the nodes 99 // for the multiplex graph. 100 // All layers must refer to the same 101 // set of nodes. 102 Nodes() graph.Nodes 103 104 // Depth returns the number of layers 105 // in the multiplex graph. 106 Depth() int 107 } 108 109 // QMultiplex returns the modularity Q score of the multiplex graph layers 110 // subdivided into the given communities at the given resolutions and weights. Q is 111 // returned as the vector of weighted Q scores for each layer of the multiplex graph. 112 // If communities is nil, the unclustered modularity score is returned. 113 // If weights is nil layers are equally weighted, otherwise the length of 114 // weights must equal the number of layers. If resolutions is nil, a resolution 115 // of 1.0 is used for all layers, otherwise either a single element slice may be used 116 // to specify a global resolution, or the length of resolutions must equal the number 117 // of layers. The resolution parameter is γ as defined in Reichardt and Bornholdt 118 // doi:10.1103/PhysRevE.74.016110. 119 // QMultiplex will panic if the graph has any layer weight-scaled edge with 120 // negative edge weight. 121 // 122 // If g is undirected, Q is calculated according to 123 // Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m_{layer} ] \delta(c_i,c_j), 124 // If g is directed, it is calculated according to 125 // Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j). 126 // 127 // Note that Q values for multiplex graphs are not scaled by the total layer edge weight. 128 // 129 // graph.Undirect may be used as a shim to allow calculation of Q for 130 // directed graphs. 131 func QMultiplex(g Multiplex, communities [][]graph.Node, weights, resolutions []float64) []float64 { 132 if weights != nil && len(weights) != g.Depth() { 133 panic("community: weights vector length mismatch") 134 } 135 if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() { 136 panic("community: resolutions vector length mismatch") 137 } 138 139 switch g := g.(type) { 140 case UndirectedMultiplex: 141 return qUndirectedMultiplex(g, communities, weights, resolutions) 142 case DirectedMultiplex: 143 return qDirectedMultiplex(g, communities, weights, resolutions) 144 default: 145 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 146 } 147 } 148 149 // ReducedMultiplex is a modularised multiplex graph. 150 type ReducedMultiplex interface { 151 Multiplex 152 153 // Communities returns the community memberships 154 // of the nodes in the graph used to generate 155 // the reduced graph. 156 Communities() [][]graph.Node 157 158 // Structure returns the community structure of 159 // the current level of the module clustering. 160 // Each slice in the returned value recursively 161 // describes the membership of a community at 162 // the current level by indexing via the node 163 // ID into the structure of the non-nil 164 // ReducedGraph returned by Expanded, or when the 165 // ReducedGraph is nil, by containing nodes 166 // from the original input graph. 167 // 168 // The returned value should not be mutated. 169 Structure() [][]graph.Node 170 171 // Expanded returns the next lower level of the 172 // module clustering or nil if at the lowest level. 173 // 174 // The returned ReducedGraph will be the same 175 // concrete type as the receiver. 176 Expanded() ReducedMultiplex 177 } 178 179 // ModularizeMultiplex returns the hierarchical modularization of g at the given resolution 180 // using the Louvain algorithm. If all is true and g have negatively weighted layers, all 181 // communities will be searched during the modularization. If src is nil, rand.Intn is 182 // used as the random generator. ModularizeMultiplex will panic if g has any edge with 183 // edge weight that does not sign-match the layer weight. 184 // 185 // If g is undirected it is modularised to minimise 186 // Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m ] \delta(c_i,c_j). 187 // If g is directed it is modularised to minimise 188 // Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j). 189 // 190 // The concrete type of the ReducedMultiplex will be a pointer to a 191 // ReducedUndirectedMultiplex. 192 // 193 // graph.Undirect may be used as a shim to allow modularization of 194 // directed graphs with the undirected modularity function. 195 func ModularizeMultiplex(g Multiplex, weights, resolutions []float64, all bool, src rand.Source) ReducedMultiplex { 196 if weights != nil && len(weights) != g.Depth() { 197 panic("community: weights vector length mismatch") 198 } 199 if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() { 200 panic("community: resolutions vector length mismatch") 201 } 202 203 switch g := g.(type) { 204 case UndirectedMultiplex: 205 return louvainUndirectedMultiplex(g, weights, resolutions, all, src) 206 case DirectedMultiplex: 207 return louvainDirectedMultiplex(g, weights, resolutions, all, src) 208 default: 209 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 210 } 211 } 212 213 // undirectedEdges is the edge structure of a reduced undirected graph. 214 type undirectedEdges struct { 215 // edges and weights is the set 216 // of edges between nodes. 217 // weights is keyed such that 218 // the first element of the key 219 // is less than the second. 220 edges [][]int 221 weights map[[2]int]float64 222 } 223 224 // directedEdges is the edge structure of a reduced directed graph. 225 type directedEdges struct { 226 // edgesFrom, edgesTo and weights 227 // is the set of edges between nodes. 228 edgesFrom [][]int 229 edgesTo [][]int 230 weights map[[2]int]float64 231 } 232 233 // isValidID returns whether id is a valid ID for a community, 234 // multiplexCommunity or node. These are all graph.Node types 235 // stored in []T with a mapping between their index and their ID 236 // so IDs must be positive and fit within the int type. 237 func isValidID(id int64) bool { 238 return id == int64(int(id)) && id >= 0 239 } 240 241 // community is a reduced graph node describing its membership. 242 type community struct { 243 // community graphs are internal, in-memory 244 // with dense IDs, so id is always an int. 245 id int 246 247 nodes []graph.Node 248 249 weight float64 250 } 251 252 func (n community) ID() int64 { return int64(n.id) } 253 254 // edge is a reduced graph edge. 255 type edge struct { 256 from, to community 257 weight float64 258 } 259 260 func (e edge) From() graph.Node { return e.from } 261 func (e edge) To() graph.Node { return e.to } 262 func (e edge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e } 263 func (e edge) Weight() float64 { return e.weight } 264 265 // multiplexCommunity is a reduced multiplex graph node describing its membership. 266 type multiplexCommunity struct { 267 // community graphs are internal, in-memory 268 // with dense IDs, so id is always an int. 269 id int 270 271 nodes []graph.Node 272 273 weights []float64 274 } 275 276 func (n multiplexCommunity) ID() int64 { return int64(n.id) } 277 278 // multiplexEdge is a reduced graph edge for a multiplex graph. 279 type multiplexEdge struct { 280 from, to multiplexCommunity 281 weight float64 282 } 283 284 func (e multiplexEdge) From() graph.Node { return e.from } 285 func (e multiplexEdge) To() graph.Node { return e.to } 286 func (e multiplexEdge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e } 287 func (e multiplexEdge) Weight() float64 { return e.weight } 288 289 // commIdx is an index of a node in a community held by a localMover. 290 type commIdx struct { 291 community int 292 node int 293 } 294 295 // node is defined to avoid an import of .../graph/simple. node is 296 // used in in-memory, dense ID graphs and so is always an int. 297 type node int 298 299 func (n node) ID() int64 { return int64(n) } 300 301 // minTaker is a set iterator. 302 type minTaker interface { 303 TakeMin(p *int) bool 304 } 305 306 // dense is a dense integer set iterator. 307 type dense struct { 308 pos int 309 n int 310 } 311 312 // TakeMin mimics intsets.Sparse TakeMin for dense sets. If the dense 313 // iterator position is less than the iterator size, TakeMin sets *p 314 // to the iterator position and increments the position and returns 315 // true. 316 // Otherwise, it returns false and *p is undefined. 317 func (d *dense) TakeMin(p *int) bool { 318 if d.pos >= d.n { 319 return false 320 } 321 *p = d.pos 322 d.pos++ 323 return true 324 } 325 326 // slice is a sparse integer set iterator. 327 type slice struct { 328 pos int 329 elems []int 330 } 331 332 // newSlice returns a new slice of elements from s, sorted ascending. 333 func newSlice(s set.Ints) *slice { 334 elems := make([]int, 0, len(s)) 335 for i := range s { 336 elems = append(elems, i) 337 } 338 sort.Ints(elems) 339 return &slice{elems: elems} 340 } 341 342 // TakeMin mimics intsets.Sparse TakeMin for a sorted set. If the set 343 // iterator position is less than the iterator size, TakeMin sets *p 344 // to the iterator position's element and increments the position 345 // and returns true. 346 // Otherwise, it returns false and *p is undefined. 347 func (s *slice) TakeMin(p *int) bool { 348 if s.pos >= len(s.elems) { 349 return false 350 } 351 *p = s.elems[s.pos] 352 s.pos++ 353 return true 354 } 355 356 const ( 357 negativeWeight = "community: unexpected negative edge weight" 358 positiveWeight = "community: unexpected positive edge weight" 359 360 // deltaQtol is the tolerance for progression of the local moving heuristic's improvement of Q. 361 deltaQtol = 1e-15 362 ) 363 364 // positiveWeightFuncFor returns a constructed weight function for the 365 // positively weighted g. Unweighted graphs have unit weight for existing 366 // edges. 367 func positiveWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 { 368 if wg, ok := g.(graph.Weighted); ok { 369 return func(xid, yid int64) float64 { 370 w, ok := wg.Weight(xid, yid) 371 if !ok { 372 return 0 373 } 374 if w < 0 { 375 panic(negativeWeight) 376 } 377 return w 378 } 379 } 380 return func(xid, yid int64) float64 { 381 e := g.Edge(xid, yid) 382 if e == nil { 383 return 0 384 } 385 return 1 386 } 387 } 388 389 // negativeWeightFuncFor returns a constructed weight function for the 390 // negatively weighted g. Unweighted graphs have unit weight for existing 391 // edges. 392 func negativeWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 { 393 if wg, ok := g.(graph.Weighted); ok { 394 return func(xid, yid int64) float64 { 395 w, ok := wg.Weight(xid, yid) 396 if !ok { 397 return 0 398 } 399 if w > 0 { 400 panic(positiveWeight) 401 } 402 return -w 403 } 404 } 405 return func(xid, yid int64) float64 { 406 e := g.Edge(xid, yid) 407 if e == nil { 408 return 0 409 } 410 return 1 411 } 412 } 413 414 // depth returns max(1, len(weights)). It is used to ensure 415 // that multiplex community weights are properly initialised. 416 func depth(weights []float64) int { 417 if weights == nil { 418 return 1 419 } 420 return len(weights) 421 }