gonum.org/v1/gonum@v0.14.0/graph/community/louvain_common.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package community 6 7 import ( 8 "fmt" 9 "sort" 10 11 "golang.org/x/exp/rand" 12 13 "gonum.org/v1/gonum/graph" 14 "gonum.org/v1/gonum/graph/internal/set" 15 ) 16 17 // Q returns the modularity Q score of the graph g subdivided into the 18 // given communities at the given resolution. If communities is nil, the 19 // unclustered modularity score is returned. The resolution parameter 20 // is γ as defined in Reichardt and Bornholdt doi:10.1103/PhysRevE.74.016110. 21 // Q will panic if g has any edge with negative edge weight. 22 // 23 // If g is undirected, Q is calculated according to 24 // 25 // Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j), 26 // 27 // If g is directed, it is calculated according to 28 // 29 // Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j). 30 // 31 // graph.Undirect may be used as a shim to allow calculation of Q for 32 // directed graphs with the undirected modularity function. 33 func Q(g graph.Graph, communities [][]graph.Node, resolution float64) float64 { 34 switch g := g.(type) { 35 case graph.Undirected: 36 return qUndirected(g, communities, resolution) 37 case graph.Directed: 38 return qDirected(g, communities, resolution) 39 default: 40 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 41 } 42 } 43 44 // ReducedGraph is a modularised graph. 45 type ReducedGraph interface { 46 graph.Graph 47 48 // Communities returns the community memberships 49 // of the nodes in the graph used to generate 50 // the reduced graph. 51 Communities() [][]graph.Node 52 53 // Structure returns the community structure of 54 // the current level of the module clustering. 55 // Each slice in the returned value recursively 56 // describes the membership of a community at 57 // the current level by indexing via the node 58 // ID into the structure of the non-nil 59 // ReducedGraph returned by Expanded, or when the 60 // ReducedGraph is nil, by containing nodes 61 // from the original input graph. 62 // 63 // The returned value should not be mutated. 64 Structure() [][]graph.Node 65 66 // Expanded returns the next lower level of the 67 // module clustering or nil if at the lowest level. 68 // 69 // The returned ReducedGraph will be the same 70 // concrete type as the receiver. 71 Expanded() ReducedGraph 72 } 73 74 // Modularize returns the hierarchical modularization of g at the given resolution 75 // using the Louvain algorithm. If src is nil, rand.Intn is used as the random 76 // generator. Modularize will panic if g has any edge with negative edge weight. 77 // 78 // If g is undirected it is modularised to minimise 79 // 80 // Q = 1/2m \sum_{ij} [ A_{ij} - (\gamma k_i k_j)/2m ] \delta(c_i,c_j), 81 // 82 // If g is directed it is modularised to minimise 83 // 84 // Q = 1/m \sum_{ij} [ A_{ij} - (\gamma k_i^in k_j^out)/m ] \delta(c_i,c_j). 85 // 86 // The concrete type of the ReducedGraph will be a pointer to either a 87 // ReducedUndirected or a ReducedDirected depending on the type of g. 88 // 89 // graph.Undirect may be used as a shim to allow modularization of 90 // directed graphs with the undirected modularity function. 91 func Modularize(g graph.Graph, resolution float64, src rand.Source) ReducedGraph { 92 switch g := g.(type) { 93 case graph.Undirected: 94 return louvainUndirected(g, resolution, src) 95 case graph.Directed: 96 return louvainDirected(g, resolution, src) 97 default: 98 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 99 } 100 } 101 102 // Multiplex is a multiplex graph. 103 type Multiplex interface { 104 // Nodes returns the nodes 105 // for the multiplex graph. 106 // All layers must refer to the same 107 // set of nodes. 108 Nodes() graph.Nodes 109 110 // Depth returns the number of layers 111 // in the multiplex graph. 112 Depth() int 113 } 114 115 // QMultiplex returns the modularity Q score of the multiplex graph layers 116 // subdivided into the given communities at the given resolutions and weights. Q is 117 // returned as the vector of weighted Q scores for each layer of the multiplex graph. 118 // If communities is nil, the unclustered modularity score is returned. 119 // If weights is nil layers are equally weighted, otherwise the length of 120 // weights must equal the number of layers. If resolutions is nil, a resolution 121 // of 1.0 is used for all layers, otherwise either a single element slice may be used 122 // to specify a global resolution, or the length of resolutions must equal the number 123 // of layers. The resolution parameter is γ as defined in Reichardt and Bornholdt 124 // doi:10.1103/PhysRevE.74.016110. 125 // QMultiplex will panic if the graph has any layer weight-scaled edge with 126 // negative edge weight. 127 // 128 // If g is undirected, Q is calculated according to 129 // 130 // Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m_{layer} ] \delta(c_i,c_j), 131 // 132 // If g is directed, it is calculated according to 133 // 134 // Q_{layer} = w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j). 135 // 136 // Note that Q values for multiplex graphs are not scaled by the total layer edge weight. 137 // 138 // graph.Undirect may be used as a shim to allow calculation of Q for 139 // directed graphs. 140 func QMultiplex(g Multiplex, communities [][]graph.Node, weights, resolutions []float64) []float64 { 141 if weights != nil && len(weights) != g.Depth() { 142 panic("community: weights vector length mismatch") 143 } 144 if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() { 145 panic("community: resolutions vector length mismatch") 146 } 147 148 switch g := g.(type) { 149 case UndirectedMultiplex: 150 return qUndirectedMultiplex(g, communities, weights, resolutions) 151 case DirectedMultiplex: 152 return qDirectedMultiplex(g, communities, weights, resolutions) 153 default: 154 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 155 } 156 } 157 158 // ReducedMultiplex is a modularised multiplex graph. 159 type ReducedMultiplex interface { 160 Multiplex 161 162 // Communities returns the community memberships 163 // of the nodes in the graph used to generate 164 // the reduced graph. 165 Communities() [][]graph.Node 166 167 // Structure returns the community structure of 168 // the current level of the module clustering. 169 // Each slice in the returned value recursively 170 // describes the membership of a community at 171 // the current level by indexing via the node 172 // ID into the structure of the non-nil 173 // ReducedGraph returned by Expanded, or when the 174 // ReducedGraph is nil, by containing nodes 175 // from the original input graph. 176 // 177 // The returned value should not be mutated. 178 Structure() [][]graph.Node 179 180 // Expanded returns the next lower level of the 181 // module clustering or nil if at the lowest level. 182 // 183 // The returned ReducedGraph will be the same 184 // concrete type as the receiver. 185 Expanded() ReducedMultiplex 186 } 187 188 // ModularizeMultiplex returns the hierarchical modularization of g at the given resolution 189 // using the Louvain algorithm. If all is true and g have negatively weighted layers, all 190 // communities will be searched during the modularization. If src is nil, rand.Intn is 191 // used as the random generator. ModularizeMultiplex will panic if g has any edge with 192 // edge weight that does not sign-match the layer weight. 193 // 194 // If g is undirected it is modularised to minimise 195 // 196 // Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i k_j)/2m ] \delta(c_i,c_j). 197 // 198 // If g is directed it is modularised to minimise 199 // 200 // Q = \sum w_{layer} \sum_{ij} [ A_{layer}*_{ij} - (\gamma_{layer} k_i^in k_j^out)/m_{layer} ] \delta(c_i,c_j). 201 // 202 // The concrete type of the ReducedMultiplex will be a pointer to a 203 // ReducedUndirectedMultiplex. 204 // 205 // graph.Undirect may be used as a shim to allow modularization of 206 // directed graphs with the undirected modularity function. 207 func ModularizeMultiplex(g Multiplex, weights, resolutions []float64, all bool, src rand.Source) ReducedMultiplex { 208 if weights != nil && len(weights) != g.Depth() { 209 panic("community: weights vector length mismatch") 210 } 211 if resolutions != nil && len(resolutions) != 1 && len(resolutions) != g.Depth() { 212 panic("community: resolutions vector length mismatch") 213 } 214 215 switch g := g.(type) { 216 case UndirectedMultiplex: 217 return louvainUndirectedMultiplex(g, weights, resolutions, all, src) 218 case DirectedMultiplex: 219 return louvainDirectedMultiplex(g, weights, resolutions, all, src) 220 default: 221 panic(fmt.Sprintf("community: invalid graph type: %T", g)) 222 } 223 } 224 225 // undirectedEdges is the edge structure of a reduced undirected graph. 226 type undirectedEdges struct { 227 // edges and weights is the set 228 // of edges between nodes. 229 // weights is keyed such that 230 // the first element of the key 231 // is less than the second. 232 edges [][]int 233 weights map[[2]int]float64 234 } 235 236 // directedEdges is the edge structure of a reduced directed graph. 237 type directedEdges struct { 238 // edgesFrom, edgesTo and weights 239 // is the set of edges between nodes. 240 edgesFrom [][]int 241 edgesTo [][]int 242 weights map[[2]int]float64 243 } 244 245 // isValidID returns whether id is a valid ID for a community, 246 // multiplexCommunity or node. These are all graph.Node types 247 // stored in []T with a mapping between their index and their ID 248 // so IDs must be positive and fit within the int type. 249 func isValidID(id int64) bool { 250 return id == int64(int(id)) && id >= 0 251 } 252 253 // community is a reduced graph node describing its membership. 254 type community struct { 255 // community graphs are internal, in-memory 256 // with dense IDs, so id is always an int. 257 id int 258 259 nodes []graph.Node 260 261 weight float64 262 } 263 264 func (n community) ID() int64 { return int64(n.id) } 265 266 // edge is a reduced graph edge. 267 type edge struct { 268 from, to community 269 weight float64 270 } 271 272 func (e edge) From() graph.Node { return e.from } 273 func (e edge) To() graph.Node { return e.to } 274 func (e edge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e } 275 func (e edge) Weight() float64 { return e.weight } 276 277 // multiplexCommunity is a reduced multiplex graph node describing its membership. 278 type multiplexCommunity struct { 279 // community graphs are internal, in-memory 280 // with dense IDs, so id is always an int. 281 id int 282 283 nodes []graph.Node 284 285 weights []float64 286 } 287 288 func (n multiplexCommunity) ID() int64 { return int64(n.id) } 289 290 // multiplexEdge is a reduced graph edge for a multiplex graph. 291 type multiplexEdge struct { 292 from, to multiplexCommunity 293 weight float64 294 } 295 296 func (e multiplexEdge) From() graph.Node { return e.from } 297 func (e multiplexEdge) To() graph.Node { return e.to } 298 func (e multiplexEdge) ReversedEdge() graph.Edge { e.from, e.to = e.to, e.from; return e } 299 func (e multiplexEdge) Weight() float64 { return e.weight } 300 301 // commIdx is an index of a node in a community held by a localMover. 302 type commIdx struct { 303 community int 304 node int 305 } 306 307 // node is defined to avoid an import of .../graph/simple. node is 308 // used in in-memory, dense ID graphs and so is always an int. 309 type node int 310 311 func (n node) ID() int64 { return int64(n) } 312 313 // minTaker is a set iterator. 314 type minTaker interface { 315 TakeMin(p *int) bool 316 } 317 318 // dense is a dense integer set iterator. 319 type dense struct { 320 pos int 321 n int 322 } 323 324 // TakeMin mimics intsets.Sparse TakeMin for dense sets. If the dense 325 // iterator position is less than the iterator size, TakeMin sets *p 326 // to the iterator position and increments the position and returns 327 // true. 328 // Otherwise, it returns false and *p is undefined. 329 func (d *dense) TakeMin(p *int) bool { 330 if d.pos >= d.n { 331 return false 332 } 333 *p = d.pos 334 d.pos++ 335 return true 336 } 337 338 // slice is a sparse integer set iterator. 339 type slice struct { 340 pos int 341 elems []int 342 } 343 344 // newSlice returns a new slice of elements from s, sorted ascending. 345 func newSlice(s set.Ints) *slice { 346 elems := make([]int, 0, len(s)) 347 for i := range s { 348 elems = append(elems, i) 349 } 350 sort.Ints(elems) 351 return &slice{elems: elems} 352 } 353 354 // TakeMin mimics intsets.Sparse TakeMin for a sorted set. If the set 355 // iterator position is less than the iterator size, TakeMin sets *p 356 // to the iterator position's element and increments the position 357 // and returns true. 358 // Otherwise, it returns false and *p is undefined. 359 func (s *slice) TakeMin(p *int) bool { 360 if s.pos >= len(s.elems) { 361 return false 362 } 363 *p = s.elems[s.pos] 364 s.pos++ 365 return true 366 } 367 368 const ( 369 negativeWeight = "community: unexpected negative edge weight" 370 positiveWeight = "community: unexpected positive edge weight" 371 372 // deltaQtol is the tolerance for progression of the local moving heuristic's improvement of Q. 373 deltaQtol = 1e-15 374 ) 375 376 // positiveWeightFuncFor returns a constructed weight function for the 377 // positively weighted g. Unweighted graphs have unit weight for existing 378 // edges. 379 func positiveWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 { 380 if wg, ok := g.(graph.Weighted); ok { 381 return func(xid, yid int64) float64 { 382 w, ok := wg.Weight(xid, yid) 383 if !ok { 384 return 0 385 } 386 if w < 0 { 387 panic(negativeWeight) 388 } 389 return w 390 } 391 } 392 return func(xid, yid int64) float64 { 393 e := g.Edge(xid, yid) 394 if e == nil { 395 return 0 396 } 397 return 1 398 } 399 } 400 401 // negativeWeightFuncFor returns a constructed weight function for the 402 // negatively weighted g. Unweighted graphs have unit weight for existing 403 // edges. 404 func negativeWeightFuncFor(g graph.Graph) func(xid, yid int64) float64 { 405 if wg, ok := g.(graph.Weighted); ok { 406 return func(xid, yid int64) float64 { 407 w, ok := wg.Weight(xid, yid) 408 if !ok { 409 return 0 410 } 411 if w > 0 { 412 panic(positiveWeight) 413 } 414 return -w 415 } 416 } 417 return func(xid, yid int64) float64 { 418 e := g.Edge(xid, yid) 419 if e == nil { 420 return 0 421 } 422 return 1 423 } 424 } 425 426 // depth returns max(1, len(weights)). It is used to ensure 427 // that multiplex community weights are properly initialised. 428 func depth(weights []float64) int { 429 if weights == nil { 430 return 1 431 } 432 return len(weights) 433 }