gonum.org/v1/gonum@v0.14.0/graph/network/page.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package network 6 7 import ( 8 "math" 9 10 "golang.org/x/exp/rand" 11 12 "gonum.org/v1/gonum/floats" 13 "gonum.org/v1/gonum/graph" 14 "gonum.org/v1/gonum/mat" 15 ) 16 17 // PageRank returns the PageRank weights for nodes of the directed graph g 18 // using the given damping factor and terminating when the 2-norm of the 19 // vector difference between iterations is below tol. The returned map is 20 // keyed on the graph node IDs. 21 // If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated. 22 func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 { 23 if g, ok := g.(graph.WeightedDirected); ok { 24 return edgeWeightedPageRank(g, damp, tol) 25 } 26 return pageRank(g, damp, tol) 27 } 28 29 // PageRankSparse returns the PageRank weights for nodes of the sparse directed 30 // graph g using the given damping factor and terminating when the 2-norm of the 31 // vector difference between iterations is below tol. The returned map is 32 // keyed on the graph node IDs. 33 // If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated. 34 func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 { 35 if g, ok := g.(graph.WeightedDirected); ok { 36 return edgeWeightedPageRankSparse(g, damp, tol) 37 } 38 return pageRankSparse(g, damp, tol) 39 } 40 41 // edgeWeightedPageRank returns the PageRank weights for nodes of the weighted directed graph g 42 // using the given damping factor and terminating when the 2-norm of the 43 // vector difference between iterations is below tol. The returned map is 44 // keyed on the graph node IDs. 45 func edgeWeightedPageRank(g graph.WeightedDirected, damp, tol float64) map[int64]float64 { 46 // edgeWeightedPageRank is implemented according to "How Google Finds Your Needle 47 // in the Web's Haystack" with the modification that 48 // the columns of hyperlink matrix H are calculated with edge weights. 49 // 50 // G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k 51 // 52 // http://www.ams.org/samplings/feature-column/fcarc-pagerank 53 54 nodes := graph.NodesOf(g.Nodes()) 55 indexOf := make(map[int64]int, len(nodes)) 56 for i, n := range nodes { 57 indexOf[n.ID()] = i 58 } 59 60 m := mat.NewDense(len(nodes), len(nodes), nil) 61 dangling := damp / float64(len(nodes)) 62 for j, u := range nodes { 63 to := graph.NodesOf(g.From(u.ID())) 64 var z float64 65 for _, v := range to { 66 if w, ok := g.Weight(u.ID(), v.ID()); ok { 67 z += w 68 } 69 } 70 if z != 0 { 71 for _, v := range to { 72 if w, ok := g.Weight(u.ID(), v.ID()); ok { 73 m.Set(indexOf[v.ID()], j, (w*damp)/z) 74 } 75 } 76 } else { 77 for i := range nodes { 78 m.Set(i, j, dangling) 79 } 80 } 81 } 82 83 matrix := m.RawMatrix().Data 84 dt := (1 - damp) / float64(len(nodes)) 85 for i := range matrix { 86 matrix[i] += dt 87 } 88 89 last := make([]float64, len(nodes)) 90 for i := range last { 91 last[i] = 1 92 } 93 lastV := mat.NewVecDense(len(nodes), last) 94 95 vec := make([]float64, len(nodes)) 96 var sum float64 97 for i := range vec { 98 r := rand.NormFloat64() 99 sum += r 100 vec[i] = r 101 } 102 f := 1 / sum 103 for i := range vec { 104 vec[i] *= f 105 } 106 v := mat.NewVecDense(len(nodes), vec) 107 108 for { 109 lastV, v = v, lastV 110 v.MulVec(m, lastV) 111 if normDiff(vec, last) < tol { 112 break 113 } 114 } 115 116 ranks := make(map[int64]float64, len(nodes)) 117 for i, r := range v.RawVector().Data { 118 ranks[nodes[i].ID()] = r 119 } 120 121 return ranks 122 } 123 124 // edgeWeightedPageRankSparse returns the PageRank weights for nodes of the sparse weighted directed 125 // graph g using the given damping factor and terminating when the 2-norm of the 126 // vector difference between iterations is below tol. The returned map is 127 // keyed on the graph node IDs. 128 func edgeWeightedPageRankSparse(g graph.WeightedDirected, damp, tol float64) map[int64]float64 { 129 // edgeWeightedPageRankSparse is implemented according to "How Google Finds Your Needle 130 // in the Web's Haystack" with the modification that 131 // the columns of hyperlink matrix H are calculated with edge weights. 132 // 133 // G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k 134 // 135 // http://www.ams.org/samplings/feature-column/fcarc-pagerank 136 137 nodes := graph.NodesOf(g.Nodes()) 138 indexOf := make(map[int64]int, len(nodes)) 139 for i, n := range nodes { 140 indexOf[n.ID()] = i 141 } 142 143 m := make(rowCompressedMatrix, len(nodes)) 144 var dangling compressedRow 145 df := damp / float64(len(nodes)) 146 for j, u := range nodes { 147 to := graph.NodesOf(g.From(u.ID())) 148 var z float64 149 for _, v := range to { 150 if w, ok := g.Weight(u.ID(), v.ID()); ok { 151 z += w 152 } 153 } 154 if z != 0 { 155 for _, v := range to { 156 if w, ok := g.Weight(u.ID(), v.ID()); ok { 157 m.addTo(indexOf[v.ID()], j, (w*damp)/z) 158 } 159 } 160 } else { 161 dangling.addTo(j, df) 162 } 163 } 164 165 last := make([]float64, len(nodes)) 166 for i := range last { 167 last[i] = 1 168 } 169 lastV := mat.NewVecDense(len(nodes), last) 170 171 vec := make([]float64, len(nodes)) 172 var sum float64 173 for i := range vec { 174 r := rand.NormFloat64() 175 sum += r 176 vec[i] = r 177 } 178 f := 1 / sum 179 for i := range vec { 180 vec[i] *= f 181 } 182 v := mat.NewVecDense(len(nodes), vec) 183 184 dt := (1 - damp) / float64(len(nodes)) 185 for { 186 lastV, v = v, lastV 187 188 m.mulVecUnitary(v, lastV) // First term of the G matrix equation; 189 with := dangling.dotUnitary(lastV) // Second term; 190 away := onesDotUnitary(dt, lastV) // Last term. 191 192 floats.AddConst(with+away, v.RawVector().Data) 193 if normDiff(vec, last) < tol { 194 break 195 } 196 } 197 198 ranks := make(map[int64]float64, len(nodes)) 199 for i, r := range v.RawVector().Data { 200 ranks[nodes[i].ID()] = r 201 } 202 203 return ranks 204 } 205 206 // pageRank returns the PageRank weights for nodes of the directed graph g 207 // using the given damping factor and terminating when the 2-norm of the 208 // vector difference between iterations is below tol. The returned map is 209 // keyed on the graph node IDs. 210 func pageRank(g graph.Directed, damp, tol float64) map[int64]float64 { 211 // pageRank is implemented according to "How Google Finds Your Needle 212 // in the Web's Haystack". 213 // 214 // G.I^k = alpha.S.I^k + (1-alpha).1/n.1.I^k 215 // 216 // http://www.ams.org/samplings/feature-column/fcarc-pagerank 217 218 nodes := graph.NodesOf(g.Nodes()) 219 indexOf := make(map[int64]int, len(nodes)) 220 for i, n := range nodes { 221 indexOf[n.ID()] = i 222 } 223 224 m := mat.NewDense(len(nodes), len(nodes), nil) 225 dangling := damp / float64(len(nodes)) 226 for j, u := range nodes { 227 to := graph.NodesOf(g.From(u.ID())) 228 f := damp / float64(len(to)) 229 for _, v := range to { 230 m.Set(indexOf[v.ID()], j, f) 231 } 232 if len(to) == 0 { 233 for i := range nodes { 234 m.Set(i, j, dangling) 235 } 236 } 237 } 238 matrix := m.RawMatrix().Data 239 dt := (1 - damp) / float64(len(nodes)) 240 for i := range matrix { 241 matrix[i] += dt 242 } 243 244 last := make([]float64, len(nodes)) 245 for i := range last { 246 last[i] = 1 247 } 248 lastV := mat.NewVecDense(len(nodes), last) 249 250 vec := make([]float64, len(nodes)) 251 var sum float64 252 for i := range vec { 253 r := rand.NormFloat64() 254 sum += r 255 vec[i] = r 256 } 257 f := 1 / sum 258 for i := range vec { 259 vec[i] *= f 260 } 261 v := mat.NewVecDense(len(nodes), vec) 262 263 for { 264 lastV, v = v, lastV 265 v.MulVec(m, lastV) 266 if normDiff(vec, last) < tol { 267 break 268 } 269 } 270 271 ranks := make(map[int64]float64, len(nodes)) 272 for i, r := range v.RawVector().Data { 273 ranks[nodes[i].ID()] = r 274 } 275 276 return ranks 277 } 278 279 // pageRankSparse returns the PageRank weights for nodes of the sparse directed 280 // graph g using the given damping factor and terminating when the 2-norm of the 281 // vector difference between iterations is below tol. The returned map is 282 // keyed on the graph node IDs. 283 func pageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 { 284 // pageRankSparse is implemented according to "How Google Finds Your Needle 285 // in the Web's Haystack". 286 // 287 // G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k 288 // 289 // http://www.ams.org/samplings/feature-column/fcarc-pagerank 290 291 nodes := graph.NodesOf(g.Nodes()) 292 indexOf := make(map[int64]int, len(nodes)) 293 for i, n := range nodes { 294 indexOf[n.ID()] = i 295 } 296 297 m := make(rowCompressedMatrix, len(nodes)) 298 var dangling compressedRow 299 df := damp / float64(len(nodes)) 300 for j, u := range nodes { 301 to := graph.NodesOf(g.From(u.ID())) 302 f := damp / float64(len(to)) 303 for _, v := range to { 304 m.addTo(indexOf[v.ID()], j, f) 305 } 306 if len(to) == 0 { 307 dangling.addTo(j, df) 308 } 309 } 310 311 last := make([]float64, len(nodes)) 312 for i := range last { 313 last[i] = 1 314 } 315 lastV := mat.NewVecDense(len(nodes), last) 316 317 vec := make([]float64, len(nodes)) 318 var sum float64 319 for i := range vec { 320 r := rand.NormFloat64() 321 sum += r 322 vec[i] = r 323 } 324 f := 1 / sum 325 for i := range vec { 326 vec[i] *= f 327 } 328 v := mat.NewVecDense(len(nodes), vec) 329 330 dt := (1 - damp) / float64(len(nodes)) 331 for { 332 lastV, v = v, lastV 333 334 m.mulVecUnitary(v, lastV) // First term of the G matrix equation; 335 with := dangling.dotUnitary(lastV) // Second term; 336 away := onesDotUnitary(dt, lastV) // Last term. 337 338 floats.AddConst(with+away, v.RawVector().Data) 339 if normDiff(vec, last) < tol { 340 break 341 } 342 } 343 344 ranks := make(map[int64]float64, len(nodes)) 345 for i, r := range v.RawVector().Data { 346 ranks[nodes[i].ID()] = r 347 } 348 349 return ranks 350 } 351 352 // rowCompressedMatrix implements row-compressed 353 // matrix/vector multiplication. 354 type rowCompressedMatrix []compressedRow 355 356 // addTo adds the value v to the matrix element at (i,j). Repeated 357 // calls to addTo with the same column index will result in 358 // non-unique element representation. 359 func (m rowCompressedMatrix) addTo(i, j int, v float64) { m[i].addTo(j, v) } 360 361 // mulVecUnitary multiplies the receiver by the src vector, storing 362 // the result in dst. It assumes src and dst are the same length as m 363 // and that both have unitary vector increments. 364 func (m rowCompressedMatrix) mulVecUnitary(dst, src *mat.VecDense) { 365 dMat := dst.RawVector().Data 366 for i, r := range m { 367 dMat[i] = r.dotUnitary(src) 368 } 369 } 370 371 // compressedRow implements a simplified scatter-based Ddot. 372 type compressedRow []sparseElement 373 374 // addTo adds the value v to the vector element at j. Repeated 375 // calls to addTo with the same vector index will result in 376 // non-unique element representation. 377 func (r *compressedRow) addTo(j int, v float64) { 378 *r = append(*r, sparseElement{index: j, value: v}) 379 } 380 381 // dotUnitary performs a simplified scatter-based Ddot operations on 382 // v and the receiver. v must have a unitary vector increment. 383 func (r compressedRow) dotUnitary(v *mat.VecDense) float64 { 384 var sum float64 385 vec := v.RawVector().Data 386 for _, e := range r { 387 sum += vec[e.index] * e.value 388 } 389 return sum 390 } 391 392 // sparseElement is a sparse vector or matrix element. 393 type sparseElement struct { 394 index int 395 value float64 396 } 397 398 // onesDotUnitary performs the equivalent of a Ddot of v with 399 // a ones vector of equal length. v must have a unitary vector 400 // increment. 401 func onesDotUnitary(alpha float64, v *mat.VecDense) float64 { 402 var sum float64 403 for _, f := range v.RawVector().Data { 404 sum += alpha * f 405 } 406 return sum 407 } 408 409 // normDiff returns the 2-norm of the difference between x and y. 410 // This is a cut down version of gonum/floats.Distance. 411 func normDiff(x, y []float64) float64 { 412 var sum float64 413 for i, v := range x { 414 d := v - y[i] 415 sum += d * d 416 } 417 return math.Sqrt(sum) 418 }