gonum.org/v1/gonum@v0.14.0/graph/network/page.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package network
     6  
     7  import (
     8  	"math"
     9  
    10  	"golang.org/x/exp/rand"
    11  
    12  	"gonum.org/v1/gonum/floats"
    13  	"gonum.org/v1/gonum/graph"
    14  	"gonum.org/v1/gonum/mat"
    15  )
    16  
    17  // PageRank returns the PageRank weights for nodes of the directed graph g
    18  // using the given damping factor and terminating when the 2-norm of the
    19  // vector difference between iterations is below tol. The returned map is
    20  // keyed on the graph node IDs.
    21  // If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated.
    22  func PageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
    23  	if g, ok := g.(graph.WeightedDirected); ok {
    24  		return edgeWeightedPageRank(g, damp, tol)
    25  	}
    26  	return pageRank(g, damp, tol)
    27  }
    28  
    29  // PageRankSparse returns the PageRank weights for nodes of the sparse directed
    30  // graph g using the given damping factor and terminating when the 2-norm of the
    31  // vector difference between iterations is below tol. The returned map is
    32  // keyed on the graph node IDs.
    33  // If g is a graph.WeightedDirected, an edge-weighted PageRank is calculated.
    34  func PageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
    35  	if g, ok := g.(graph.WeightedDirected); ok {
    36  		return edgeWeightedPageRankSparse(g, damp, tol)
    37  	}
    38  	return pageRankSparse(g, damp, tol)
    39  }
    40  
    41  // edgeWeightedPageRank returns the PageRank weights for nodes of the weighted directed graph g
    42  // using the given damping factor and terminating when the 2-norm of the
    43  // vector difference between iterations is below tol. The returned map is
    44  // keyed on the graph node IDs.
    45  func edgeWeightedPageRank(g graph.WeightedDirected, damp, tol float64) map[int64]float64 {
    46  	// edgeWeightedPageRank is implemented according to "How Google Finds Your Needle
    47  	// in the Web's Haystack" with the modification that
    48  	// the columns of hyperlink matrix H are calculated with edge weights.
    49  	//
    50  	// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
    51  	//
    52  	// http://www.ams.org/samplings/feature-column/fcarc-pagerank
    53  
    54  	nodes := graph.NodesOf(g.Nodes())
    55  	indexOf := make(map[int64]int, len(nodes))
    56  	for i, n := range nodes {
    57  		indexOf[n.ID()] = i
    58  	}
    59  
    60  	m := mat.NewDense(len(nodes), len(nodes), nil)
    61  	dangling := damp / float64(len(nodes))
    62  	for j, u := range nodes {
    63  		to := graph.NodesOf(g.From(u.ID()))
    64  		var z float64
    65  		for _, v := range to {
    66  			if w, ok := g.Weight(u.ID(), v.ID()); ok {
    67  				z += w
    68  			}
    69  		}
    70  		if z != 0 {
    71  			for _, v := range to {
    72  				if w, ok := g.Weight(u.ID(), v.ID()); ok {
    73  					m.Set(indexOf[v.ID()], j, (w*damp)/z)
    74  				}
    75  			}
    76  		} else {
    77  			for i := range nodes {
    78  				m.Set(i, j, dangling)
    79  			}
    80  		}
    81  	}
    82  
    83  	matrix := m.RawMatrix().Data
    84  	dt := (1 - damp) / float64(len(nodes))
    85  	for i := range matrix {
    86  		matrix[i] += dt
    87  	}
    88  
    89  	last := make([]float64, len(nodes))
    90  	for i := range last {
    91  		last[i] = 1
    92  	}
    93  	lastV := mat.NewVecDense(len(nodes), last)
    94  
    95  	vec := make([]float64, len(nodes))
    96  	var sum float64
    97  	for i := range vec {
    98  		r := rand.NormFloat64()
    99  		sum += r
   100  		vec[i] = r
   101  	}
   102  	f := 1 / sum
   103  	for i := range vec {
   104  		vec[i] *= f
   105  	}
   106  	v := mat.NewVecDense(len(nodes), vec)
   107  
   108  	for {
   109  		lastV, v = v, lastV
   110  		v.MulVec(m, lastV)
   111  		if normDiff(vec, last) < tol {
   112  			break
   113  		}
   114  	}
   115  
   116  	ranks := make(map[int64]float64, len(nodes))
   117  	for i, r := range v.RawVector().Data {
   118  		ranks[nodes[i].ID()] = r
   119  	}
   120  
   121  	return ranks
   122  }
   123  
   124  // edgeWeightedPageRankSparse returns the PageRank weights for nodes of the sparse weighted directed
   125  // graph g using the given damping factor and terminating when the 2-norm of the
   126  // vector difference between iterations is below tol. The returned map is
   127  // keyed on the graph node IDs.
   128  func edgeWeightedPageRankSparse(g graph.WeightedDirected, damp, tol float64) map[int64]float64 {
   129  	// edgeWeightedPageRankSparse is implemented according to "How Google Finds Your Needle
   130  	// in the Web's Haystack" with the modification that
   131  	// the columns of hyperlink matrix H are calculated with edge weights.
   132  	//
   133  	// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
   134  	//
   135  	// http://www.ams.org/samplings/feature-column/fcarc-pagerank
   136  
   137  	nodes := graph.NodesOf(g.Nodes())
   138  	indexOf := make(map[int64]int, len(nodes))
   139  	for i, n := range nodes {
   140  		indexOf[n.ID()] = i
   141  	}
   142  
   143  	m := make(rowCompressedMatrix, len(nodes))
   144  	var dangling compressedRow
   145  	df := damp / float64(len(nodes))
   146  	for j, u := range nodes {
   147  		to := graph.NodesOf(g.From(u.ID()))
   148  		var z float64
   149  		for _, v := range to {
   150  			if w, ok := g.Weight(u.ID(), v.ID()); ok {
   151  				z += w
   152  			}
   153  		}
   154  		if z != 0 {
   155  			for _, v := range to {
   156  				if w, ok := g.Weight(u.ID(), v.ID()); ok {
   157  					m.addTo(indexOf[v.ID()], j, (w*damp)/z)
   158  				}
   159  			}
   160  		} else {
   161  			dangling.addTo(j, df)
   162  		}
   163  	}
   164  
   165  	last := make([]float64, len(nodes))
   166  	for i := range last {
   167  		last[i] = 1
   168  	}
   169  	lastV := mat.NewVecDense(len(nodes), last)
   170  
   171  	vec := make([]float64, len(nodes))
   172  	var sum float64
   173  	for i := range vec {
   174  		r := rand.NormFloat64()
   175  		sum += r
   176  		vec[i] = r
   177  	}
   178  	f := 1 / sum
   179  	for i := range vec {
   180  		vec[i] *= f
   181  	}
   182  	v := mat.NewVecDense(len(nodes), vec)
   183  
   184  	dt := (1 - damp) / float64(len(nodes))
   185  	for {
   186  		lastV, v = v, lastV
   187  
   188  		m.mulVecUnitary(v, lastV)          // First term of the G matrix equation;
   189  		with := dangling.dotUnitary(lastV) // Second term;
   190  		away := onesDotUnitary(dt, lastV)  // Last term.
   191  
   192  		floats.AddConst(with+away, v.RawVector().Data)
   193  		if normDiff(vec, last) < tol {
   194  			break
   195  		}
   196  	}
   197  
   198  	ranks := make(map[int64]float64, len(nodes))
   199  	for i, r := range v.RawVector().Data {
   200  		ranks[nodes[i].ID()] = r
   201  	}
   202  
   203  	return ranks
   204  }
   205  
   206  // pageRank returns the PageRank weights for nodes of the directed graph g
   207  // using the given damping factor and terminating when the 2-norm of the
   208  // vector difference between iterations is below tol. The returned map is
   209  // keyed on the graph node IDs.
   210  func pageRank(g graph.Directed, damp, tol float64) map[int64]float64 {
   211  	// pageRank is implemented according to "How Google Finds Your Needle
   212  	// in the Web's Haystack".
   213  	//
   214  	// G.I^k = alpha.S.I^k + (1-alpha).1/n.1.I^k
   215  	//
   216  	// http://www.ams.org/samplings/feature-column/fcarc-pagerank
   217  
   218  	nodes := graph.NodesOf(g.Nodes())
   219  	indexOf := make(map[int64]int, len(nodes))
   220  	for i, n := range nodes {
   221  		indexOf[n.ID()] = i
   222  	}
   223  
   224  	m := mat.NewDense(len(nodes), len(nodes), nil)
   225  	dangling := damp / float64(len(nodes))
   226  	for j, u := range nodes {
   227  		to := graph.NodesOf(g.From(u.ID()))
   228  		f := damp / float64(len(to))
   229  		for _, v := range to {
   230  			m.Set(indexOf[v.ID()], j, f)
   231  		}
   232  		if len(to) == 0 {
   233  			for i := range nodes {
   234  				m.Set(i, j, dangling)
   235  			}
   236  		}
   237  	}
   238  	matrix := m.RawMatrix().Data
   239  	dt := (1 - damp) / float64(len(nodes))
   240  	for i := range matrix {
   241  		matrix[i] += dt
   242  	}
   243  
   244  	last := make([]float64, len(nodes))
   245  	for i := range last {
   246  		last[i] = 1
   247  	}
   248  	lastV := mat.NewVecDense(len(nodes), last)
   249  
   250  	vec := make([]float64, len(nodes))
   251  	var sum float64
   252  	for i := range vec {
   253  		r := rand.NormFloat64()
   254  		sum += r
   255  		vec[i] = r
   256  	}
   257  	f := 1 / sum
   258  	for i := range vec {
   259  		vec[i] *= f
   260  	}
   261  	v := mat.NewVecDense(len(nodes), vec)
   262  
   263  	for {
   264  		lastV, v = v, lastV
   265  		v.MulVec(m, lastV)
   266  		if normDiff(vec, last) < tol {
   267  			break
   268  		}
   269  	}
   270  
   271  	ranks := make(map[int64]float64, len(nodes))
   272  	for i, r := range v.RawVector().Data {
   273  		ranks[nodes[i].ID()] = r
   274  	}
   275  
   276  	return ranks
   277  }
   278  
   279  // pageRankSparse returns the PageRank weights for nodes of the sparse directed
   280  // graph g using the given damping factor and terminating when the 2-norm of the
   281  // vector difference between iterations is below tol. The returned map is
   282  // keyed on the graph node IDs.
   283  func pageRankSparse(g graph.Directed, damp, tol float64) map[int64]float64 {
   284  	// pageRankSparse is implemented according to "How Google Finds Your Needle
   285  	// in the Web's Haystack".
   286  	//
   287  	// G.I^k = alpha.H.I^k + alpha.A.I^k + (1-alpha).1/n.1.I^k
   288  	//
   289  	// http://www.ams.org/samplings/feature-column/fcarc-pagerank
   290  
   291  	nodes := graph.NodesOf(g.Nodes())
   292  	indexOf := make(map[int64]int, len(nodes))
   293  	for i, n := range nodes {
   294  		indexOf[n.ID()] = i
   295  	}
   296  
   297  	m := make(rowCompressedMatrix, len(nodes))
   298  	var dangling compressedRow
   299  	df := damp / float64(len(nodes))
   300  	for j, u := range nodes {
   301  		to := graph.NodesOf(g.From(u.ID()))
   302  		f := damp / float64(len(to))
   303  		for _, v := range to {
   304  			m.addTo(indexOf[v.ID()], j, f)
   305  		}
   306  		if len(to) == 0 {
   307  			dangling.addTo(j, df)
   308  		}
   309  	}
   310  
   311  	last := make([]float64, len(nodes))
   312  	for i := range last {
   313  		last[i] = 1
   314  	}
   315  	lastV := mat.NewVecDense(len(nodes), last)
   316  
   317  	vec := make([]float64, len(nodes))
   318  	var sum float64
   319  	for i := range vec {
   320  		r := rand.NormFloat64()
   321  		sum += r
   322  		vec[i] = r
   323  	}
   324  	f := 1 / sum
   325  	for i := range vec {
   326  		vec[i] *= f
   327  	}
   328  	v := mat.NewVecDense(len(nodes), vec)
   329  
   330  	dt := (1 - damp) / float64(len(nodes))
   331  	for {
   332  		lastV, v = v, lastV
   333  
   334  		m.mulVecUnitary(v, lastV)          // First term of the G matrix equation;
   335  		with := dangling.dotUnitary(lastV) // Second term;
   336  		away := onesDotUnitary(dt, lastV)  // Last term.
   337  
   338  		floats.AddConst(with+away, v.RawVector().Data)
   339  		if normDiff(vec, last) < tol {
   340  			break
   341  		}
   342  	}
   343  
   344  	ranks := make(map[int64]float64, len(nodes))
   345  	for i, r := range v.RawVector().Data {
   346  		ranks[nodes[i].ID()] = r
   347  	}
   348  
   349  	return ranks
   350  }
   351  
   352  // rowCompressedMatrix implements row-compressed
   353  // matrix/vector multiplication.
   354  type rowCompressedMatrix []compressedRow
   355  
   356  // addTo adds the value v to the matrix element at (i,j). Repeated
   357  // calls to addTo with the same column index will result in
   358  // non-unique element representation.
   359  func (m rowCompressedMatrix) addTo(i, j int, v float64) { m[i].addTo(j, v) }
   360  
   361  // mulVecUnitary multiplies the receiver by the src vector, storing
   362  // the result in dst. It assumes src and dst are the same length as m
   363  // and that both have unitary vector increments.
   364  func (m rowCompressedMatrix) mulVecUnitary(dst, src *mat.VecDense) {
   365  	dMat := dst.RawVector().Data
   366  	for i, r := range m {
   367  		dMat[i] = r.dotUnitary(src)
   368  	}
   369  }
   370  
   371  // compressedRow implements a simplified scatter-based Ddot.
   372  type compressedRow []sparseElement
   373  
   374  // addTo adds the value v to the vector element at j. Repeated
   375  // calls to addTo with the same vector index will result in
   376  // non-unique element representation.
   377  func (r *compressedRow) addTo(j int, v float64) {
   378  	*r = append(*r, sparseElement{index: j, value: v})
   379  }
   380  
   381  // dotUnitary performs a simplified scatter-based Ddot operations on
   382  // v and the receiver. v must have a unitary vector increment.
   383  func (r compressedRow) dotUnitary(v *mat.VecDense) float64 {
   384  	var sum float64
   385  	vec := v.RawVector().Data
   386  	for _, e := range r {
   387  		sum += vec[e.index] * e.value
   388  	}
   389  	return sum
   390  }
   391  
   392  // sparseElement is a sparse vector or matrix element.
   393  type sparseElement struct {
   394  	index int
   395  	value float64
   396  }
   397  
   398  // onesDotUnitary performs the equivalent of a Ddot of v with
   399  // a ones vector of equal length. v must have a unitary vector
   400  // increment.
   401  func onesDotUnitary(alpha float64, v *mat.VecDense) float64 {
   402  	var sum float64
   403  	for _, f := range v.RawVector().Data {
   404  		sum += alpha * f
   405  	}
   406  	return sum
   407  }
   408  
   409  // normDiff returns the 2-norm of the difference between x and y.
   410  // This is a cut down version of gonum/floats.Distance.
   411  func normDiff(x, y []float64) float64 {
   412  	var sum float64
   413  	for i, v := range x {
   414  		d := v - y[i]
   415  		sum += d * d
   416  	}
   417  	return math.Sqrt(sum)
   418  }