github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/stat/statmat.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package stat
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/jingcheng-WU/gonum/floats"
    11  	"github.com/jingcheng-WU/gonum/mat"
    12  )
    13  
    14  // CovarianceMatrix calculates the covariance matrix (also known as the
    15  // variance-covariance matrix) calculated from a matrix of data, x, using
    16  // a two-pass algorithm. The result is stored in dst.
    17  //
    18  // If weights is not nil the weighted covariance of x is calculated. weights
    19  // must have length equal to the number of rows in input data matrix and
    20  // must not contain negative elements.
    21  // The dst matrix must either be empty or have the same number of
    22  // columns as the input data matrix.
    23  func CovarianceMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) {
    24  	// This is the matrix version of the two-pass algorithm. It doesn't use the
    25  	// additional floating point error correction that the Covariance function uses
    26  	// to reduce the impact of rounding during centering.
    27  
    28  	r, c := x.Dims()
    29  
    30  	if dst.IsEmpty() {
    31  		*dst = *(dst.GrowSym(c).(*mat.SymDense))
    32  	} else if n := dst.Symmetric(); n != c {
    33  		panic(mat.ErrShape)
    34  	}
    35  
    36  	var xt mat.Dense
    37  	xt.CloneFrom(x.T())
    38  	// Subtract the mean of each of the columns.
    39  	for i := 0; i < c; i++ {
    40  		v := xt.RawRowView(i)
    41  		// This will panic with ErrShape if len(weights) != len(v), so
    42  		// we don't have to check the size later.
    43  		mean := Mean(v, weights)
    44  		floats.AddConst(-mean, v)
    45  	}
    46  
    47  	if weights == nil {
    48  		// Calculate the normalization factor
    49  		// scaled by the sample size.
    50  		dst.SymOuterK(1/(float64(r)-1), &xt)
    51  		return
    52  	}
    53  
    54  	// Multiply by the sqrt of the weights, so that multiplication is symmetric.
    55  	sqrtwts := make([]float64, r)
    56  	for i, w := range weights {
    57  		if w < 0 {
    58  			panic("stat: negative covariance matrix weights")
    59  		}
    60  		sqrtwts[i] = math.Sqrt(w)
    61  	}
    62  	// Weight the rows.
    63  	for i := 0; i < c; i++ {
    64  		v := xt.RawRowView(i)
    65  		floats.Mul(v, sqrtwts)
    66  	}
    67  
    68  	// Calculate the normalization factor
    69  	// scaled by the weighted sample size.
    70  	dst.SymOuterK(1/(floats.Sum(weights)-1), &xt)
    71  }
    72  
    73  // CorrelationMatrix returns the correlation matrix calculated from a matrix
    74  // of data, x, using a two-pass algorithm. The result is stored in dst.
    75  //
    76  // If weights is not nil the weighted correlation of x is calculated. weights
    77  // must have length equal to the number of rows in input data matrix and
    78  // must not contain negative elements.
    79  // The dst matrix must either be empty or have the same number of
    80  // columns as the input data matrix.
    81  func CorrelationMatrix(dst *mat.SymDense, x mat.Matrix, weights []float64) {
    82  	// This will panic if the sizes don't match, or if weights is the wrong size.
    83  	CovarianceMatrix(dst, x, weights)
    84  	covToCorr(dst)
    85  }
    86  
    87  // covToCorr converts a covariance matrix to a correlation matrix.
    88  func covToCorr(c *mat.SymDense) {
    89  	r := c.Symmetric()
    90  
    91  	s := make([]float64, r)
    92  	for i := 0; i < r; i++ {
    93  		s[i] = 1 / math.Sqrt(c.At(i, i))
    94  	}
    95  	for i, sx := range s {
    96  		// Ensure that the diagonal has exactly ones.
    97  		c.SetSym(i, i, 1)
    98  		for j := i + 1; j < r; j++ {
    99  			v := c.At(i, j)
   100  			c.SetSym(i, j, v*sx*s[j])
   101  		}
   102  	}
   103  }
   104  
   105  // corrToCov converts a correlation matrix to a covariance matrix.
   106  // The input sigma should be vector of standard deviations corresponding
   107  // to the covariance.  It will panic if len(sigma) is not equal to the
   108  // number of rows in the correlation matrix.
   109  func corrToCov(c *mat.SymDense, sigma []float64) {
   110  	r, _ := c.Dims()
   111  
   112  	if r != len(sigma) {
   113  		panic(mat.ErrShape)
   114  	}
   115  	for i, sx := range sigma {
   116  		// Ensure that the diagonal has exactly sigma squared.
   117  		c.SetSym(i, i, sx*sx)
   118  		for j := i + 1; j < r; j++ {
   119  			v := c.At(i, j)
   120  			c.SetSym(i, j, v*sx*sigma[j])
   121  		}
   122  	}
   123  }
   124  
   125  // Mahalanobis computes the Mahalanobis distance
   126  //  D = sqrt((x-y)ᵀ * Σ^-1 * (x-y))
   127  // between the column vectors x and y given the cholesky decomposition of Σ.
   128  // Mahalanobis returns NaN if the linear solve fails.
   129  //
   130  // See https://en.wikipedia.org/wiki/Mahalanobis_distance for more information.
   131  func Mahalanobis(x, y mat.Vector, chol *mat.Cholesky) float64 {
   132  	var diff mat.VecDense
   133  	diff.SubVec(x, y)
   134  	var tmp mat.VecDense
   135  	err := chol.SolveVecTo(&tmp, &diff)
   136  	if err != nil {
   137  		return math.NaN()
   138  	}
   139  	return math.Sqrt(mat.Dot(&tmp, &diff))
   140  }